{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 21975, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00022753128555176336, "grad_norm": 12.279616595766896, "learning_rate": 1.25e-06, "loss": 0.1112, "step": 1 }, { "epoch": 0.0004550625711035267, "grad_norm": 7.2629005990045785, "learning_rate": 1.2499999936130725e-06, "loss": 0.1394, "step": 2 }, { "epoch": 0.0006825938566552901, "grad_norm": 9.061755966937882, "learning_rate": 1.2499999744522896e-06, "loss": 0.266, "step": 3 }, { "epoch": 0.0009101251422070534, "grad_norm": 4.366742138714415, "learning_rate": 1.2499999425176518e-06, "loss": 0.1258, "step": 4 }, { "epoch": 0.0011376564277588168, "grad_norm": 8.875041328214682, "learning_rate": 1.2499998978091598e-06, "loss": 0.1749, "step": 5 }, { "epoch": 0.0013651877133105802, "grad_norm": 8.800851302952648, "learning_rate": 1.2499998403268147e-06, "loss": 0.2, "step": 6 }, { "epoch": 0.0015927189988623437, "grad_norm": 3.6087293622719416, "learning_rate": 1.2499997700706173e-06, "loss": 0.1398, "step": 7 }, { "epoch": 0.0018202502844141069, "grad_norm": 3.42318784466345, "learning_rate": 1.2499996870405692e-06, "loss": 0.1226, "step": 8 }, { "epoch": 0.0020477815699658703, "grad_norm": 10.818031536677639, "learning_rate": 1.2499995912366722e-06, "loss": 0.171, "step": 9 }, { "epoch": 0.0022753128555176336, "grad_norm": 4.021373220825307, "learning_rate": 1.2499994826589282e-06, "loss": 0.1326, "step": 10 }, { "epoch": 0.002502844141069397, "grad_norm": 23.4957115965293, "learning_rate": 1.2499993613073393e-06, "loss": 0.1235, "step": 11 }, { "epoch": 0.0027303754266211604, "grad_norm": 21.545758763913824, "learning_rate": 1.2499992271819083e-06, "loss": 0.0925, "step": 12 }, { "epoch": 0.0029579067121729237, "grad_norm": 3.510359233156002, "learning_rate": 1.2499990802826377e-06, "loss": 0.0985, "step": 13 }, { "epoch": 0.0031854379977246873, "grad_norm": 3.9794392758691632, "learning_rate": 1.2499989206095304e-06, "loss": 0.0906, "step": 14 }, { "epoch": 0.0034129692832764505, "grad_norm": 2.4759900062021503, "learning_rate": 1.2499987481625899e-06, "loss": 0.0746, "step": 15 }, { "epoch": 0.0036405005688282138, "grad_norm": 7.91228269690243, "learning_rate": 1.2499985629418195e-06, "loss": 0.1381, "step": 16 }, { "epoch": 0.0038680318543799774, "grad_norm": 3.050431958605509, "learning_rate": 1.2499983649472233e-06, "loss": 0.0509, "step": 17 }, { "epoch": 0.004095563139931741, "grad_norm": 9.537504830414553, "learning_rate": 1.249998154178805e-06, "loss": 0.0658, "step": 18 }, { "epoch": 0.004323094425483504, "grad_norm": 5.663160798723299, "learning_rate": 1.2499979306365692e-06, "loss": 0.097, "step": 19 }, { "epoch": 0.004550625711035267, "grad_norm": 13.411158998212743, "learning_rate": 1.2499976943205202e-06, "loss": 0.1, "step": 20 }, { "epoch": 0.00477815699658703, "grad_norm": 3.6621965264316194, "learning_rate": 1.249997445230663e-06, "loss": 0.0698, "step": 21 }, { "epoch": 0.005005688282138794, "grad_norm": 31.354306922977063, "learning_rate": 1.2499971833670026e-06, "loss": 0.1129, "step": 22 }, { "epoch": 0.005233219567690558, "grad_norm": 4.025175447301565, "learning_rate": 1.2499969087295443e-06, "loss": 0.1076, "step": 23 }, { "epoch": 0.005460750853242321, "grad_norm": 3.7989823695443863, "learning_rate": 1.249996621318294e-06, "loss": 0.0902, "step": 24 }, { "epoch": 0.005688282138794084, "grad_norm": 2.2185511399815545, "learning_rate": 1.2499963211332573e-06, "loss": 0.0398, "step": 25 }, { "epoch": 0.005915813424345847, "grad_norm": 3.775038573479949, "learning_rate": 1.2499960081744405e-06, "loss": 0.1145, "step": 26 }, { "epoch": 0.0061433447098976105, "grad_norm": 7.295114622894012, "learning_rate": 1.24999568244185e-06, "loss": 0.1379, "step": 27 }, { "epoch": 0.006370875995449375, "grad_norm": 21.290899699279613, "learning_rate": 1.249995343935492e-06, "loss": 0.0995, "step": 28 }, { "epoch": 0.006598407281001138, "grad_norm": 3.593142181899087, "learning_rate": 1.2499949926553743e-06, "loss": 0.0973, "step": 29 }, { "epoch": 0.006825938566552901, "grad_norm": 7.737004888367107, "learning_rate": 1.2499946286015032e-06, "loss": 0.09, "step": 30 }, { "epoch": 0.007053469852104664, "grad_norm": 2.955413576195516, "learning_rate": 1.2499942517738867e-06, "loss": 0.0625, "step": 31 }, { "epoch": 0.0072810011376564275, "grad_norm": 4.138230802608221, "learning_rate": 1.2499938621725322e-06, "loss": 0.0749, "step": 32 }, { "epoch": 0.007508532423208191, "grad_norm": 2.2958240484880115, "learning_rate": 1.2499934597974478e-06, "loss": 0.0986, "step": 33 }, { "epoch": 0.007736063708759955, "grad_norm": 3.9060226417706256, "learning_rate": 1.2499930446486416e-06, "loss": 0.1296, "step": 34 }, { "epoch": 0.007963594994311717, "grad_norm": 1.5544518249405173, "learning_rate": 1.2499926167261224e-06, "loss": 0.0598, "step": 35 }, { "epoch": 0.008191126279863481, "grad_norm": 5.2971839668544884, "learning_rate": 1.2499921760298987e-06, "loss": 0.1216, "step": 36 }, { "epoch": 0.008418657565415245, "grad_norm": 6.168021178604936, "learning_rate": 1.2499917225599796e-06, "loss": 0.1257, "step": 37 }, { "epoch": 0.008646188850967008, "grad_norm": 5.087771081836934, "learning_rate": 1.2499912563163742e-06, "loss": 0.0695, "step": 38 }, { "epoch": 0.008873720136518772, "grad_norm": 7.633032375201886, "learning_rate": 1.249990777299092e-06, "loss": 0.0522, "step": 39 }, { "epoch": 0.009101251422070534, "grad_norm": 2.9414497498297734, "learning_rate": 1.249990285508143e-06, "loss": 0.081, "step": 40 }, { "epoch": 0.009328782707622298, "grad_norm": 4.449234015706431, "learning_rate": 1.2499897809435374e-06, "loss": 0.1104, "step": 41 }, { "epoch": 0.00955631399317406, "grad_norm": 17.29783348718493, "learning_rate": 1.249989263605285e-06, "loss": 0.0952, "step": 42 }, { "epoch": 0.009783845278725825, "grad_norm": 4.357226856628825, "learning_rate": 1.249988733493397e-06, "loss": 0.0639, "step": 43 }, { "epoch": 0.010011376564277589, "grad_norm": 4.248351802911978, "learning_rate": 1.2499881906078836e-06, "loss": 0.0845, "step": 44 }, { "epoch": 0.010238907849829351, "grad_norm": 2.230593274135571, "learning_rate": 1.2499876349487564e-06, "loss": 0.1031, "step": 45 }, { "epoch": 0.010466439135381115, "grad_norm": 5.02333552530202, "learning_rate": 1.2499870665160262e-06, "loss": 0.0763, "step": 46 }, { "epoch": 0.010693970420932878, "grad_norm": 1.7336741239976143, "learning_rate": 1.2499864853097054e-06, "loss": 0.0495, "step": 47 }, { "epoch": 0.010921501706484642, "grad_norm": 4.9863935956267325, "learning_rate": 1.2499858913298053e-06, "loss": 0.1674, "step": 48 }, { "epoch": 0.011149032992036406, "grad_norm": 2.8583968641628967, "learning_rate": 1.249985284576338e-06, "loss": 0.0728, "step": 49 }, { "epoch": 0.011376564277588168, "grad_norm": 5.82977520269559, "learning_rate": 1.2499846650493164e-06, "loss": 0.1076, "step": 50 }, { "epoch": 0.011604095563139932, "grad_norm": 2.407346961463521, "learning_rate": 1.2499840327487528e-06, "loss": 0.0542, "step": 51 }, { "epoch": 0.011831626848691695, "grad_norm": 1.7581308929371289, "learning_rate": 1.24998338767466e-06, "loss": 0.0851, "step": 52 }, { "epoch": 0.012059158134243459, "grad_norm": 3.6069730541490315, "learning_rate": 1.2499827298270515e-06, "loss": 0.0771, "step": 53 }, { "epoch": 0.012286689419795221, "grad_norm": 2.6384830282527965, "learning_rate": 1.2499820592059405e-06, "loss": 0.1336, "step": 54 }, { "epoch": 0.012514220705346985, "grad_norm": 3.1162890015900793, "learning_rate": 1.2499813758113409e-06, "loss": 0.1058, "step": 55 }, { "epoch": 0.01274175199089875, "grad_norm": 3.5247536707308966, "learning_rate": 1.2499806796432665e-06, "loss": 0.0717, "step": 56 }, { "epoch": 0.012969283276450512, "grad_norm": 2.2371436604921584, "learning_rate": 1.2499799707017315e-06, "loss": 0.07, "step": 57 }, { "epoch": 0.013196814562002276, "grad_norm": 5.912440334168246, "learning_rate": 1.2499792489867508e-06, "loss": 0.1588, "step": 58 }, { "epoch": 0.013424345847554038, "grad_norm": 1.6415965725415542, "learning_rate": 1.2499785144983386e-06, "loss": 0.0587, "step": 59 }, { "epoch": 0.013651877133105802, "grad_norm": 3.921979088226587, "learning_rate": 1.24997776723651e-06, "loss": 0.1403, "step": 60 }, { "epoch": 0.013879408418657566, "grad_norm": 4.854246136713843, "learning_rate": 1.2499770072012809e-06, "loss": 0.0706, "step": 61 }, { "epoch": 0.014106939704209329, "grad_norm": 2.249851975045142, "learning_rate": 1.2499762343926661e-06, "loss": 0.0988, "step": 62 }, { "epoch": 0.014334470989761093, "grad_norm": 2.5833858090734956, "learning_rate": 1.2499754488106817e-06, "loss": 0.0618, "step": 63 }, { "epoch": 0.014562002275312855, "grad_norm": 3.523489637935598, "learning_rate": 1.2499746504553436e-06, "loss": 0.0788, "step": 64 }, { "epoch": 0.01478953356086462, "grad_norm": 1.4862582066571077, "learning_rate": 1.2499738393266684e-06, "loss": 0.0574, "step": 65 }, { "epoch": 0.015017064846416382, "grad_norm": 3.5354676602669506, "learning_rate": 1.2499730154246726e-06, "loss": 0.0742, "step": 66 }, { "epoch": 0.015244596131968146, "grad_norm": 4.702124240777191, "learning_rate": 1.2499721787493726e-06, "loss": 0.0994, "step": 67 }, { "epoch": 0.01547212741751991, "grad_norm": 2.2735257693436854, "learning_rate": 1.2499713293007862e-06, "loss": 0.0728, "step": 68 }, { "epoch": 0.015699658703071672, "grad_norm": 1.7727324277447247, "learning_rate": 1.2499704670789301e-06, "loss": 0.0492, "step": 69 }, { "epoch": 0.015927189988623434, "grad_norm": 2.0842450205223977, "learning_rate": 1.2499695920838225e-06, "loss": 0.0552, "step": 70 }, { "epoch": 0.0161547212741752, "grad_norm": 3.5228642040697116, "learning_rate": 1.2499687043154809e-06, "loss": 0.0556, "step": 71 }, { "epoch": 0.016382252559726963, "grad_norm": 2.797328413443058, "learning_rate": 1.2499678037739235e-06, "loss": 0.0519, "step": 72 }, { "epoch": 0.016609783845278725, "grad_norm": 3.349987636757497, "learning_rate": 1.2499668904591688e-06, "loss": 0.1053, "step": 73 }, { "epoch": 0.01683731513083049, "grad_norm": 4.71488578944105, "learning_rate": 1.2499659643712356e-06, "loss": 0.1072, "step": 74 }, { "epoch": 0.017064846416382253, "grad_norm": 1.9864905040446388, "learning_rate": 1.2499650255101425e-06, "loss": 0.1129, "step": 75 }, { "epoch": 0.017292377701934016, "grad_norm": 3.9786000792659073, "learning_rate": 1.2499640738759088e-06, "loss": 0.0798, "step": 76 }, { "epoch": 0.017519908987485778, "grad_norm": 3.293845214395531, "learning_rate": 1.249963109468554e-06, "loss": 0.1235, "step": 77 }, { "epoch": 0.017747440273037544, "grad_norm": 5.489256171335804, "learning_rate": 1.2499621322880979e-06, "loss": 0.0439, "step": 78 }, { "epoch": 0.017974971558589306, "grad_norm": 5.65488778928505, "learning_rate": 1.2499611423345604e-06, "loss": 0.0715, "step": 79 }, { "epoch": 0.01820250284414107, "grad_norm": 1.7907281324160922, "learning_rate": 1.2499601396079617e-06, "loss": 0.0668, "step": 80 }, { "epoch": 0.018430034129692834, "grad_norm": 2.5432248323972377, "learning_rate": 1.2499591241083222e-06, "loss": 0.0836, "step": 81 }, { "epoch": 0.018657565415244597, "grad_norm": 2.6549098221779146, "learning_rate": 1.2499580958356628e-06, "loss": 0.0612, "step": 82 }, { "epoch": 0.01888509670079636, "grad_norm": 3.6428276701171685, "learning_rate": 1.2499570547900045e-06, "loss": 0.0713, "step": 83 }, { "epoch": 0.01911262798634812, "grad_norm": 3.8990952294988994, "learning_rate": 1.2499560009713684e-06, "loss": 0.1046, "step": 84 }, { "epoch": 0.019340159271899887, "grad_norm": 1.7241928848576353, "learning_rate": 1.2499549343797764e-06, "loss": 0.0759, "step": 85 }, { "epoch": 0.01956769055745165, "grad_norm": 2.2613238963696545, "learning_rate": 1.24995385501525e-06, "loss": 0.0931, "step": 86 }, { "epoch": 0.019795221843003412, "grad_norm": 2.3167104475270492, "learning_rate": 1.2499527628778116e-06, "loss": 0.0775, "step": 87 }, { "epoch": 0.020022753128555178, "grad_norm": 2.631127335335558, "learning_rate": 1.2499516579674831e-06, "loss": 0.0911, "step": 88 }, { "epoch": 0.02025028441410694, "grad_norm": 3.902893778838773, "learning_rate": 1.2499505402842872e-06, "loss": 0.1129, "step": 89 }, { "epoch": 0.020477815699658702, "grad_norm": 2.6988246720898905, "learning_rate": 1.2499494098282469e-06, "loss": 0.088, "step": 90 }, { "epoch": 0.020705346985210465, "grad_norm": 1.4451215893923708, "learning_rate": 1.2499482665993851e-06, "loss": 0.0521, "step": 91 }, { "epoch": 0.02093287827076223, "grad_norm": 3.920423356576455, "learning_rate": 1.2499471105977252e-06, "loss": 0.079, "step": 92 }, { "epoch": 0.021160409556313993, "grad_norm": 3.1520828274033486, "learning_rate": 1.249945941823291e-06, "loss": 0.1206, "step": 93 }, { "epoch": 0.021387940841865755, "grad_norm": 6.767303206340345, "learning_rate": 1.2499447602761063e-06, "loss": 0.2231, "step": 94 }, { "epoch": 0.02161547212741752, "grad_norm": 8.857984803581953, "learning_rate": 1.2499435659561954e-06, "loss": 0.2288, "step": 95 }, { "epoch": 0.021843003412969283, "grad_norm": 3.2203370791462302, "learning_rate": 1.2499423588635823e-06, "loss": 0.1181, "step": 96 }, { "epoch": 0.022070534698521046, "grad_norm": 2.3890790390173864, "learning_rate": 1.2499411389982919e-06, "loss": 0.0546, "step": 97 }, { "epoch": 0.02229806598407281, "grad_norm": 3.9314083232645953, "learning_rate": 1.2499399063603492e-06, "loss": 0.1202, "step": 98 }, { "epoch": 0.022525597269624574, "grad_norm": 1.534409734668097, "learning_rate": 1.2499386609497793e-06, "loss": 0.0575, "step": 99 }, { "epoch": 0.022753128555176336, "grad_norm": 2.6915769569708163, "learning_rate": 1.2499374027666078e-06, "loss": 0.0865, "step": 100 }, { "epoch": 0.0229806598407281, "grad_norm": 1.9363626293552272, "learning_rate": 1.2499361318108602e-06, "loss": 0.0691, "step": 101 }, { "epoch": 0.023208191126279865, "grad_norm": 1.706755813278197, "learning_rate": 1.2499348480825627e-06, "loss": 0.0694, "step": 102 }, { "epoch": 0.023435722411831627, "grad_norm": 2.4058707201091902, "learning_rate": 1.2499335515817413e-06, "loss": 0.0873, "step": 103 }, { "epoch": 0.02366325369738339, "grad_norm": 1.4799661409619025, "learning_rate": 1.2499322423084226e-06, "loss": 0.0489, "step": 104 }, { "epoch": 0.023890784982935155, "grad_norm": 1.5529996293769865, "learning_rate": 1.2499309202626336e-06, "loss": 0.0489, "step": 105 }, { "epoch": 0.024118316268486917, "grad_norm": 1.2094533521149893, "learning_rate": 1.249929585444401e-06, "loss": 0.0555, "step": 106 }, { "epoch": 0.02434584755403868, "grad_norm": 3.3515777105454805, "learning_rate": 1.2499282378537522e-06, "loss": 0.0869, "step": 107 }, { "epoch": 0.024573378839590442, "grad_norm": 3.049734475824735, "learning_rate": 1.2499268774907144e-06, "loss": 0.0436, "step": 108 }, { "epoch": 0.024800910125142208, "grad_norm": 1.466216427195955, "learning_rate": 1.249925504355316e-06, "loss": 0.0602, "step": 109 }, { "epoch": 0.02502844141069397, "grad_norm": 1.7889692240619717, "learning_rate": 1.2499241184475848e-06, "loss": 0.0485, "step": 110 }, { "epoch": 0.025255972696245733, "grad_norm": 1.601570429523166, "learning_rate": 1.249922719767549e-06, "loss": 0.0657, "step": 111 }, { "epoch": 0.0254835039817975, "grad_norm": 1.9895197648670602, "learning_rate": 1.2499213083152374e-06, "loss": 0.0613, "step": 112 }, { "epoch": 0.02571103526734926, "grad_norm": 1.559725191147961, "learning_rate": 1.2499198840906787e-06, "loss": 0.0376, "step": 113 }, { "epoch": 0.025938566552901023, "grad_norm": 1.8823702931097668, "learning_rate": 1.249918447093902e-06, "loss": 0.0441, "step": 114 }, { "epoch": 0.026166097838452786, "grad_norm": 2.8320457267761707, "learning_rate": 1.249916997324937e-06, "loss": 0.1287, "step": 115 }, { "epoch": 0.02639362912400455, "grad_norm": 4.382916227720041, "learning_rate": 1.2499155347838129e-06, "loss": 0.0828, "step": 116 }, { "epoch": 0.026621160409556314, "grad_norm": 3.200832910369845, "learning_rate": 1.2499140594705596e-06, "loss": 0.0621, "step": 117 }, { "epoch": 0.026848691695108076, "grad_norm": 1.6914803041014228, "learning_rate": 1.2499125713852076e-06, "loss": 0.0778, "step": 118 }, { "epoch": 0.027076222980659842, "grad_norm": 1.192120885936281, "learning_rate": 1.2499110705277869e-06, "loss": 0.0505, "step": 119 }, { "epoch": 0.027303754266211604, "grad_norm": 2.0573836463025716, "learning_rate": 1.2499095568983284e-06, "loss": 0.0802, "step": 120 }, { "epoch": 0.027531285551763367, "grad_norm": 1.4902545114620074, "learning_rate": 1.2499080304968634e-06, "loss": 0.0565, "step": 121 }, { "epoch": 0.027758816837315133, "grad_norm": 5.341407524309934, "learning_rate": 1.2499064913234222e-06, "loss": 0.0556, "step": 122 }, { "epoch": 0.027986348122866895, "grad_norm": 1.9959451584520156, "learning_rate": 1.249904939378037e-06, "loss": 0.0425, "step": 123 }, { "epoch": 0.028213879408418657, "grad_norm": 4.8411742944908225, "learning_rate": 1.2499033746607395e-06, "loss": 0.1189, "step": 124 }, { "epoch": 0.02844141069397042, "grad_norm": 2.6968922160166624, "learning_rate": 1.2499017971715614e-06, "loss": 0.0952, "step": 125 }, { "epoch": 0.028668941979522185, "grad_norm": 2.498689273522412, "learning_rate": 1.2499002069105348e-06, "loss": 0.0609, "step": 126 }, { "epoch": 0.028896473265073948, "grad_norm": 1.7926923107505222, "learning_rate": 1.2498986038776926e-06, "loss": 0.0674, "step": 127 }, { "epoch": 0.02912400455062571, "grad_norm": 2.951592426546495, "learning_rate": 1.2498969880730671e-06, "loss": 0.0581, "step": 128 }, { "epoch": 0.029351535836177476, "grad_norm": 2.546012031191214, "learning_rate": 1.249895359496692e-06, "loss": 0.0604, "step": 129 }, { "epoch": 0.02957906712172924, "grad_norm": 2.982654740949648, "learning_rate": 1.2498937181486e-06, "loss": 0.1317, "step": 130 }, { "epoch": 0.029806598407281, "grad_norm": 3.35692876580473, "learning_rate": 1.2498920640288248e-06, "loss": 0.1357, "step": 131 }, { "epoch": 0.030034129692832763, "grad_norm": 3.0581666005631694, "learning_rate": 1.2498903971374005e-06, "loss": 0.1404, "step": 132 }, { "epoch": 0.03026166097838453, "grad_norm": 2.8194208797307936, "learning_rate": 1.2498887174743606e-06, "loss": 0.1139, "step": 133 }, { "epoch": 0.03048919226393629, "grad_norm": 1.307812448511231, "learning_rate": 1.24988702503974e-06, "loss": 0.0439, "step": 134 }, { "epoch": 0.030716723549488054, "grad_norm": 2.1828107869380178, "learning_rate": 1.2498853198335728e-06, "loss": 0.0714, "step": 135 }, { "epoch": 0.03094425483503982, "grad_norm": 2.7687164066652588, "learning_rate": 1.2498836018558942e-06, "loss": 0.0588, "step": 136 }, { "epoch": 0.031171786120591582, "grad_norm": 2.0398223571287124, "learning_rate": 1.2498818711067392e-06, "loss": 0.0788, "step": 137 }, { "epoch": 0.031399317406143344, "grad_norm": 1.544887443922682, "learning_rate": 1.2498801275861433e-06, "loss": 0.0424, "step": 138 }, { "epoch": 0.03162684869169511, "grad_norm": 1.9943988491119002, "learning_rate": 1.2498783712941418e-06, "loss": 0.0509, "step": 139 }, { "epoch": 0.03185437997724687, "grad_norm": 2.2756991800533726, "learning_rate": 1.2498766022307709e-06, "loss": 0.077, "step": 140 }, { "epoch": 0.032081911262798635, "grad_norm": 2.111047377421262, "learning_rate": 1.2498748203960665e-06, "loss": 0.0698, "step": 141 }, { "epoch": 0.0323094425483504, "grad_norm": 5.455732324109059, "learning_rate": 1.2498730257900655e-06, "loss": 0.0464, "step": 142 }, { "epoch": 0.03253697383390216, "grad_norm": 5.8400695805570475, "learning_rate": 1.249871218412804e-06, "loss": 0.0808, "step": 143 }, { "epoch": 0.032764505119453925, "grad_norm": 1.6154990753356673, "learning_rate": 1.2498693982643192e-06, "loss": 0.0579, "step": 144 }, { "epoch": 0.03299203640500569, "grad_norm": 2.6179374503343644, "learning_rate": 1.2498675653446485e-06, "loss": 0.0539, "step": 145 }, { "epoch": 0.03321956769055745, "grad_norm": 1.8802237147910983, "learning_rate": 1.249865719653829e-06, "loss": 0.0562, "step": 146 }, { "epoch": 0.033447098976109216, "grad_norm": 1.9601893311780516, "learning_rate": 1.2498638611918985e-06, "loss": 0.0842, "step": 147 }, { "epoch": 0.03367463026166098, "grad_norm": 3.2621758627603814, "learning_rate": 1.249861989958895e-06, "loss": 0.0707, "step": 148 }, { "epoch": 0.03390216154721274, "grad_norm": 1.7359031480428844, "learning_rate": 1.2498601059548572e-06, "loss": 0.0552, "step": 149 }, { "epoch": 0.034129692832764506, "grad_norm": 1.9031742914383765, "learning_rate": 1.2498582091798228e-06, "loss": 0.0551, "step": 150 }, { "epoch": 0.034357224118316265, "grad_norm": 1.6839469596143435, "learning_rate": 1.2498562996338312e-06, "loss": 0.0549, "step": 151 }, { "epoch": 0.03458475540386803, "grad_norm": 4.770073312573169, "learning_rate": 1.249854377316921e-06, "loss": 0.0922, "step": 152 }, { "epoch": 0.0348122866894198, "grad_norm": 9.77865071194089, "learning_rate": 1.2498524422291319e-06, "loss": 0.0656, "step": 153 }, { "epoch": 0.035039817974971556, "grad_norm": 3.279720923464348, "learning_rate": 1.2498504943705033e-06, "loss": 0.0628, "step": 154 }, { "epoch": 0.03526734926052332, "grad_norm": 2.5593319197975086, "learning_rate": 1.249848533741075e-06, "loss": 0.1116, "step": 155 }, { "epoch": 0.03549488054607509, "grad_norm": 2.7020759561258796, "learning_rate": 1.2498465603408865e-06, "loss": 0.0818, "step": 156 }, { "epoch": 0.035722411831626846, "grad_norm": 1.1827259380181383, "learning_rate": 1.2498445741699792e-06, "loss": 0.0412, "step": 157 }, { "epoch": 0.03594994311717861, "grad_norm": 3.090835953132798, "learning_rate": 1.249842575228393e-06, "loss": 0.0922, "step": 158 }, { "epoch": 0.03617747440273038, "grad_norm": 4.286688880757478, "learning_rate": 1.249840563516169e-06, "loss": 0.0921, "step": 159 }, { "epoch": 0.03640500568828214, "grad_norm": 3.826111531217773, "learning_rate": 1.249838539033348e-06, "loss": 0.0711, "step": 160 }, { "epoch": 0.0366325369738339, "grad_norm": 1.286547340104366, "learning_rate": 1.2498365017799715e-06, "loss": 0.0363, "step": 161 }, { "epoch": 0.03686006825938567, "grad_norm": 0.9115912875215112, "learning_rate": 1.2498344517560815e-06, "loss": 0.0499, "step": 162 }, { "epoch": 0.03708759954493743, "grad_norm": 1.967900659157127, "learning_rate": 1.2498323889617198e-06, "loss": 0.0642, "step": 163 }, { "epoch": 0.03731513083048919, "grad_norm": 20.150143075771535, "learning_rate": 1.2498303133969281e-06, "loss": 0.0764, "step": 164 }, { "epoch": 0.03754266211604096, "grad_norm": 1.8715348615633682, "learning_rate": 1.2498282250617492e-06, "loss": 0.0756, "step": 165 }, { "epoch": 0.03777019340159272, "grad_norm": 1.4158902127506805, "learning_rate": 1.2498261239562257e-06, "loss": 0.0669, "step": 166 }, { "epoch": 0.037997724687144484, "grad_norm": 2.5083436605654583, "learning_rate": 1.2498240100804005e-06, "loss": 0.0642, "step": 167 }, { "epoch": 0.03822525597269624, "grad_norm": 4.201645953801033, "learning_rate": 1.249821883434317e-06, "loss": 0.0804, "step": 168 }, { "epoch": 0.03845278725824801, "grad_norm": 1.8056911269518585, "learning_rate": 1.2498197440180182e-06, "loss": 0.0826, "step": 169 }, { "epoch": 0.038680318543799774, "grad_norm": 2.5577509171659174, "learning_rate": 1.2498175918315484e-06, "loss": 0.0782, "step": 170 }, { "epoch": 0.03890784982935153, "grad_norm": 1.6347218920613042, "learning_rate": 1.2498154268749513e-06, "loss": 0.0526, "step": 171 }, { "epoch": 0.0391353811149033, "grad_norm": 2.089011530912283, "learning_rate": 1.249813249148271e-06, "loss": 0.0845, "step": 172 }, { "epoch": 0.039362912400455065, "grad_norm": 2.0840328778691557, "learning_rate": 1.2498110586515525e-06, "loss": 0.0642, "step": 173 }, { "epoch": 0.039590443686006824, "grad_norm": 2.63166064853352, "learning_rate": 1.2498088553848398e-06, "loss": 0.0896, "step": 174 }, { "epoch": 0.03981797497155859, "grad_norm": 1.7790239100772818, "learning_rate": 1.2498066393481787e-06, "loss": 0.0887, "step": 175 }, { "epoch": 0.040045506257110355, "grad_norm": 2.0484275395799374, "learning_rate": 1.249804410541614e-06, "loss": 0.0757, "step": 176 }, { "epoch": 0.040273037542662114, "grad_norm": 2.7339764522222674, "learning_rate": 1.2498021689651916e-06, "loss": 0.0752, "step": 177 }, { "epoch": 0.04050056882821388, "grad_norm": 1.7202036539272256, "learning_rate": 1.249799914618957e-06, "loss": 0.0571, "step": 178 }, { "epoch": 0.040728100113765646, "grad_norm": 0.9595120325924671, "learning_rate": 1.2497976475029566e-06, "loss": 0.0431, "step": 179 }, { "epoch": 0.040955631399317405, "grad_norm": 2.2881459955255283, "learning_rate": 1.2497953676172364e-06, "loss": 0.0714, "step": 180 }, { "epoch": 0.04118316268486917, "grad_norm": 1.1818626309445535, "learning_rate": 1.2497930749618431e-06, "loss": 0.0352, "step": 181 }, { "epoch": 0.04141069397042093, "grad_norm": 1.7233391740704784, "learning_rate": 1.2497907695368238e-06, "loss": 0.0469, "step": 182 }, { "epoch": 0.041638225255972695, "grad_norm": 1.260961347917145, "learning_rate": 1.2497884513422253e-06, "loss": 0.0545, "step": 183 }, { "epoch": 0.04186575654152446, "grad_norm": 1.350181559062471, "learning_rate": 1.249786120378095e-06, "loss": 0.0579, "step": 184 }, { "epoch": 0.04209328782707622, "grad_norm": 1.4312208218600422, "learning_rate": 1.2497837766444806e-06, "loss": 0.0671, "step": 185 }, { "epoch": 0.042320819112627986, "grad_norm": 3.0862470606278527, "learning_rate": 1.2497814201414304e-06, "loss": 0.1552, "step": 186 }, { "epoch": 0.04254835039817975, "grad_norm": 2.010075620155332, "learning_rate": 1.249779050868992e-06, "loss": 0.0819, "step": 187 }, { "epoch": 0.04277588168373151, "grad_norm": 1.5041508185690229, "learning_rate": 1.249776668827214e-06, "loss": 0.0555, "step": 188 }, { "epoch": 0.043003412969283276, "grad_norm": 15.160154885670842, "learning_rate": 1.249774274016145e-06, "loss": 0.1377, "step": 189 }, { "epoch": 0.04323094425483504, "grad_norm": 1.3900253472134594, "learning_rate": 1.2497718664358341e-06, "loss": 0.0573, "step": 190 }, { "epoch": 0.0434584755403868, "grad_norm": 1.5383922685465743, "learning_rate": 1.2497694460863307e-06, "loss": 0.0986, "step": 191 }, { "epoch": 0.04368600682593857, "grad_norm": 2.2430359603043266, "learning_rate": 1.2497670129676838e-06, "loss": 0.0771, "step": 192 }, { "epoch": 0.04391353811149033, "grad_norm": 2.1439327799346333, "learning_rate": 1.2497645670799436e-06, "loss": 0.0436, "step": 193 }, { "epoch": 0.04414106939704209, "grad_norm": 1.771166715500092, "learning_rate": 1.2497621084231595e-06, "loss": 0.0523, "step": 194 }, { "epoch": 0.04436860068259386, "grad_norm": 1.4299897341433283, "learning_rate": 1.2497596369973823e-06, "loss": 0.0401, "step": 195 }, { "epoch": 0.04459613196814562, "grad_norm": 2.3614356932574654, "learning_rate": 1.2497571528026623e-06, "loss": 0.0888, "step": 196 }, { "epoch": 0.04482366325369738, "grad_norm": 1.8304520866772858, "learning_rate": 1.2497546558390503e-06, "loss": 0.065, "step": 197 }, { "epoch": 0.04505119453924915, "grad_norm": 1.8912148609925916, "learning_rate": 1.2497521461065973e-06, "loss": 0.0601, "step": 198 }, { "epoch": 0.04527872582480091, "grad_norm": 3.522073583380884, "learning_rate": 1.2497496236053547e-06, "loss": 0.0841, "step": 199 }, { "epoch": 0.04550625711035267, "grad_norm": 1.3143196144497622, "learning_rate": 1.2497470883353738e-06, "loss": 0.0485, "step": 200 }, { "epoch": 0.04573378839590444, "grad_norm": 2.251264141887164, "learning_rate": 1.2497445402967068e-06, "loss": 0.1068, "step": 201 }, { "epoch": 0.0459613196814562, "grad_norm": 1.5072412944239943, "learning_rate": 1.2497419794894053e-06, "loss": 0.0685, "step": 202 }, { "epoch": 0.04618885096700796, "grad_norm": 3.468383203818734, "learning_rate": 1.249739405913522e-06, "loss": 0.0815, "step": 203 }, { "epoch": 0.04641638225255973, "grad_norm": 2.799795848526847, "learning_rate": 1.2497368195691095e-06, "loss": 0.0611, "step": 204 }, { "epoch": 0.04664391353811149, "grad_norm": 1.8317350709529971, "learning_rate": 1.2497342204562205e-06, "loss": 0.0645, "step": 205 }, { "epoch": 0.046871444823663254, "grad_norm": 1.8552065397519177, "learning_rate": 1.2497316085749081e-06, "loss": 0.0475, "step": 206 }, { "epoch": 0.04709897610921502, "grad_norm": 33.648509264650215, "learning_rate": 1.249728983925226e-06, "loss": 0.4463, "step": 207 }, { "epoch": 0.04732650739476678, "grad_norm": 3.6371540108131555, "learning_rate": 1.2497263465072274e-06, "loss": 0.1261, "step": 208 }, { "epoch": 0.047554038680318544, "grad_norm": 4.579077765212189, "learning_rate": 1.2497236963209663e-06, "loss": 0.1537, "step": 209 }, { "epoch": 0.04778156996587031, "grad_norm": 1.8349256873988717, "learning_rate": 1.2497210333664972e-06, "loss": 0.0905, "step": 210 }, { "epoch": 0.04800910125142207, "grad_norm": 1.0349470677126553, "learning_rate": 1.2497183576438743e-06, "loss": 0.0383, "step": 211 }, { "epoch": 0.048236632536973835, "grad_norm": 1.7552990958997892, "learning_rate": 1.2497156691531523e-06, "loss": 0.0667, "step": 212 }, { "epoch": 0.048464163822525594, "grad_norm": 1.1378619448565346, "learning_rate": 1.249712967894386e-06, "loss": 0.0494, "step": 213 }, { "epoch": 0.04869169510807736, "grad_norm": 2.098843760453871, "learning_rate": 1.2497102538676308e-06, "loss": 0.0683, "step": 214 }, { "epoch": 0.048919226393629126, "grad_norm": 2.4023627244072996, "learning_rate": 1.249707527072942e-06, "loss": 0.0752, "step": 215 }, { "epoch": 0.049146757679180884, "grad_norm": 3.0741883094346405, "learning_rate": 1.2497047875103757e-06, "loss": 0.1576, "step": 216 }, { "epoch": 0.04937428896473265, "grad_norm": 2.109066858277886, "learning_rate": 1.2497020351799875e-06, "loss": 0.0834, "step": 217 }, { "epoch": 0.049601820250284416, "grad_norm": 1.1860658004774445, "learning_rate": 1.2496992700818335e-06, "loss": 0.0487, "step": 218 }, { "epoch": 0.049829351535836175, "grad_norm": 2.78284096499592, "learning_rate": 1.249696492215971e-06, "loss": 0.0743, "step": 219 }, { "epoch": 0.05005688282138794, "grad_norm": 2.111446571336187, "learning_rate": 1.249693701582456e-06, "loss": 0.0381, "step": 220 }, { "epoch": 0.05028441410693971, "grad_norm": 1.8439980435313363, "learning_rate": 1.2496908981813458e-06, "loss": 0.0821, "step": 221 }, { "epoch": 0.050511945392491465, "grad_norm": 2.1292431200987165, "learning_rate": 1.2496880820126977e-06, "loss": 0.102, "step": 222 }, { "epoch": 0.05073947667804323, "grad_norm": 1.58572513687618, "learning_rate": 1.2496852530765695e-06, "loss": 0.0451, "step": 223 }, { "epoch": 0.050967007963595, "grad_norm": 3.028551047012946, "learning_rate": 1.2496824113730186e-06, "loss": 0.1259, "step": 224 }, { "epoch": 0.051194539249146756, "grad_norm": 1.2656190346220688, "learning_rate": 1.2496795569021033e-06, "loss": 0.0593, "step": 225 }, { "epoch": 0.05142207053469852, "grad_norm": 2.5365255203279444, "learning_rate": 1.2496766896638819e-06, "loss": 0.0754, "step": 226 }, { "epoch": 0.05164960182025029, "grad_norm": 2.246271311279298, "learning_rate": 1.249673809658413e-06, "loss": 0.0689, "step": 227 }, { "epoch": 0.05187713310580205, "grad_norm": 1.212864625758916, "learning_rate": 1.2496709168857555e-06, "loss": 0.0451, "step": 228 }, { "epoch": 0.05210466439135381, "grad_norm": 2.806330657303488, "learning_rate": 1.2496680113459683e-06, "loss": 0.1473, "step": 229 }, { "epoch": 0.05233219567690557, "grad_norm": 2.652722899111948, "learning_rate": 1.2496650930391113e-06, "loss": 0.1155, "step": 230 }, { "epoch": 0.05255972696245734, "grad_norm": 2.7863683658107696, "learning_rate": 1.2496621619652435e-06, "loss": 0.0939, "step": 231 }, { "epoch": 0.0527872582480091, "grad_norm": 2.0852803666211925, "learning_rate": 1.2496592181244253e-06, "loss": 0.0385, "step": 232 }, { "epoch": 0.05301478953356086, "grad_norm": 2.3109089889052274, "learning_rate": 1.249656261516717e-06, "loss": 0.0591, "step": 233 }, { "epoch": 0.05324232081911263, "grad_norm": 1.657658325406227, "learning_rate": 1.2496532921421781e-06, "loss": 0.0666, "step": 234 }, { "epoch": 0.053469852104664393, "grad_norm": 1.8341397873796956, "learning_rate": 1.2496503100008704e-06, "loss": 0.0893, "step": 235 }, { "epoch": 0.05369738339021615, "grad_norm": 1.3722825156667726, "learning_rate": 1.249647315092854e-06, "loss": 0.0445, "step": 236 }, { "epoch": 0.05392491467576792, "grad_norm": 2.4225397650308187, "learning_rate": 1.2496443074181905e-06, "loss": 0.0783, "step": 237 }, { "epoch": 0.054152445961319684, "grad_norm": 1.27450689566742, "learning_rate": 1.2496412869769415e-06, "loss": 0.0482, "step": 238 }, { "epoch": 0.05437997724687144, "grad_norm": 1.5788523588867425, "learning_rate": 1.2496382537691686e-06, "loss": 0.0559, "step": 239 }, { "epoch": 0.05460750853242321, "grad_norm": 2.455674592198026, "learning_rate": 1.2496352077949336e-06, "loss": 0.0686, "step": 240 }, { "epoch": 0.054835039817974975, "grad_norm": 0.9822051281919313, "learning_rate": 1.249632149054299e-06, "loss": 0.0333, "step": 241 }, { "epoch": 0.05506257110352673, "grad_norm": 1.4918328919143296, "learning_rate": 1.249629077547327e-06, "loss": 0.0564, "step": 242 }, { "epoch": 0.0552901023890785, "grad_norm": 2.2431233119775973, "learning_rate": 1.2496259932740813e-06, "loss": 0.0998, "step": 243 }, { "epoch": 0.055517633674630265, "grad_norm": 1.0547810468664673, "learning_rate": 1.2496228962346236e-06, "loss": 0.054, "step": 244 }, { "epoch": 0.055745164960182024, "grad_norm": 4.2608959679243785, "learning_rate": 1.249619786429018e-06, "loss": 0.1324, "step": 245 }, { "epoch": 0.05597269624573379, "grad_norm": 0.9597443879271953, "learning_rate": 1.2496166638573278e-06, "loss": 0.0545, "step": 246 }, { "epoch": 0.05620022753128555, "grad_norm": 1.0564009660406621, "learning_rate": 1.2496135285196172e-06, "loss": 0.0409, "step": 247 }, { "epoch": 0.056427758816837315, "grad_norm": 5.181656955922701, "learning_rate": 1.2496103804159497e-06, "loss": 0.0514, "step": 248 }, { "epoch": 0.05665529010238908, "grad_norm": 31.956127145863782, "learning_rate": 1.2496072195463904e-06, "loss": 0.5089, "step": 249 }, { "epoch": 0.05688282138794084, "grad_norm": 1.6118693718642072, "learning_rate": 1.249604045911003e-06, "loss": 0.0601, "step": 250 }, { "epoch": 0.057110352673492605, "grad_norm": 1.8753798893546312, "learning_rate": 1.249600859509853e-06, "loss": 0.0781, "step": 251 }, { "epoch": 0.05733788395904437, "grad_norm": 1.9173456172296954, "learning_rate": 1.2495976603430054e-06, "loss": 0.0998, "step": 252 }, { "epoch": 0.05756541524459613, "grad_norm": 1.7446295444585516, "learning_rate": 1.2495944484105254e-06, "loss": 0.0348, "step": 253 }, { "epoch": 0.057792946530147896, "grad_norm": 3.1221291176703816, "learning_rate": 1.2495912237124787e-06, "loss": 0.0467, "step": 254 }, { "epoch": 0.05802047781569966, "grad_norm": 3.8018086885799454, "learning_rate": 1.2495879862489312e-06, "loss": 0.0893, "step": 255 }, { "epoch": 0.05824800910125142, "grad_norm": 7.114525761671455, "learning_rate": 1.2495847360199495e-06, "loss": 0.0647, "step": 256 }, { "epoch": 0.058475540386803186, "grad_norm": 1.861881748798064, "learning_rate": 1.2495814730255993e-06, "loss": 0.0832, "step": 257 }, { "epoch": 0.05870307167235495, "grad_norm": 1.920791237896296, "learning_rate": 1.2495781972659479e-06, "loss": 0.0346, "step": 258 }, { "epoch": 0.05893060295790671, "grad_norm": 1.9123959618493658, "learning_rate": 1.2495749087410618e-06, "loss": 0.0924, "step": 259 }, { "epoch": 0.05915813424345848, "grad_norm": 4.29696428710133, "learning_rate": 1.2495716074510087e-06, "loss": 0.0503, "step": 260 }, { "epoch": 0.059385665529010236, "grad_norm": 1.518204196410266, "learning_rate": 1.2495682933958555e-06, "loss": 0.0516, "step": 261 }, { "epoch": 0.059613196814562, "grad_norm": 4.632688881661792, "learning_rate": 1.2495649665756705e-06, "loss": 0.1211, "step": 262 }, { "epoch": 0.05984072810011377, "grad_norm": 2.486723056340893, "learning_rate": 1.2495616269905212e-06, "loss": 0.0811, "step": 263 }, { "epoch": 0.060068259385665526, "grad_norm": 1.581969386319132, "learning_rate": 1.2495582746404762e-06, "loss": 0.0589, "step": 264 }, { "epoch": 0.06029579067121729, "grad_norm": 1.5286635653264549, "learning_rate": 1.249554909525604e-06, "loss": 0.0638, "step": 265 }, { "epoch": 0.06052332195676906, "grad_norm": 1.2992103388623448, "learning_rate": 1.249551531645973e-06, "loss": 0.0309, "step": 266 }, { "epoch": 0.06075085324232082, "grad_norm": 1.4096677819832404, "learning_rate": 1.2495481410016527e-06, "loss": 0.0779, "step": 267 }, { "epoch": 0.06097838452787258, "grad_norm": 2.5850002425545586, "learning_rate": 1.2495447375927122e-06, "loss": 0.0718, "step": 268 }, { "epoch": 0.06120591581342435, "grad_norm": 2.2171422082649155, "learning_rate": 1.2495413214192209e-06, "loss": 0.0761, "step": 269 }, { "epoch": 0.06143344709897611, "grad_norm": 1.8061938933671926, "learning_rate": 1.2495378924812486e-06, "loss": 0.068, "step": 270 }, { "epoch": 0.06166097838452787, "grad_norm": 1.805390591341637, "learning_rate": 1.2495344507788662e-06, "loss": 0.0589, "step": 271 }, { "epoch": 0.06188850967007964, "grad_norm": 28.567737212004268, "learning_rate": 1.249530996312143e-06, "loss": 0.2672, "step": 272 }, { "epoch": 0.0621160409556314, "grad_norm": 2.023457290937467, "learning_rate": 1.2495275290811499e-06, "loss": 0.0762, "step": 273 }, { "epoch": 0.062343572241183164, "grad_norm": 1.726293880974768, "learning_rate": 1.2495240490859581e-06, "loss": 0.1124, "step": 274 }, { "epoch": 0.06257110352673492, "grad_norm": 3.5654068745258094, "learning_rate": 1.2495205563266384e-06, "loss": 0.08, "step": 275 }, { "epoch": 0.06279863481228669, "grad_norm": 3.330534204865081, "learning_rate": 1.2495170508032624e-06, "loss": 0.1063, "step": 276 }, { "epoch": 0.06302616609783845, "grad_norm": 2.7347891521994265, "learning_rate": 1.2495135325159015e-06, "loss": 0.0544, "step": 277 }, { "epoch": 0.06325369738339022, "grad_norm": 1.6191657189271829, "learning_rate": 1.2495100014646277e-06, "loss": 0.0399, "step": 278 }, { "epoch": 0.06348122866894199, "grad_norm": 2.2760450280718163, "learning_rate": 1.2495064576495134e-06, "loss": 0.0842, "step": 279 }, { "epoch": 0.06370875995449374, "grad_norm": 1.26417795343513, "learning_rate": 1.2495029010706306e-06, "loss": 0.0396, "step": 280 }, { "epoch": 0.0639362912400455, "grad_norm": 2.452124440380249, "learning_rate": 1.2494993317280524e-06, "loss": 0.057, "step": 281 }, { "epoch": 0.06416382252559727, "grad_norm": 1.6768179224480646, "learning_rate": 1.2494957496218516e-06, "loss": 0.0778, "step": 282 }, { "epoch": 0.06439135381114904, "grad_norm": 1.148511080426964, "learning_rate": 1.2494921547521013e-06, "loss": 0.0593, "step": 283 }, { "epoch": 0.0646188850967008, "grad_norm": 2.6954847712989687, "learning_rate": 1.249488547118875e-06, "loss": 0.0581, "step": 284 }, { "epoch": 0.06484641638225255, "grad_norm": 1.7038701082720327, "learning_rate": 1.2494849267222466e-06, "loss": 0.0467, "step": 285 }, { "epoch": 0.06507394766780432, "grad_norm": 2.0205984871282654, "learning_rate": 1.24948129356229e-06, "loss": 0.08, "step": 286 }, { "epoch": 0.06530147895335608, "grad_norm": 2.4430734451001093, "learning_rate": 1.2494776476390793e-06, "loss": 0.0689, "step": 287 }, { "epoch": 0.06552901023890785, "grad_norm": 2.2074548221975223, "learning_rate": 1.2494739889526894e-06, "loss": 0.036, "step": 288 }, { "epoch": 0.06575654152445962, "grad_norm": 2.3839227269087115, "learning_rate": 1.2494703175031946e-06, "loss": 0.1017, "step": 289 }, { "epoch": 0.06598407281001138, "grad_norm": 1.297764082661735, "learning_rate": 1.2494666332906702e-06, "loss": 0.0428, "step": 290 }, { "epoch": 0.06621160409556313, "grad_norm": 2.29700118159146, "learning_rate": 1.2494629363151916e-06, "loss": 0.0564, "step": 291 }, { "epoch": 0.0664391353811149, "grad_norm": 10.343219429302799, "learning_rate": 1.2494592265768343e-06, "loss": 0.2299, "step": 292 }, { "epoch": 0.06666666666666667, "grad_norm": 1.7119116189327344, "learning_rate": 1.2494555040756737e-06, "loss": 0.0491, "step": 293 }, { "epoch": 0.06689419795221843, "grad_norm": 1.5996315416913807, "learning_rate": 1.2494517688117867e-06, "loss": 0.0569, "step": 294 }, { "epoch": 0.0671217292377702, "grad_norm": 2.2996053958249068, "learning_rate": 1.2494480207852489e-06, "loss": 0.0561, "step": 295 }, { "epoch": 0.06734926052332196, "grad_norm": 2.4074335741025346, "learning_rate": 1.249444259996137e-06, "loss": 0.0578, "step": 296 }, { "epoch": 0.06757679180887372, "grad_norm": 3.415732508941554, "learning_rate": 1.2494404864445284e-06, "loss": 0.0617, "step": 297 }, { "epoch": 0.06780432309442548, "grad_norm": 2.777678894153543, "learning_rate": 1.2494367001304996e-06, "loss": 0.081, "step": 298 }, { "epoch": 0.06803185437997725, "grad_norm": 1.6387141538383663, "learning_rate": 1.2494329010541284e-06, "loss": 0.0387, "step": 299 }, { "epoch": 0.06825938566552901, "grad_norm": 2.069756645203544, "learning_rate": 1.2494290892154922e-06, "loss": 0.0742, "step": 300 }, { "epoch": 0.06848691695108078, "grad_norm": 3.4307396136508332, "learning_rate": 1.2494252646146692e-06, "loss": 0.0957, "step": 301 }, { "epoch": 0.06871444823663253, "grad_norm": 2.0232162640823974, "learning_rate": 1.249421427251737e-06, "loss": 0.0522, "step": 302 }, { "epoch": 0.0689419795221843, "grad_norm": 2.146160892723777, "learning_rate": 1.2494175771267748e-06, "loss": 0.0896, "step": 303 }, { "epoch": 0.06916951080773606, "grad_norm": 2.940508414612204, "learning_rate": 1.2494137142398607e-06, "loss": 0.0776, "step": 304 }, { "epoch": 0.06939704209328783, "grad_norm": 4.306654470652337, "learning_rate": 1.249409838591074e-06, "loss": 0.112, "step": 305 }, { "epoch": 0.0696245733788396, "grad_norm": 2.698854439380438, "learning_rate": 1.2494059501804937e-06, "loss": 0.0922, "step": 306 }, { "epoch": 0.06985210466439136, "grad_norm": 2.684055328227073, "learning_rate": 1.249402049008199e-06, "loss": 0.0603, "step": 307 }, { "epoch": 0.07007963594994311, "grad_norm": 1.6527179950360487, "learning_rate": 1.2493981350742704e-06, "loss": 0.035, "step": 308 }, { "epoch": 0.07030716723549488, "grad_norm": 2.632569906843673, "learning_rate": 1.2493942083787872e-06, "loss": 0.0483, "step": 309 }, { "epoch": 0.07053469852104664, "grad_norm": 1.8419184740112247, "learning_rate": 1.2493902689218299e-06, "loss": 0.0592, "step": 310 }, { "epoch": 0.07076222980659841, "grad_norm": 4.098447223521742, "learning_rate": 1.249386316703479e-06, "loss": 0.0647, "step": 311 }, { "epoch": 0.07098976109215017, "grad_norm": 3.425692782797119, "learning_rate": 1.2493823517238154e-06, "loss": 0.0304, "step": 312 }, { "epoch": 0.07121729237770194, "grad_norm": 2.09845005259606, "learning_rate": 1.2493783739829202e-06, "loss": 0.0841, "step": 313 }, { "epoch": 0.07144482366325369, "grad_norm": 1.3953874008324176, "learning_rate": 1.2493743834808741e-06, "loss": 0.0572, "step": 314 }, { "epoch": 0.07167235494880546, "grad_norm": 2.623892824594455, "learning_rate": 1.2493703802177594e-06, "loss": 0.0625, "step": 315 }, { "epoch": 0.07189988623435722, "grad_norm": 1.2851580205849038, "learning_rate": 1.2493663641936576e-06, "loss": 0.0321, "step": 316 }, { "epoch": 0.07212741751990899, "grad_norm": 1.9343201552647253, "learning_rate": 1.2493623354086507e-06, "loss": 0.067, "step": 317 }, { "epoch": 0.07235494880546076, "grad_norm": 2.2726283028135286, "learning_rate": 1.2493582938628213e-06, "loss": 0.0797, "step": 318 }, { "epoch": 0.07258248009101251, "grad_norm": 1.669835013133446, "learning_rate": 1.2493542395562516e-06, "loss": 0.0534, "step": 319 }, { "epoch": 0.07281001137656427, "grad_norm": 1.684538229380955, "learning_rate": 1.2493501724890247e-06, "loss": 0.0728, "step": 320 }, { "epoch": 0.07303754266211604, "grad_norm": 1.9071354849813447, "learning_rate": 1.249346092661224e-06, "loss": 0.0678, "step": 321 }, { "epoch": 0.0732650739476678, "grad_norm": 1.7042139773464355, "learning_rate": 1.2493420000729322e-06, "loss": 0.0784, "step": 322 }, { "epoch": 0.07349260523321957, "grad_norm": 1.6735213517836012, "learning_rate": 1.2493378947242336e-06, "loss": 0.0776, "step": 323 }, { "epoch": 0.07372013651877134, "grad_norm": 1.5646112601537545, "learning_rate": 1.2493337766152119e-06, "loss": 0.0582, "step": 324 }, { "epoch": 0.07394766780432309, "grad_norm": 1.5543937889365869, "learning_rate": 1.249329645745951e-06, "loss": 0.0512, "step": 325 }, { "epoch": 0.07417519908987485, "grad_norm": 1.982236801977647, "learning_rate": 1.2493255021165357e-06, "loss": 0.0804, "step": 326 }, { "epoch": 0.07440273037542662, "grad_norm": 1.2394712998486772, "learning_rate": 1.2493213457270504e-06, "loss": 0.0534, "step": 327 }, { "epoch": 0.07463026166097839, "grad_norm": 1.7803933921864556, "learning_rate": 1.2493171765775804e-06, "loss": 0.0687, "step": 328 }, { "epoch": 0.07485779294653015, "grad_norm": 2.379962120888543, "learning_rate": 1.2493129946682107e-06, "loss": 0.071, "step": 329 }, { "epoch": 0.07508532423208192, "grad_norm": 1.589657975033299, "learning_rate": 1.2493087999990263e-06, "loss": 0.0739, "step": 330 }, { "epoch": 0.07531285551763367, "grad_norm": 3.043606210421758, "learning_rate": 1.249304592570114e-06, "loss": 0.1518, "step": 331 }, { "epoch": 0.07554038680318544, "grad_norm": 1.4392385212589442, "learning_rate": 1.2493003723815588e-06, "loss": 0.0556, "step": 332 }, { "epoch": 0.0757679180887372, "grad_norm": 2.0188482312629397, "learning_rate": 1.2492961394334474e-06, "loss": 0.0662, "step": 333 }, { "epoch": 0.07599544937428897, "grad_norm": 1.3600493699697815, "learning_rate": 1.2492918937258663e-06, "loss": 0.042, "step": 334 }, { "epoch": 0.07622298065984073, "grad_norm": 2.2314121339792714, "learning_rate": 1.2492876352589024e-06, "loss": 0.0751, "step": 335 }, { "epoch": 0.07645051194539249, "grad_norm": 1.4434673698644476, "learning_rate": 1.2492833640326424e-06, "loss": 0.0359, "step": 336 }, { "epoch": 0.07667804323094425, "grad_norm": 1.9053597077248743, "learning_rate": 1.2492790800471738e-06, "loss": 0.0881, "step": 337 }, { "epoch": 0.07690557451649602, "grad_norm": 0.9804503872776514, "learning_rate": 1.249274783302584e-06, "loss": 0.0332, "step": 338 }, { "epoch": 0.07713310580204778, "grad_norm": 1.439933416412126, "learning_rate": 1.249270473798961e-06, "loss": 0.0363, "step": 339 }, { "epoch": 0.07736063708759955, "grad_norm": 1.8018763642808848, "learning_rate": 1.249266151536393e-06, "loss": 0.0652, "step": 340 }, { "epoch": 0.07758816837315131, "grad_norm": 1.605885586174771, "learning_rate": 1.249261816514968e-06, "loss": 0.0483, "step": 341 }, { "epoch": 0.07781569965870307, "grad_norm": 1.5475812110829932, "learning_rate": 1.2492574687347747e-06, "loss": 0.0518, "step": 342 }, { "epoch": 0.07804323094425483, "grad_norm": 1.3861334147739204, "learning_rate": 1.249253108195902e-06, "loss": 0.0438, "step": 343 }, { "epoch": 0.0782707622298066, "grad_norm": 1.6280816638823692, "learning_rate": 1.249248734898439e-06, "loss": 0.0804, "step": 344 }, { "epoch": 0.07849829351535836, "grad_norm": 1.945038730906618, "learning_rate": 1.2492443488424753e-06, "loss": 0.08, "step": 345 }, { "epoch": 0.07872582480091013, "grad_norm": 1.163435954016696, "learning_rate": 1.2492399500281002e-06, "loss": 0.0404, "step": 346 }, { "epoch": 0.07895335608646188, "grad_norm": 2.0650618862775225, "learning_rate": 1.2492355384554039e-06, "loss": 0.0965, "step": 347 }, { "epoch": 0.07918088737201365, "grad_norm": 1.2688387295944672, "learning_rate": 1.2492311141244764e-06, "loss": 0.0387, "step": 348 }, { "epoch": 0.07940841865756541, "grad_norm": 2.2968428653497472, "learning_rate": 1.249226677035408e-06, "loss": 0.0569, "step": 349 }, { "epoch": 0.07963594994311718, "grad_norm": 1.3014564625890543, "learning_rate": 1.2492222271882896e-06, "loss": 0.0418, "step": 350 }, { "epoch": 0.07986348122866894, "grad_norm": 2.3510870610479433, "learning_rate": 1.2492177645832121e-06, "loss": 0.0932, "step": 351 }, { "epoch": 0.08009101251422071, "grad_norm": 1.4510684428455591, "learning_rate": 1.2492132892202668e-06, "loss": 0.0513, "step": 352 }, { "epoch": 0.08031854379977246, "grad_norm": 4.594413722551582, "learning_rate": 1.2492088010995449e-06, "loss": 0.1235, "step": 353 }, { "epoch": 0.08054607508532423, "grad_norm": 2.137036810353962, "learning_rate": 1.2492043002211385e-06, "loss": 0.1026, "step": 354 }, { "epoch": 0.080773606370876, "grad_norm": 0.7577275839475693, "learning_rate": 1.2491997865851392e-06, "loss": 0.0368, "step": 355 }, { "epoch": 0.08100113765642776, "grad_norm": 1.0494266975406659, "learning_rate": 1.2491952601916395e-06, "loss": 0.0385, "step": 356 }, { "epoch": 0.08122866894197953, "grad_norm": 1.0423156372262818, "learning_rate": 1.2491907210407319e-06, "loss": 0.0424, "step": 357 }, { "epoch": 0.08145620022753129, "grad_norm": 1.150214646729361, "learning_rate": 1.249186169132509e-06, "loss": 0.0694, "step": 358 }, { "epoch": 0.08168373151308304, "grad_norm": 2.382439951131263, "learning_rate": 1.2491816044670641e-06, "loss": 0.0367, "step": 359 }, { "epoch": 0.08191126279863481, "grad_norm": 1.7484749214098252, "learning_rate": 1.24917702704449e-06, "loss": 0.0677, "step": 360 }, { "epoch": 0.08213879408418658, "grad_norm": 2.1328792490346227, "learning_rate": 1.2491724368648808e-06, "loss": 0.049, "step": 361 }, { "epoch": 0.08236632536973834, "grad_norm": 0.6514880974265386, "learning_rate": 1.2491678339283303e-06, "loss": 0.0217, "step": 362 }, { "epoch": 0.08259385665529011, "grad_norm": 1.3802700887569803, "learning_rate": 1.249163218234932e-06, "loss": 0.0406, "step": 363 }, { "epoch": 0.08282138794084186, "grad_norm": 1.7918385557024892, "learning_rate": 1.249158589784781e-06, "loss": 0.064, "step": 364 }, { "epoch": 0.08304891922639362, "grad_norm": 1.2060165323705336, "learning_rate": 1.2491539485779713e-06, "loss": 0.0367, "step": 365 }, { "epoch": 0.08327645051194539, "grad_norm": 1.2166249313910695, "learning_rate": 1.2491492946145981e-06, "loss": 0.0512, "step": 366 }, { "epoch": 0.08350398179749716, "grad_norm": 1.3048815510766842, "learning_rate": 1.2491446278947563e-06, "loss": 0.0554, "step": 367 }, { "epoch": 0.08373151308304892, "grad_norm": 1.9459326713846232, "learning_rate": 1.2491399484185413e-06, "loss": 0.0488, "step": 368 }, { "epoch": 0.08395904436860069, "grad_norm": 1.7915232664153333, "learning_rate": 1.249135256186049e-06, "loss": 0.1229, "step": 369 }, { "epoch": 0.08418657565415244, "grad_norm": 0.9354087864668412, "learning_rate": 1.249130551197375e-06, "loss": 0.0383, "step": 370 }, { "epoch": 0.0844141069397042, "grad_norm": 2.07264306868735, "learning_rate": 1.2491258334526155e-06, "loss": 0.0794, "step": 371 }, { "epoch": 0.08464163822525597, "grad_norm": 1.4534854537948092, "learning_rate": 1.2491211029518672e-06, "loss": 0.0282, "step": 372 }, { "epoch": 0.08486916951080774, "grad_norm": 2.0044431124055984, "learning_rate": 1.2491163596952264e-06, "loss": 0.0709, "step": 373 }, { "epoch": 0.0850967007963595, "grad_norm": 1.6305074169029203, "learning_rate": 1.2491116036827902e-06, "loss": 0.0407, "step": 374 }, { "epoch": 0.08532423208191127, "grad_norm": 1.223898789967783, "learning_rate": 1.2491068349146559e-06, "loss": 0.0595, "step": 375 }, { "epoch": 0.08555176336746302, "grad_norm": 1.7895390232916124, "learning_rate": 1.249102053390921e-06, "loss": 0.1034, "step": 376 }, { "epoch": 0.08577929465301479, "grad_norm": 1.6725786460952021, "learning_rate": 1.249097259111683e-06, "loss": 0.0955, "step": 377 }, { "epoch": 0.08600682593856655, "grad_norm": 2.4309538605657735, "learning_rate": 1.24909245207704e-06, "loss": 0.0794, "step": 378 }, { "epoch": 0.08623435722411832, "grad_norm": 0.8982937912678952, "learning_rate": 1.2490876322870904e-06, "loss": 0.0302, "step": 379 }, { "epoch": 0.08646188850967008, "grad_norm": 3.0756164983832, "learning_rate": 1.2490827997419325e-06, "loss": 0.0549, "step": 380 }, { "epoch": 0.08668941979522184, "grad_norm": 3.676872613008804, "learning_rate": 1.249077954441665e-06, "loss": 0.141, "step": 381 }, { "epoch": 0.0869169510807736, "grad_norm": 1.4373261550715828, "learning_rate": 1.249073096386387e-06, "loss": 0.0392, "step": 382 }, { "epoch": 0.08714448236632537, "grad_norm": 3.1988616398871006, "learning_rate": 1.249068225576198e-06, "loss": 0.0915, "step": 383 }, { "epoch": 0.08737201365187713, "grad_norm": 1.5619645893707617, "learning_rate": 1.2490633420111974e-06, "loss": 0.0541, "step": 384 }, { "epoch": 0.0875995449374289, "grad_norm": 0.9211933924050371, "learning_rate": 1.249058445691485e-06, "loss": 0.0337, "step": 385 }, { "epoch": 0.08782707622298067, "grad_norm": 1.1767496728905957, "learning_rate": 1.2490535366171607e-06, "loss": 0.039, "step": 386 }, { "epoch": 0.08805460750853242, "grad_norm": 2.9534787325055274, "learning_rate": 1.249048614788325e-06, "loss": 0.1048, "step": 387 }, { "epoch": 0.08828213879408418, "grad_norm": 6.651807262283111, "learning_rate": 1.249043680205079e-06, "loss": 0.0494, "step": 388 }, { "epoch": 0.08850967007963595, "grad_norm": 2.5123377668968745, "learning_rate": 1.2490387328675226e-06, "loss": 0.1199, "step": 389 }, { "epoch": 0.08873720136518772, "grad_norm": 1.5502594476140923, "learning_rate": 1.2490337727757576e-06, "loss": 0.0503, "step": 390 }, { "epoch": 0.08896473265073948, "grad_norm": 5.640796726611394, "learning_rate": 1.249028799929885e-06, "loss": 0.06, "step": 391 }, { "epoch": 0.08919226393629125, "grad_norm": 1.2047358188375967, "learning_rate": 1.2490238143300066e-06, "loss": 0.0427, "step": 392 }, { "epoch": 0.089419795221843, "grad_norm": 1.5816796650719427, "learning_rate": 1.2490188159762243e-06, "loss": 0.0665, "step": 393 }, { "epoch": 0.08964732650739476, "grad_norm": 2.8164102954322505, "learning_rate": 1.2490138048686405e-06, "loss": 0.076, "step": 394 }, { "epoch": 0.08987485779294653, "grad_norm": 2.0775035442513596, "learning_rate": 1.249008781007357e-06, "loss": 0.0857, "step": 395 }, { "epoch": 0.0901023890784983, "grad_norm": 1.3816134442939907, "learning_rate": 1.2490037443924768e-06, "loss": 0.0647, "step": 396 }, { "epoch": 0.09032992036405006, "grad_norm": 5.3602328587776205, "learning_rate": 1.2489986950241032e-06, "loss": 0.0857, "step": 397 }, { "epoch": 0.09055745164960181, "grad_norm": 1.3668732893642688, "learning_rate": 1.2489936329023387e-06, "loss": 0.0513, "step": 398 }, { "epoch": 0.09078498293515358, "grad_norm": 1.3818985376090422, "learning_rate": 1.2489885580272874e-06, "loss": 0.0643, "step": 399 }, { "epoch": 0.09101251422070535, "grad_norm": 1.1645733063202421, "learning_rate": 1.2489834703990527e-06, "loss": 0.0564, "step": 400 }, { "epoch": 0.09124004550625711, "grad_norm": 2.579861013447331, "learning_rate": 1.2489783700177385e-06, "loss": 0.0844, "step": 401 }, { "epoch": 0.09146757679180888, "grad_norm": 1.7416779142182774, "learning_rate": 1.2489732568834492e-06, "loss": 0.0874, "step": 402 }, { "epoch": 0.09169510807736064, "grad_norm": 1.2036980088831546, "learning_rate": 1.2489681309962895e-06, "loss": 0.0605, "step": 403 }, { "epoch": 0.0919226393629124, "grad_norm": 2.823337671795372, "learning_rate": 1.2489629923563637e-06, "loss": 0.0563, "step": 404 }, { "epoch": 0.09215017064846416, "grad_norm": 3.5147572243595406, "learning_rate": 1.2489578409637774e-06, "loss": 0.0962, "step": 405 }, { "epoch": 0.09237770193401593, "grad_norm": 2.441595167267241, "learning_rate": 1.2489526768186352e-06, "loss": 0.0409, "step": 406 }, { "epoch": 0.09260523321956769, "grad_norm": 1.606289452778594, "learning_rate": 1.2489474999210434e-06, "loss": 0.083, "step": 407 }, { "epoch": 0.09283276450511946, "grad_norm": 2.0108948875054358, "learning_rate": 1.2489423102711068e-06, "loss": 0.0424, "step": 408 }, { "epoch": 0.09306029579067122, "grad_norm": 1.9517497199537932, "learning_rate": 1.2489371078689326e-06, "loss": 0.063, "step": 409 }, { "epoch": 0.09328782707622298, "grad_norm": 2.1984153851855903, "learning_rate": 1.2489318927146263e-06, "loss": 0.0607, "step": 410 }, { "epoch": 0.09351535836177474, "grad_norm": 0.9821028313036854, "learning_rate": 1.2489266648082951e-06, "loss": 0.0269, "step": 411 }, { "epoch": 0.09374288964732651, "grad_norm": 1.4642895316806703, "learning_rate": 1.2489214241500453e-06, "loss": 0.0773, "step": 412 }, { "epoch": 0.09397042093287827, "grad_norm": 2.6315717094149598, "learning_rate": 1.2489161707399843e-06, "loss": 0.1042, "step": 413 }, { "epoch": 0.09419795221843004, "grad_norm": 1.2730067445859696, "learning_rate": 1.2489109045782194e-06, "loss": 0.0397, "step": 414 }, { "epoch": 0.09442548350398179, "grad_norm": 1.0214233244231261, "learning_rate": 1.2489056256648582e-06, "loss": 0.0297, "step": 415 }, { "epoch": 0.09465301478953356, "grad_norm": 1.8895414872949885, "learning_rate": 1.2489003340000089e-06, "loss": 0.1027, "step": 416 }, { "epoch": 0.09488054607508532, "grad_norm": 1.2431995135273388, "learning_rate": 1.2488950295837792e-06, "loss": 0.0694, "step": 417 }, { "epoch": 0.09510807736063709, "grad_norm": 2.741996145452892, "learning_rate": 1.2488897124162777e-06, "loss": 0.0815, "step": 418 }, { "epoch": 0.09533560864618885, "grad_norm": 1.711178715312897, "learning_rate": 1.248884382497613e-06, "loss": 0.0955, "step": 419 }, { "epoch": 0.09556313993174062, "grad_norm": 2.609523312381175, "learning_rate": 1.2488790398278941e-06, "loss": 0.1047, "step": 420 }, { "epoch": 0.09579067121729237, "grad_norm": 1.53212810794613, "learning_rate": 1.2488736844072304e-06, "loss": 0.0585, "step": 421 }, { "epoch": 0.09601820250284414, "grad_norm": 1.4174025848206742, "learning_rate": 1.248868316235731e-06, "loss": 0.0387, "step": 422 }, { "epoch": 0.0962457337883959, "grad_norm": 2.1766082548840644, "learning_rate": 1.2488629353135059e-06, "loss": 0.0883, "step": 423 }, { "epoch": 0.09647326507394767, "grad_norm": 1.2878325712311849, "learning_rate": 1.2488575416406649e-06, "loss": 0.0469, "step": 424 }, { "epoch": 0.09670079635949944, "grad_norm": 2.1138235879975222, "learning_rate": 1.2488521352173183e-06, "loss": 0.0531, "step": 425 }, { "epoch": 0.09692832764505119, "grad_norm": 1.6090227620325084, "learning_rate": 1.2488467160435765e-06, "loss": 0.0532, "step": 426 }, { "epoch": 0.09715585893060295, "grad_norm": 1.5021035863134842, "learning_rate": 1.2488412841195505e-06, "loss": 0.0805, "step": 427 }, { "epoch": 0.09738339021615472, "grad_norm": 2.0944361790568515, "learning_rate": 1.2488358394453512e-06, "loss": 0.0864, "step": 428 }, { "epoch": 0.09761092150170649, "grad_norm": 1.3336921136142907, "learning_rate": 1.2488303820210897e-06, "loss": 0.0405, "step": 429 }, { "epoch": 0.09783845278725825, "grad_norm": 1.4602632552266945, "learning_rate": 1.2488249118468776e-06, "loss": 0.0319, "step": 430 }, { "epoch": 0.09806598407281002, "grad_norm": 1.1300406263272467, "learning_rate": 1.248819428922827e-06, "loss": 0.0328, "step": 431 }, { "epoch": 0.09829351535836177, "grad_norm": 3.2436410504809006, "learning_rate": 1.2488139332490495e-06, "loss": 0.1052, "step": 432 }, { "epoch": 0.09852104664391353, "grad_norm": 1.1456421976312734, "learning_rate": 1.248808424825658e-06, "loss": 0.0455, "step": 433 }, { "epoch": 0.0987485779294653, "grad_norm": 1.8967128040132528, "learning_rate": 1.2488029036527645e-06, "loss": 0.0521, "step": 434 }, { "epoch": 0.09897610921501707, "grad_norm": 2.22293609570488, "learning_rate": 1.2487973697304822e-06, "loss": 0.0401, "step": 435 }, { "epoch": 0.09920364050056883, "grad_norm": 1.3201279140220556, "learning_rate": 1.248791823058924e-06, "loss": 0.0656, "step": 436 }, { "epoch": 0.0994311717861206, "grad_norm": 1.8801560325756854, "learning_rate": 1.2487862636382034e-06, "loss": 0.036, "step": 437 }, { "epoch": 0.09965870307167235, "grad_norm": 0.9413516301280102, "learning_rate": 1.248780691468434e-06, "loss": 0.0369, "step": 438 }, { "epoch": 0.09988623435722412, "grad_norm": 1.6280926081257134, "learning_rate": 1.2487751065497296e-06, "loss": 0.0679, "step": 439 }, { "epoch": 0.10011376564277588, "grad_norm": 1.1483666492978277, "learning_rate": 1.2487695088822044e-06, "loss": 0.0476, "step": 440 }, { "epoch": 0.10034129692832765, "grad_norm": 1.091509019893692, "learning_rate": 1.2487638984659729e-06, "loss": 0.0267, "step": 441 }, { "epoch": 0.10056882821387941, "grad_norm": 1.2894939135732226, "learning_rate": 1.2487582753011496e-06, "loss": 0.0379, "step": 442 }, { "epoch": 0.10079635949943117, "grad_norm": 1.5539892877637305, "learning_rate": 1.2487526393878497e-06, "loss": 0.0717, "step": 443 }, { "epoch": 0.10102389078498293, "grad_norm": 1.0799436334247106, "learning_rate": 1.248746990726188e-06, "loss": 0.0494, "step": 444 }, { "epoch": 0.1012514220705347, "grad_norm": 1.0482657304812906, "learning_rate": 1.2487413293162803e-06, "loss": 0.0328, "step": 445 }, { "epoch": 0.10147895335608646, "grad_norm": 2.974661390553103, "learning_rate": 1.2487356551582421e-06, "loss": 0.1436, "step": 446 }, { "epoch": 0.10170648464163823, "grad_norm": 2.6800805691171843, "learning_rate": 1.2487299682521893e-06, "loss": 0.1383, "step": 447 }, { "epoch": 0.10193401592719, "grad_norm": 2.2359167995553233, "learning_rate": 1.2487242685982384e-06, "loss": 0.0485, "step": 448 }, { "epoch": 0.10216154721274175, "grad_norm": 2.818745463327436, "learning_rate": 1.2487185561965057e-06, "loss": 0.0446, "step": 449 }, { "epoch": 0.10238907849829351, "grad_norm": 0.6315900981706886, "learning_rate": 1.248712831047108e-06, "loss": 0.0186, "step": 450 }, { "epoch": 0.10261660978384528, "grad_norm": 1.9674239723403535, "learning_rate": 1.2487070931501624e-06, "loss": 0.0673, "step": 451 }, { "epoch": 0.10284414106939704, "grad_norm": 1.6885325551942292, "learning_rate": 1.2487013425057858e-06, "loss": 0.0767, "step": 452 }, { "epoch": 0.10307167235494881, "grad_norm": 1.891051810210748, "learning_rate": 1.2486955791140964e-06, "loss": 0.063, "step": 453 }, { "epoch": 0.10329920364050058, "grad_norm": 1.1912814980714257, "learning_rate": 1.2486898029752113e-06, "loss": 0.0493, "step": 454 }, { "epoch": 0.10352673492605233, "grad_norm": 0.8883039975316582, "learning_rate": 1.248684014089249e-06, "loss": 0.0277, "step": 455 }, { "epoch": 0.1037542662116041, "grad_norm": 1.306066876530539, "learning_rate": 1.2486782124563277e-06, "loss": 0.0544, "step": 456 }, { "epoch": 0.10398179749715586, "grad_norm": 1.5420420577450527, "learning_rate": 1.2486723980765659e-06, "loss": 0.077, "step": 457 }, { "epoch": 0.10420932878270762, "grad_norm": 2.0367683349755823, "learning_rate": 1.2486665709500826e-06, "loss": 0.0649, "step": 458 }, { "epoch": 0.10443686006825939, "grad_norm": 1.1542715096556455, "learning_rate": 1.2486607310769965e-06, "loss": 0.0586, "step": 459 }, { "epoch": 0.10466439135381114, "grad_norm": 2.061046992152943, "learning_rate": 1.2486548784574275e-06, "loss": 0.0491, "step": 460 }, { "epoch": 0.10489192263936291, "grad_norm": 1.0629825041281826, "learning_rate": 1.2486490130914948e-06, "loss": 0.0445, "step": 461 }, { "epoch": 0.10511945392491467, "grad_norm": 1.415699131985968, "learning_rate": 1.2486431349793185e-06, "loss": 0.0679, "step": 462 }, { "epoch": 0.10534698521046644, "grad_norm": 1.8548752695365796, "learning_rate": 1.2486372441210188e-06, "loss": 0.0514, "step": 463 }, { "epoch": 0.1055745164960182, "grad_norm": 1.1301759724239804, "learning_rate": 1.248631340516716e-06, "loss": 0.0381, "step": 464 }, { "epoch": 0.10580204778156997, "grad_norm": 1.408994037607189, "learning_rate": 1.2486254241665302e-06, "loss": 0.0692, "step": 465 }, { "epoch": 0.10602957906712172, "grad_norm": 1.6631340659469183, "learning_rate": 1.2486194950705831e-06, "loss": 0.0454, "step": 466 }, { "epoch": 0.10625711035267349, "grad_norm": 4.446207788916147, "learning_rate": 1.248613553228996e-06, "loss": 0.051, "step": 467 }, { "epoch": 0.10648464163822526, "grad_norm": 2.558672760104107, "learning_rate": 1.2486075986418896e-06, "loss": 0.0581, "step": 468 }, { "epoch": 0.10671217292377702, "grad_norm": 1.1293994671100194, "learning_rate": 1.248601631309386e-06, "loss": 0.0601, "step": 469 }, { "epoch": 0.10693970420932879, "grad_norm": 0.9858665143030374, "learning_rate": 1.2485956512316072e-06, "loss": 0.042, "step": 470 }, { "epoch": 0.10716723549488055, "grad_norm": 1.0238223731711866, "learning_rate": 1.2485896584086754e-06, "loss": 0.0419, "step": 471 }, { "epoch": 0.1073947667804323, "grad_norm": 1.4636163379221103, "learning_rate": 1.248583652840713e-06, "loss": 0.0763, "step": 472 }, { "epoch": 0.10762229806598407, "grad_norm": 2.3052142467408534, "learning_rate": 1.2485776345278427e-06, "loss": 0.0577, "step": 473 }, { "epoch": 0.10784982935153584, "grad_norm": 1.797428495175526, "learning_rate": 1.2485716034701876e-06, "loss": 0.0664, "step": 474 }, { "epoch": 0.1080773606370876, "grad_norm": 0.9502391190499007, "learning_rate": 1.2485655596678712e-06, "loss": 0.032, "step": 475 }, { "epoch": 0.10830489192263937, "grad_norm": 2.0597597691340033, "learning_rate": 1.2485595031210164e-06, "loss": 0.0693, "step": 476 }, { "epoch": 0.10853242320819112, "grad_norm": 1.6422248848311762, "learning_rate": 1.2485534338297475e-06, "loss": 0.036, "step": 477 }, { "epoch": 0.10875995449374289, "grad_norm": 1.0205877451233056, "learning_rate": 1.2485473517941884e-06, "loss": 0.036, "step": 478 }, { "epoch": 0.10898748577929465, "grad_norm": 1.352496943979393, "learning_rate": 1.2485412570144633e-06, "loss": 0.0619, "step": 479 }, { "epoch": 0.10921501706484642, "grad_norm": 1.1460532851585836, "learning_rate": 1.2485351494906969e-06, "loss": 0.0457, "step": 480 }, { "epoch": 0.10944254835039818, "grad_norm": 2.136834742910293, "learning_rate": 1.2485290292230142e-06, "loss": 0.1229, "step": 481 }, { "epoch": 0.10967007963594995, "grad_norm": 1.2780826832089733, "learning_rate": 1.24852289621154e-06, "loss": 0.0295, "step": 482 }, { "epoch": 0.1098976109215017, "grad_norm": 1.5165261909270789, "learning_rate": 1.2485167504563995e-06, "loss": 0.0605, "step": 483 }, { "epoch": 0.11012514220705347, "grad_norm": 2.5598525552258153, "learning_rate": 1.2485105919577187e-06, "loss": 0.1343, "step": 484 }, { "epoch": 0.11035267349260523, "grad_norm": 1.2948078456387253, "learning_rate": 1.2485044207156233e-06, "loss": 0.0462, "step": 485 }, { "epoch": 0.110580204778157, "grad_norm": 2.2282842690273674, "learning_rate": 1.2484982367302395e-06, "loss": 0.0906, "step": 486 }, { "epoch": 0.11080773606370876, "grad_norm": 1.58668388631447, "learning_rate": 1.2484920400016936e-06, "loss": 0.0754, "step": 487 }, { "epoch": 0.11103526734926053, "grad_norm": 1.8486367636195544, "learning_rate": 1.2484858305301122e-06, "loss": 0.0858, "step": 488 }, { "epoch": 0.11126279863481228, "grad_norm": 1.2490242519124817, "learning_rate": 1.2484796083156222e-06, "loss": 0.0412, "step": 489 }, { "epoch": 0.11149032992036405, "grad_norm": 2.4293013886013024, "learning_rate": 1.2484733733583511e-06, "loss": 0.0347, "step": 490 }, { "epoch": 0.11171786120591581, "grad_norm": 1.3895430378196065, "learning_rate": 1.248467125658426e-06, "loss": 0.0508, "step": 491 }, { "epoch": 0.11194539249146758, "grad_norm": 1.0989498931762587, "learning_rate": 1.2484608652159746e-06, "loss": 0.0376, "step": 492 }, { "epoch": 0.11217292377701935, "grad_norm": 1.2816896914965858, "learning_rate": 1.248454592031125e-06, "loss": 0.0325, "step": 493 }, { "epoch": 0.1124004550625711, "grad_norm": 1.3834104899544186, "learning_rate": 1.2484483061040054e-06, "loss": 0.0502, "step": 494 }, { "epoch": 0.11262798634812286, "grad_norm": 1.8438437167351667, "learning_rate": 1.2484420074347441e-06, "loss": 0.0675, "step": 495 }, { "epoch": 0.11285551763367463, "grad_norm": 1.3847331523036337, "learning_rate": 1.24843569602347e-06, "loss": 0.0708, "step": 496 }, { "epoch": 0.1130830489192264, "grad_norm": 1.4695794221134664, "learning_rate": 1.2484293718703119e-06, "loss": 0.0677, "step": 497 }, { "epoch": 0.11331058020477816, "grad_norm": 1.4197704649286171, "learning_rate": 1.2484230349753994e-06, "loss": 0.0275, "step": 498 }, { "epoch": 0.11353811149032993, "grad_norm": 1.9758484599559376, "learning_rate": 1.2484166853388617e-06, "loss": 0.0846, "step": 499 }, { "epoch": 0.11376564277588168, "grad_norm": 1.4480733215282273, "learning_rate": 1.2484103229608288e-06, "loss": 0.0302, "step": 500 }, { "epoch": 0.11399317406143344, "grad_norm": 1.6803783059061204, "learning_rate": 1.2484039478414305e-06, "loss": 0.033, "step": 501 }, { "epoch": 0.11422070534698521, "grad_norm": 1.5093539895040036, "learning_rate": 1.2483975599807972e-06, "loss": 0.0592, "step": 502 }, { "epoch": 0.11444823663253698, "grad_norm": 1.2912092446277637, "learning_rate": 1.2483911593790595e-06, "loss": 0.0341, "step": 503 }, { "epoch": 0.11467576791808874, "grad_norm": 4.053757173255741, "learning_rate": 1.2483847460363482e-06, "loss": 0.08, "step": 504 }, { "epoch": 0.1149032992036405, "grad_norm": 1.8561008975587792, "learning_rate": 1.2483783199527943e-06, "loss": 0.0417, "step": 505 }, { "epoch": 0.11513083048919226, "grad_norm": 1.7954101019883197, "learning_rate": 1.2483718811285296e-06, "loss": 0.098, "step": 506 }, { "epoch": 0.11535836177474403, "grad_norm": 0.90376517227131, "learning_rate": 1.2483654295636848e-06, "loss": 0.0204, "step": 507 }, { "epoch": 0.11558589306029579, "grad_norm": 1.0402238740609373, "learning_rate": 1.2483589652583924e-06, "loss": 0.0393, "step": 508 }, { "epoch": 0.11581342434584756, "grad_norm": 1.2739463431329932, "learning_rate": 1.2483524882127846e-06, "loss": 0.0361, "step": 509 }, { "epoch": 0.11604095563139932, "grad_norm": 1.8542368928683455, "learning_rate": 1.2483459984269933e-06, "loss": 0.062, "step": 510 }, { "epoch": 0.11626848691695107, "grad_norm": 1.4901585211244064, "learning_rate": 1.2483394959011514e-06, "loss": 0.0362, "step": 511 }, { "epoch": 0.11649601820250284, "grad_norm": 1.655677974364157, "learning_rate": 1.248332980635392e-06, "loss": 0.068, "step": 512 }, { "epoch": 0.1167235494880546, "grad_norm": 1.40135662829084, "learning_rate": 1.2483264526298478e-06, "loss": 0.0467, "step": 513 }, { "epoch": 0.11695108077360637, "grad_norm": 0.9801745008839565, "learning_rate": 1.2483199118846525e-06, "loss": 0.0547, "step": 514 }, { "epoch": 0.11717861205915814, "grad_norm": 0.7956319000441687, "learning_rate": 1.2483133583999399e-06, "loss": 0.0292, "step": 515 }, { "epoch": 0.1174061433447099, "grad_norm": 1.5822262084465146, "learning_rate": 1.2483067921758439e-06, "loss": 0.0442, "step": 516 }, { "epoch": 0.11763367463026166, "grad_norm": 1.788875375154368, "learning_rate": 1.2483002132124983e-06, "loss": 0.1027, "step": 517 }, { "epoch": 0.11786120591581342, "grad_norm": 1.12691319203208, "learning_rate": 1.2482936215100382e-06, "loss": 0.0392, "step": 518 }, { "epoch": 0.11808873720136519, "grad_norm": 1.4278197951300633, "learning_rate": 1.2482870170685978e-06, "loss": 0.0309, "step": 519 }, { "epoch": 0.11831626848691695, "grad_norm": 2.3408918893379176, "learning_rate": 1.2482803998883122e-06, "loss": 0.0554, "step": 520 }, { "epoch": 0.11854379977246872, "grad_norm": 1.308272654811059, "learning_rate": 1.2482737699693168e-06, "loss": 0.0386, "step": 521 }, { "epoch": 0.11877133105802047, "grad_norm": 1.4643677550086747, "learning_rate": 1.248267127311747e-06, "loss": 0.0517, "step": 522 }, { "epoch": 0.11899886234357224, "grad_norm": 1.9375531511223079, "learning_rate": 1.2482604719157386e-06, "loss": 0.0547, "step": 523 }, { "epoch": 0.119226393629124, "grad_norm": 1.4368931736724269, "learning_rate": 1.2482538037814277e-06, "loss": 0.0594, "step": 524 }, { "epoch": 0.11945392491467577, "grad_norm": 2.3983593700848864, "learning_rate": 1.2482471229089502e-06, "loss": 0.0479, "step": 525 }, { "epoch": 0.11968145620022753, "grad_norm": 1.5406589025382933, "learning_rate": 1.2482404292984431e-06, "loss": 0.0468, "step": 526 }, { "epoch": 0.1199089874857793, "grad_norm": 2.7569107151011254, "learning_rate": 1.248233722950043e-06, "loss": 0.1653, "step": 527 }, { "epoch": 0.12013651877133105, "grad_norm": 1.2498018017145271, "learning_rate": 1.2482270038638872e-06, "loss": 0.0376, "step": 528 }, { "epoch": 0.12036405005688282, "grad_norm": 1.663550002347151, "learning_rate": 1.2482202720401128e-06, "loss": 0.0336, "step": 529 }, { "epoch": 0.12059158134243458, "grad_norm": 1.302350240596974, "learning_rate": 1.248213527478857e-06, "loss": 0.058, "step": 530 }, { "epoch": 0.12081911262798635, "grad_norm": 1.6402862563370133, "learning_rate": 1.2482067701802583e-06, "loss": 0.0826, "step": 531 }, { "epoch": 0.12104664391353812, "grad_norm": 1.1268914835455415, "learning_rate": 1.2482000001444547e-06, "loss": 0.0524, "step": 532 }, { "epoch": 0.12127417519908988, "grad_norm": 1.877525140915591, "learning_rate": 1.2481932173715845e-06, "loss": 0.0539, "step": 533 }, { "epoch": 0.12150170648464163, "grad_norm": 1.928222388008525, "learning_rate": 1.2481864218617859e-06, "loss": 0.1134, "step": 534 }, { "epoch": 0.1217292377701934, "grad_norm": 1.2293579919391775, "learning_rate": 1.2481796136151984e-06, "loss": 0.0516, "step": 535 }, { "epoch": 0.12195676905574517, "grad_norm": 1.5890038701436435, "learning_rate": 1.2481727926319609e-06, "loss": 0.0922, "step": 536 }, { "epoch": 0.12218430034129693, "grad_norm": 1.850040007143475, "learning_rate": 1.2481659589122127e-06, "loss": 0.075, "step": 537 }, { "epoch": 0.1224118316268487, "grad_norm": 2.5725083852053747, "learning_rate": 1.2481591124560934e-06, "loss": 0.0891, "step": 538 }, { "epoch": 0.12263936291240045, "grad_norm": 1.768076465759339, "learning_rate": 1.2481522532637435e-06, "loss": 0.0533, "step": 539 }, { "epoch": 0.12286689419795221, "grad_norm": 0.9070123837857468, "learning_rate": 1.2481453813353026e-06, "loss": 0.0294, "step": 540 }, { "epoch": 0.12309442548350398, "grad_norm": 1.625916805043753, "learning_rate": 1.2481384966709116e-06, "loss": 0.0323, "step": 541 }, { "epoch": 0.12332195676905575, "grad_norm": 2.291411200127928, "learning_rate": 1.2481315992707104e-06, "loss": 0.0964, "step": 542 }, { "epoch": 0.12354948805460751, "grad_norm": 1.7452732363406978, "learning_rate": 1.248124689134841e-06, "loss": 0.0555, "step": 543 }, { "epoch": 0.12377701934015928, "grad_norm": 1.4048824029212816, "learning_rate": 1.2481177662634438e-06, "loss": 0.0551, "step": 544 }, { "epoch": 0.12400455062571103, "grad_norm": 6.9359786939510695, "learning_rate": 1.2481108306566609e-06, "loss": 0.0746, "step": 545 }, { "epoch": 0.1242320819112628, "grad_norm": 1.2916063155011999, "learning_rate": 1.2481038823146338e-06, "loss": 0.0245, "step": 546 }, { "epoch": 0.12445961319681456, "grad_norm": 1.9432907622995745, "learning_rate": 1.2480969212375043e-06, "loss": 0.1119, "step": 547 }, { "epoch": 0.12468714448236633, "grad_norm": 2.9617568197379347, "learning_rate": 1.2480899474254151e-06, "loss": 0.0878, "step": 548 }, { "epoch": 0.12491467576791809, "grad_norm": 0.9954987625211155, "learning_rate": 1.2480829608785085e-06, "loss": 0.0397, "step": 549 }, { "epoch": 0.12514220705346984, "grad_norm": 1.2186721476872615, "learning_rate": 1.2480759615969273e-06, "loss": 0.0462, "step": 550 }, { "epoch": 0.12536973833902162, "grad_norm": 1.1723077588981092, "learning_rate": 1.2480689495808144e-06, "loss": 0.0646, "step": 551 }, { "epoch": 0.12559726962457338, "grad_norm": 1.4338215761686757, "learning_rate": 1.2480619248303133e-06, "loss": 0.0806, "step": 552 }, { "epoch": 0.12582480091012513, "grad_norm": 1.841000108666981, "learning_rate": 1.2480548873455675e-06, "loss": 0.0827, "step": 553 }, { "epoch": 0.1260523321956769, "grad_norm": 2.5765107503856144, "learning_rate": 1.248047837126721e-06, "loss": 0.1326, "step": 554 }, { "epoch": 0.12627986348122866, "grad_norm": 1.3867433794433537, "learning_rate": 1.248040774173918e-06, "loss": 0.0588, "step": 555 }, { "epoch": 0.12650739476678044, "grad_norm": 1.3761837680071665, "learning_rate": 1.248033698487302e-06, "loss": 0.0495, "step": 556 }, { "epoch": 0.1267349260523322, "grad_norm": 1.1704965217659915, "learning_rate": 1.2480266100670189e-06, "loss": 0.0467, "step": 557 }, { "epoch": 0.12696245733788397, "grad_norm": 2.106768640375489, "learning_rate": 1.2480195089132125e-06, "loss": 0.0426, "step": 558 }, { "epoch": 0.12718998862343572, "grad_norm": 1.1575398708268563, "learning_rate": 1.2480123950260284e-06, "loss": 0.0413, "step": 559 }, { "epoch": 0.12741751990898748, "grad_norm": 9.831393771519632, "learning_rate": 1.248005268405612e-06, "loss": 0.0434, "step": 560 }, { "epoch": 0.12764505119453926, "grad_norm": 1.0820393760670333, "learning_rate": 1.2479981290521087e-06, "loss": 0.034, "step": 561 }, { "epoch": 0.127872582480091, "grad_norm": 2.206902342168789, "learning_rate": 1.2479909769656648e-06, "loss": 0.073, "step": 562 }, { "epoch": 0.1281001137656428, "grad_norm": 1.5027865833496339, "learning_rate": 1.2479838121464263e-06, "loss": 0.0744, "step": 563 }, { "epoch": 0.12832764505119454, "grad_norm": 2.001324966678472, "learning_rate": 1.2479766345945395e-06, "loss": 0.0457, "step": 564 }, { "epoch": 0.1285551763367463, "grad_norm": 0.9498156004808115, "learning_rate": 1.2479694443101513e-06, "loss": 0.0442, "step": 565 }, { "epoch": 0.12878270762229807, "grad_norm": 1.427402300347758, "learning_rate": 1.2479622412934087e-06, "loss": 0.0774, "step": 566 }, { "epoch": 0.12901023890784982, "grad_norm": 1.338474573791641, "learning_rate": 1.2479550255444586e-06, "loss": 0.0498, "step": 567 }, { "epoch": 0.1292377701934016, "grad_norm": 1.0332062154173083, "learning_rate": 1.2479477970634487e-06, "loss": 0.0575, "step": 568 }, { "epoch": 0.12946530147895335, "grad_norm": 2.342085432201913, "learning_rate": 1.2479405558505267e-06, "loss": 0.0776, "step": 569 }, { "epoch": 0.1296928327645051, "grad_norm": 1.7614392055133243, "learning_rate": 1.247933301905841e-06, "loss": 0.0626, "step": 570 }, { "epoch": 0.12992036405005689, "grad_norm": 1.401551595719724, "learning_rate": 1.2479260352295388e-06, "loss": 0.0354, "step": 571 }, { "epoch": 0.13014789533560864, "grad_norm": 1.4755845320165697, "learning_rate": 1.2479187558217697e-06, "loss": 0.0407, "step": 572 }, { "epoch": 0.13037542662116042, "grad_norm": 0.7585780841566192, "learning_rate": 1.247911463682682e-06, "loss": 0.0376, "step": 573 }, { "epoch": 0.13060295790671217, "grad_norm": 2.2124015942087496, "learning_rate": 1.2479041588124247e-06, "loss": 0.0537, "step": 574 }, { "epoch": 0.13083048919226395, "grad_norm": 1.0766166316852028, "learning_rate": 1.2478968412111471e-06, "loss": 0.0292, "step": 575 }, { "epoch": 0.1310580204778157, "grad_norm": 0.995096761634457, "learning_rate": 1.247889510878999e-06, "loss": 0.0523, "step": 576 }, { "epoch": 0.13128555176336745, "grad_norm": 1.6016154355497043, "learning_rate": 1.24788216781613e-06, "loss": 0.0634, "step": 577 }, { "epoch": 0.13151308304891923, "grad_norm": 1.1448425600800458, "learning_rate": 1.2478748120226902e-06, "loss": 0.0449, "step": 578 }, { "epoch": 0.13174061433447098, "grad_norm": 1.1092744505453933, "learning_rate": 1.2478674434988299e-06, "loss": 0.0377, "step": 579 }, { "epoch": 0.13196814562002276, "grad_norm": 0.8431904554396206, "learning_rate": 1.2478600622447001e-06, "loss": 0.0404, "step": 580 }, { "epoch": 0.13219567690557452, "grad_norm": 1.1168682097019669, "learning_rate": 1.2478526682604512e-06, "loss": 0.0324, "step": 581 }, { "epoch": 0.13242320819112627, "grad_norm": 1.3377796205096442, "learning_rate": 1.2478452615462345e-06, "loss": 0.0663, "step": 582 }, { "epoch": 0.13265073947667805, "grad_norm": 3.2228218888338667, "learning_rate": 1.247837842102201e-06, "loss": 0.1264, "step": 583 }, { "epoch": 0.1328782707622298, "grad_norm": 1.5298131963208237, "learning_rate": 1.2478304099285031e-06, "loss": 0.0525, "step": 584 }, { "epoch": 0.13310580204778158, "grad_norm": 1.1458963041110781, "learning_rate": 1.2478229650252921e-06, "loss": 0.0464, "step": 585 }, { "epoch": 0.13333333333333333, "grad_norm": 1.7278138919558592, "learning_rate": 1.2478155073927204e-06, "loss": 0.0556, "step": 586 }, { "epoch": 0.13356086461888508, "grad_norm": 1.2475569782092948, "learning_rate": 1.2478080370309404e-06, "loss": 0.0428, "step": 587 }, { "epoch": 0.13378839590443686, "grad_norm": 1.088342211454506, "learning_rate": 1.2478005539401046e-06, "loss": 0.0295, "step": 588 }, { "epoch": 0.13401592718998862, "grad_norm": 1.7587707539606798, "learning_rate": 1.2477930581203663e-06, "loss": 0.0442, "step": 589 }, { "epoch": 0.1342434584755404, "grad_norm": 1.1629708095404674, "learning_rate": 1.2477855495718782e-06, "loss": 0.0548, "step": 590 }, { "epoch": 0.13447098976109215, "grad_norm": 1.551063066789381, "learning_rate": 1.2477780282947942e-06, "loss": 0.037, "step": 591 }, { "epoch": 0.13469852104664393, "grad_norm": 1.0301289542876222, "learning_rate": 1.2477704942892677e-06, "loss": 0.0443, "step": 592 }, { "epoch": 0.13492605233219568, "grad_norm": 3.67842928304707, "learning_rate": 1.2477629475554532e-06, "loss": 0.0909, "step": 593 }, { "epoch": 0.13515358361774743, "grad_norm": 1.6590539267977245, "learning_rate": 1.2477553880935043e-06, "loss": 0.0523, "step": 594 }, { "epoch": 0.1353811149032992, "grad_norm": 1.9972940081293025, "learning_rate": 1.2477478159035758e-06, "loss": 0.0745, "step": 595 }, { "epoch": 0.13560864618885096, "grad_norm": 2.5034292233621693, "learning_rate": 1.2477402309858226e-06, "loss": 0.0731, "step": 596 }, { "epoch": 0.13583617747440274, "grad_norm": 1.7292171438267574, "learning_rate": 1.2477326333403995e-06, "loss": 0.0947, "step": 597 }, { "epoch": 0.1360637087599545, "grad_norm": 1.0893865478354072, "learning_rate": 1.2477250229674618e-06, "loss": 0.036, "step": 598 }, { "epoch": 0.13629124004550625, "grad_norm": 2.0220916101870854, "learning_rate": 1.2477173998671653e-06, "loss": 0.0695, "step": 599 }, { "epoch": 0.13651877133105803, "grad_norm": 1.6663966081636725, "learning_rate": 1.2477097640396655e-06, "loss": 0.0239, "step": 600 }, { "epoch": 0.13674630261660978, "grad_norm": 2.160820824239015, "learning_rate": 1.2477021154851185e-06, "loss": 0.041, "step": 601 }, { "epoch": 0.13697383390216156, "grad_norm": 1.7560963022061893, "learning_rate": 1.2476944542036806e-06, "loss": 0.0646, "step": 602 }, { "epoch": 0.1372013651877133, "grad_norm": 1.0045761668266382, "learning_rate": 1.2476867801955086e-06, "loss": 0.0319, "step": 603 }, { "epoch": 0.13742889647326506, "grad_norm": 1.1722883805586992, "learning_rate": 1.247679093460759e-06, "loss": 0.0421, "step": 604 }, { "epoch": 0.13765642775881684, "grad_norm": 1.5520795851774112, "learning_rate": 1.2476713939995895e-06, "loss": 0.0545, "step": 605 }, { "epoch": 0.1378839590443686, "grad_norm": 1.5341083321869782, "learning_rate": 1.2476636818121568e-06, "loss": 0.0587, "step": 606 }, { "epoch": 0.13811149032992037, "grad_norm": 1.427789001931161, "learning_rate": 1.247655956898619e-06, "loss": 0.0783, "step": 607 }, { "epoch": 0.13833902161547212, "grad_norm": 1.6612253896109521, "learning_rate": 1.2476482192591335e-06, "loss": 0.0454, "step": 608 }, { "epoch": 0.1385665529010239, "grad_norm": 4.06982296073183, "learning_rate": 1.247640468893859e-06, "loss": 0.0518, "step": 609 }, { "epoch": 0.13879408418657566, "grad_norm": 0.9984415328300282, "learning_rate": 1.2476327058029534e-06, "loss": 0.0286, "step": 610 }, { "epoch": 0.1390216154721274, "grad_norm": 1.5505955919902168, "learning_rate": 1.2476249299865757e-06, "loss": 0.0679, "step": 611 }, { "epoch": 0.1392491467576792, "grad_norm": 1.1777984076454868, "learning_rate": 1.2476171414448847e-06, "loss": 0.0333, "step": 612 }, { "epoch": 0.13947667804323094, "grad_norm": 1.5134654111327426, "learning_rate": 1.2476093401780397e-06, "loss": 0.0464, "step": 613 }, { "epoch": 0.13970420932878272, "grad_norm": 8.868555700718426, "learning_rate": 1.2476015261861998e-06, "loss": 0.0588, "step": 614 }, { "epoch": 0.13993174061433447, "grad_norm": 1.07185240181413, "learning_rate": 1.247593699469525e-06, "loss": 0.0508, "step": 615 }, { "epoch": 0.14015927189988622, "grad_norm": 1.0467231615661654, "learning_rate": 1.2475858600281754e-06, "loss": 0.0375, "step": 616 }, { "epoch": 0.140386803185438, "grad_norm": 1.6914112039280367, "learning_rate": 1.247578007862311e-06, "loss": 0.0385, "step": 617 }, { "epoch": 0.14061433447098975, "grad_norm": 1.6162335462442214, "learning_rate": 1.2475701429720923e-06, "loss": 0.0617, "step": 618 }, { "epoch": 0.14084186575654153, "grad_norm": 0.8727030068232594, "learning_rate": 1.24756226535768e-06, "loss": 0.0388, "step": 619 }, { "epoch": 0.1410693970420933, "grad_norm": 1.5566079211643449, "learning_rate": 1.2475543750192352e-06, "loss": 0.094, "step": 620 }, { "epoch": 0.14129692832764504, "grad_norm": 1.1473885930159387, "learning_rate": 1.2475464719569192e-06, "loss": 0.0563, "step": 621 }, { "epoch": 0.14152445961319682, "grad_norm": 1.111358042304426, "learning_rate": 1.2475385561708934e-06, "loss": 0.058, "step": 622 }, { "epoch": 0.14175199089874857, "grad_norm": 0.8961189135726134, "learning_rate": 1.2475306276613194e-06, "loss": 0.0393, "step": 623 }, { "epoch": 0.14197952218430035, "grad_norm": 3.442046327889576, "learning_rate": 1.2475226864283596e-06, "loss": 0.0713, "step": 624 }, { "epoch": 0.1422070534698521, "grad_norm": 4.150666880510046, "learning_rate": 1.2475147324721764e-06, "loss": 0.0442, "step": 625 }, { "epoch": 0.14243458475540388, "grad_norm": 0.9296406182877743, "learning_rate": 1.2475067657929319e-06, "loss": 0.0414, "step": 626 }, { "epoch": 0.14266211604095563, "grad_norm": 1.0829545070929647, "learning_rate": 1.2474987863907894e-06, "loss": 0.0451, "step": 627 }, { "epoch": 0.14288964732650739, "grad_norm": 1.8334147294224943, "learning_rate": 1.2474907942659116e-06, "loss": 0.0771, "step": 628 }, { "epoch": 0.14311717861205916, "grad_norm": 1.7815672754482244, "learning_rate": 1.247482789418462e-06, "loss": 0.0897, "step": 629 }, { "epoch": 0.14334470989761092, "grad_norm": 1.3733903816478978, "learning_rate": 1.2474747718486044e-06, "loss": 0.0358, "step": 630 }, { "epoch": 0.1435722411831627, "grad_norm": 2.28709865860884, "learning_rate": 1.2474667415565022e-06, "loss": 0.0652, "step": 631 }, { "epoch": 0.14379977246871445, "grad_norm": 1.231780324099585, "learning_rate": 1.24745869854232e-06, "loss": 0.0425, "step": 632 }, { "epoch": 0.1440273037542662, "grad_norm": 1.335370738840473, "learning_rate": 1.2474506428062219e-06, "loss": 0.0422, "step": 633 }, { "epoch": 0.14425483503981798, "grad_norm": 2.966338619611093, "learning_rate": 1.2474425743483726e-06, "loss": 0.1214, "step": 634 }, { "epoch": 0.14448236632536973, "grad_norm": 0.8989763932220884, "learning_rate": 1.2474344931689371e-06, "loss": 0.0341, "step": 635 }, { "epoch": 0.1447098976109215, "grad_norm": 0.937533756927628, "learning_rate": 1.2474263992680805e-06, "loss": 0.0394, "step": 636 }, { "epoch": 0.14493742889647326, "grad_norm": 1.8912948524392619, "learning_rate": 1.247418292645968e-06, "loss": 0.058, "step": 637 }, { "epoch": 0.14516496018202502, "grad_norm": 2.6567132856925877, "learning_rate": 1.2474101733027659e-06, "loss": 0.1061, "step": 638 }, { "epoch": 0.1453924914675768, "grad_norm": 1.0544164694645533, "learning_rate": 1.2474020412386395e-06, "loss": 0.0327, "step": 639 }, { "epoch": 0.14562002275312855, "grad_norm": 0.8862493084977525, "learning_rate": 1.2473938964537551e-06, "loss": 0.0337, "step": 640 }, { "epoch": 0.14584755403868033, "grad_norm": 1.1660786205758522, "learning_rate": 1.2473857389482797e-06, "loss": 0.0379, "step": 641 }, { "epoch": 0.14607508532423208, "grad_norm": 1.275063352619887, "learning_rate": 1.2473775687223794e-06, "loss": 0.0677, "step": 642 }, { "epoch": 0.14630261660978386, "grad_norm": 1.033368353907448, "learning_rate": 1.2473693857762215e-06, "loss": 0.0499, "step": 643 }, { "epoch": 0.1465301478953356, "grad_norm": 0.8176955754401027, "learning_rate": 1.247361190109973e-06, "loss": 0.028, "step": 644 }, { "epoch": 0.14675767918088736, "grad_norm": 1.5868301680057004, "learning_rate": 1.2473529817238016e-06, "loss": 0.0758, "step": 645 }, { "epoch": 0.14698521046643914, "grad_norm": 1.1116607935229232, "learning_rate": 1.2473447606178754e-06, "loss": 0.0464, "step": 646 }, { "epoch": 0.1472127417519909, "grad_norm": 1.3737040045295303, "learning_rate": 1.2473365267923617e-06, "loss": 0.038, "step": 647 }, { "epoch": 0.14744027303754267, "grad_norm": 1.6851756106039961, "learning_rate": 1.2473282802474293e-06, "loss": 0.0521, "step": 648 }, { "epoch": 0.14766780432309443, "grad_norm": 1.067298154630741, "learning_rate": 1.2473200209832465e-06, "loss": 0.0339, "step": 649 }, { "epoch": 0.14789533560864618, "grad_norm": 3.389712283735002, "learning_rate": 1.2473117489999823e-06, "loss": 0.1032, "step": 650 }, { "epoch": 0.14812286689419796, "grad_norm": 1.6568457682006943, "learning_rate": 1.2473034642978057e-06, "loss": 0.0619, "step": 651 }, { "epoch": 0.1483503981797497, "grad_norm": 1.490856549646787, "learning_rate": 1.247295166876886e-06, "loss": 0.0516, "step": 652 }, { "epoch": 0.1485779294653015, "grad_norm": 1.1851614081448008, "learning_rate": 1.2472868567373924e-06, "loss": 0.0431, "step": 653 }, { "epoch": 0.14880546075085324, "grad_norm": 1.644166757760147, "learning_rate": 1.2472785338794953e-06, "loss": 0.082, "step": 654 }, { "epoch": 0.149032992036405, "grad_norm": 1.556466054889254, "learning_rate": 1.247270198303365e-06, "loss": 0.0573, "step": 655 }, { "epoch": 0.14926052332195677, "grad_norm": 2.233896039541245, "learning_rate": 1.247261850009171e-06, "loss": 0.1073, "step": 656 }, { "epoch": 0.14948805460750852, "grad_norm": 1.5970780316095814, "learning_rate": 1.2472534889970848e-06, "loss": 0.0741, "step": 657 }, { "epoch": 0.1497155858930603, "grad_norm": 1.2400037573314944, "learning_rate": 1.2472451152672766e-06, "loss": 0.0483, "step": 658 }, { "epoch": 0.14994311717861206, "grad_norm": 0.7241628580276863, "learning_rate": 1.2472367288199177e-06, "loss": 0.0294, "step": 659 }, { "epoch": 0.15017064846416384, "grad_norm": 1.7234352219783362, "learning_rate": 1.2472283296551798e-06, "loss": 0.0462, "step": 660 }, { "epoch": 0.1503981797497156, "grad_norm": 1.4734355231360154, "learning_rate": 1.2472199177732346e-06, "loss": 0.0582, "step": 661 }, { "epoch": 0.15062571103526734, "grad_norm": 2.1378423903625317, "learning_rate": 1.2472114931742537e-06, "loss": 0.0584, "step": 662 }, { "epoch": 0.15085324232081912, "grad_norm": 10.24253063417928, "learning_rate": 1.2472030558584093e-06, "loss": 0.0477, "step": 663 }, { "epoch": 0.15108077360637087, "grad_norm": 1.8691143785999473, "learning_rate": 1.2471946058258742e-06, "loss": 0.0739, "step": 664 }, { "epoch": 0.15130830489192265, "grad_norm": 2.241077277544209, "learning_rate": 1.2471861430768205e-06, "loss": 0.0672, "step": 665 }, { "epoch": 0.1515358361774744, "grad_norm": 2.2720454211658385, "learning_rate": 1.2471776676114217e-06, "loss": 0.0744, "step": 666 }, { "epoch": 0.15176336746302616, "grad_norm": 1.5599655593025843, "learning_rate": 1.2471691794298508e-06, "loss": 0.0708, "step": 667 }, { "epoch": 0.15199089874857794, "grad_norm": 1.2105729421418516, "learning_rate": 1.2471606785322814e-06, "loss": 0.0408, "step": 668 }, { "epoch": 0.1522184300341297, "grad_norm": 1.2672422777930856, "learning_rate": 1.247152164918887e-06, "loss": 0.0406, "step": 669 }, { "epoch": 0.15244596131968147, "grad_norm": 1.4429003913192175, "learning_rate": 1.247143638589842e-06, "loss": 0.0537, "step": 670 }, { "epoch": 0.15267349260523322, "grad_norm": 2.4501715529962667, "learning_rate": 1.2471350995453203e-06, "loss": 0.0658, "step": 671 }, { "epoch": 0.15290102389078497, "grad_norm": 1.48398143402129, "learning_rate": 1.2471265477854966e-06, "loss": 0.0556, "step": 672 }, { "epoch": 0.15312855517633675, "grad_norm": 2.1744278885096278, "learning_rate": 1.2471179833105454e-06, "loss": 0.0404, "step": 673 }, { "epoch": 0.1533560864618885, "grad_norm": 6.901371774679421, "learning_rate": 1.2471094061206422e-06, "loss": 0.0869, "step": 674 }, { "epoch": 0.15358361774744028, "grad_norm": 1.1601432497447364, "learning_rate": 1.247100816215962e-06, "loss": 0.0379, "step": 675 }, { "epoch": 0.15381114903299203, "grad_norm": 1.2820824697846318, "learning_rate": 1.2470922135966806e-06, "loss": 0.032, "step": 676 }, { "epoch": 0.1540386803185438, "grad_norm": 1.2705594517037477, "learning_rate": 1.2470835982629736e-06, "loss": 0.0494, "step": 677 }, { "epoch": 0.15426621160409557, "grad_norm": 1.5982674187017603, "learning_rate": 1.247074970215017e-06, "loss": 0.0495, "step": 678 }, { "epoch": 0.15449374288964732, "grad_norm": 1.7109186334283122, "learning_rate": 1.2470663294529873e-06, "loss": 0.0414, "step": 679 }, { "epoch": 0.1547212741751991, "grad_norm": 1.0715248913545208, "learning_rate": 1.2470576759770612e-06, "loss": 0.0325, "step": 680 }, { "epoch": 0.15494880546075085, "grad_norm": 1.824156765255701, "learning_rate": 1.2470490097874155e-06, "loss": 0.0466, "step": 681 }, { "epoch": 0.15517633674630263, "grad_norm": 2.4373458213618897, "learning_rate": 1.247040330884227e-06, "loss": 0.0539, "step": 682 }, { "epoch": 0.15540386803185438, "grad_norm": 1.0065301428430162, "learning_rate": 1.2470316392676738e-06, "loss": 0.0525, "step": 683 }, { "epoch": 0.15563139931740613, "grad_norm": 1.5815427583099921, "learning_rate": 1.2470229349379326e-06, "loss": 0.0519, "step": 684 }, { "epoch": 0.1558589306029579, "grad_norm": 1.9652039642446397, "learning_rate": 1.2470142178951822e-06, "loss": 0.0768, "step": 685 }, { "epoch": 0.15608646188850966, "grad_norm": 1.6149386041596687, "learning_rate": 1.2470054881396002e-06, "loss": 0.0832, "step": 686 }, { "epoch": 0.15631399317406144, "grad_norm": 1.1987854832655134, "learning_rate": 1.246996745671365e-06, "loss": 0.0659, "step": 687 }, { "epoch": 0.1565415244596132, "grad_norm": 1.3921581311290316, "learning_rate": 1.2469879904906556e-06, "loss": 0.0477, "step": 688 }, { "epoch": 0.15676905574516495, "grad_norm": 1.5291818763479217, "learning_rate": 1.2469792225976507e-06, "loss": 0.0597, "step": 689 }, { "epoch": 0.15699658703071673, "grad_norm": 1.1978579028524097, "learning_rate": 1.2469704419925296e-06, "loss": 0.0551, "step": 690 }, { "epoch": 0.15722411831626848, "grad_norm": 2.2562422781402827, "learning_rate": 1.246961648675472e-06, "loss": 0.0709, "step": 691 }, { "epoch": 0.15745164960182026, "grad_norm": 1.947406282057691, "learning_rate": 1.246952842646657e-06, "loss": 0.115, "step": 692 }, { "epoch": 0.157679180887372, "grad_norm": 1.2450058586298152, "learning_rate": 1.2469440239062653e-06, "loss": 0.0499, "step": 693 }, { "epoch": 0.15790671217292376, "grad_norm": 1.8197388877884058, "learning_rate": 1.2469351924544766e-06, "loss": 0.0927, "step": 694 }, { "epoch": 0.15813424345847554, "grad_norm": 1.904635209687238, "learning_rate": 1.2469263482914716e-06, "loss": 0.0814, "step": 695 }, { "epoch": 0.1583617747440273, "grad_norm": 1.4011214554570595, "learning_rate": 1.246917491417431e-06, "loss": 0.0585, "step": 696 }, { "epoch": 0.15858930602957907, "grad_norm": 1.9445852493644915, "learning_rate": 1.246908621832536e-06, "loss": 0.1005, "step": 697 }, { "epoch": 0.15881683731513083, "grad_norm": 1.363848979307397, "learning_rate": 1.2468997395369677e-06, "loss": 0.0717, "step": 698 }, { "epoch": 0.1590443686006826, "grad_norm": 1.3903385726286603, "learning_rate": 1.2468908445309077e-06, "loss": 0.0557, "step": 699 }, { "epoch": 0.15927189988623436, "grad_norm": 0.8725919788890233, "learning_rate": 1.2468819368145376e-06, "loss": 0.0365, "step": 700 }, { "epoch": 0.1594994311717861, "grad_norm": 1.33354116221911, "learning_rate": 1.2468730163880398e-06, "loss": 0.0486, "step": 701 }, { "epoch": 0.1597269624573379, "grad_norm": 4.544549924474667, "learning_rate": 1.2468640832515962e-06, "loss": 0.0302, "step": 702 }, { "epoch": 0.15995449374288964, "grad_norm": 2.9489517825532308, "learning_rate": 1.24685513740539e-06, "loss": 0.0904, "step": 703 }, { "epoch": 0.16018202502844142, "grad_norm": 1.3714756073146785, "learning_rate": 1.2468461788496036e-06, "loss": 0.056, "step": 704 }, { "epoch": 0.16040955631399317, "grad_norm": 1.9329458786445137, "learning_rate": 1.24683720758442e-06, "loss": 0.0848, "step": 705 }, { "epoch": 0.16063708759954493, "grad_norm": 1.0521231330897036, "learning_rate": 1.2468282236100226e-06, "loss": 0.0438, "step": 706 }, { "epoch": 0.1608646188850967, "grad_norm": 1.7554793724577804, "learning_rate": 1.2468192269265955e-06, "loss": 0.1102, "step": 707 }, { "epoch": 0.16109215017064846, "grad_norm": 1.0071708604696206, "learning_rate": 1.246810217534322e-06, "loss": 0.0266, "step": 708 }, { "epoch": 0.16131968145620024, "grad_norm": 1.3334260212411804, "learning_rate": 1.2468011954333864e-06, "loss": 0.0609, "step": 709 }, { "epoch": 0.161547212741752, "grad_norm": 1.1880594144178185, "learning_rate": 1.2467921606239734e-06, "loss": 0.0465, "step": 710 }, { "epoch": 0.16177474402730374, "grad_norm": 2.7036981936596542, "learning_rate": 1.2467831131062672e-06, "loss": 0.0516, "step": 711 }, { "epoch": 0.16200227531285552, "grad_norm": 1.416736687413605, "learning_rate": 1.2467740528804528e-06, "loss": 0.0552, "step": 712 }, { "epoch": 0.16222980659840727, "grad_norm": 1.3251728729578673, "learning_rate": 1.2467649799467156e-06, "loss": 0.0615, "step": 713 }, { "epoch": 0.16245733788395905, "grad_norm": 0.9167577669577527, "learning_rate": 1.246755894305241e-06, "loss": 0.0312, "step": 714 }, { "epoch": 0.1626848691695108, "grad_norm": 1.4735705097484915, "learning_rate": 1.2467467959562143e-06, "loss": 0.0676, "step": 715 }, { "epoch": 0.16291240045506258, "grad_norm": 1.6890201461846701, "learning_rate": 1.2467376848998221e-06, "loss": 0.0597, "step": 716 }, { "epoch": 0.16313993174061434, "grad_norm": 1.4659558773380117, "learning_rate": 1.2467285611362501e-06, "loss": 0.052, "step": 717 }, { "epoch": 0.1633674630261661, "grad_norm": 1.7717923581360278, "learning_rate": 1.2467194246656851e-06, "loss": 0.0563, "step": 718 }, { "epoch": 0.16359499431171787, "grad_norm": 1.0756915461750918, "learning_rate": 1.2467102754883136e-06, "loss": 0.0411, "step": 719 }, { "epoch": 0.16382252559726962, "grad_norm": 1.2683417009383617, "learning_rate": 1.2467011136043228e-06, "loss": 0.0454, "step": 720 }, { "epoch": 0.1640500568828214, "grad_norm": 1.5509079166930662, "learning_rate": 1.2466919390138995e-06, "loss": 0.0745, "step": 721 }, { "epoch": 0.16427758816837315, "grad_norm": 1.799801190377342, "learning_rate": 1.246682751717232e-06, "loss": 0.0531, "step": 722 }, { "epoch": 0.1645051194539249, "grad_norm": 1.734480174185366, "learning_rate": 1.2466735517145074e-06, "loss": 0.0792, "step": 723 }, { "epoch": 0.16473265073947668, "grad_norm": 1.961760175196997, "learning_rate": 1.2466643390059138e-06, "loss": 0.0685, "step": 724 }, { "epoch": 0.16496018202502843, "grad_norm": 0.8466307246278059, "learning_rate": 1.2466551135916398e-06, "loss": 0.0229, "step": 725 }, { "epoch": 0.16518771331058021, "grad_norm": 1.2173112499342529, "learning_rate": 1.2466458754718737e-06, "loss": 0.0632, "step": 726 }, { "epoch": 0.16541524459613197, "grad_norm": 2.194872247572006, "learning_rate": 1.2466366246468045e-06, "loss": 0.0432, "step": 727 }, { "epoch": 0.16564277588168372, "grad_norm": 1.4416201742124337, "learning_rate": 1.246627361116621e-06, "loss": 0.0619, "step": 728 }, { "epoch": 0.1658703071672355, "grad_norm": 1.8390552268184857, "learning_rate": 1.246618084881513e-06, "loss": 0.1148, "step": 729 }, { "epoch": 0.16609783845278725, "grad_norm": 1.7117932742809556, "learning_rate": 1.2466087959416695e-06, "loss": 0.0354, "step": 730 }, { "epoch": 0.16632536973833903, "grad_norm": 2.1473525159969644, "learning_rate": 1.2465994942972805e-06, "loss": 0.0479, "step": 731 }, { "epoch": 0.16655290102389078, "grad_norm": 2.0228728542376846, "learning_rate": 1.2465901799485366e-06, "loss": 0.0819, "step": 732 }, { "epoch": 0.16678043230944256, "grad_norm": 0.7672679660904389, "learning_rate": 1.2465808528956277e-06, "loss": 0.0251, "step": 733 }, { "epoch": 0.1670079635949943, "grad_norm": 6.58955540161242, "learning_rate": 1.2465715131387446e-06, "loss": 0.0608, "step": 734 }, { "epoch": 0.16723549488054607, "grad_norm": 2.0572059988722873, "learning_rate": 1.2465621606780778e-06, "loss": 0.0841, "step": 735 }, { "epoch": 0.16746302616609784, "grad_norm": 1.628470993180389, "learning_rate": 1.2465527955138191e-06, "loss": 0.059, "step": 736 }, { "epoch": 0.1676905574516496, "grad_norm": 1.0886679327758462, "learning_rate": 1.2465434176461596e-06, "loss": 0.0419, "step": 737 }, { "epoch": 0.16791808873720138, "grad_norm": 1.4286021962604534, "learning_rate": 1.2465340270752908e-06, "loss": 0.0481, "step": 738 }, { "epoch": 0.16814562002275313, "grad_norm": 0.8663035560041373, "learning_rate": 1.2465246238014047e-06, "loss": 0.0295, "step": 739 }, { "epoch": 0.16837315130830488, "grad_norm": 1.8501418707953075, "learning_rate": 1.2465152078246936e-06, "loss": 0.0829, "step": 740 }, { "epoch": 0.16860068259385666, "grad_norm": 1.311824892930283, "learning_rate": 1.24650577914535e-06, "loss": 0.0418, "step": 741 }, { "epoch": 0.1688282138794084, "grad_norm": 1.249901682454073, "learning_rate": 1.2464963377635667e-06, "loss": 0.0479, "step": 742 }, { "epoch": 0.1690557451649602, "grad_norm": 1.9982982994055205, "learning_rate": 1.246486883679536e-06, "loss": 0.112, "step": 743 }, { "epoch": 0.16928327645051194, "grad_norm": 1.8891194873050854, "learning_rate": 1.246477416893452e-06, "loss": 0.0489, "step": 744 }, { "epoch": 0.1695108077360637, "grad_norm": 1.3265078136556134, "learning_rate": 1.2464679374055074e-06, "loss": 0.0492, "step": 745 }, { "epoch": 0.16973833902161548, "grad_norm": 1.1620433482725554, "learning_rate": 1.2464584452158968e-06, "loss": 0.0217, "step": 746 }, { "epoch": 0.16996587030716723, "grad_norm": 1.8320124858819533, "learning_rate": 1.2464489403248133e-06, "loss": 0.057, "step": 747 }, { "epoch": 0.170193401592719, "grad_norm": 0.8996339413245623, "learning_rate": 1.246439422732452e-06, "loss": 0.0308, "step": 748 }, { "epoch": 0.17042093287827076, "grad_norm": 1.1199200581464024, "learning_rate": 1.2464298924390066e-06, "loss": 0.0449, "step": 749 }, { "epoch": 0.17064846416382254, "grad_norm": 1.6610024470498899, "learning_rate": 1.2464203494446725e-06, "loss": 0.0667, "step": 750 }, { "epoch": 0.1708759954493743, "grad_norm": 1.1007524784666796, "learning_rate": 1.2464107937496444e-06, "loss": 0.0341, "step": 751 }, { "epoch": 0.17110352673492604, "grad_norm": 1.0836506765705751, "learning_rate": 1.246401225354118e-06, "loss": 0.0397, "step": 752 }, { "epoch": 0.17133105802047782, "grad_norm": 1.320442517854178, "learning_rate": 1.2463916442582883e-06, "loss": 0.0694, "step": 753 }, { "epoch": 0.17155858930602957, "grad_norm": 1.0431861088552385, "learning_rate": 1.2463820504623516e-06, "loss": 0.0285, "step": 754 }, { "epoch": 0.17178612059158135, "grad_norm": 0.9128127625945931, "learning_rate": 1.246372443966504e-06, "loss": 0.0394, "step": 755 }, { "epoch": 0.1720136518771331, "grad_norm": 1.3898389984950463, "learning_rate": 1.246362824770941e-06, "loss": 0.0451, "step": 756 }, { "epoch": 0.17224118316268486, "grad_norm": 0.9752697300280603, "learning_rate": 1.2463531928758605e-06, "loss": 0.0398, "step": 757 }, { "epoch": 0.17246871444823664, "grad_norm": 1.8211753063899252, "learning_rate": 1.2463435482814585e-06, "loss": 0.0947, "step": 758 }, { "epoch": 0.1726962457337884, "grad_norm": 1.5787941280691695, "learning_rate": 1.246333890987932e-06, "loss": 0.1047, "step": 759 }, { "epoch": 0.17292377701934017, "grad_norm": 1.8356072487697, "learning_rate": 1.246324220995479e-06, "loss": 0.0729, "step": 760 }, { "epoch": 0.17315130830489192, "grad_norm": 4.476837441740864, "learning_rate": 1.2463145383042966e-06, "loss": 0.0686, "step": 761 }, { "epoch": 0.17337883959044367, "grad_norm": 252.52198381660855, "learning_rate": 1.2463048429145832e-06, "loss": 1.0972, "step": 762 }, { "epoch": 0.17360637087599545, "grad_norm": 1.6008327751394016, "learning_rate": 1.2462951348265364e-06, "loss": 0.067, "step": 763 }, { "epoch": 0.1738339021615472, "grad_norm": 1.087447761528399, "learning_rate": 1.2462854140403553e-06, "loss": 0.0433, "step": 764 }, { "epoch": 0.17406143344709898, "grad_norm": 2.5989190208674384, "learning_rate": 1.2462756805562378e-06, "loss": 0.1183, "step": 765 }, { "epoch": 0.17428896473265074, "grad_norm": 1.0290974538586322, "learning_rate": 1.2462659343743832e-06, "loss": 0.048, "step": 766 }, { "epoch": 0.17451649601820252, "grad_norm": 1.2617664816926566, "learning_rate": 1.2462561754949908e-06, "loss": 0.0498, "step": 767 }, { "epoch": 0.17474402730375427, "grad_norm": 1.6415043644592435, "learning_rate": 1.2462464039182598e-06, "loss": 0.062, "step": 768 }, { "epoch": 0.17497155858930602, "grad_norm": 1.6673873794852758, "learning_rate": 1.2462366196443903e-06, "loss": 0.0656, "step": 769 }, { "epoch": 0.1751990898748578, "grad_norm": 3.3205898186422718, "learning_rate": 1.246226822673582e-06, "loss": 0.1071, "step": 770 }, { "epoch": 0.17542662116040955, "grad_norm": 1.280610915532866, "learning_rate": 1.2462170130060351e-06, "loss": 0.0318, "step": 771 }, { "epoch": 0.17565415244596133, "grad_norm": 1.9843858659809077, "learning_rate": 1.24620719064195e-06, "loss": 0.0829, "step": 772 }, { "epoch": 0.17588168373151308, "grad_norm": 2.184973202731946, "learning_rate": 1.246197355581528e-06, "loss": 0.0998, "step": 773 }, { "epoch": 0.17610921501706484, "grad_norm": 1.361748313569626, "learning_rate": 1.2461875078249694e-06, "loss": 0.0459, "step": 774 }, { "epoch": 0.17633674630261661, "grad_norm": 1.5219919911848705, "learning_rate": 1.246177647372476e-06, "loss": 0.0539, "step": 775 }, { "epoch": 0.17656427758816837, "grad_norm": 49.0672993271897, "learning_rate": 1.246167774224249e-06, "loss": 0.5261, "step": 776 }, { "epoch": 0.17679180887372015, "grad_norm": 1.5576377429332795, "learning_rate": 1.2461578883804903e-06, "loss": 0.036, "step": 777 }, { "epoch": 0.1770193401592719, "grad_norm": 0.869488380021361, "learning_rate": 1.246147989841402e-06, "loss": 0.0252, "step": 778 }, { "epoch": 0.17724687144482365, "grad_norm": 1.2894143452262044, "learning_rate": 1.2461380786071863e-06, "loss": 0.0401, "step": 779 }, { "epoch": 0.17747440273037543, "grad_norm": 1.4297168158807256, "learning_rate": 1.246128154678046e-06, "loss": 0.0586, "step": 780 }, { "epoch": 0.17770193401592718, "grad_norm": 4.251777831616221, "learning_rate": 1.2461182180541835e-06, "loss": 0.0484, "step": 781 }, { "epoch": 0.17792946530147896, "grad_norm": 1.1899715722609847, "learning_rate": 1.2461082687358022e-06, "loss": 0.0391, "step": 782 }, { "epoch": 0.1781569965870307, "grad_norm": 1.0511863654570817, "learning_rate": 1.2460983067231055e-06, "loss": 0.0424, "step": 783 }, { "epoch": 0.1783845278725825, "grad_norm": 1.6108410955721733, "learning_rate": 1.246088332016297e-06, "loss": 0.0453, "step": 784 }, { "epoch": 0.17861205915813425, "grad_norm": 1.014732446092101, "learning_rate": 1.2460783446155802e-06, "loss": 0.0398, "step": 785 }, { "epoch": 0.178839590443686, "grad_norm": 3.365599666409623, "learning_rate": 1.2460683445211596e-06, "loss": 0.1579, "step": 786 }, { "epoch": 0.17906712172923778, "grad_norm": 2.0790980638653984, "learning_rate": 1.2460583317332395e-06, "loss": 0.1013, "step": 787 }, { "epoch": 0.17929465301478953, "grad_norm": 1.0292372040908986, "learning_rate": 1.2460483062520246e-06, "loss": 0.0399, "step": 788 }, { "epoch": 0.1795221843003413, "grad_norm": 1.056684060389556, "learning_rate": 1.2460382680777196e-06, "loss": 0.0342, "step": 789 }, { "epoch": 0.17974971558589306, "grad_norm": 1.3734567420277208, "learning_rate": 1.2460282172105298e-06, "loss": 0.0673, "step": 790 }, { "epoch": 0.1799772468714448, "grad_norm": 1.519969265957327, "learning_rate": 1.2460181536506608e-06, "loss": 0.0775, "step": 791 }, { "epoch": 0.1802047781569966, "grad_norm": 1.514321767455213, "learning_rate": 1.2460080773983177e-06, "loss": 0.0659, "step": 792 }, { "epoch": 0.18043230944254834, "grad_norm": 1.0361490916169322, "learning_rate": 1.2459979884537072e-06, "loss": 0.0424, "step": 793 }, { "epoch": 0.18065984072810012, "grad_norm": 1.3102760225972407, "learning_rate": 1.2459878868170348e-06, "loss": 0.0476, "step": 794 }, { "epoch": 0.18088737201365188, "grad_norm": 1.4645693998145335, "learning_rate": 1.2459777724885075e-06, "loss": 0.0522, "step": 795 }, { "epoch": 0.18111490329920363, "grad_norm": 1.6043429433204082, "learning_rate": 1.2459676454683318e-06, "loss": 0.0415, "step": 796 }, { "epoch": 0.1813424345847554, "grad_norm": 2.3566952879735963, "learning_rate": 1.2459575057567144e-06, "loss": 0.0916, "step": 797 }, { "epoch": 0.18156996587030716, "grad_norm": 1.5769029444288707, "learning_rate": 1.245947353353863e-06, "loss": 0.0596, "step": 798 }, { "epoch": 0.18179749715585894, "grad_norm": 2.293268133767521, "learning_rate": 1.245937188259985e-06, "loss": 0.1087, "step": 799 }, { "epoch": 0.1820250284414107, "grad_norm": 2.0227046414956096, "learning_rate": 1.245927010475288e-06, "loss": 0.0986, "step": 800 }, { "epoch": 0.18225255972696247, "grad_norm": 1.4895840436039498, "learning_rate": 1.24591681999998e-06, "loss": 0.0633, "step": 801 }, { "epoch": 0.18248009101251422, "grad_norm": 1.9671715135309942, "learning_rate": 1.2459066168342693e-06, "loss": 0.0551, "step": 802 }, { "epoch": 0.18270762229806597, "grad_norm": 2.1525267147108322, "learning_rate": 1.2458964009783646e-06, "loss": 0.0809, "step": 803 }, { "epoch": 0.18293515358361775, "grad_norm": 1.2864874343026942, "learning_rate": 1.2458861724324745e-06, "loss": 0.064, "step": 804 }, { "epoch": 0.1831626848691695, "grad_norm": 3.1883103987924444, "learning_rate": 1.2458759311968084e-06, "loss": 0.1303, "step": 805 }, { "epoch": 0.1833902161547213, "grad_norm": 1.0904025457159896, "learning_rate": 1.245865677271575e-06, "loss": 0.0419, "step": 806 }, { "epoch": 0.18361774744027304, "grad_norm": 1.4716173202867617, "learning_rate": 1.2458554106569844e-06, "loss": 0.072, "step": 807 }, { "epoch": 0.1838452787258248, "grad_norm": 0.9197168745315846, "learning_rate": 1.2458451313532463e-06, "loss": 0.04, "step": 808 }, { "epoch": 0.18407281001137657, "grad_norm": 1.2386933488526581, "learning_rate": 1.2458348393605708e-06, "loss": 0.0383, "step": 809 }, { "epoch": 0.18430034129692832, "grad_norm": 1.2129381734644349, "learning_rate": 1.2458245346791678e-06, "loss": 0.0486, "step": 810 }, { "epoch": 0.1845278725824801, "grad_norm": 1.0772770126401692, "learning_rate": 1.2458142173092486e-06, "loss": 0.0654, "step": 811 }, { "epoch": 0.18475540386803185, "grad_norm": 5.609205856069292, "learning_rate": 1.2458038872510237e-06, "loss": 0.0565, "step": 812 }, { "epoch": 0.1849829351535836, "grad_norm": 1.1271779547323408, "learning_rate": 1.2457935445047042e-06, "loss": 0.0414, "step": 813 }, { "epoch": 0.18521046643913538, "grad_norm": 1.1585514417402933, "learning_rate": 1.2457831890705018e-06, "loss": 0.0646, "step": 814 }, { "epoch": 0.18543799772468714, "grad_norm": 1.6262463401225817, "learning_rate": 1.2457728209486279e-06, "loss": 0.0555, "step": 815 }, { "epoch": 0.18566552901023892, "grad_norm": 1.6971282783367077, "learning_rate": 1.2457624401392943e-06, "loss": 0.0803, "step": 816 }, { "epoch": 0.18589306029579067, "grad_norm": 1.5890865721783058, "learning_rate": 1.2457520466427135e-06, "loss": 0.0637, "step": 817 }, { "epoch": 0.18612059158134245, "grad_norm": 1.663587359707044, "learning_rate": 1.2457416404590974e-06, "loss": 0.063, "step": 818 }, { "epoch": 0.1863481228668942, "grad_norm": 1.4589989522369327, "learning_rate": 1.2457312215886592e-06, "loss": 0.0665, "step": 819 }, { "epoch": 0.18657565415244595, "grad_norm": 1.2689131726245948, "learning_rate": 1.2457207900316115e-06, "loss": 0.0653, "step": 820 }, { "epoch": 0.18680318543799773, "grad_norm": 1.2763445646366045, "learning_rate": 1.245710345788168e-06, "loss": 0.0691, "step": 821 }, { "epoch": 0.18703071672354948, "grad_norm": 2.1044109818141523, "learning_rate": 1.2456998888585414e-06, "loss": 0.0334, "step": 822 }, { "epoch": 0.18725824800910126, "grad_norm": 1.1582960528680695, "learning_rate": 1.245689419242946e-06, "loss": 0.0513, "step": 823 }, { "epoch": 0.18748577929465302, "grad_norm": 1.4301422945996285, "learning_rate": 1.2456789369415955e-06, "loss": 0.0632, "step": 824 }, { "epoch": 0.18771331058020477, "grad_norm": 1.5961327622673143, "learning_rate": 1.2456684419547044e-06, "loss": 0.0915, "step": 825 }, { "epoch": 0.18794084186575655, "grad_norm": 1.156039499623056, "learning_rate": 1.245657934282487e-06, "loss": 0.0406, "step": 826 }, { "epoch": 0.1881683731513083, "grad_norm": 0.7964273734238357, "learning_rate": 1.245647413925158e-06, "loss": 0.0339, "step": 827 }, { "epoch": 0.18839590443686008, "grad_norm": 1.3110827687978466, "learning_rate": 1.2456368808829327e-06, "loss": 0.0315, "step": 828 }, { "epoch": 0.18862343572241183, "grad_norm": 2.167581797519473, "learning_rate": 1.2456263351560261e-06, "loss": 0.0944, "step": 829 }, { "epoch": 0.18885096700796358, "grad_norm": 1.7008701552637757, "learning_rate": 1.2456157767446538e-06, "loss": 0.0675, "step": 830 }, { "epoch": 0.18907849829351536, "grad_norm": 0.9862167570499358, "learning_rate": 1.245605205649032e-06, "loss": 0.0283, "step": 831 }, { "epoch": 0.18930602957906711, "grad_norm": 1.4504322451994889, "learning_rate": 1.245594621869376e-06, "loss": 0.0855, "step": 832 }, { "epoch": 0.1895335608646189, "grad_norm": 0.8637440563578112, "learning_rate": 1.2455840254059026e-06, "loss": 0.0457, "step": 833 }, { "epoch": 0.18976109215017065, "grad_norm": 1.365450351316701, "learning_rate": 1.2455734162588282e-06, "loss": 0.0523, "step": 834 }, { "epoch": 0.1899886234357224, "grad_norm": 1.9203688742265796, "learning_rate": 1.2455627944283697e-06, "loss": 0.0421, "step": 835 }, { "epoch": 0.19021615472127418, "grad_norm": 1.947700834519252, "learning_rate": 1.245552159914744e-06, "loss": 0.0631, "step": 836 }, { "epoch": 0.19044368600682593, "grad_norm": 1.274745482531776, "learning_rate": 1.245541512718169e-06, "loss": 0.0408, "step": 837 }, { "epoch": 0.1906712172923777, "grad_norm": 1.2840926844609297, "learning_rate": 1.245530852838862e-06, "loss": 0.0435, "step": 838 }, { "epoch": 0.19089874857792946, "grad_norm": 1.5640003481699476, "learning_rate": 1.2455201802770405e-06, "loss": 0.0616, "step": 839 }, { "epoch": 0.19112627986348124, "grad_norm": 1.2705630922274525, "learning_rate": 1.245509495032923e-06, "loss": 0.0604, "step": 840 }, { "epoch": 0.191353811149033, "grad_norm": 1.2721765422307298, "learning_rate": 1.2454987971067278e-06, "loss": 0.0431, "step": 841 }, { "epoch": 0.19158134243458474, "grad_norm": 1.7662664353990207, "learning_rate": 1.2454880864986737e-06, "loss": 0.0881, "step": 842 }, { "epoch": 0.19180887372013652, "grad_norm": 0.8548688062918027, "learning_rate": 1.2454773632089795e-06, "loss": 0.0219, "step": 843 }, { "epoch": 0.19203640500568828, "grad_norm": 1.1833145714689994, "learning_rate": 1.2454666272378644e-06, "loss": 0.0502, "step": 844 }, { "epoch": 0.19226393629124006, "grad_norm": 2.15061289434072, "learning_rate": 1.2454558785855475e-06, "loss": 0.0687, "step": 845 }, { "epoch": 0.1924914675767918, "grad_norm": 1.0201954402445583, "learning_rate": 1.245445117252249e-06, "loss": 0.0377, "step": 846 }, { "epoch": 0.19271899886234356, "grad_norm": 2.256502679796923, "learning_rate": 1.2454343432381886e-06, "loss": 0.1344, "step": 847 }, { "epoch": 0.19294653014789534, "grad_norm": 0.9607197396814332, "learning_rate": 1.2454235565435862e-06, "loss": 0.0293, "step": 848 }, { "epoch": 0.1931740614334471, "grad_norm": 1.1041321579517591, "learning_rate": 1.2454127571686629e-06, "loss": 0.0588, "step": 849 }, { "epoch": 0.19340159271899887, "grad_norm": 1.551888854493328, "learning_rate": 1.245401945113639e-06, "loss": 0.0884, "step": 850 }, { "epoch": 0.19362912400455062, "grad_norm": 1.2207595212151812, "learning_rate": 1.2453911203787355e-06, "loss": 0.0495, "step": 851 }, { "epoch": 0.19385665529010238, "grad_norm": 3.151128795398426, "learning_rate": 1.2453802829641736e-06, "loss": 0.0959, "step": 852 }, { "epoch": 0.19408418657565416, "grad_norm": 1.0306148148350838, "learning_rate": 1.2453694328701752e-06, "loss": 0.0362, "step": 853 }, { "epoch": 0.1943117178612059, "grad_norm": 1.4686426621438413, "learning_rate": 1.2453585700969614e-06, "loss": 0.0519, "step": 854 }, { "epoch": 0.1945392491467577, "grad_norm": 4.418162916580233, "learning_rate": 1.2453476946447547e-06, "loss": 0.0718, "step": 855 }, { "epoch": 0.19476678043230944, "grad_norm": 1.2510611267805927, "learning_rate": 1.2453368065137772e-06, "loss": 0.0471, "step": 856 }, { "epoch": 0.19499431171786122, "grad_norm": 3.8279701209646566, "learning_rate": 1.2453259057042514e-06, "loss": 0.0962, "step": 857 }, { "epoch": 0.19522184300341297, "grad_norm": 0.9857769216686043, "learning_rate": 1.2453149922164003e-06, "loss": 0.0361, "step": 858 }, { "epoch": 0.19544937428896472, "grad_norm": 1.2075815978254054, "learning_rate": 1.2453040660504468e-06, "loss": 0.0361, "step": 859 }, { "epoch": 0.1956769055745165, "grad_norm": 1.5382334773010704, "learning_rate": 1.2452931272066141e-06, "loss": 0.0489, "step": 860 }, { "epoch": 0.19590443686006825, "grad_norm": 1.2911029078532041, "learning_rate": 1.245282175685126e-06, "loss": 0.0743, "step": 861 }, { "epoch": 0.19613196814562003, "grad_norm": 2.432722962095318, "learning_rate": 1.2452712114862063e-06, "loss": 0.106, "step": 862 }, { "epoch": 0.19635949943117179, "grad_norm": 1.3235947439225328, "learning_rate": 1.245260234610079e-06, "loss": 0.045, "step": 863 }, { "epoch": 0.19658703071672354, "grad_norm": 1.1324676509010458, "learning_rate": 1.2452492450569682e-06, "loss": 0.0554, "step": 864 }, { "epoch": 0.19681456200227532, "grad_norm": 2.2307637995876863, "learning_rate": 1.245238242827099e-06, "loss": 0.0832, "step": 865 }, { "epoch": 0.19704209328782707, "grad_norm": 1.4079128192407386, "learning_rate": 1.245227227920696e-06, "loss": 0.079, "step": 866 }, { "epoch": 0.19726962457337885, "grad_norm": 1.9175023199810017, "learning_rate": 1.2452162003379842e-06, "loss": 0.0917, "step": 867 }, { "epoch": 0.1974971558589306, "grad_norm": 1.731489599286668, "learning_rate": 1.2452051600791891e-06, "loss": 0.0571, "step": 868 }, { "epoch": 0.19772468714448235, "grad_norm": 1.3722463786220256, "learning_rate": 1.2451941071445367e-06, "loss": 0.039, "step": 869 }, { "epoch": 0.19795221843003413, "grad_norm": 1.287569176842529, "learning_rate": 1.2451830415342524e-06, "loss": 0.0427, "step": 870 }, { "epoch": 0.19817974971558588, "grad_norm": 2.2505234649573795, "learning_rate": 1.2451719632485627e-06, "loss": 0.0606, "step": 871 }, { "epoch": 0.19840728100113766, "grad_norm": 1.3085408197305148, "learning_rate": 1.2451608722876938e-06, "loss": 0.0659, "step": 872 }, { "epoch": 0.19863481228668942, "grad_norm": 2.3075742049076085, "learning_rate": 1.2451497686518722e-06, "loss": 0.0762, "step": 873 }, { "epoch": 0.1988623435722412, "grad_norm": 1.1650796320462304, "learning_rate": 1.2451386523413252e-06, "loss": 0.0559, "step": 874 }, { "epoch": 0.19908987485779295, "grad_norm": 1.199672804247241, "learning_rate": 1.24512752335628e-06, "loss": 0.0417, "step": 875 }, { "epoch": 0.1993174061433447, "grad_norm": 1.4954309542074338, "learning_rate": 1.2451163816969639e-06, "loss": 0.0841, "step": 876 }, { "epoch": 0.19954493742889648, "grad_norm": 1.4893257668939828, "learning_rate": 1.2451052273636045e-06, "loss": 0.0639, "step": 877 }, { "epoch": 0.19977246871444823, "grad_norm": 1.1681659159861986, "learning_rate": 1.24509406035643e-06, "loss": 0.0487, "step": 878 }, { "epoch": 0.2, "grad_norm": 1.337002748039853, "learning_rate": 1.2450828806756685e-06, "loss": 0.0408, "step": 879 }, { "epoch": 0.20022753128555176, "grad_norm": 1.238199679938109, "learning_rate": 1.245071688321549e-06, "loss": 0.0452, "step": 880 }, { "epoch": 0.20045506257110352, "grad_norm": 1.4016981334625263, "learning_rate": 1.2450604832942991e-06, "loss": 0.0462, "step": 881 }, { "epoch": 0.2006825938566553, "grad_norm": 1.4932886959066234, "learning_rate": 1.245049265594149e-06, "loss": 0.0735, "step": 882 }, { "epoch": 0.20091012514220705, "grad_norm": 1.2357598702477623, "learning_rate": 1.2450380352213271e-06, "loss": 0.0504, "step": 883 }, { "epoch": 0.20113765642775883, "grad_norm": 2.015377601632808, "learning_rate": 1.2450267921760636e-06, "loss": 0.0523, "step": 884 }, { "epoch": 0.20136518771331058, "grad_norm": 1.8296148009850803, "learning_rate": 1.2450155364585878e-06, "loss": 0.0554, "step": 885 }, { "epoch": 0.20159271899886233, "grad_norm": 1.9872439975590221, "learning_rate": 1.2450042680691301e-06, "loss": 0.0737, "step": 886 }, { "epoch": 0.2018202502844141, "grad_norm": 0.832951332510299, "learning_rate": 1.2449929870079206e-06, "loss": 0.0457, "step": 887 }, { "epoch": 0.20204778156996586, "grad_norm": 0.7896517265085776, "learning_rate": 1.24498169327519e-06, "loss": 0.0261, "step": 888 }, { "epoch": 0.20227531285551764, "grad_norm": 1.3980548207185421, "learning_rate": 1.2449703868711688e-06, "loss": 0.061, "step": 889 }, { "epoch": 0.2025028441410694, "grad_norm": 1.3109982570334282, "learning_rate": 1.2449590677960886e-06, "loss": 0.0525, "step": 890 }, { "epoch": 0.20273037542662117, "grad_norm": 0.8611577720266457, "learning_rate": 1.2449477360501802e-06, "loss": 0.0297, "step": 891 }, { "epoch": 0.20295790671217293, "grad_norm": 2.2537154917621267, "learning_rate": 1.2449363916336756e-06, "loss": 0.0658, "step": 892 }, { "epoch": 0.20318543799772468, "grad_norm": 2.7860866459618823, "learning_rate": 1.2449250345468065e-06, "loss": 0.0853, "step": 893 }, { "epoch": 0.20341296928327646, "grad_norm": 2.063772825536578, "learning_rate": 1.244913664789805e-06, "loss": 0.0413, "step": 894 }, { "epoch": 0.2036405005688282, "grad_norm": 1.3514226592309118, "learning_rate": 1.2449022823629036e-06, "loss": 0.0445, "step": 895 }, { "epoch": 0.20386803185438, "grad_norm": 1.2525160508368907, "learning_rate": 1.2448908872663347e-06, "loss": 0.0337, "step": 896 }, { "epoch": 0.20409556313993174, "grad_norm": 0.9460587754259561, "learning_rate": 1.2448794795003313e-06, "loss": 0.0391, "step": 897 }, { "epoch": 0.2043230944254835, "grad_norm": 1.146769061895453, "learning_rate": 1.2448680590651269e-06, "loss": 0.0618, "step": 898 }, { "epoch": 0.20455062571103527, "grad_norm": 1.277169361575558, "learning_rate": 1.2448566259609543e-06, "loss": 0.0479, "step": 899 }, { "epoch": 0.20477815699658702, "grad_norm": 1.2913475907417995, "learning_rate": 1.2448451801880476e-06, "loss": 0.0352, "step": 900 }, { "epoch": 0.2050056882821388, "grad_norm": 1.614343484279519, "learning_rate": 1.2448337217466404e-06, "loss": 0.0672, "step": 901 }, { "epoch": 0.20523321956769056, "grad_norm": 1.1122189034817742, "learning_rate": 1.2448222506369675e-06, "loss": 0.0385, "step": 902 }, { "epoch": 0.2054607508532423, "grad_norm": 1.5480813920575183, "learning_rate": 1.2448107668592626e-06, "loss": 0.0696, "step": 903 }, { "epoch": 0.2056882821387941, "grad_norm": 2.133323122403055, "learning_rate": 1.244799270413761e-06, "loss": 0.0726, "step": 904 }, { "epoch": 0.20591581342434584, "grad_norm": 1.9375085657983182, "learning_rate": 1.2447877613006972e-06, "loss": 0.0762, "step": 905 }, { "epoch": 0.20614334470989762, "grad_norm": 2.138121754084785, "learning_rate": 1.244776239520307e-06, "loss": 0.1251, "step": 906 }, { "epoch": 0.20637087599544937, "grad_norm": 69.30343641276347, "learning_rate": 1.244764705072825e-06, "loss": 1.5633, "step": 907 }, { "epoch": 0.20659840728100115, "grad_norm": 0.9136079997375776, "learning_rate": 1.2447531579584878e-06, "loss": 0.0359, "step": 908 }, { "epoch": 0.2068259385665529, "grad_norm": 2.258737842455169, "learning_rate": 1.2447415981775312e-06, "loss": 0.0815, "step": 909 }, { "epoch": 0.20705346985210465, "grad_norm": 1.1302894088592583, "learning_rate": 1.2447300257301912e-06, "loss": 0.0377, "step": 910 }, { "epoch": 0.20728100113765643, "grad_norm": 1.4921976481695052, "learning_rate": 1.2447184406167045e-06, "loss": 0.0867, "step": 911 }, { "epoch": 0.2075085324232082, "grad_norm": 2.2983270765657546, "learning_rate": 1.2447068428373077e-06, "loss": 0.0726, "step": 912 }, { "epoch": 0.20773606370875997, "grad_norm": 1.9446016021679782, "learning_rate": 1.244695232392238e-06, "loss": 0.064, "step": 913 }, { "epoch": 0.20796359499431172, "grad_norm": 0.9872094991366439, "learning_rate": 1.2446836092817328e-06, "loss": 0.0432, "step": 914 }, { "epoch": 0.20819112627986347, "grad_norm": 6.934093481791269, "learning_rate": 1.2446719735060293e-06, "loss": 0.0945, "step": 915 }, { "epoch": 0.20841865756541525, "grad_norm": 1.360488111746127, "learning_rate": 1.2446603250653658e-06, "loss": 0.0603, "step": 916 }, { "epoch": 0.208646188850967, "grad_norm": 1.3310461950551042, "learning_rate": 1.24464866395998e-06, "loss": 0.0365, "step": 917 }, { "epoch": 0.20887372013651878, "grad_norm": 0.9182393241316875, "learning_rate": 1.2446369901901102e-06, "loss": 0.0559, "step": 918 }, { "epoch": 0.20910125142207053, "grad_norm": 1.4023619755477905, "learning_rate": 1.2446253037559952e-06, "loss": 0.0457, "step": 919 }, { "epoch": 0.20932878270762229, "grad_norm": 1.1256168684655514, "learning_rate": 1.2446136046578739e-06, "loss": 0.0455, "step": 920 }, { "epoch": 0.20955631399317406, "grad_norm": 1.9433578174394295, "learning_rate": 1.2446018928959853e-06, "loss": 0.0962, "step": 921 }, { "epoch": 0.20978384527872582, "grad_norm": 1.5957841959716605, "learning_rate": 1.2445901684705685e-06, "loss": 0.0408, "step": 922 }, { "epoch": 0.2100113765642776, "grad_norm": 1.075018126077203, "learning_rate": 1.2445784313818638e-06, "loss": 0.0378, "step": 923 }, { "epoch": 0.21023890784982935, "grad_norm": 1.5370330862679797, "learning_rate": 1.2445666816301102e-06, "loss": 0.0578, "step": 924 }, { "epoch": 0.21046643913538113, "grad_norm": 1.4943129389397118, "learning_rate": 1.2445549192155487e-06, "loss": 0.0875, "step": 925 }, { "epoch": 0.21069397042093288, "grad_norm": 1.1898394401405206, "learning_rate": 1.244543144138419e-06, "loss": 0.0486, "step": 926 }, { "epoch": 0.21092150170648463, "grad_norm": 1.6954763665720234, "learning_rate": 1.2445313563989624e-06, "loss": 0.0641, "step": 927 }, { "epoch": 0.2111490329920364, "grad_norm": 1.0426224473753456, "learning_rate": 1.2445195559974194e-06, "loss": 0.0404, "step": 928 }, { "epoch": 0.21137656427758816, "grad_norm": 1.6481528761515403, "learning_rate": 1.244507742934031e-06, "loss": 0.0839, "step": 929 }, { "epoch": 0.21160409556313994, "grad_norm": 0.9619424275618367, "learning_rate": 1.2444959172090393e-06, "loss": 0.0273, "step": 930 }, { "epoch": 0.2118316268486917, "grad_norm": 3.69544520271876, "learning_rate": 1.2444840788226854e-06, "loss": 0.1652, "step": 931 }, { "epoch": 0.21205915813424345, "grad_norm": 1.804306156177365, "learning_rate": 1.2444722277752114e-06, "loss": 0.1027, "step": 932 }, { "epoch": 0.21228668941979523, "grad_norm": 1.768050037917839, "learning_rate": 1.2444603640668596e-06, "loss": 0.095, "step": 933 }, { "epoch": 0.21251422070534698, "grad_norm": 1.1285321294388682, "learning_rate": 1.2444484876978725e-06, "loss": 0.0465, "step": 934 }, { "epoch": 0.21274175199089876, "grad_norm": 1.7249462106069577, "learning_rate": 1.2444365986684929e-06, "loss": 0.0842, "step": 935 }, { "epoch": 0.2129692832764505, "grad_norm": 1.2306924671997328, "learning_rate": 1.2444246969789633e-06, "loss": 0.0447, "step": 936 }, { "epoch": 0.21319681456200226, "grad_norm": 1.1194256262692541, "learning_rate": 1.2444127826295277e-06, "loss": 0.0387, "step": 937 }, { "epoch": 0.21342434584755404, "grad_norm": 0.9357846968568098, "learning_rate": 1.244400855620429e-06, "loss": 0.0311, "step": 938 }, { "epoch": 0.2136518771331058, "grad_norm": 0.9399277842933605, "learning_rate": 1.2443889159519113e-06, "loss": 0.0408, "step": 939 }, { "epoch": 0.21387940841865757, "grad_norm": 1.038736805523899, "learning_rate": 1.2443769636242185e-06, "loss": 0.0573, "step": 940 }, { "epoch": 0.21410693970420933, "grad_norm": 1.179286632772412, "learning_rate": 1.244364998637595e-06, "loss": 0.0578, "step": 941 }, { "epoch": 0.2143344709897611, "grad_norm": 3.1404649465639594, "learning_rate": 1.2443530209922848e-06, "loss": 0.0549, "step": 942 }, { "epoch": 0.21456200227531286, "grad_norm": 1.4304654332903906, "learning_rate": 1.2443410306885337e-06, "loss": 0.0408, "step": 943 }, { "epoch": 0.2147895335608646, "grad_norm": 1.3926671923537388, "learning_rate": 1.244329027726586e-06, "loss": 0.0675, "step": 944 }, { "epoch": 0.2150170648464164, "grad_norm": 0.8473568816417418, "learning_rate": 1.2443170121066872e-06, "loss": 0.0388, "step": 945 }, { "epoch": 0.21524459613196814, "grad_norm": 1.7456532933762987, "learning_rate": 1.2443049838290827e-06, "loss": 0.0655, "step": 946 }, { "epoch": 0.21547212741751992, "grad_norm": 5.065824955296096, "learning_rate": 1.2442929428940186e-06, "loss": 0.196, "step": 947 }, { "epoch": 0.21569965870307167, "grad_norm": 0.9216519176270381, "learning_rate": 1.2442808893017414e-06, "loss": 0.0376, "step": 948 }, { "epoch": 0.21592718998862342, "grad_norm": 1.111383779669666, "learning_rate": 1.2442688230524965e-06, "loss": 0.0403, "step": 949 }, { "epoch": 0.2161547212741752, "grad_norm": 1.557530476173142, "learning_rate": 1.244256744146531e-06, "loss": 0.0365, "step": 950 }, { "epoch": 0.21638225255972696, "grad_norm": 1.1787378400008193, "learning_rate": 1.244244652584092e-06, "loss": 0.0507, "step": 951 }, { "epoch": 0.21660978384527874, "grad_norm": 0.9890913425273592, "learning_rate": 1.2442325483654263e-06, "loss": 0.0533, "step": 952 }, { "epoch": 0.2168373151308305, "grad_norm": 2.1206498633906294, "learning_rate": 1.2442204314907812e-06, "loss": 0.0794, "step": 953 }, { "epoch": 0.21706484641638224, "grad_norm": 1.3153818338446894, "learning_rate": 1.2442083019604047e-06, "loss": 0.0706, "step": 954 }, { "epoch": 0.21729237770193402, "grad_norm": 1.2497584639698338, "learning_rate": 1.2441961597745447e-06, "loss": 0.0474, "step": 955 }, { "epoch": 0.21751990898748577, "grad_norm": 1.0089839713441826, "learning_rate": 1.244184004933449e-06, "loss": 0.0354, "step": 956 }, { "epoch": 0.21774744027303755, "grad_norm": 1.967078988299315, "learning_rate": 1.2441718374373662e-06, "loss": 0.0371, "step": 957 }, { "epoch": 0.2179749715585893, "grad_norm": 1.1714442804547354, "learning_rate": 1.244159657286545e-06, "loss": 0.0452, "step": 958 }, { "epoch": 0.21820250284414108, "grad_norm": 0.940255872741034, "learning_rate": 1.2441474644812345e-06, "loss": 0.0363, "step": 959 }, { "epoch": 0.21843003412969283, "grad_norm": 1.100910872233546, "learning_rate": 1.2441352590216836e-06, "loss": 0.0357, "step": 960 }, { "epoch": 0.2186575654152446, "grad_norm": 1.9969980893987056, "learning_rate": 1.244123040908142e-06, "loss": 0.1572, "step": 961 }, { "epoch": 0.21888509670079637, "grad_norm": 1.5961804013592509, "learning_rate": 1.2441108101408592e-06, "loss": 0.0425, "step": 962 }, { "epoch": 0.21911262798634812, "grad_norm": 2.1162445389078024, "learning_rate": 1.2440985667200853e-06, "loss": 0.0517, "step": 963 }, { "epoch": 0.2193401592718999, "grad_norm": 1.064967765846801, "learning_rate": 1.2440863106460705e-06, "loss": 0.023, "step": 964 }, { "epoch": 0.21956769055745165, "grad_norm": 2.08098035050502, "learning_rate": 1.2440740419190655e-06, "loss": 0.0796, "step": 965 }, { "epoch": 0.2197952218430034, "grad_norm": 2.4340967444170136, "learning_rate": 1.2440617605393208e-06, "loss": 0.0673, "step": 966 }, { "epoch": 0.22002275312855518, "grad_norm": 2.139025801414634, "learning_rate": 1.2440494665070874e-06, "loss": 0.102, "step": 967 }, { "epoch": 0.22025028441410693, "grad_norm": 1.3877680351054698, "learning_rate": 1.2440371598226165e-06, "loss": 0.0588, "step": 968 }, { "epoch": 0.2204778156996587, "grad_norm": 1.3210352246509975, "learning_rate": 1.2440248404861598e-06, "loss": 0.0528, "step": 969 }, { "epoch": 0.22070534698521047, "grad_norm": 1.4713104262343806, "learning_rate": 1.2440125084979693e-06, "loss": 0.0468, "step": 970 }, { "epoch": 0.22093287827076222, "grad_norm": 1.4854620779239043, "learning_rate": 1.2440001638582965e-06, "loss": 0.0494, "step": 971 }, { "epoch": 0.221160409556314, "grad_norm": 1.1733972765082468, "learning_rate": 1.2439878065673944e-06, "loss": 0.0517, "step": 972 }, { "epoch": 0.22138794084186575, "grad_norm": 2.818817007281193, "learning_rate": 1.2439754366255149e-06, "loss": 0.0332, "step": 973 }, { "epoch": 0.22161547212741753, "grad_norm": 1.3519809642179033, "learning_rate": 1.2439630540329111e-06, "loss": 0.0714, "step": 974 }, { "epoch": 0.22184300341296928, "grad_norm": 1.2366129488699829, "learning_rate": 1.2439506587898358e-06, "loss": 0.0307, "step": 975 }, { "epoch": 0.22207053469852106, "grad_norm": 1.2264022197392555, "learning_rate": 1.243938250896543e-06, "loss": 0.0505, "step": 976 }, { "epoch": 0.2222980659840728, "grad_norm": 1.7725437520836973, "learning_rate": 1.2439258303532858e-06, "loss": 0.0551, "step": 977 }, { "epoch": 0.22252559726962456, "grad_norm": 0.9663835721436027, "learning_rate": 1.243913397160318e-06, "loss": 0.0375, "step": 978 }, { "epoch": 0.22275312855517634, "grad_norm": 0.8743576163827176, "learning_rate": 1.2439009513178938e-06, "loss": 0.0245, "step": 979 }, { "epoch": 0.2229806598407281, "grad_norm": 1.054127962193629, "learning_rate": 1.2438884928262678e-06, "loss": 0.0323, "step": 980 }, { "epoch": 0.22320819112627988, "grad_norm": 0.9874001278398739, "learning_rate": 1.2438760216856944e-06, "loss": 0.0435, "step": 981 }, { "epoch": 0.22343572241183163, "grad_norm": 1.3544331214991197, "learning_rate": 1.2438635378964284e-06, "loss": 0.0707, "step": 982 }, { "epoch": 0.22366325369738338, "grad_norm": 1.3272223315121154, "learning_rate": 1.2438510414587251e-06, "loss": 0.0553, "step": 983 }, { "epoch": 0.22389078498293516, "grad_norm": 1.4246609306991453, "learning_rate": 1.24383853237284e-06, "loss": 0.0616, "step": 984 }, { "epoch": 0.2241183162684869, "grad_norm": 1.3509367739810239, "learning_rate": 1.2438260106390285e-06, "loss": 0.054, "step": 985 }, { "epoch": 0.2243458475540387, "grad_norm": 2.743016610014617, "learning_rate": 1.2438134762575467e-06, "loss": 0.1654, "step": 986 }, { "epoch": 0.22457337883959044, "grad_norm": 1.1104234557718136, "learning_rate": 1.243800929228651e-06, "loss": 0.0438, "step": 987 }, { "epoch": 0.2248009101251422, "grad_norm": 1.5813628573176925, "learning_rate": 1.2437883695525974e-06, "loss": 0.0577, "step": 988 }, { "epoch": 0.22502844141069397, "grad_norm": 0.9637284966057095, "learning_rate": 1.2437757972296427e-06, "loss": 0.0441, "step": 989 }, { "epoch": 0.22525597269624573, "grad_norm": 1.2736418998037702, "learning_rate": 1.2437632122600442e-06, "loss": 0.0627, "step": 990 }, { "epoch": 0.2254835039817975, "grad_norm": 1.704338839028225, "learning_rate": 1.2437506146440587e-06, "loss": 0.0794, "step": 991 }, { "epoch": 0.22571103526734926, "grad_norm": 1.3083723593285776, "learning_rate": 1.243738004381944e-06, "loss": 0.0367, "step": 992 }, { "epoch": 0.225938566552901, "grad_norm": 1.2372256320814667, "learning_rate": 1.2437253814739572e-06, "loss": 0.0478, "step": 993 }, { "epoch": 0.2261660978384528, "grad_norm": 2.064272199741316, "learning_rate": 1.2437127459203572e-06, "loss": 0.0673, "step": 994 }, { "epoch": 0.22639362912400454, "grad_norm": 0.9919600393162777, "learning_rate": 1.2437000977214015e-06, "loss": 0.031, "step": 995 }, { "epoch": 0.22662116040955632, "grad_norm": 1.6080832002027023, "learning_rate": 1.243687436877349e-06, "loss": 0.0841, "step": 996 }, { "epoch": 0.22684869169510807, "grad_norm": 1.3930180180919087, "learning_rate": 1.2436747633884583e-06, "loss": 0.0616, "step": 997 }, { "epoch": 0.22707622298065985, "grad_norm": 4.70664655158806, "learning_rate": 1.2436620772549885e-06, "loss": 0.1541, "step": 998 }, { "epoch": 0.2273037542662116, "grad_norm": 2.3548939922944556, "learning_rate": 1.243649378477199e-06, "loss": 0.1161, "step": 999 }, { "epoch": 0.22753128555176336, "grad_norm": 1.9186203633212395, "learning_rate": 1.2436366670553491e-06, "loss": 0.0725, "step": 1000 }, { "epoch": 0.22775881683731514, "grad_norm": 1.726012700048525, "learning_rate": 1.2436239429896988e-06, "loss": 0.1038, "step": 1001 }, { "epoch": 0.2279863481228669, "grad_norm": 1.7981785549763478, "learning_rate": 1.2436112062805081e-06, "loss": 0.0485, "step": 1002 }, { "epoch": 0.22821387940841867, "grad_norm": 1.935791028636196, "learning_rate": 1.2435984569280372e-06, "loss": 0.0773, "step": 1003 }, { "epoch": 0.22844141069397042, "grad_norm": 1.4700273599627696, "learning_rate": 1.2435856949325467e-06, "loss": 0.0584, "step": 1004 }, { "epoch": 0.22866894197952217, "grad_norm": 4.712742088498197, "learning_rate": 1.2435729202942972e-06, "loss": 0.073, "step": 1005 }, { "epoch": 0.22889647326507395, "grad_norm": 1.3146906101281273, "learning_rate": 1.2435601330135506e-06, "loss": 0.0357, "step": 1006 }, { "epoch": 0.2291240045506257, "grad_norm": 2.3674281228532066, "learning_rate": 1.2435473330905674e-06, "loss": 0.0701, "step": 1007 }, { "epoch": 0.22935153583617748, "grad_norm": 1.2459812264205032, "learning_rate": 1.2435345205256097e-06, "loss": 0.0375, "step": 1008 }, { "epoch": 0.22957906712172924, "grad_norm": 1.4505448834484096, "learning_rate": 1.243521695318939e-06, "loss": 0.057, "step": 1009 }, { "epoch": 0.229806598407281, "grad_norm": 1.014039827241364, "learning_rate": 1.2435088574708178e-06, "loss": 0.027, "step": 1010 }, { "epoch": 0.23003412969283277, "grad_norm": 2.0117138797089766, "learning_rate": 1.2434960069815083e-06, "loss": 0.0583, "step": 1011 }, { "epoch": 0.23026166097838452, "grad_norm": 2.319983736807929, "learning_rate": 1.243483143851273e-06, "loss": 0.0687, "step": 1012 }, { "epoch": 0.2304891922639363, "grad_norm": 1.5313544208925962, "learning_rate": 1.2434702680803751e-06, "loss": 0.0531, "step": 1013 }, { "epoch": 0.23071672354948805, "grad_norm": 2.4005097874561616, "learning_rate": 1.2434573796690774e-06, "loss": 0.0929, "step": 1014 }, { "epoch": 0.23094425483503983, "grad_norm": 1.4786754339201256, "learning_rate": 1.2434444786176435e-06, "loss": 0.072, "step": 1015 }, { "epoch": 0.23117178612059158, "grad_norm": 1.0190695534603456, "learning_rate": 1.2434315649263372e-06, "loss": 0.0336, "step": 1016 }, { "epoch": 0.23139931740614333, "grad_norm": 0.85632474720689, "learning_rate": 1.2434186385954225e-06, "loss": 0.035, "step": 1017 }, { "epoch": 0.23162684869169511, "grad_norm": 1.2607686804487748, "learning_rate": 1.243405699625163e-06, "loss": 0.0442, "step": 1018 }, { "epoch": 0.23185437997724687, "grad_norm": 1.2717127046145194, "learning_rate": 1.243392748015824e-06, "loss": 0.0377, "step": 1019 }, { "epoch": 0.23208191126279865, "grad_norm": 0.9597540032341592, "learning_rate": 1.2433797837676694e-06, "loss": 0.0434, "step": 1020 }, { "epoch": 0.2323094425483504, "grad_norm": 0.9627749018367474, "learning_rate": 1.2433668068809648e-06, "loss": 0.0303, "step": 1021 }, { "epoch": 0.23253697383390215, "grad_norm": 1.2513646734831103, "learning_rate": 1.243353817355975e-06, "loss": 0.039, "step": 1022 }, { "epoch": 0.23276450511945393, "grad_norm": 1.2104231875454026, "learning_rate": 1.2433408151929655e-06, "loss": 0.0361, "step": 1023 }, { "epoch": 0.23299203640500568, "grad_norm": 1.0202244699658645, "learning_rate": 1.2433278003922026e-06, "loss": 0.0351, "step": 1024 }, { "epoch": 0.23321956769055746, "grad_norm": 1.4263990244044498, "learning_rate": 1.2433147729539514e-06, "loss": 0.0815, "step": 1025 }, { "epoch": 0.2334470989761092, "grad_norm": 1.5336205920808743, "learning_rate": 1.2433017328784788e-06, "loss": 0.0637, "step": 1026 }, { "epoch": 0.23367463026166096, "grad_norm": 1.4842765531420945, "learning_rate": 1.2432886801660513e-06, "loss": 0.0619, "step": 1027 }, { "epoch": 0.23390216154721274, "grad_norm": 0.9615217436557268, "learning_rate": 1.2432756148169354e-06, "loss": 0.0424, "step": 1028 }, { "epoch": 0.2341296928327645, "grad_norm": 2.5880116161383273, "learning_rate": 1.2432625368313983e-06, "loss": 0.0925, "step": 1029 }, { "epoch": 0.23435722411831628, "grad_norm": 1.1806940004387316, "learning_rate": 1.2432494462097072e-06, "loss": 0.0547, "step": 1030 }, { "epoch": 0.23458475540386803, "grad_norm": 0.9929303526421415, "learning_rate": 1.2432363429521295e-06, "loss": 0.0529, "step": 1031 }, { "epoch": 0.2348122866894198, "grad_norm": 0.9571908888077277, "learning_rate": 1.2432232270589335e-06, "loss": 0.0334, "step": 1032 }, { "epoch": 0.23503981797497156, "grad_norm": 2.067393897730002, "learning_rate": 1.2432100985303868e-06, "loss": 0.106, "step": 1033 }, { "epoch": 0.2352673492605233, "grad_norm": 0.8691321322200917, "learning_rate": 1.243196957366758e-06, "loss": 0.0346, "step": 1034 }, { "epoch": 0.2354948805460751, "grad_norm": 1.5305165337075897, "learning_rate": 1.2431838035683155e-06, "loss": 0.0848, "step": 1035 }, { "epoch": 0.23572241183162684, "grad_norm": 1.3223616364406545, "learning_rate": 1.2431706371353282e-06, "loss": 0.0687, "step": 1036 }, { "epoch": 0.23594994311717862, "grad_norm": 1.928604930493819, "learning_rate": 1.2431574580680653e-06, "loss": 0.0649, "step": 1037 }, { "epoch": 0.23617747440273038, "grad_norm": 1.2898613078943457, "learning_rate": 1.2431442663667958e-06, "loss": 0.0542, "step": 1038 }, { "epoch": 0.23640500568828213, "grad_norm": 1.0765527225532854, "learning_rate": 1.2431310620317898e-06, "loss": 0.0601, "step": 1039 }, { "epoch": 0.2366325369738339, "grad_norm": 0.7732520151584088, "learning_rate": 1.2431178450633168e-06, "loss": 0.0285, "step": 1040 }, { "epoch": 0.23686006825938566, "grad_norm": 1.1685154356326117, "learning_rate": 1.2431046154616473e-06, "loss": 0.0356, "step": 1041 }, { "epoch": 0.23708759954493744, "grad_norm": 1.3543617648083628, "learning_rate": 1.2430913732270512e-06, "loss": 0.0446, "step": 1042 }, { "epoch": 0.2373151308304892, "grad_norm": 1.707962300050113, "learning_rate": 1.2430781183597995e-06, "loss": 0.0555, "step": 1043 }, { "epoch": 0.23754266211604094, "grad_norm": 1.2841428640766417, "learning_rate": 1.243064850860163e-06, "loss": 0.0447, "step": 1044 }, { "epoch": 0.23777019340159272, "grad_norm": 6.261159556049287, "learning_rate": 1.243051570728413e-06, "loss": 0.0749, "step": 1045 }, { "epoch": 0.23799772468714447, "grad_norm": 1.1364071139867686, "learning_rate": 1.2430382779648208e-06, "loss": 0.0326, "step": 1046 }, { "epoch": 0.23822525597269625, "grad_norm": 1.484581864381779, "learning_rate": 1.243024972569658e-06, "loss": 0.0645, "step": 1047 }, { "epoch": 0.238452787258248, "grad_norm": 1.1545473703015605, "learning_rate": 1.2430116545431966e-06, "loss": 0.0481, "step": 1048 }, { "epoch": 0.23868031854379979, "grad_norm": 0.8983805133665866, "learning_rate": 1.2429983238857088e-06, "loss": 0.0369, "step": 1049 }, { "epoch": 0.23890784982935154, "grad_norm": 1.068742935236811, "learning_rate": 1.2429849805974673e-06, "loss": 0.039, "step": 1050 }, { "epoch": 0.2391353811149033, "grad_norm": 1.4873654976644912, "learning_rate": 1.2429716246787444e-06, "loss": 0.0312, "step": 1051 }, { "epoch": 0.23936291240045507, "grad_norm": 0.9449501930869969, "learning_rate": 1.242958256129813e-06, "loss": 0.0567, "step": 1052 }, { "epoch": 0.23959044368600682, "grad_norm": 1.6243209526441913, "learning_rate": 1.242944874950947e-06, "loss": 0.0647, "step": 1053 }, { "epoch": 0.2398179749715586, "grad_norm": 1.4569612537340462, "learning_rate": 1.2429314811424192e-06, "loss": 0.0758, "step": 1054 }, { "epoch": 0.24004550625711035, "grad_norm": 1.5825027859681509, "learning_rate": 1.242918074704504e-06, "loss": 0.0621, "step": 1055 }, { "epoch": 0.2402730375426621, "grad_norm": 1.0511003317201362, "learning_rate": 1.2429046556374747e-06, "loss": 0.037, "step": 1056 }, { "epoch": 0.24050056882821388, "grad_norm": 1.5928024296302492, "learning_rate": 1.2428912239416057e-06, "loss": 0.0453, "step": 1057 }, { "epoch": 0.24072810011376564, "grad_norm": 1.1044477930026537, "learning_rate": 1.242877779617172e-06, "loss": 0.0419, "step": 1058 }, { "epoch": 0.24095563139931742, "grad_norm": 1.3035104956572268, "learning_rate": 1.242864322664448e-06, "loss": 0.0531, "step": 1059 }, { "epoch": 0.24118316268486917, "grad_norm": 1.4034057924806689, "learning_rate": 1.2428508530837088e-06, "loss": 0.0753, "step": 1060 }, { "epoch": 0.24141069397042092, "grad_norm": 45.147488919338755, "learning_rate": 1.2428373708752298e-06, "loss": 0.4166, "step": 1061 }, { "epoch": 0.2416382252559727, "grad_norm": 1.804049071201822, "learning_rate": 1.2428238760392862e-06, "loss": 0.0881, "step": 1062 }, { "epoch": 0.24186575654152445, "grad_norm": 1.2662139418382417, "learning_rate": 1.2428103685761543e-06, "loss": 0.0592, "step": 1063 }, { "epoch": 0.24209328782707623, "grad_norm": 2.633674196903193, "learning_rate": 1.2427968484861097e-06, "loss": 0.1104, "step": 1064 }, { "epoch": 0.24232081911262798, "grad_norm": 1.437345835314508, "learning_rate": 1.2427833157694292e-06, "loss": 0.059, "step": 1065 }, { "epoch": 0.24254835039817976, "grad_norm": 1.4759939750585893, "learning_rate": 1.2427697704263892e-06, "loss": 0.0564, "step": 1066 }, { "epoch": 0.24277588168373151, "grad_norm": 1.4003983063384298, "learning_rate": 1.2427562124572663e-06, "loss": 0.0756, "step": 1067 }, { "epoch": 0.24300341296928327, "grad_norm": 1.2893015359567934, "learning_rate": 1.2427426418623377e-06, "loss": 0.0581, "step": 1068 }, { "epoch": 0.24323094425483505, "grad_norm": 0.9083897855186556, "learning_rate": 1.242729058641881e-06, "loss": 0.0353, "step": 1069 }, { "epoch": 0.2434584755403868, "grad_norm": 2.339042375493057, "learning_rate": 1.2427154627961737e-06, "loss": 0.1366, "step": 1070 }, { "epoch": 0.24368600682593858, "grad_norm": 2.1438385712601593, "learning_rate": 1.2427018543254935e-06, "loss": 0.082, "step": 1071 }, { "epoch": 0.24391353811149033, "grad_norm": 1.627313372349946, "learning_rate": 1.2426882332301187e-06, "loss": 0.076, "step": 1072 }, { "epoch": 0.24414106939704208, "grad_norm": 0.7791355914048339, "learning_rate": 1.2426745995103277e-06, "loss": 0.0309, "step": 1073 }, { "epoch": 0.24436860068259386, "grad_norm": 1.1486991259203154, "learning_rate": 1.242660953166399e-06, "loss": 0.0509, "step": 1074 }, { "epoch": 0.2445961319681456, "grad_norm": 1.7757375833942548, "learning_rate": 1.2426472941986117e-06, "loss": 0.0731, "step": 1075 }, { "epoch": 0.2448236632536974, "grad_norm": 1.4454374080664514, "learning_rate": 1.2426336226072449e-06, "loss": 0.0868, "step": 1076 }, { "epoch": 0.24505119453924915, "grad_norm": 1.0698078663784565, "learning_rate": 1.242619938392578e-06, "loss": 0.037, "step": 1077 }, { "epoch": 0.2452787258248009, "grad_norm": 1.4863388002827098, "learning_rate": 1.2426062415548907e-06, "loss": 0.0677, "step": 1078 }, { "epoch": 0.24550625711035268, "grad_norm": 0.8732937705027402, "learning_rate": 1.2425925320944628e-06, "loss": 0.0293, "step": 1079 }, { "epoch": 0.24573378839590443, "grad_norm": 1.2595288446902135, "learning_rate": 1.2425788100115747e-06, "loss": 0.056, "step": 1080 }, { "epoch": 0.2459613196814562, "grad_norm": 1.5998408970282165, "learning_rate": 1.2425650753065065e-06, "loss": 0.0764, "step": 1081 }, { "epoch": 0.24618885096700796, "grad_norm": 1.8027463727170587, "learning_rate": 1.2425513279795395e-06, "loss": 0.0615, "step": 1082 }, { "epoch": 0.24641638225255974, "grad_norm": 1.5654018901455387, "learning_rate": 1.2425375680309543e-06, "loss": 0.0661, "step": 1083 }, { "epoch": 0.2466439135381115, "grad_norm": 1.0676862677438599, "learning_rate": 1.2425237954610322e-06, "loss": 0.0469, "step": 1084 }, { "epoch": 0.24687144482366324, "grad_norm": 1.097337057406586, "learning_rate": 1.2425100102700547e-06, "loss": 0.0457, "step": 1085 }, { "epoch": 0.24709897610921502, "grad_norm": 1.7432427011380645, "learning_rate": 1.2424962124583033e-06, "loss": 0.0566, "step": 1086 }, { "epoch": 0.24732650739476678, "grad_norm": 1.2401111182610864, "learning_rate": 1.2424824020260603e-06, "loss": 0.0471, "step": 1087 }, { "epoch": 0.24755403868031856, "grad_norm": 1.6086762249686064, "learning_rate": 1.2424685789736077e-06, "loss": 0.0712, "step": 1088 }, { "epoch": 0.2477815699658703, "grad_norm": 2.223604715087674, "learning_rate": 1.2424547433012284e-06, "loss": 0.059, "step": 1089 }, { "epoch": 0.24800910125142206, "grad_norm": 1.763749275351197, "learning_rate": 1.2424408950092049e-06, "loss": 0.0768, "step": 1090 }, { "epoch": 0.24823663253697384, "grad_norm": 1.7828992657348108, "learning_rate": 1.2424270340978204e-06, "loss": 0.0794, "step": 1091 }, { "epoch": 0.2484641638225256, "grad_norm": 1.5001450767436246, "learning_rate": 1.2424131605673582e-06, "loss": 0.0693, "step": 1092 }, { "epoch": 0.24869169510807737, "grad_norm": 1.2301421279887537, "learning_rate": 1.2423992744181015e-06, "loss": 0.0512, "step": 1093 }, { "epoch": 0.24891922639362912, "grad_norm": 1.2844864578504145, "learning_rate": 1.2423853756503343e-06, "loss": 0.0447, "step": 1094 }, { "epoch": 0.24914675767918087, "grad_norm": 1.7196177827780137, "learning_rate": 1.2423714642643408e-06, "loss": 0.1182, "step": 1095 }, { "epoch": 0.24937428896473265, "grad_norm": 1.46657616590301, "learning_rate": 1.2423575402604051e-06, "loss": 0.0656, "step": 1096 }, { "epoch": 0.2496018202502844, "grad_norm": 1.326342830248352, "learning_rate": 1.2423436036388122e-06, "loss": 0.0544, "step": 1097 }, { "epoch": 0.24982935153583619, "grad_norm": 1.952008099521945, "learning_rate": 1.2423296543998465e-06, "loss": 0.078, "step": 1098 }, { "epoch": 0.25005688282138794, "grad_norm": 2.219926087555926, "learning_rate": 1.2423156925437932e-06, "loss": 0.0976, "step": 1099 }, { "epoch": 0.2502844141069397, "grad_norm": 1.4678969382750013, "learning_rate": 1.2423017180709376e-06, "loss": 0.0727, "step": 1100 }, { "epoch": 0.25051194539249144, "grad_norm": 1.9083840830954104, "learning_rate": 1.2422877309815656e-06, "loss": 0.0647, "step": 1101 }, { "epoch": 0.25073947667804325, "grad_norm": 1.3418972776139215, "learning_rate": 1.242273731275963e-06, "loss": 0.0616, "step": 1102 }, { "epoch": 0.250967007963595, "grad_norm": 1.0799686257073313, "learning_rate": 1.2422597189544155e-06, "loss": 0.0306, "step": 1103 }, { "epoch": 0.25119453924914675, "grad_norm": 1.4786340245230432, "learning_rate": 1.2422456940172101e-06, "loss": 0.0752, "step": 1104 }, { "epoch": 0.2514220705346985, "grad_norm": 1.3574468048142418, "learning_rate": 1.2422316564646331e-06, "loss": 0.0469, "step": 1105 }, { "epoch": 0.25164960182025026, "grad_norm": 1.0996737224918256, "learning_rate": 1.2422176062969713e-06, "loss": 0.06, "step": 1106 }, { "epoch": 0.25187713310580206, "grad_norm": 1.2079586241887055, "learning_rate": 1.2422035435145121e-06, "loss": 0.0417, "step": 1107 }, { "epoch": 0.2521046643913538, "grad_norm": 1.8484139155779635, "learning_rate": 1.2421894681175428e-06, "loss": 0.1201, "step": 1108 }, { "epoch": 0.25233219567690557, "grad_norm": 2.2964405416217977, "learning_rate": 1.2421753801063511e-06, "loss": 0.0661, "step": 1109 }, { "epoch": 0.2525597269624573, "grad_norm": 1.233798632970271, "learning_rate": 1.2421612794812248e-06, "loss": 0.0533, "step": 1110 }, { "epoch": 0.25278725824800913, "grad_norm": 0.8794754874897922, "learning_rate": 1.2421471662424525e-06, "loss": 0.0374, "step": 1111 }, { "epoch": 0.2530147895335609, "grad_norm": 1.1655828487104456, "learning_rate": 1.2421330403903222e-06, "loss": 0.0417, "step": 1112 }, { "epoch": 0.25324232081911263, "grad_norm": 1.978604856786128, "learning_rate": 1.2421189019251228e-06, "loss": 0.0613, "step": 1113 }, { "epoch": 0.2534698521046644, "grad_norm": 2.776603540550928, "learning_rate": 1.2421047508471433e-06, "loss": 0.0945, "step": 1114 }, { "epoch": 0.25369738339021614, "grad_norm": 1.4281147024591225, "learning_rate": 1.242090587156673e-06, "loss": 0.0634, "step": 1115 }, { "epoch": 0.25392491467576794, "grad_norm": 1.9687323288611094, "learning_rate": 1.242076410854001e-06, "loss": 0.0637, "step": 1116 }, { "epoch": 0.2541524459613197, "grad_norm": 0.9824968425418638, "learning_rate": 1.2420622219394174e-06, "loss": 0.0361, "step": 1117 }, { "epoch": 0.25437997724687145, "grad_norm": 1.1748756467082628, "learning_rate": 1.2420480204132117e-06, "loss": 0.0438, "step": 1118 }, { "epoch": 0.2546075085324232, "grad_norm": 1.1687970818584577, "learning_rate": 1.242033806275675e-06, "loss": 0.0517, "step": 1119 }, { "epoch": 0.25483503981797495, "grad_norm": 0.7243821856689469, "learning_rate": 1.2420195795270973e-06, "loss": 0.0341, "step": 1120 }, { "epoch": 0.25506257110352676, "grad_norm": 1.5334379290667297, "learning_rate": 1.2420053401677693e-06, "loss": 0.0834, "step": 1121 }, { "epoch": 0.2552901023890785, "grad_norm": 1.1475477659525273, "learning_rate": 1.241991088197982e-06, "loss": 0.0409, "step": 1122 }, { "epoch": 0.25551763367463026, "grad_norm": 1.0609745683681193, "learning_rate": 1.241976823618027e-06, "loss": 0.0521, "step": 1123 }, { "epoch": 0.255745164960182, "grad_norm": 2.0039923086463602, "learning_rate": 1.241962546428196e-06, "loss": 0.0912, "step": 1124 }, { "epoch": 0.25597269624573377, "grad_norm": 2.1894810646011846, "learning_rate": 1.24194825662878e-06, "loss": 0.0785, "step": 1125 }, { "epoch": 0.2562002275312856, "grad_norm": 1.4851813238564915, "learning_rate": 1.2419339542200715e-06, "loss": 0.0688, "step": 1126 }, { "epoch": 0.2564277588168373, "grad_norm": 1.2728572943407013, "learning_rate": 1.241919639202363e-06, "loss": 0.0794, "step": 1127 }, { "epoch": 0.2566552901023891, "grad_norm": 1.8082441837013492, "learning_rate": 1.2419053115759468e-06, "loss": 0.0677, "step": 1128 }, { "epoch": 0.25688282138794083, "grad_norm": 2.0609613364729267, "learning_rate": 1.2418909713411161e-06, "loss": 0.08, "step": 1129 }, { "epoch": 0.2571103526734926, "grad_norm": 1.533485951338072, "learning_rate": 1.2418766184981634e-06, "loss": 0.0606, "step": 1130 }, { "epoch": 0.2573378839590444, "grad_norm": 1.4347136829017464, "learning_rate": 1.2418622530473825e-06, "loss": 0.0825, "step": 1131 }, { "epoch": 0.25756541524459614, "grad_norm": 0.8595888516934124, "learning_rate": 1.2418478749890672e-06, "loss": 0.0351, "step": 1132 }, { "epoch": 0.2577929465301479, "grad_norm": 0.8988536535150867, "learning_rate": 1.2418334843235105e-06, "loss": 0.0465, "step": 1133 }, { "epoch": 0.25802047781569964, "grad_norm": 0.8536639563076862, "learning_rate": 1.2418190810510075e-06, "loss": 0.0258, "step": 1134 }, { "epoch": 0.2582480091012514, "grad_norm": 1.8139538056092865, "learning_rate": 1.2418046651718518e-06, "loss": 0.0545, "step": 1135 }, { "epoch": 0.2584755403868032, "grad_norm": 1.0709084305362149, "learning_rate": 1.2417902366863386e-06, "loss": 0.0488, "step": 1136 }, { "epoch": 0.25870307167235496, "grad_norm": 1.5965977494143468, "learning_rate": 1.2417757955947623e-06, "loss": 0.0549, "step": 1137 }, { "epoch": 0.2589306029579067, "grad_norm": 1.258938457199683, "learning_rate": 1.2417613418974187e-06, "loss": 0.0422, "step": 1138 }, { "epoch": 0.25915813424345846, "grad_norm": 0.9954800667569594, "learning_rate": 1.2417468755946025e-06, "loss": 0.0297, "step": 1139 }, { "epoch": 0.2593856655290102, "grad_norm": 1.410994342027921, "learning_rate": 1.2417323966866097e-06, "loss": 0.054, "step": 1140 }, { "epoch": 0.259613196814562, "grad_norm": 1.5107757792275114, "learning_rate": 1.2417179051737364e-06, "loss": 0.0589, "step": 1141 }, { "epoch": 0.25984072810011377, "grad_norm": 1.6398671824805653, "learning_rate": 1.2417034010562784e-06, "loss": 0.0666, "step": 1142 }, { "epoch": 0.2600682593856655, "grad_norm": 1.3520000022781709, "learning_rate": 1.2416888843345323e-06, "loss": 0.0416, "step": 1143 }, { "epoch": 0.2602957906712173, "grad_norm": 2.915495796493339, "learning_rate": 1.2416743550087951e-06, "loss": 0.1129, "step": 1144 }, { "epoch": 0.2605233219567691, "grad_norm": 1.2846258547817568, "learning_rate": 1.241659813079363e-06, "loss": 0.0579, "step": 1145 }, { "epoch": 0.26075085324232083, "grad_norm": 2.7427597460101936, "learning_rate": 1.2416452585465342e-06, "loss": 0.0702, "step": 1146 }, { "epoch": 0.2609783845278726, "grad_norm": 0.5884709313504546, "learning_rate": 1.2416306914106053e-06, "loss": 0.0237, "step": 1147 }, { "epoch": 0.26120591581342434, "grad_norm": 1.2939407540414039, "learning_rate": 1.2416161116718744e-06, "loss": 0.0583, "step": 1148 }, { "epoch": 0.2614334470989761, "grad_norm": 1.96584405463017, "learning_rate": 1.2416015193306397e-06, "loss": 0.0597, "step": 1149 }, { "epoch": 0.2616609783845279, "grad_norm": 1.367578620212531, "learning_rate": 1.241586914387199e-06, "loss": 0.0525, "step": 1150 }, { "epoch": 0.26188850967007965, "grad_norm": 1.5579301663665894, "learning_rate": 1.2415722968418508e-06, "loss": 0.0701, "step": 1151 }, { "epoch": 0.2621160409556314, "grad_norm": 2.113746394897606, "learning_rate": 1.2415576666948945e-06, "loss": 0.1158, "step": 1152 }, { "epoch": 0.26234357224118315, "grad_norm": 1.450032906329583, "learning_rate": 1.2415430239466283e-06, "loss": 0.0602, "step": 1153 }, { "epoch": 0.2625711035267349, "grad_norm": 1.3272413443266664, "learning_rate": 1.241528368597352e-06, "loss": 0.0523, "step": 1154 }, { "epoch": 0.2627986348122867, "grad_norm": 1.179039651442814, "learning_rate": 1.2415137006473649e-06, "loss": 0.0647, "step": 1155 }, { "epoch": 0.26302616609783847, "grad_norm": 1.145520426253407, "learning_rate": 1.241499020096967e-06, "loss": 0.0477, "step": 1156 }, { "epoch": 0.2632536973833902, "grad_norm": 1.328947872893636, "learning_rate": 1.2414843269464579e-06, "loss": 0.0452, "step": 1157 }, { "epoch": 0.26348122866894197, "grad_norm": 1.8843518385578133, "learning_rate": 1.2414696211961386e-06, "loss": 0.0603, "step": 1158 }, { "epoch": 0.2637087599544937, "grad_norm": 2.1467164256006943, "learning_rate": 1.2414549028463087e-06, "loss": 0.0908, "step": 1159 }, { "epoch": 0.26393629124004553, "grad_norm": 1.1918005997211691, "learning_rate": 1.2414401718972703e-06, "loss": 0.0576, "step": 1160 }, { "epoch": 0.2641638225255973, "grad_norm": 1.2786644068555166, "learning_rate": 1.2414254283493232e-06, "loss": 0.0416, "step": 1161 }, { "epoch": 0.26439135381114903, "grad_norm": 0.9452023930772737, "learning_rate": 1.2414106722027694e-06, "loss": 0.0537, "step": 1162 }, { "epoch": 0.2646188850967008, "grad_norm": 1.1756293508640332, "learning_rate": 1.2413959034579104e-06, "loss": 0.0511, "step": 1163 }, { "epoch": 0.26484641638225254, "grad_norm": 1.0472201568449635, "learning_rate": 1.2413811221150478e-06, "loss": 0.0442, "step": 1164 }, { "epoch": 0.26507394766780434, "grad_norm": 1.916314085543847, "learning_rate": 1.2413663281744843e-06, "loss": 0.0768, "step": 1165 }, { "epoch": 0.2653014789533561, "grad_norm": 1.0894771604167501, "learning_rate": 1.2413515216365216e-06, "loss": 0.0602, "step": 1166 }, { "epoch": 0.26552901023890785, "grad_norm": 1.00975683219884, "learning_rate": 1.2413367025014628e-06, "loss": 0.0432, "step": 1167 }, { "epoch": 0.2657565415244596, "grad_norm": 2.1765273948785913, "learning_rate": 1.2413218707696103e-06, "loss": 0.1186, "step": 1168 }, { "epoch": 0.26598407281001135, "grad_norm": 1.5268905455044306, "learning_rate": 1.2413070264412677e-06, "loss": 0.0671, "step": 1169 }, { "epoch": 0.26621160409556316, "grad_norm": 0.8198195830517153, "learning_rate": 1.2412921695167381e-06, "loss": 0.0303, "step": 1170 }, { "epoch": 0.2664391353811149, "grad_norm": 0.916743439302086, "learning_rate": 1.2412772999963253e-06, "loss": 0.0519, "step": 1171 }, { "epoch": 0.26666666666666666, "grad_norm": 0.9592599841051566, "learning_rate": 1.2412624178803332e-06, "loss": 0.0465, "step": 1172 }, { "epoch": 0.2668941979522184, "grad_norm": 1.2610976325050403, "learning_rate": 1.2412475231690656e-06, "loss": 0.0345, "step": 1173 }, { "epoch": 0.26712172923777017, "grad_norm": 1.5016024974886808, "learning_rate": 1.2412326158628275e-06, "loss": 0.0714, "step": 1174 }, { "epoch": 0.267349260523322, "grad_norm": 1.4314762978749895, "learning_rate": 1.2412176959619232e-06, "loss": 0.0591, "step": 1175 }, { "epoch": 0.2675767918088737, "grad_norm": 2.2495091900368562, "learning_rate": 1.2412027634666578e-06, "loss": 0.1059, "step": 1176 }, { "epoch": 0.2678043230944255, "grad_norm": 1.706282565066305, "learning_rate": 1.2411878183773366e-06, "loss": 0.0665, "step": 1177 }, { "epoch": 0.26803185437997723, "grad_norm": 0.7041103017092823, "learning_rate": 1.2411728606942647e-06, "loss": 0.0218, "step": 1178 }, { "epoch": 0.26825938566552904, "grad_norm": 0.9725397335622218, "learning_rate": 1.241157890417748e-06, "loss": 0.0452, "step": 1179 }, { "epoch": 0.2684869169510808, "grad_norm": 1.9030708757385315, "learning_rate": 1.2411429075480923e-06, "loss": 0.0827, "step": 1180 }, { "epoch": 0.26871444823663254, "grad_norm": 1.1973351649414545, "learning_rate": 1.2411279120856042e-06, "loss": 0.0476, "step": 1181 }, { "epoch": 0.2689419795221843, "grad_norm": 1.1061135820635588, "learning_rate": 1.24111290403059e-06, "loss": 0.0387, "step": 1182 }, { "epoch": 0.26916951080773605, "grad_norm": 1.1523226678516665, "learning_rate": 1.2410978833833564e-06, "loss": 0.0614, "step": 1183 }, { "epoch": 0.26939704209328785, "grad_norm": 1.0514874828378626, "learning_rate": 1.2410828501442104e-06, "loss": 0.0439, "step": 1184 }, { "epoch": 0.2696245733788396, "grad_norm": 1.5362061949507542, "learning_rate": 1.2410678043134591e-06, "loss": 0.0804, "step": 1185 }, { "epoch": 0.26985210466439136, "grad_norm": 1.8173832515444213, "learning_rate": 1.2410527458914103e-06, "loss": 0.063, "step": 1186 }, { "epoch": 0.2700796359499431, "grad_norm": 1.556320593990832, "learning_rate": 1.2410376748783714e-06, "loss": 0.0795, "step": 1187 }, { "epoch": 0.27030716723549486, "grad_norm": 1.321348596107485, "learning_rate": 1.241022591274651e-06, "loss": 0.0802, "step": 1188 }, { "epoch": 0.27053469852104667, "grad_norm": 1.0652846616165823, "learning_rate": 1.2410074950805567e-06, "loss": 0.036, "step": 1189 }, { "epoch": 0.2707622298065984, "grad_norm": 1.2080698971206665, "learning_rate": 1.2409923862963973e-06, "loss": 0.0488, "step": 1190 }, { "epoch": 0.27098976109215017, "grad_norm": 1.0230330657283313, "learning_rate": 1.240977264922482e-06, "loss": 0.0408, "step": 1191 }, { "epoch": 0.2712172923777019, "grad_norm": 1.243454259162499, "learning_rate": 1.2409621309591195e-06, "loss": 0.0359, "step": 1192 }, { "epoch": 0.2714448236632537, "grad_norm": 1.3001395180964366, "learning_rate": 1.2409469844066188e-06, "loss": 0.0454, "step": 1193 }, { "epoch": 0.2716723549488055, "grad_norm": 1.4147947246201094, "learning_rate": 1.2409318252652899e-06, "loss": 0.0673, "step": 1194 }, { "epoch": 0.27189988623435724, "grad_norm": 2.0607810868090977, "learning_rate": 1.2409166535354428e-06, "loss": 0.1099, "step": 1195 }, { "epoch": 0.272127417519909, "grad_norm": 0.8856660147431201, "learning_rate": 1.2409014692173872e-06, "loss": 0.037, "step": 1196 }, { "epoch": 0.27235494880546074, "grad_norm": 0.8588236308520142, "learning_rate": 1.240886272311433e-06, "loss": 0.0317, "step": 1197 }, { "epoch": 0.2725824800910125, "grad_norm": 1.2966972282013072, "learning_rate": 1.240871062817892e-06, "loss": 0.0519, "step": 1198 }, { "epoch": 0.2728100113765643, "grad_norm": 1.0141950682821237, "learning_rate": 1.240855840737074e-06, "loss": 0.0362, "step": 1199 }, { "epoch": 0.27303754266211605, "grad_norm": 1.1327532340524933, "learning_rate": 1.2408406060692909e-06, "loss": 0.0411, "step": 1200 }, { "epoch": 0.2732650739476678, "grad_norm": 38.94813584323084, "learning_rate": 1.2408253588148532e-06, "loss": 0.4954, "step": 1201 }, { "epoch": 0.27349260523321955, "grad_norm": 1.3419172753176944, "learning_rate": 1.2408100989740735e-06, "loss": 0.0629, "step": 1202 }, { "epoch": 0.2737201365187713, "grad_norm": 1.164418605632623, "learning_rate": 1.2407948265472628e-06, "loss": 0.0327, "step": 1203 }, { "epoch": 0.2739476678043231, "grad_norm": 1.2988536193125668, "learning_rate": 1.2407795415347336e-06, "loss": 0.0567, "step": 1204 }, { "epoch": 0.27417519908987487, "grad_norm": 1.361893473954927, "learning_rate": 1.2407642439367986e-06, "loss": 0.0438, "step": 1205 }, { "epoch": 0.2744027303754266, "grad_norm": 1.0391897172547626, "learning_rate": 1.24074893375377e-06, "loss": 0.0557, "step": 1206 }, { "epoch": 0.27463026166097837, "grad_norm": 0.9962942412430917, "learning_rate": 1.2407336109859607e-06, "loss": 0.0371, "step": 1207 }, { "epoch": 0.2748577929465301, "grad_norm": 1.215727870574286, "learning_rate": 1.2407182756336844e-06, "loss": 0.0437, "step": 1208 }, { "epoch": 0.27508532423208193, "grad_norm": 1.8508042963184421, "learning_rate": 1.240702927697254e-06, "loss": 0.0626, "step": 1209 }, { "epoch": 0.2753128555176337, "grad_norm": 1.8022236633570414, "learning_rate": 1.2406875671769837e-06, "loss": 0.0584, "step": 1210 }, { "epoch": 0.27554038680318543, "grad_norm": 1.606815955580643, "learning_rate": 1.2406721940731866e-06, "loss": 0.0599, "step": 1211 }, { "epoch": 0.2757679180887372, "grad_norm": 1.0652074757978982, "learning_rate": 1.2406568083861776e-06, "loss": 0.0531, "step": 1212 }, { "epoch": 0.27599544937428894, "grad_norm": 1.4187270182072722, "learning_rate": 1.2406414101162708e-06, "loss": 0.0528, "step": 1213 }, { "epoch": 0.27622298065984074, "grad_norm": 1.0888371326459914, "learning_rate": 1.2406259992637815e-06, "loss": 0.0552, "step": 1214 }, { "epoch": 0.2764505119453925, "grad_norm": 2.1125996368288553, "learning_rate": 1.240610575829024e-06, "loss": 0.0735, "step": 1215 }, { "epoch": 0.27667804323094425, "grad_norm": 1.288876395538611, "learning_rate": 1.2405951398123136e-06, "loss": 0.0553, "step": 1216 }, { "epoch": 0.276905574516496, "grad_norm": 1.711303610363005, "learning_rate": 1.2405796912139662e-06, "loss": 0.0673, "step": 1217 }, { "epoch": 0.2771331058020478, "grad_norm": 1.3330966398511392, "learning_rate": 1.240564230034297e-06, "loss": 0.0347, "step": 1218 }, { "epoch": 0.27736063708759956, "grad_norm": 0.9030680288819154, "learning_rate": 1.2405487562736226e-06, "loss": 0.0365, "step": 1219 }, { "epoch": 0.2775881683731513, "grad_norm": 1.20973479021967, "learning_rate": 1.240533269932259e-06, "loss": 0.0647, "step": 1220 }, { "epoch": 0.27781569965870306, "grad_norm": 1.2330537480211692, "learning_rate": 1.2405177710105223e-06, "loss": 0.0415, "step": 1221 }, { "epoch": 0.2780432309442548, "grad_norm": 1.4447131783576561, "learning_rate": 1.24050225950873e-06, "loss": 0.04, "step": 1222 }, { "epoch": 0.2782707622298066, "grad_norm": 1.5348321099487485, "learning_rate": 1.2404867354271984e-06, "loss": 0.0699, "step": 1223 }, { "epoch": 0.2784982935153584, "grad_norm": 1.103797642514096, "learning_rate": 1.2404711987662452e-06, "loss": 0.0501, "step": 1224 }, { "epoch": 0.2787258248009101, "grad_norm": 1.8543277255058668, "learning_rate": 1.240455649526188e-06, "loss": 0.0779, "step": 1225 }, { "epoch": 0.2789533560864619, "grad_norm": 1.461515751555683, "learning_rate": 1.2404400877073446e-06, "loss": 0.0589, "step": 1226 }, { "epoch": 0.27918088737201363, "grad_norm": 1.7282210636065953, "learning_rate": 1.2404245133100328e-06, "loss": 0.0616, "step": 1227 }, { "epoch": 0.27940841865756544, "grad_norm": 0.9274643279367238, "learning_rate": 1.240408926334571e-06, "loss": 0.0259, "step": 1228 }, { "epoch": 0.2796359499431172, "grad_norm": 1.033501452595113, "learning_rate": 1.240393326781278e-06, "loss": 0.0388, "step": 1229 }, { "epoch": 0.27986348122866894, "grad_norm": 1.2277681625095047, "learning_rate": 1.2403777146504722e-06, "loss": 0.0417, "step": 1230 }, { "epoch": 0.2800910125142207, "grad_norm": 1.6056103902463323, "learning_rate": 1.240362089942473e-06, "loss": 0.0869, "step": 1231 }, { "epoch": 0.28031854379977245, "grad_norm": 0.9379340266438139, "learning_rate": 1.2403464526575997e-06, "loss": 0.0288, "step": 1232 }, { "epoch": 0.28054607508532425, "grad_norm": 1.791400262370738, "learning_rate": 1.240330802796172e-06, "loss": 0.0494, "step": 1233 }, { "epoch": 0.280773606370876, "grad_norm": 1.5180897393064074, "learning_rate": 1.2403151403585093e-06, "loss": 0.0555, "step": 1234 }, { "epoch": 0.28100113765642776, "grad_norm": 1.3796072286824306, "learning_rate": 1.240299465344932e-06, "loss": 0.0405, "step": 1235 }, { "epoch": 0.2812286689419795, "grad_norm": 0.968124324669557, "learning_rate": 1.2402837777557608e-06, "loss": 0.0462, "step": 1236 }, { "epoch": 0.28145620022753126, "grad_norm": 2.8732703815868157, "learning_rate": 1.240268077591316e-06, "loss": 0.1432, "step": 1237 }, { "epoch": 0.28168373151308307, "grad_norm": 1.7096825525845536, "learning_rate": 1.2402523648519184e-06, "loss": 0.0709, "step": 1238 }, { "epoch": 0.2819112627986348, "grad_norm": 1.4537193421874715, "learning_rate": 1.2402366395378892e-06, "loss": 0.0477, "step": 1239 }, { "epoch": 0.2821387940841866, "grad_norm": 1.0525576505096776, "learning_rate": 1.24022090164955e-06, "loss": 0.0381, "step": 1240 }, { "epoch": 0.2823663253697383, "grad_norm": 2.0691434586350135, "learning_rate": 1.240205151187222e-06, "loss": 0.0749, "step": 1241 }, { "epoch": 0.2825938566552901, "grad_norm": 1.4855492490109166, "learning_rate": 1.2401893881512278e-06, "loss": 0.0486, "step": 1242 }, { "epoch": 0.2828213879408419, "grad_norm": 2.3507672202858427, "learning_rate": 1.240173612541889e-06, "loss": 0.1187, "step": 1243 }, { "epoch": 0.28304891922639364, "grad_norm": 1.4271939145537942, "learning_rate": 1.2401578243595281e-06, "loss": 0.0459, "step": 1244 }, { "epoch": 0.2832764505119454, "grad_norm": 0.7153929012800043, "learning_rate": 1.2401420236044678e-06, "loss": 0.0203, "step": 1245 }, { "epoch": 0.28350398179749714, "grad_norm": 1.8926578555618896, "learning_rate": 1.2401262102770314e-06, "loss": 0.0939, "step": 1246 }, { "epoch": 0.2837315130830489, "grad_norm": 1.5820745105272767, "learning_rate": 1.2401103843775416e-06, "loss": 0.0885, "step": 1247 }, { "epoch": 0.2839590443686007, "grad_norm": 1.3120392754023362, "learning_rate": 1.240094545906322e-06, "loss": 0.0629, "step": 1248 }, { "epoch": 0.28418657565415245, "grad_norm": 1.2780609773747653, "learning_rate": 1.2400786948636966e-06, "loss": 0.0669, "step": 1249 }, { "epoch": 0.2844141069397042, "grad_norm": 1.4394753485796945, "learning_rate": 1.2400628312499892e-06, "loss": 0.0457, "step": 1250 }, { "epoch": 0.28464163822525596, "grad_norm": 1.4902299382519306, "learning_rate": 1.2400469550655239e-06, "loss": 0.0788, "step": 1251 }, { "epoch": 0.28486916951080776, "grad_norm": 2.1373698726270614, "learning_rate": 1.240031066310625e-06, "loss": 0.0709, "step": 1252 }, { "epoch": 0.2850967007963595, "grad_norm": 2.6364110769644467, "learning_rate": 1.2400151649856176e-06, "loss": 0.1474, "step": 1253 }, { "epoch": 0.28532423208191127, "grad_norm": 1.54489334981108, "learning_rate": 1.2399992510908266e-06, "loss": 0.0681, "step": 1254 }, { "epoch": 0.285551763367463, "grad_norm": 1.2035560270921895, "learning_rate": 1.2399833246265772e-06, "loss": 0.0624, "step": 1255 }, { "epoch": 0.28577929465301477, "grad_norm": 2.3200664566107427, "learning_rate": 1.2399673855931951e-06, "loss": 0.0948, "step": 1256 }, { "epoch": 0.2860068259385666, "grad_norm": 0.911546303051303, "learning_rate": 1.2399514339910058e-06, "loss": 0.0371, "step": 1257 }, { "epoch": 0.28623435722411833, "grad_norm": 1.1459525470306917, "learning_rate": 1.2399354698203353e-06, "loss": 0.063, "step": 1258 }, { "epoch": 0.2864618885096701, "grad_norm": 5.90970905115882, "learning_rate": 1.2399194930815103e-06, "loss": 0.2573, "step": 1259 }, { "epoch": 0.28668941979522183, "grad_norm": 1.6075560430361318, "learning_rate": 1.2399035037748567e-06, "loss": 0.0712, "step": 1260 }, { "epoch": 0.2869169510807736, "grad_norm": 1.059504335911741, "learning_rate": 1.2398875019007017e-06, "loss": 0.0336, "step": 1261 }, { "epoch": 0.2871444823663254, "grad_norm": 1.0386465638630533, "learning_rate": 1.2398714874593722e-06, "loss": 0.028, "step": 1262 }, { "epoch": 0.28737201365187715, "grad_norm": 1.2034325585223464, "learning_rate": 1.2398554604511958e-06, "loss": 0.0426, "step": 1263 }, { "epoch": 0.2875995449374289, "grad_norm": 1.130614264631673, "learning_rate": 1.2398394208764998e-06, "loss": 0.0449, "step": 1264 }, { "epoch": 0.28782707622298065, "grad_norm": 1.8759349581624183, "learning_rate": 1.239823368735612e-06, "loss": 0.0738, "step": 1265 }, { "epoch": 0.2880546075085324, "grad_norm": 1.534877012833876, "learning_rate": 1.2398073040288605e-06, "loss": 0.0565, "step": 1266 }, { "epoch": 0.2882821387940842, "grad_norm": 1.1199756497261975, "learning_rate": 1.2397912267565738e-06, "loss": 0.0434, "step": 1267 }, { "epoch": 0.28850967007963596, "grad_norm": 1.2767125637243415, "learning_rate": 1.2397751369190803e-06, "loss": 0.053, "step": 1268 }, { "epoch": 0.2887372013651877, "grad_norm": 1.6866930161803586, "learning_rate": 1.2397590345167088e-06, "loss": 0.0977, "step": 1269 }, { "epoch": 0.28896473265073946, "grad_norm": 1.2795958593835728, "learning_rate": 1.2397429195497887e-06, "loss": 0.0435, "step": 1270 }, { "epoch": 0.2891922639362912, "grad_norm": 1.2653384358613777, "learning_rate": 1.239726792018649e-06, "loss": 0.0366, "step": 1271 }, { "epoch": 0.289419795221843, "grad_norm": 1.175013233181025, "learning_rate": 1.2397106519236197e-06, "loss": 0.0589, "step": 1272 }, { "epoch": 0.2896473265073948, "grad_norm": 0.8118499461560013, "learning_rate": 1.2396944992650303e-06, "loss": 0.0341, "step": 1273 }, { "epoch": 0.2898748577929465, "grad_norm": 1.7245088180198505, "learning_rate": 1.2396783340432111e-06, "loss": 0.0712, "step": 1274 }, { "epoch": 0.2901023890784983, "grad_norm": 1.453392643848313, "learning_rate": 1.2396621562584925e-06, "loss": 0.0552, "step": 1275 }, { "epoch": 0.29032992036405003, "grad_norm": 0.9686308053813132, "learning_rate": 1.2396459659112052e-06, "loss": 0.0298, "step": 1276 }, { "epoch": 0.29055745164960184, "grad_norm": 1.2190161105816313, "learning_rate": 1.23962976300168e-06, "loss": 0.0657, "step": 1277 }, { "epoch": 0.2907849829351536, "grad_norm": 1.4400910874681545, "learning_rate": 1.2396135475302478e-06, "loss": 0.0689, "step": 1278 }, { "epoch": 0.29101251422070534, "grad_norm": 1.6072456813020977, "learning_rate": 1.2395973194972406e-06, "loss": 0.0519, "step": 1279 }, { "epoch": 0.2912400455062571, "grad_norm": 0.9134133498246579, "learning_rate": 1.2395810789029898e-06, "loss": 0.0305, "step": 1280 }, { "epoch": 0.29146757679180885, "grad_norm": 1.4368452567354464, "learning_rate": 1.2395648257478271e-06, "loss": 0.0842, "step": 1281 }, { "epoch": 0.29169510807736065, "grad_norm": 1.2627048123993603, "learning_rate": 1.239548560032085e-06, "loss": 0.0556, "step": 1282 }, { "epoch": 0.2919226393629124, "grad_norm": 1.8157780106000843, "learning_rate": 1.239532281756096e-06, "loss": 0.0812, "step": 1283 }, { "epoch": 0.29215017064846416, "grad_norm": 1.7031472200076767, "learning_rate": 1.2395159909201924e-06, "loss": 0.1025, "step": 1284 }, { "epoch": 0.2923777019340159, "grad_norm": 1.4512078193177647, "learning_rate": 1.2394996875247075e-06, "loss": 0.0878, "step": 1285 }, { "epoch": 0.2926052332195677, "grad_norm": 1.1558527774450407, "learning_rate": 1.2394833715699743e-06, "loss": 0.0517, "step": 1286 }, { "epoch": 0.29283276450511947, "grad_norm": 1.1961425724018928, "learning_rate": 1.2394670430563264e-06, "loss": 0.0822, "step": 1287 }, { "epoch": 0.2930602957906712, "grad_norm": 1.1257477613693543, "learning_rate": 1.2394507019840974e-06, "loss": 0.0433, "step": 1288 }, { "epoch": 0.293287827076223, "grad_norm": 1.7490981885592791, "learning_rate": 1.2394343483536215e-06, "loss": 0.0731, "step": 1289 }, { "epoch": 0.2935153583617747, "grad_norm": 0.7736925600695324, "learning_rate": 1.2394179821652326e-06, "loss": 0.048, "step": 1290 }, { "epoch": 0.29374288964732653, "grad_norm": 1.2742121789528693, "learning_rate": 1.2394016034192654e-06, "loss": 0.0523, "step": 1291 }, { "epoch": 0.2939704209328783, "grad_norm": 1.4246615440711465, "learning_rate": 1.2393852121160548e-06, "loss": 0.0813, "step": 1292 }, { "epoch": 0.29419795221843004, "grad_norm": 1.656527882332843, "learning_rate": 1.2393688082559357e-06, "loss": 0.0699, "step": 1293 }, { "epoch": 0.2944254835039818, "grad_norm": 1.2504233198660668, "learning_rate": 1.2393523918392433e-06, "loss": 0.0457, "step": 1294 }, { "epoch": 0.29465301478953354, "grad_norm": 2.1835557352018458, "learning_rate": 1.2393359628663133e-06, "loss": 0.1184, "step": 1295 }, { "epoch": 0.29488054607508535, "grad_norm": 1.1347886652336971, "learning_rate": 1.239319521337481e-06, "loss": 0.0655, "step": 1296 }, { "epoch": 0.2951080773606371, "grad_norm": 1.1763338033692377, "learning_rate": 1.2393030672530828e-06, "loss": 0.0568, "step": 1297 }, { "epoch": 0.29533560864618885, "grad_norm": 1.3990303438853744, "learning_rate": 1.239286600613455e-06, "loss": 0.041, "step": 1298 }, { "epoch": 0.2955631399317406, "grad_norm": 0.9401022615309382, "learning_rate": 1.2392701214189343e-06, "loss": 0.0349, "step": 1299 }, { "epoch": 0.29579067121729236, "grad_norm": 2.3948134916737516, "learning_rate": 1.2392536296698571e-06, "loss": 0.1472, "step": 1300 }, { "epoch": 0.29601820250284416, "grad_norm": 1.320538326644519, "learning_rate": 1.2392371253665605e-06, "loss": 0.0767, "step": 1301 }, { "epoch": 0.2962457337883959, "grad_norm": 1.354776649937033, "learning_rate": 1.2392206085093823e-06, "loss": 0.0584, "step": 1302 }, { "epoch": 0.29647326507394767, "grad_norm": 1.4302761520764427, "learning_rate": 1.2392040790986594e-06, "loss": 0.0823, "step": 1303 }, { "epoch": 0.2967007963594994, "grad_norm": 1.4047156580871927, "learning_rate": 1.2391875371347303e-06, "loss": 0.0549, "step": 1304 }, { "epoch": 0.29692832764505117, "grad_norm": 0.7970562047989252, "learning_rate": 1.2391709826179327e-06, "loss": 0.0336, "step": 1305 }, { "epoch": 0.297155858930603, "grad_norm": 1.3727411496357969, "learning_rate": 1.239154415548605e-06, "loss": 0.0392, "step": 1306 }, { "epoch": 0.29738339021615473, "grad_norm": 0.9054264955476324, "learning_rate": 1.2391378359270859e-06, "loss": 0.0389, "step": 1307 }, { "epoch": 0.2976109215017065, "grad_norm": 1.814490926829392, "learning_rate": 1.2391212437537138e-06, "loss": 0.072, "step": 1308 }, { "epoch": 0.29783845278725823, "grad_norm": 1.7618287946971354, "learning_rate": 1.2391046390288287e-06, "loss": 0.058, "step": 1309 }, { "epoch": 0.29806598407281, "grad_norm": 1.5269715641275399, "learning_rate": 1.2390880217527692e-06, "loss": 0.0656, "step": 1310 }, { "epoch": 0.2982935153583618, "grad_norm": 1.262642874805734, "learning_rate": 1.2390713919258752e-06, "loss": 0.0532, "step": 1311 }, { "epoch": 0.29852104664391355, "grad_norm": 1.9617860637916495, "learning_rate": 1.2390547495484866e-06, "loss": 0.1163, "step": 1312 }, { "epoch": 0.2987485779294653, "grad_norm": 1.288057411790914, "learning_rate": 1.2390380946209436e-06, "loss": 0.0508, "step": 1313 }, { "epoch": 0.29897610921501705, "grad_norm": 1.1943768298905832, "learning_rate": 1.2390214271435863e-06, "loss": 0.0388, "step": 1314 }, { "epoch": 0.2992036405005688, "grad_norm": 1.291453023419144, "learning_rate": 1.2390047471167557e-06, "loss": 0.0459, "step": 1315 }, { "epoch": 0.2994311717861206, "grad_norm": 1.014250520116699, "learning_rate": 1.2389880545407926e-06, "loss": 0.0415, "step": 1316 }, { "epoch": 0.29965870307167236, "grad_norm": 0.8604020785970597, "learning_rate": 1.2389713494160379e-06, "loss": 0.0256, "step": 1317 }, { "epoch": 0.2998862343572241, "grad_norm": 1.5717854535146285, "learning_rate": 1.2389546317428335e-06, "loss": 0.0659, "step": 1318 }, { "epoch": 0.30011376564277586, "grad_norm": 1.1712963748867264, "learning_rate": 1.2389379015215208e-06, "loss": 0.0576, "step": 1319 }, { "epoch": 0.3003412969283277, "grad_norm": 1.9673409422404795, "learning_rate": 1.2389211587524416e-06, "loss": 0.0724, "step": 1320 }, { "epoch": 0.3005688282138794, "grad_norm": 1.7254424987946275, "learning_rate": 1.2389044034359383e-06, "loss": 0.0667, "step": 1321 }, { "epoch": 0.3007963594994312, "grad_norm": 1.7304834437682375, "learning_rate": 1.2388876355723533e-06, "loss": 0.0669, "step": 1322 }, { "epoch": 0.30102389078498293, "grad_norm": 1.5970096808225174, "learning_rate": 1.2388708551620295e-06, "loss": 0.0606, "step": 1323 }, { "epoch": 0.3012514220705347, "grad_norm": 1.4122576326239549, "learning_rate": 1.2388540622053095e-06, "loss": 0.0521, "step": 1324 }, { "epoch": 0.3014789533560865, "grad_norm": 0.955203381580325, "learning_rate": 1.2388372567025367e-06, "loss": 0.0338, "step": 1325 }, { "epoch": 0.30170648464163824, "grad_norm": 1.8026618764076134, "learning_rate": 1.2388204386540546e-06, "loss": 0.0667, "step": 1326 }, { "epoch": 0.30193401592719, "grad_norm": 1.4712519094439878, "learning_rate": 1.238803608060207e-06, "loss": 0.0573, "step": 1327 }, { "epoch": 0.30216154721274174, "grad_norm": 1.7736677880474818, "learning_rate": 1.2387867649213376e-06, "loss": 0.066, "step": 1328 }, { "epoch": 0.3023890784982935, "grad_norm": 2.3477972521623465, "learning_rate": 1.2387699092377908e-06, "loss": 0.1076, "step": 1329 }, { "epoch": 0.3026166097838453, "grad_norm": 1.1616522138628205, "learning_rate": 1.2387530410099113e-06, "loss": 0.0439, "step": 1330 }, { "epoch": 0.30284414106939705, "grad_norm": 1.4363882653748854, "learning_rate": 1.2387361602380436e-06, "loss": 0.0571, "step": 1331 }, { "epoch": 0.3030716723549488, "grad_norm": 0.7445259748127024, "learning_rate": 1.238719266922533e-06, "loss": 0.0254, "step": 1332 }, { "epoch": 0.30329920364050056, "grad_norm": 1.5848968542889414, "learning_rate": 1.238702361063724e-06, "loss": 0.0656, "step": 1333 }, { "epoch": 0.3035267349260523, "grad_norm": 6.520543814327786, "learning_rate": 1.2386854426619633e-06, "loss": 0.168, "step": 1334 }, { "epoch": 0.3037542662116041, "grad_norm": 1.1879669128675703, "learning_rate": 1.2386685117175956e-06, "loss": 0.0414, "step": 1335 }, { "epoch": 0.30398179749715587, "grad_norm": 1.627644346798232, "learning_rate": 1.2386515682309676e-06, "loss": 0.0524, "step": 1336 }, { "epoch": 0.3042093287827076, "grad_norm": 1.5535932914971018, "learning_rate": 1.2386346122024253e-06, "loss": 0.0783, "step": 1337 }, { "epoch": 0.3044368600682594, "grad_norm": 1.9629237703618092, "learning_rate": 1.2386176436323154e-06, "loss": 0.0682, "step": 1338 }, { "epoch": 0.3046643913538111, "grad_norm": 1.1645585598673585, "learning_rate": 1.2386006625209847e-06, "loss": 0.0459, "step": 1339 }, { "epoch": 0.30489192263936293, "grad_norm": 1.7630197143088098, "learning_rate": 1.2385836688687802e-06, "loss": 0.0934, "step": 1340 }, { "epoch": 0.3051194539249147, "grad_norm": 1.6191880044317468, "learning_rate": 1.2385666626760493e-06, "loss": 0.0633, "step": 1341 }, { "epoch": 0.30534698521046644, "grad_norm": 1.8913527150034817, "learning_rate": 1.2385496439431395e-06, "loss": 0.0914, "step": 1342 }, { "epoch": 0.3055745164960182, "grad_norm": 1.445418394402971, "learning_rate": 1.2385326126703986e-06, "loss": 0.0687, "step": 1343 }, { "epoch": 0.30580204778156994, "grad_norm": 1.8288680303956055, "learning_rate": 1.2385155688581746e-06, "loss": 0.0739, "step": 1344 }, { "epoch": 0.30602957906712175, "grad_norm": 1.0257171242110983, "learning_rate": 1.238498512506816e-06, "loss": 0.0384, "step": 1345 }, { "epoch": 0.3062571103526735, "grad_norm": 1.8747685549411612, "learning_rate": 1.2384814436166715e-06, "loss": 0.0818, "step": 1346 }, { "epoch": 0.30648464163822525, "grad_norm": 1.2409585891810266, "learning_rate": 1.2384643621880898e-06, "loss": 0.0559, "step": 1347 }, { "epoch": 0.306712172923777, "grad_norm": 1.5549364252372342, "learning_rate": 1.2384472682214201e-06, "loss": 0.0633, "step": 1348 }, { "epoch": 0.30693970420932876, "grad_norm": 2.521195686268785, "learning_rate": 1.2384301617170116e-06, "loss": 0.076, "step": 1349 }, { "epoch": 0.30716723549488056, "grad_norm": 1.5977696260072916, "learning_rate": 1.2384130426752142e-06, "loss": 0.0849, "step": 1350 }, { "epoch": 0.3073947667804323, "grad_norm": 1.8196313465168323, "learning_rate": 1.2383959110963775e-06, "loss": 0.0585, "step": 1351 }, { "epoch": 0.30762229806598407, "grad_norm": 1.349543502368748, "learning_rate": 1.2383787669808518e-06, "loss": 0.0627, "step": 1352 }, { "epoch": 0.3078498293515358, "grad_norm": 1.6887455424782774, "learning_rate": 1.2383616103289871e-06, "loss": 0.0497, "step": 1353 }, { "epoch": 0.3080773606370876, "grad_norm": 0.6215713527290292, "learning_rate": 1.238344441141135e-06, "loss": 0.0178, "step": 1354 }, { "epoch": 0.3083048919226394, "grad_norm": 1.640454410435492, "learning_rate": 1.2383272594176454e-06, "loss": 0.0638, "step": 1355 }, { "epoch": 0.30853242320819113, "grad_norm": 2.278012967969371, "learning_rate": 1.23831006515887e-06, "loss": 0.0815, "step": 1356 }, { "epoch": 0.3087599544937429, "grad_norm": 1.272401712072303, "learning_rate": 1.2382928583651601e-06, "loss": 0.0654, "step": 1357 }, { "epoch": 0.30898748577929463, "grad_norm": 2.586692416689192, "learning_rate": 1.2382756390368674e-06, "loss": 0.0821, "step": 1358 }, { "epoch": 0.30921501706484644, "grad_norm": 1.0904063045445367, "learning_rate": 1.2382584071743438e-06, "loss": 0.0461, "step": 1359 }, { "epoch": 0.3094425483503982, "grad_norm": 1.8779937542492704, "learning_rate": 1.2382411627779414e-06, "loss": 0.0883, "step": 1360 }, { "epoch": 0.30967007963594995, "grad_norm": 1.9814293020102243, "learning_rate": 1.2382239058480128e-06, "loss": 0.0778, "step": 1361 }, { "epoch": 0.3098976109215017, "grad_norm": 1.4818784709766222, "learning_rate": 1.2382066363849106e-06, "loss": 0.0476, "step": 1362 }, { "epoch": 0.31012514220705345, "grad_norm": 1.5363329257464977, "learning_rate": 1.2381893543889878e-06, "loss": 0.0539, "step": 1363 }, { "epoch": 0.31035267349260526, "grad_norm": 1.58062550907191, "learning_rate": 1.2381720598605976e-06, "loss": 0.0761, "step": 1364 }, { "epoch": 0.310580204778157, "grad_norm": 1.358541490432856, "learning_rate": 1.2381547528000934e-06, "loss": 0.0923, "step": 1365 }, { "epoch": 0.31080773606370876, "grad_norm": 1.0458300282796855, "learning_rate": 1.238137433207829e-06, "loss": 0.0377, "step": 1366 }, { "epoch": 0.3110352673492605, "grad_norm": 1.183742190968498, "learning_rate": 1.2381201010841585e-06, "loss": 0.0402, "step": 1367 }, { "epoch": 0.31126279863481227, "grad_norm": 1.2571454436583878, "learning_rate": 1.2381027564294359e-06, "loss": 0.062, "step": 1368 }, { "epoch": 0.3114903299203641, "grad_norm": 1.503878059552754, "learning_rate": 1.238085399244016e-06, "loss": 0.0751, "step": 1369 }, { "epoch": 0.3117178612059158, "grad_norm": 2.1756129961580317, "learning_rate": 1.2380680295282532e-06, "loss": 0.099, "step": 1370 }, { "epoch": 0.3119453924914676, "grad_norm": 1.478282276295607, "learning_rate": 1.2380506472825025e-06, "loss": 0.0451, "step": 1371 }, { "epoch": 0.31217292377701933, "grad_norm": 1.210809744948102, "learning_rate": 1.2380332525071194e-06, "loss": 0.0371, "step": 1372 }, { "epoch": 0.3124004550625711, "grad_norm": 1.4368943652552293, "learning_rate": 1.2380158452024595e-06, "loss": 0.0407, "step": 1373 }, { "epoch": 0.3126279863481229, "grad_norm": 0.7640327894504174, "learning_rate": 1.2379984253688783e-06, "loss": 0.0337, "step": 1374 }, { "epoch": 0.31285551763367464, "grad_norm": 1.8295475899815037, "learning_rate": 1.237980993006732e-06, "loss": 0.1147, "step": 1375 }, { "epoch": 0.3130830489192264, "grad_norm": 1.4807139063710786, "learning_rate": 1.2379635481163768e-06, "loss": 0.0743, "step": 1376 }, { "epoch": 0.31331058020477814, "grad_norm": 2.448286916518216, "learning_rate": 1.2379460906981692e-06, "loss": 0.0534, "step": 1377 }, { "epoch": 0.3135381114903299, "grad_norm": 0.9023973811346913, "learning_rate": 1.237928620752466e-06, "loss": 0.052, "step": 1378 }, { "epoch": 0.3137656427758817, "grad_norm": 2.013947590986497, "learning_rate": 1.2379111382796246e-06, "loss": 0.0575, "step": 1379 }, { "epoch": 0.31399317406143346, "grad_norm": 1.6032640725703056, "learning_rate": 1.2378936432800017e-06, "loss": 0.0706, "step": 1380 }, { "epoch": 0.3142207053469852, "grad_norm": 1.239992146480516, "learning_rate": 1.2378761357539554e-06, "loss": 0.0569, "step": 1381 }, { "epoch": 0.31444823663253696, "grad_norm": 1.0642429061701875, "learning_rate": 1.2378586157018434e-06, "loss": 0.0381, "step": 1382 }, { "epoch": 0.3146757679180887, "grad_norm": 1.1087768689334148, "learning_rate": 1.2378410831240235e-06, "loss": 0.0558, "step": 1383 }, { "epoch": 0.3149032992036405, "grad_norm": 1.9572648591918616, "learning_rate": 1.2378235380208542e-06, "loss": 0.0738, "step": 1384 }, { "epoch": 0.31513083048919227, "grad_norm": 0.9929386411259282, "learning_rate": 1.2378059803926941e-06, "loss": 0.0394, "step": 1385 }, { "epoch": 0.315358361774744, "grad_norm": 1.202982993512815, "learning_rate": 1.2377884102399023e-06, "loss": 0.0461, "step": 1386 }, { "epoch": 0.3155858930602958, "grad_norm": 1.1752667673190271, "learning_rate": 1.2377708275628375e-06, "loss": 0.0447, "step": 1387 }, { "epoch": 0.3158134243458475, "grad_norm": 1.5891864361864994, "learning_rate": 1.2377532323618593e-06, "loss": 0.0456, "step": 1388 }, { "epoch": 0.31604095563139933, "grad_norm": 1.6341111744531334, "learning_rate": 1.2377356246373271e-06, "loss": 0.069, "step": 1389 }, { "epoch": 0.3162684869169511, "grad_norm": 1.440731938439493, "learning_rate": 1.2377180043896012e-06, "loss": 0.0595, "step": 1390 }, { "epoch": 0.31649601820250284, "grad_norm": 0.9756352806439903, "learning_rate": 1.2377003716190411e-06, "loss": 0.0469, "step": 1391 }, { "epoch": 0.3167235494880546, "grad_norm": 1.4676165718616923, "learning_rate": 1.2376827263260078e-06, "loss": 0.0516, "step": 1392 }, { "epoch": 0.3169510807736064, "grad_norm": 1.5939577885228815, "learning_rate": 1.2376650685108612e-06, "loss": 0.0503, "step": 1393 }, { "epoch": 0.31717861205915815, "grad_norm": 2.1816410059989098, "learning_rate": 1.2376473981739632e-06, "loss": 0.0929, "step": 1394 }, { "epoch": 0.3174061433447099, "grad_norm": 2.718683093963833, "learning_rate": 1.237629715315674e-06, "loss": 0.1129, "step": 1395 }, { "epoch": 0.31763367463026165, "grad_norm": 0.9979838242263193, "learning_rate": 1.2376120199363554e-06, "loss": 0.0536, "step": 1396 }, { "epoch": 0.3178612059158134, "grad_norm": 1.9720298867063506, "learning_rate": 1.2375943120363692e-06, "loss": 0.0969, "step": 1397 }, { "epoch": 0.3180887372013652, "grad_norm": 0.6502262811463818, "learning_rate": 1.2375765916160773e-06, "loss": 0.0287, "step": 1398 }, { "epoch": 0.31831626848691696, "grad_norm": 1.3837265125988647, "learning_rate": 1.2375588586758415e-06, "loss": 0.0441, "step": 1399 }, { "epoch": 0.3185437997724687, "grad_norm": 1.593794924686227, "learning_rate": 1.2375411132160245e-06, "loss": 0.0595, "step": 1400 }, { "epoch": 0.31877133105802047, "grad_norm": 1.776611213440541, "learning_rate": 1.2375233552369892e-06, "loss": 0.0694, "step": 1401 }, { "epoch": 0.3189988623435722, "grad_norm": 1.2580919074923334, "learning_rate": 1.237505584739098e-06, "loss": 0.0491, "step": 1402 }, { "epoch": 0.31922639362912403, "grad_norm": 0.994481303076797, "learning_rate": 1.2374878017227147e-06, "loss": 0.0291, "step": 1403 }, { "epoch": 0.3194539249146758, "grad_norm": 0.7265755658718445, "learning_rate": 1.237470006188202e-06, "loss": 0.027, "step": 1404 }, { "epoch": 0.31968145620022753, "grad_norm": 0.9576296817818531, "learning_rate": 1.2374521981359245e-06, "loss": 0.0423, "step": 1405 }, { "epoch": 0.3199089874857793, "grad_norm": 1.7776464530772098, "learning_rate": 1.2374343775662456e-06, "loss": 0.0764, "step": 1406 }, { "epoch": 0.32013651877133104, "grad_norm": 1.2848189857025718, "learning_rate": 1.2374165444795296e-06, "loss": 0.0398, "step": 1407 }, { "epoch": 0.32036405005688284, "grad_norm": 2.0114145603579625, "learning_rate": 1.237398698876141e-06, "loss": 0.0934, "step": 1408 }, { "epoch": 0.3205915813424346, "grad_norm": 1.3984905897111508, "learning_rate": 1.2373808407564446e-06, "loss": 0.0513, "step": 1409 }, { "epoch": 0.32081911262798635, "grad_norm": 0.9464490409610342, "learning_rate": 1.2373629701208053e-06, "loss": 0.0369, "step": 1410 }, { "epoch": 0.3210466439135381, "grad_norm": 1.4072284744296988, "learning_rate": 1.2373450869695883e-06, "loss": 0.0479, "step": 1411 }, { "epoch": 0.32127417519908985, "grad_norm": 1.5797071346301892, "learning_rate": 1.2373271913031593e-06, "loss": 0.0581, "step": 1412 }, { "epoch": 0.32150170648464166, "grad_norm": 1.5741999856445812, "learning_rate": 1.237309283121884e-06, "loss": 0.0695, "step": 1413 }, { "epoch": 0.3217292377701934, "grad_norm": 1.004256336444922, "learning_rate": 1.237291362426128e-06, "loss": 0.0584, "step": 1414 }, { "epoch": 0.32195676905574516, "grad_norm": 1.2610248823720653, "learning_rate": 1.2372734292162584e-06, "loss": 0.0508, "step": 1415 }, { "epoch": 0.3221843003412969, "grad_norm": 1.138966176373832, "learning_rate": 1.237255483492641e-06, "loss": 0.0346, "step": 1416 }, { "epoch": 0.32241183162684867, "grad_norm": 1.279726266655994, "learning_rate": 1.2372375252556429e-06, "loss": 0.0526, "step": 1417 }, { "epoch": 0.3226393629124005, "grad_norm": 2.259244357087649, "learning_rate": 1.2372195545056308e-06, "loss": 0.0737, "step": 1418 }, { "epoch": 0.3228668941979522, "grad_norm": 1.260233063486729, "learning_rate": 1.2372015712429725e-06, "loss": 0.0439, "step": 1419 }, { "epoch": 0.323094425483504, "grad_norm": 1.180315594146752, "learning_rate": 1.237183575468035e-06, "loss": 0.0506, "step": 1420 }, { "epoch": 0.32332195676905573, "grad_norm": 0.9777167019905004, "learning_rate": 1.2371655671811866e-06, "loss": 0.0308, "step": 1421 }, { "epoch": 0.3235494880546075, "grad_norm": 0.8734921365662196, "learning_rate": 1.237147546382795e-06, "loss": 0.0281, "step": 1422 }, { "epoch": 0.3237770193401593, "grad_norm": 1.1112810388031804, "learning_rate": 1.237129513073229e-06, "loss": 0.04, "step": 1423 }, { "epoch": 0.32400455062571104, "grad_norm": 1.156890481720808, "learning_rate": 1.2371114672528565e-06, "loss": 0.0593, "step": 1424 }, { "epoch": 0.3242320819112628, "grad_norm": 0.8688160126362955, "learning_rate": 1.2370934089220466e-06, "loss": 0.0239, "step": 1425 }, { "epoch": 0.32445961319681454, "grad_norm": 1.1430764409107144, "learning_rate": 1.2370753380811685e-06, "loss": 0.0343, "step": 1426 }, { "epoch": 0.32468714448236635, "grad_norm": 0.94277640364479, "learning_rate": 1.2370572547305915e-06, "loss": 0.0254, "step": 1427 }, { "epoch": 0.3249146757679181, "grad_norm": 0.8543512868236353, "learning_rate": 1.237039158870685e-06, "loss": 0.0359, "step": 1428 }, { "epoch": 0.32514220705346986, "grad_norm": 0.9268593896369735, "learning_rate": 1.237021050501819e-06, "loss": 0.0462, "step": 1429 }, { "epoch": 0.3253697383390216, "grad_norm": 1.0492131527271786, "learning_rate": 1.2370029296243638e-06, "loss": 0.0483, "step": 1430 }, { "epoch": 0.32559726962457336, "grad_norm": 1.2265145141334126, "learning_rate": 1.2369847962386893e-06, "loss": 0.0605, "step": 1431 }, { "epoch": 0.32582480091012517, "grad_norm": 1.4614421557097579, "learning_rate": 1.2369666503451665e-06, "loss": 0.0792, "step": 1432 }, { "epoch": 0.3260523321956769, "grad_norm": 1.3551154178225278, "learning_rate": 1.2369484919441662e-06, "loss": 0.0504, "step": 1433 }, { "epoch": 0.32627986348122867, "grad_norm": 2.070524215459429, "learning_rate": 1.2369303210360592e-06, "loss": 0.0813, "step": 1434 }, { "epoch": 0.3265073947667804, "grad_norm": 2.674887998532007, "learning_rate": 1.2369121376212174e-06, "loss": 0.1356, "step": 1435 }, { "epoch": 0.3267349260523322, "grad_norm": 2.0059123123271543, "learning_rate": 1.236893941700012e-06, "loss": 0.0815, "step": 1436 }, { "epoch": 0.326962457337884, "grad_norm": 1.614422308046532, "learning_rate": 1.236875733272815e-06, "loss": 0.0632, "step": 1437 }, { "epoch": 0.32718998862343573, "grad_norm": 1.0273415965735717, "learning_rate": 1.236857512339999e-06, "loss": 0.0636, "step": 1438 }, { "epoch": 0.3274175199089875, "grad_norm": 2.063298633572998, "learning_rate": 1.2368392789019356e-06, "loss": 0.1028, "step": 1439 }, { "epoch": 0.32764505119453924, "grad_norm": 1.0762629811135433, "learning_rate": 1.2368210329589982e-06, "loss": 0.0287, "step": 1440 }, { "epoch": 0.327872582480091, "grad_norm": 1.5998871926979066, "learning_rate": 1.236802774511559e-06, "loss": 0.0862, "step": 1441 }, { "epoch": 0.3281001137656428, "grad_norm": 1.4906993606341739, "learning_rate": 1.2367845035599919e-06, "loss": 0.094, "step": 1442 }, { "epoch": 0.32832764505119455, "grad_norm": 0.8173626977187779, "learning_rate": 1.2367662201046698e-06, "loss": 0.0348, "step": 1443 }, { "epoch": 0.3285551763367463, "grad_norm": 1.260244704516005, "learning_rate": 1.2367479241459666e-06, "loss": 0.0388, "step": 1444 }, { "epoch": 0.32878270762229805, "grad_norm": 1.2263377322773583, "learning_rate": 1.2367296156842562e-06, "loss": 0.0514, "step": 1445 }, { "epoch": 0.3290102389078498, "grad_norm": 1.1202578543878008, "learning_rate": 1.2367112947199128e-06, "loss": 0.0332, "step": 1446 }, { "epoch": 0.3292377701934016, "grad_norm": 1.3736971576588957, "learning_rate": 1.2366929612533109e-06, "loss": 0.0539, "step": 1447 }, { "epoch": 0.32946530147895337, "grad_norm": 1.2893217491287035, "learning_rate": 1.2366746152848249e-06, "loss": 0.058, "step": 1448 }, { "epoch": 0.3296928327645051, "grad_norm": 1.4381578160793065, "learning_rate": 1.2366562568148301e-06, "loss": 0.0521, "step": 1449 }, { "epoch": 0.32992036405005687, "grad_norm": 1.6844461549267769, "learning_rate": 1.2366378858437016e-06, "loss": 0.1036, "step": 1450 }, { "epoch": 0.3301478953356086, "grad_norm": 1.397177590337732, "learning_rate": 1.2366195023718152e-06, "loss": 0.0534, "step": 1451 }, { "epoch": 0.33037542662116043, "grad_norm": 1.0456464972065773, "learning_rate": 1.2366011063995458e-06, "loss": 0.0558, "step": 1452 }, { "epoch": 0.3306029579067122, "grad_norm": 1.9835006920707845, "learning_rate": 1.2365826979272702e-06, "loss": 0.1249, "step": 1453 }, { "epoch": 0.33083048919226393, "grad_norm": 2.6314184120557376, "learning_rate": 1.2365642769553644e-06, "loss": 0.1484, "step": 1454 }, { "epoch": 0.3310580204778157, "grad_norm": 1.4200073439085166, "learning_rate": 1.2365458434842046e-06, "loss": 0.0483, "step": 1455 }, { "epoch": 0.33128555176336744, "grad_norm": 1.1143001688120713, "learning_rate": 1.2365273975141675e-06, "loss": 0.0369, "step": 1456 }, { "epoch": 0.33151308304891924, "grad_norm": 1.0377938140431064, "learning_rate": 1.2365089390456308e-06, "loss": 0.0518, "step": 1457 }, { "epoch": 0.331740614334471, "grad_norm": 1.1599848352897109, "learning_rate": 1.236490468078971e-06, "loss": 0.0432, "step": 1458 }, { "epoch": 0.33196814562002275, "grad_norm": 1.5939156162998587, "learning_rate": 1.2364719846145662e-06, "loss": 0.0479, "step": 1459 }, { "epoch": 0.3321956769055745, "grad_norm": 1.0154765270633663, "learning_rate": 1.2364534886527937e-06, "loss": 0.0431, "step": 1460 }, { "epoch": 0.3324232081911263, "grad_norm": 1.8584242211532112, "learning_rate": 1.2364349801940317e-06, "loss": 0.1002, "step": 1461 }, { "epoch": 0.33265073947667806, "grad_norm": 1.3694201211070989, "learning_rate": 1.2364164592386588e-06, "loss": 0.0733, "step": 1462 }, { "epoch": 0.3328782707622298, "grad_norm": 1.9119781832863982, "learning_rate": 1.2363979257870528e-06, "loss": 0.0696, "step": 1463 }, { "epoch": 0.33310580204778156, "grad_norm": 1.9001104021651412, "learning_rate": 1.2363793798395932e-06, "loss": 0.0572, "step": 1464 }, { "epoch": 0.3333333333333333, "grad_norm": 2.498897834454089, "learning_rate": 1.2363608213966588e-06, "loss": 0.0848, "step": 1465 }, { "epoch": 0.3335608646188851, "grad_norm": 1.1620781742745308, "learning_rate": 1.2363422504586286e-06, "loss": 0.0504, "step": 1466 }, { "epoch": 0.3337883959044369, "grad_norm": 1.2505263772670705, "learning_rate": 1.2363236670258827e-06, "loss": 0.05, "step": 1467 }, { "epoch": 0.3340159271899886, "grad_norm": 1.4633039559699816, "learning_rate": 1.2363050710988003e-06, "loss": 0.079, "step": 1468 }, { "epoch": 0.3342434584755404, "grad_norm": 1.2463657051053827, "learning_rate": 1.236286462677762e-06, "loss": 0.0418, "step": 1469 }, { "epoch": 0.33447098976109213, "grad_norm": 1.683583709704665, "learning_rate": 1.236267841763148e-06, "loss": 0.0812, "step": 1470 }, { "epoch": 0.33469852104664394, "grad_norm": 0.7484223072827635, "learning_rate": 1.2362492083553387e-06, "loss": 0.0356, "step": 1471 }, { "epoch": 0.3349260523321957, "grad_norm": 1.639323852335928, "learning_rate": 1.236230562454715e-06, "loss": 0.0401, "step": 1472 }, { "epoch": 0.33515358361774744, "grad_norm": 1.2064100614992055, "learning_rate": 1.2362119040616582e-06, "loss": 0.0677, "step": 1473 }, { "epoch": 0.3353811149032992, "grad_norm": 1.107381501336236, "learning_rate": 1.2361932331765492e-06, "loss": 0.0404, "step": 1474 }, { "epoch": 0.33560864618885095, "grad_norm": 1.4911394528443533, "learning_rate": 1.2361745497997702e-06, "loss": 0.0623, "step": 1475 }, { "epoch": 0.33583617747440275, "grad_norm": 1.2262949506307204, "learning_rate": 1.2361558539317023e-06, "loss": 0.0651, "step": 1476 }, { "epoch": 0.3360637087599545, "grad_norm": 0.9519435590816607, "learning_rate": 1.2361371455727284e-06, "loss": 0.0404, "step": 1477 }, { "epoch": 0.33629124004550626, "grad_norm": 1.5689690961881004, "learning_rate": 1.2361184247232302e-06, "loss": 0.087, "step": 1478 }, { "epoch": 0.336518771331058, "grad_norm": 5.906098295875717, "learning_rate": 1.2360996913835907e-06, "loss": 0.236, "step": 1479 }, { "epoch": 0.33674630261660976, "grad_norm": 0.8654279575626805, "learning_rate": 1.2360809455541928e-06, "loss": 0.0549, "step": 1480 }, { "epoch": 0.33697383390216157, "grad_norm": 1.776832317476583, "learning_rate": 1.2360621872354195e-06, "loss": 0.0453, "step": 1481 }, { "epoch": 0.3372013651877133, "grad_norm": 1.347252653711477, "learning_rate": 1.236043416427654e-06, "loss": 0.0697, "step": 1482 }, { "epoch": 0.33742889647326507, "grad_norm": 1.2864149376371685, "learning_rate": 1.2360246331312804e-06, "loss": 0.0381, "step": 1483 }, { "epoch": 0.3376564277588168, "grad_norm": 1.6142130648155326, "learning_rate": 1.2360058373466821e-06, "loss": 0.0477, "step": 1484 }, { "epoch": 0.3378839590443686, "grad_norm": 2.106670043543402, "learning_rate": 1.2359870290742437e-06, "loss": 0.097, "step": 1485 }, { "epoch": 0.3381114903299204, "grad_norm": 1.2681226361832132, "learning_rate": 1.2359682083143494e-06, "loss": 0.0682, "step": 1486 }, { "epoch": 0.33833902161547214, "grad_norm": 0.8649596148543436, "learning_rate": 1.2359493750673835e-06, "loss": 0.0302, "step": 1487 }, { "epoch": 0.3385665529010239, "grad_norm": 1.3533089587285108, "learning_rate": 1.2359305293337316e-06, "loss": 0.0526, "step": 1488 }, { "epoch": 0.33879408418657564, "grad_norm": 0.7446203717116848, "learning_rate": 1.2359116711137785e-06, "loss": 0.0247, "step": 1489 }, { "epoch": 0.3390216154721274, "grad_norm": 1.1703255026021544, "learning_rate": 1.2358928004079095e-06, "loss": 0.0469, "step": 1490 }, { "epoch": 0.3392491467576792, "grad_norm": 2.2956044408157554, "learning_rate": 1.2358739172165108e-06, "loss": 0.1262, "step": 1491 }, { "epoch": 0.33947667804323095, "grad_norm": 1.425210539470094, "learning_rate": 1.235855021539968e-06, "loss": 0.0539, "step": 1492 }, { "epoch": 0.3397042093287827, "grad_norm": 1.2958224989383627, "learning_rate": 1.2358361133786668e-06, "loss": 0.0417, "step": 1493 }, { "epoch": 0.33993174061433445, "grad_norm": 0.8383879482270241, "learning_rate": 1.2358171927329946e-06, "loss": 0.0411, "step": 1494 }, { "epoch": 0.34015927189988626, "grad_norm": 1.0816879292496617, "learning_rate": 1.2357982596033374e-06, "loss": 0.0321, "step": 1495 }, { "epoch": 0.340386803185438, "grad_norm": 1.2228392090454494, "learning_rate": 1.2357793139900823e-06, "loss": 0.076, "step": 1496 }, { "epoch": 0.34061433447098977, "grad_norm": 0.9376305546071186, "learning_rate": 1.2357603558936168e-06, "loss": 0.0294, "step": 1497 }, { "epoch": 0.3408418657565415, "grad_norm": 2.032138065689827, "learning_rate": 1.2357413853143282e-06, "loss": 0.1073, "step": 1498 }, { "epoch": 0.34106939704209327, "grad_norm": 1.203066065214765, "learning_rate": 1.2357224022526041e-06, "loss": 0.0585, "step": 1499 }, { "epoch": 0.3412969283276451, "grad_norm": 1.5240232506976108, "learning_rate": 1.2357034067088327e-06, "loss": 0.0566, "step": 1500 }, { "epoch": 0.34152445961319683, "grad_norm": 1.4069261769751387, "learning_rate": 1.235684398683402e-06, "loss": 0.0911, "step": 1501 }, { "epoch": 0.3417519908987486, "grad_norm": 1.2794137407821935, "learning_rate": 1.2356653781767009e-06, "loss": 0.0387, "step": 1502 }, { "epoch": 0.34197952218430033, "grad_norm": 1.7747949717550477, "learning_rate": 1.2356463451891174e-06, "loss": 0.0928, "step": 1503 }, { "epoch": 0.3422070534698521, "grad_norm": 1.0081856733209773, "learning_rate": 1.2356272997210414e-06, "loss": 0.0289, "step": 1504 }, { "epoch": 0.3424345847554039, "grad_norm": 1.8655840562261654, "learning_rate": 1.2356082417728612e-06, "loss": 0.0837, "step": 1505 }, { "epoch": 0.34266211604095564, "grad_norm": 0.9436733972164556, "learning_rate": 1.2355891713449672e-06, "loss": 0.0406, "step": 1506 }, { "epoch": 0.3428896473265074, "grad_norm": 1.2922255822019764, "learning_rate": 1.2355700884377485e-06, "loss": 0.0363, "step": 1507 }, { "epoch": 0.34311717861205915, "grad_norm": 1.3543650706739145, "learning_rate": 1.2355509930515958e-06, "loss": 0.0697, "step": 1508 }, { "epoch": 0.3433447098976109, "grad_norm": 1.3352155504337506, "learning_rate": 1.2355318851868987e-06, "loss": 0.0723, "step": 1509 }, { "epoch": 0.3435722411831627, "grad_norm": 0.8330395399945464, "learning_rate": 1.235512764844048e-06, "loss": 0.0427, "step": 1510 }, { "epoch": 0.34379977246871446, "grad_norm": 0.7599055129001592, "learning_rate": 1.2354936320234345e-06, "loss": 0.0275, "step": 1511 }, { "epoch": 0.3440273037542662, "grad_norm": 1.107814773629098, "learning_rate": 1.2354744867254493e-06, "loss": 0.0519, "step": 1512 }, { "epoch": 0.34425483503981796, "grad_norm": 0.9874953818051542, "learning_rate": 1.2354553289504836e-06, "loss": 0.0276, "step": 1513 }, { "epoch": 0.3444823663253697, "grad_norm": 1.4195908498124084, "learning_rate": 1.2354361586989287e-06, "loss": 0.0658, "step": 1514 }, { "epoch": 0.3447098976109215, "grad_norm": 1.1536105740853742, "learning_rate": 1.235416975971177e-06, "loss": 0.0693, "step": 1515 }, { "epoch": 0.3449374288964733, "grad_norm": 1.1208196458039765, "learning_rate": 1.2353977807676205e-06, "loss": 0.0451, "step": 1516 }, { "epoch": 0.345164960182025, "grad_norm": 1.6664132616304859, "learning_rate": 1.2353785730886506e-06, "loss": 0.0659, "step": 1517 }, { "epoch": 0.3453924914675768, "grad_norm": 1.6331880132019159, "learning_rate": 1.235359352934661e-06, "loss": 0.0518, "step": 1518 }, { "epoch": 0.34562002275312853, "grad_norm": 1.9450367879491788, "learning_rate": 1.235340120306044e-06, "loss": 0.108, "step": 1519 }, { "epoch": 0.34584755403868034, "grad_norm": 1.4426224264383893, "learning_rate": 1.2353208752031925e-06, "loss": 0.0598, "step": 1520 }, { "epoch": 0.3460750853242321, "grad_norm": 1.2111901419847948, "learning_rate": 1.2353016176265002e-06, "loss": 0.0304, "step": 1521 }, { "epoch": 0.34630261660978384, "grad_norm": 0.841669446586875, "learning_rate": 1.2352823475763603e-06, "loss": 0.029, "step": 1522 }, { "epoch": 0.3465301478953356, "grad_norm": 1.6324873257088213, "learning_rate": 1.2352630650531672e-06, "loss": 0.0737, "step": 1523 }, { "epoch": 0.34675767918088735, "grad_norm": 1.5631645650045773, "learning_rate": 1.2352437700573147e-06, "loss": 0.0555, "step": 1524 }, { "epoch": 0.34698521046643915, "grad_norm": 1.3689087939433322, "learning_rate": 1.235224462589197e-06, "loss": 0.0699, "step": 1525 }, { "epoch": 0.3472127417519909, "grad_norm": 1.729486346174357, "learning_rate": 1.2352051426492089e-06, "loss": 0.0795, "step": 1526 }, { "epoch": 0.34744027303754266, "grad_norm": 1.2401649575621747, "learning_rate": 1.2351858102377455e-06, "loss": 0.0459, "step": 1527 }, { "epoch": 0.3476678043230944, "grad_norm": 1.1736397484657297, "learning_rate": 1.2351664653552012e-06, "loss": 0.0374, "step": 1528 }, { "epoch": 0.34789533560864616, "grad_norm": 0.8775289996354035, "learning_rate": 1.235147108001972e-06, "loss": 0.0415, "step": 1529 }, { "epoch": 0.34812286689419797, "grad_norm": 1.5150051901632806, "learning_rate": 1.2351277381784532e-06, "loss": 0.0499, "step": 1530 }, { "epoch": 0.3483503981797497, "grad_norm": 0.7367153174028898, "learning_rate": 1.235108355885041e-06, "loss": 0.0405, "step": 1531 }, { "epoch": 0.3485779294653015, "grad_norm": 1.0973156282921959, "learning_rate": 1.2350889611221315e-06, "loss": 0.0593, "step": 1532 }, { "epoch": 0.3488054607508532, "grad_norm": 1.2029539420964899, "learning_rate": 1.2350695538901207e-06, "loss": 0.052, "step": 1533 }, { "epoch": 0.34903299203640503, "grad_norm": 1.2254789545948275, "learning_rate": 1.2350501341894055e-06, "loss": 0.0442, "step": 1534 }, { "epoch": 0.3492605233219568, "grad_norm": 1.173455391781939, "learning_rate": 1.235030702020383e-06, "loss": 0.0627, "step": 1535 }, { "epoch": 0.34948805460750854, "grad_norm": 1.1576335152673878, "learning_rate": 1.23501125738345e-06, "loss": 0.0467, "step": 1536 }, { "epoch": 0.3497155858930603, "grad_norm": 1.0478326366866813, "learning_rate": 1.2349918002790043e-06, "loss": 0.0409, "step": 1537 }, { "epoch": 0.34994311717861204, "grad_norm": 0.7625837183082101, "learning_rate": 1.2349723307074432e-06, "loss": 0.037, "step": 1538 }, { "epoch": 0.35017064846416385, "grad_norm": 1.2469855224758288, "learning_rate": 1.2349528486691648e-06, "loss": 0.0668, "step": 1539 }, { "epoch": 0.3503981797497156, "grad_norm": 1.0826914171234396, "learning_rate": 1.2349333541645672e-06, "loss": 0.0443, "step": 1540 }, { "epoch": 0.35062571103526735, "grad_norm": 1.195949344430507, "learning_rate": 1.2349138471940489e-06, "loss": 0.0362, "step": 1541 }, { "epoch": 0.3508532423208191, "grad_norm": 1.2921680529996942, "learning_rate": 1.2348943277580086e-06, "loss": 0.0534, "step": 1542 }, { "epoch": 0.35108077360637086, "grad_norm": 0.713008372789458, "learning_rate": 1.2348747958568452e-06, "loss": 0.0211, "step": 1543 }, { "epoch": 0.35130830489192266, "grad_norm": 1.5116593555386681, "learning_rate": 1.2348552514909579e-06, "loss": 0.0712, "step": 1544 }, { "epoch": 0.3515358361774744, "grad_norm": 1.0248863840699733, "learning_rate": 1.2348356946607462e-06, "loss": 0.0305, "step": 1545 }, { "epoch": 0.35176336746302617, "grad_norm": 1.1409421918168505, "learning_rate": 1.2348161253666096e-06, "loss": 0.0529, "step": 1546 }, { "epoch": 0.3519908987485779, "grad_norm": 1.768834998548112, "learning_rate": 1.2347965436089484e-06, "loss": 0.0888, "step": 1547 }, { "epoch": 0.35221843003412967, "grad_norm": 1.4622739241779608, "learning_rate": 1.2347769493881625e-06, "loss": 0.0466, "step": 1548 }, { "epoch": 0.3524459613196815, "grad_norm": 0.6584121088377994, "learning_rate": 1.2347573427046527e-06, "loss": 0.0348, "step": 1549 }, { "epoch": 0.35267349260523323, "grad_norm": 2.2141006455976604, "learning_rate": 1.2347377235588193e-06, "loss": 0.1474, "step": 1550 }, { "epoch": 0.352901023890785, "grad_norm": 1.19254071976006, "learning_rate": 1.2347180919510637e-06, "loss": 0.0511, "step": 1551 }, { "epoch": 0.35312855517633673, "grad_norm": 1.4469931390196393, "learning_rate": 1.234698447881787e-06, "loss": 0.0457, "step": 1552 }, { "epoch": 0.3533560864618885, "grad_norm": 1.0953678059195961, "learning_rate": 1.2346787913513904e-06, "loss": 0.0351, "step": 1553 }, { "epoch": 0.3535836177474403, "grad_norm": 1.2220799955288675, "learning_rate": 1.234659122360276e-06, "loss": 0.0605, "step": 1554 }, { "epoch": 0.35381114903299204, "grad_norm": 0.7125746349183502, "learning_rate": 1.2346394409088457e-06, "loss": 0.029, "step": 1555 }, { "epoch": 0.3540386803185438, "grad_norm": 0.7700035617297257, "learning_rate": 1.2346197469975016e-06, "loss": 0.0399, "step": 1556 }, { "epoch": 0.35426621160409555, "grad_norm": 1.4287678945741027, "learning_rate": 1.2346000406266466e-06, "loss": 0.0849, "step": 1557 }, { "epoch": 0.3544937428896473, "grad_norm": 1.4371209706481878, "learning_rate": 1.2345803217966829e-06, "loss": 0.0497, "step": 1558 }, { "epoch": 0.3547212741751991, "grad_norm": 1.5348631747891068, "learning_rate": 1.2345605905080141e-06, "loss": 0.0625, "step": 1559 }, { "epoch": 0.35494880546075086, "grad_norm": 1.8970470757470723, "learning_rate": 1.234540846761043e-06, "loss": 0.0613, "step": 1560 }, { "epoch": 0.3551763367463026, "grad_norm": 2.2382508192831416, "learning_rate": 1.2345210905561733e-06, "loss": 0.0889, "step": 1561 }, { "epoch": 0.35540386803185436, "grad_norm": 0.9465351044370347, "learning_rate": 1.2345013218938089e-06, "loss": 0.0478, "step": 1562 }, { "epoch": 0.3556313993174061, "grad_norm": 0.7802531004508686, "learning_rate": 1.2344815407743537e-06, "loss": 0.0265, "step": 1563 }, { "epoch": 0.3558589306029579, "grad_norm": 1.3777470813147898, "learning_rate": 1.2344617471982119e-06, "loss": 0.0595, "step": 1564 }, { "epoch": 0.3560864618885097, "grad_norm": 1.9091835939621151, "learning_rate": 1.2344419411657883e-06, "loss": 0.0652, "step": 1565 }, { "epoch": 0.3563139931740614, "grad_norm": 1.8891000138650136, "learning_rate": 1.2344221226774874e-06, "loss": 0.0894, "step": 1566 }, { "epoch": 0.3565415244596132, "grad_norm": 2.3333751351582968, "learning_rate": 1.2344022917337147e-06, "loss": 0.1068, "step": 1567 }, { "epoch": 0.356769055745165, "grad_norm": 1.5543425365273886, "learning_rate": 1.234382448334875e-06, "loss": 0.0549, "step": 1568 }, { "epoch": 0.35699658703071674, "grad_norm": 0.7049933258516752, "learning_rate": 1.2343625924813741e-06, "loss": 0.024, "step": 1569 }, { "epoch": 0.3572241183162685, "grad_norm": 1.7675084632445353, "learning_rate": 1.234342724173618e-06, "loss": 0.0485, "step": 1570 }, { "epoch": 0.35745164960182024, "grad_norm": 1.3808807084264496, "learning_rate": 1.2343228434120124e-06, "loss": 0.067, "step": 1571 }, { "epoch": 0.357679180887372, "grad_norm": 1.2453962587688918, "learning_rate": 1.2343029501969638e-06, "loss": 0.0524, "step": 1572 }, { "epoch": 0.3579067121729238, "grad_norm": 1.343656065138551, "learning_rate": 1.2342830445288788e-06, "loss": 0.0699, "step": 1573 }, { "epoch": 0.35813424345847555, "grad_norm": 1.0013673586639926, "learning_rate": 1.2342631264081643e-06, "loss": 0.051, "step": 1574 }, { "epoch": 0.3583617747440273, "grad_norm": 1.1273372730897988, "learning_rate": 1.234243195835227e-06, "loss": 0.0564, "step": 1575 }, { "epoch": 0.35858930602957906, "grad_norm": 1.3457068038564708, "learning_rate": 1.234223252810475e-06, "loss": 0.0445, "step": 1576 }, { "epoch": 0.3588168373151308, "grad_norm": 1.1036016217002649, "learning_rate": 1.2342032973343152e-06, "loss": 0.0494, "step": 1577 }, { "epoch": 0.3590443686006826, "grad_norm": 1.1088066035106612, "learning_rate": 1.2341833294071558e-06, "loss": 0.0483, "step": 1578 }, { "epoch": 0.35927189988623437, "grad_norm": 1.81529439176539, "learning_rate": 1.2341633490294046e-06, "loss": 0.1067, "step": 1579 }, { "epoch": 0.3594994311717861, "grad_norm": 1.7525830328724732, "learning_rate": 1.2341433562014705e-06, "loss": 0.0977, "step": 1580 }, { "epoch": 0.3597269624573379, "grad_norm": 1.1410352301000704, "learning_rate": 1.2341233509237616e-06, "loss": 0.0383, "step": 1581 }, { "epoch": 0.3599544937428896, "grad_norm": 1.3291484690769528, "learning_rate": 1.234103333196687e-06, "loss": 0.0414, "step": 1582 }, { "epoch": 0.36018202502844143, "grad_norm": 1.8297830185646766, "learning_rate": 1.2340833030206558e-06, "loss": 0.0578, "step": 1583 }, { "epoch": 0.3604095563139932, "grad_norm": 1.4265200616837415, "learning_rate": 1.2340632603960774e-06, "loss": 0.0533, "step": 1584 }, { "epoch": 0.36063708759954494, "grad_norm": 1.5565200581725265, "learning_rate": 1.2340432053233615e-06, "loss": 0.0558, "step": 1585 }, { "epoch": 0.3608646188850967, "grad_norm": 0.8252310606061587, "learning_rate": 1.2340231378029177e-06, "loss": 0.0415, "step": 1586 }, { "epoch": 0.36109215017064844, "grad_norm": 1.1625630250416708, "learning_rate": 1.2340030578351564e-06, "loss": 0.0536, "step": 1587 }, { "epoch": 0.36131968145620025, "grad_norm": 1.4833717042031473, "learning_rate": 1.2339829654204878e-06, "loss": 0.054, "step": 1588 }, { "epoch": 0.361547212741752, "grad_norm": 1.2867709045464473, "learning_rate": 1.2339628605593229e-06, "loss": 0.0486, "step": 1589 }, { "epoch": 0.36177474402730375, "grad_norm": 1.7822365018575843, "learning_rate": 1.2339427432520722e-06, "loss": 0.0538, "step": 1590 }, { "epoch": 0.3620022753128555, "grad_norm": 1.7621725916894804, "learning_rate": 1.2339226134991471e-06, "loss": 0.0742, "step": 1591 }, { "epoch": 0.36222980659840726, "grad_norm": 2.2648964065061223, "learning_rate": 1.2339024713009592e-06, "loss": 0.0702, "step": 1592 }, { "epoch": 0.36245733788395906, "grad_norm": 1.3626520454398885, "learning_rate": 1.2338823166579197e-06, "loss": 0.0896, "step": 1593 }, { "epoch": 0.3626848691695108, "grad_norm": 1.3432892967430223, "learning_rate": 1.2338621495704409e-06, "loss": 0.0442, "step": 1594 }, { "epoch": 0.36291240045506257, "grad_norm": 1.920266034051075, "learning_rate": 1.2338419700389349e-06, "loss": 0.0949, "step": 1595 }, { "epoch": 0.3631399317406143, "grad_norm": 1.556412496157174, "learning_rate": 1.2338217780638137e-06, "loss": 0.0496, "step": 1596 }, { "epoch": 0.36336746302616607, "grad_norm": 2.160817778943729, "learning_rate": 1.2338015736454908e-06, "loss": 0.1306, "step": 1597 }, { "epoch": 0.3635949943117179, "grad_norm": 1.3086877468472204, "learning_rate": 1.2337813567843784e-06, "loss": 0.0441, "step": 1598 }, { "epoch": 0.36382252559726963, "grad_norm": 1.1991943484955294, "learning_rate": 1.2337611274808901e-06, "loss": 0.0684, "step": 1599 }, { "epoch": 0.3640500568828214, "grad_norm": 1.223483082152576, "learning_rate": 1.2337408857354394e-06, "loss": 0.0464, "step": 1600 }, { "epoch": 0.36427758816837313, "grad_norm": 0.8679079039579355, "learning_rate": 1.2337206315484396e-06, "loss": 0.036, "step": 1601 }, { "epoch": 0.36450511945392494, "grad_norm": 1.1879547293476054, "learning_rate": 1.2337003649203049e-06, "loss": 0.0481, "step": 1602 }, { "epoch": 0.3647326507394767, "grad_norm": 1.3048080561875424, "learning_rate": 1.2336800858514498e-06, "loss": 0.057, "step": 1603 }, { "epoch": 0.36496018202502845, "grad_norm": 1.3761316306984896, "learning_rate": 1.2336597943422883e-06, "loss": 0.0806, "step": 1604 }, { "epoch": 0.3651877133105802, "grad_norm": 1.3617767409383836, "learning_rate": 1.2336394903932353e-06, "loss": 0.0636, "step": 1605 }, { "epoch": 0.36541524459613195, "grad_norm": 0.8022215628059131, "learning_rate": 1.233619174004706e-06, "loss": 0.0419, "step": 1606 }, { "epoch": 0.36564277588168376, "grad_norm": 1.0273812494717272, "learning_rate": 1.233598845177115e-06, "loss": 0.052, "step": 1607 }, { "epoch": 0.3658703071672355, "grad_norm": 1.262245877196861, "learning_rate": 1.2335785039108787e-06, "loss": 0.0494, "step": 1608 }, { "epoch": 0.36609783845278726, "grad_norm": 1.32897975384147, "learning_rate": 1.233558150206412e-06, "loss": 0.069, "step": 1609 }, { "epoch": 0.366325369738339, "grad_norm": 1.2103326320958596, "learning_rate": 1.2335377840641314e-06, "loss": 0.0388, "step": 1610 }, { "epoch": 0.36655290102389076, "grad_norm": 1.6475678381055654, "learning_rate": 1.233517405484453e-06, "loss": 0.0584, "step": 1611 }, { "epoch": 0.3667804323094426, "grad_norm": 0.8570759088496013, "learning_rate": 1.2334970144677929e-06, "loss": 0.0268, "step": 1612 }, { "epoch": 0.3670079635949943, "grad_norm": 1.2115212249577887, "learning_rate": 1.2334766110145684e-06, "loss": 0.0492, "step": 1613 }, { "epoch": 0.3672354948805461, "grad_norm": 1.6505586244119963, "learning_rate": 1.2334561951251967e-06, "loss": 0.0696, "step": 1614 }, { "epoch": 0.36746302616609783, "grad_norm": 1.0033818903908105, "learning_rate": 1.2334357668000943e-06, "loss": 0.0485, "step": 1615 }, { "epoch": 0.3676905574516496, "grad_norm": 0.8359701120507522, "learning_rate": 1.2334153260396795e-06, "loss": 0.0428, "step": 1616 }, { "epoch": 0.3679180887372014, "grad_norm": 1.3307257141956392, "learning_rate": 1.2333948728443692e-06, "loss": 0.0609, "step": 1617 }, { "epoch": 0.36814562002275314, "grad_norm": 0.9161807224363929, "learning_rate": 1.2333744072145824e-06, "loss": 0.0357, "step": 1618 }, { "epoch": 0.3683731513083049, "grad_norm": 1.5055305786265103, "learning_rate": 1.2333539291507365e-06, "loss": 0.1025, "step": 1619 }, { "epoch": 0.36860068259385664, "grad_norm": 1.936271932194028, "learning_rate": 1.2333334386532507e-06, "loss": 0.073, "step": 1620 }, { "epoch": 0.3688282138794084, "grad_norm": 0.9636982724990819, "learning_rate": 1.2333129357225434e-06, "loss": 0.0366, "step": 1621 }, { "epoch": 0.3690557451649602, "grad_norm": 1.4864238412613773, "learning_rate": 1.2332924203590341e-06, "loss": 0.0579, "step": 1622 }, { "epoch": 0.36928327645051195, "grad_norm": 1.1141779480557945, "learning_rate": 1.2332718925631414e-06, "loss": 0.0626, "step": 1623 }, { "epoch": 0.3695108077360637, "grad_norm": 1.4180072004359519, "learning_rate": 1.2332513523352853e-06, "loss": 0.0583, "step": 1624 }, { "epoch": 0.36973833902161546, "grad_norm": 1.3854251776786142, "learning_rate": 1.2332307996758854e-06, "loss": 0.0499, "step": 1625 }, { "epoch": 0.3699658703071672, "grad_norm": 2.0139670039119766, "learning_rate": 1.233210234585362e-06, "loss": 0.083, "step": 1626 }, { "epoch": 0.370193401592719, "grad_norm": 1.951694104084049, "learning_rate": 1.2331896570641354e-06, "loss": 0.08, "step": 1627 }, { "epoch": 0.37042093287827077, "grad_norm": 1.2223571341809563, "learning_rate": 1.233169067112626e-06, "loss": 0.0515, "step": 1628 }, { "epoch": 0.3706484641638225, "grad_norm": 1.845536817940592, "learning_rate": 1.2331484647312545e-06, "loss": 0.1018, "step": 1629 }, { "epoch": 0.3708759954493743, "grad_norm": 1.2829067703722639, "learning_rate": 1.2331278499204423e-06, "loss": 0.0577, "step": 1630 }, { "epoch": 0.371103526734926, "grad_norm": 1.5585929030011767, "learning_rate": 1.2331072226806107e-06, "loss": 0.0466, "step": 1631 }, { "epoch": 0.37133105802047783, "grad_norm": 1.6427062066371947, "learning_rate": 1.233086583012181e-06, "loss": 0.0855, "step": 1632 }, { "epoch": 0.3715585893060296, "grad_norm": 1.1255582962725759, "learning_rate": 1.233065930915575e-06, "loss": 0.0514, "step": 1633 }, { "epoch": 0.37178612059158134, "grad_norm": 1.1524468415244595, "learning_rate": 1.2330452663912155e-06, "loss": 0.0489, "step": 1634 }, { "epoch": 0.3720136518771331, "grad_norm": 3.45030750590545, "learning_rate": 1.233024589439524e-06, "loss": 0.0984, "step": 1635 }, { "epoch": 0.3722411831626849, "grad_norm": 1.3099017670753825, "learning_rate": 1.2330039000609233e-06, "loss": 0.0402, "step": 1636 }, { "epoch": 0.37246871444823665, "grad_norm": 1.9185688290740377, "learning_rate": 1.2329831982558365e-06, "loss": 0.0612, "step": 1637 }, { "epoch": 0.3726962457337884, "grad_norm": 1.0515979623712708, "learning_rate": 1.2329624840246867e-06, "loss": 0.0341, "step": 1638 }, { "epoch": 0.37292377701934015, "grad_norm": 1.0104966308830603, "learning_rate": 1.2329417573678974e-06, "loss": 0.0363, "step": 1639 }, { "epoch": 0.3731513083048919, "grad_norm": 0.8767867018961214, "learning_rate": 1.2329210182858915e-06, "loss": 0.0358, "step": 1640 }, { "epoch": 0.3733788395904437, "grad_norm": 1.2786801769613858, "learning_rate": 1.2329002667790937e-06, "loss": 0.0727, "step": 1641 }, { "epoch": 0.37360637087599546, "grad_norm": 1.0430455326740677, "learning_rate": 1.2328795028479275e-06, "loss": 0.0293, "step": 1642 }, { "epoch": 0.3738339021615472, "grad_norm": 1.3214304805194543, "learning_rate": 1.2328587264928176e-06, "loss": 0.0487, "step": 1643 }, { "epoch": 0.37406143344709897, "grad_norm": 2.17352926519868, "learning_rate": 1.2328379377141885e-06, "loss": 0.1179, "step": 1644 }, { "epoch": 0.3742889647326507, "grad_norm": 1.6384210150692329, "learning_rate": 1.2328171365124655e-06, "loss": 0.0356, "step": 1645 }, { "epoch": 0.3745164960182025, "grad_norm": 1.0746654458597755, "learning_rate": 1.2327963228880733e-06, "loss": 0.0348, "step": 1646 }, { "epoch": 0.3747440273037543, "grad_norm": 1.1554129673310896, "learning_rate": 1.2327754968414372e-06, "loss": 0.0559, "step": 1647 }, { "epoch": 0.37497155858930603, "grad_norm": 1.381713868304537, "learning_rate": 1.232754658372983e-06, "loss": 0.0429, "step": 1648 }, { "epoch": 0.3751990898748578, "grad_norm": 1.0461255300086107, "learning_rate": 1.2327338074831366e-06, "loss": 0.0498, "step": 1649 }, { "epoch": 0.37542662116040953, "grad_norm": 0.9696935937818977, "learning_rate": 1.2327129441723242e-06, "loss": 0.0282, "step": 1650 }, { "epoch": 0.37565415244596134, "grad_norm": 2.070008088112287, "learning_rate": 1.2326920684409724e-06, "loss": 0.0936, "step": 1651 }, { "epoch": 0.3758816837315131, "grad_norm": 1.9843357113833955, "learning_rate": 1.2326711802895077e-06, "loss": 0.068, "step": 1652 }, { "epoch": 0.37610921501706485, "grad_norm": 1.3824487035448465, "learning_rate": 1.2326502797183568e-06, "loss": 0.0609, "step": 1653 }, { "epoch": 0.3763367463026166, "grad_norm": 1.4651223595589276, "learning_rate": 1.2326293667279472e-06, "loss": 0.0448, "step": 1654 }, { "epoch": 0.37656427758816835, "grad_norm": 0.9774603013378036, "learning_rate": 1.232608441318706e-06, "loss": 0.0282, "step": 1655 }, { "epoch": 0.37679180887372016, "grad_norm": 1.1003503115295237, "learning_rate": 1.232587503491061e-06, "loss": 0.0447, "step": 1656 }, { "epoch": 0.3770193401592719, "grad_norm": 1.8648740699841149, "learning_rate": 1.2325665532454403e-06, "loss": 0.0879, "step": 1657 }, { "epoch": 0.37724687144482366, "grad_norm": 1.4508296738594828, "learning_rate": 1.2325455905822719e-06, "loss": 0.048, "step": 1658 }, { "epoch": 0.3774744027303754, "grad_norm": 1.5446190575894356, "learning_rate": 1.2325246155019844e-06, "loss": 0.0713, "step": 1659 }, { "epoch": 0.37770193401592717, "grad_norm": 0.657557765969774, "learning_rate": 1.2325036280050063e-06, "loss": 0.0252, "step": 1660 }, { "epoch": 0.377929465301479, "grad_norm": 0.7023883517908608, "learning_rate": 1.2324826280917664e-06, "loss": 0.0222, "step": 1661 }, { "epoch": 0.3781569965870307, "grad_norm": 1.2534079220919763, "learning_rate": 1.2324616157626943e-06, "loss": 0.0467, "step": 1662 }, { "epoch": 0.3783845278725825, "grad_norm": 1.4009224674260483, "learning_rate": 1.2324405910182195e-06, "loss": 0.0796, "step": 1663 }, { "epoch": 0.37861205915813423, "grad_norm": 2.444160860575137, "learning_rate": 1.2324195538587713e-06, "loss": 0.0772, "step": 1664 }, { "epoch": 0.378839590443686, "grad_norm": 1.1670387195512752, "learning_rate": 1.23239850428478e-06, "loss": 0.0648, "step": 1665 }, { "epoch": 0.3790671217292378, "grad_norm": 1.1973819971714186, "learning_rate": 1.2323774422966756e-06, "loss": 0.0466, "step": 1666 }, { "epoch": 0.37929465301478954, "grad_norm": 2.3889623346599484, "learning_rate": 1.2323563678948885e-06, "loss": 0.0995, "step": 1667 }, { "epoch": 0.3795221843003413, "grad_norm": 1.0719186425987624, "learning_rate": 1.2323352810798498e-06, "loss": 0.0434, "step": 1668 }, { "epoch": 0.37974971558589304, "grad_norm": 1.5135458293019917, "learning_rate": 1.23231418185199e-06, "loss": 0.1, "step": 1669 }, { "epoch": 0.3799772468714448, "grad_norm": 1.6715828908867492, "learning_rate": 1.2322930702117406e-06, "loss": 0.0654, "step": 1670 }, { "epoch": 0.3802047781569966, "grad_norm": 1.1117468704392939, "learning_rate": 1.232271946159533e-06, "loss": 0.0391, "step": 1671 }, { "epoch": 0.38043230944254836, "grad_norm": 1.7072937667796886, "learning_rate": 1.2322508096957992e-06, "loss": 0.0597, "step": 1672 }, { "epoch": 0.3806598407281001, "grad_norm": 2.695721182769726, "learning_rate": 1.2322296608209709e-06, "loss": 0.099, "step": 1673 }, { "epoch": 0.38088737201365186, "grad_norm": 2.29172385709693, "learning_rate": 1.2322084995354805e-06, "loss": 0.0939, "step": 1674 }, { "epoch": 0.38111490329920367, "grad_norm": 1.1798892776179337, "learning_rate": 1.2321873258397602e-06, "loss": 0.0544, "step": 1675 }, { "epoch": 0.3813424345847554, "grad_norm": 1.4030630144768808, "learning_rate": 1.232166139734243e-06, "loss": 0.0533, "step": 1676 }, { "epoch": 0.38156996587030717, "grad_norm": 1.2796792180280614, "learning_rate": 1.2321449412193622e-06, "loss": 0.0607, "step": 1677 }, { "epoch": 0.3817974971558589, "grad_norm": 1.3330548972713963, "learning_rate": 1.2321237302955505e-06, "loss": 0.0474, "step": 1678 }, { "epoch": 0.3820250284414107, "grad_norm": 1.0575562905738431, "learning_rate": 1.2321025069632416e-06, "loss": 0.0495, "step": 1679 }, { "epoch": 0.3822525597269625, "grad_norm": 1.1768156879508973, "learning_rate": 1.2320812712228694e-06, "loss": 0.0451, "step": 1680 }, { "epoch": 0.38248009101251423, "grad_norm": 0.7767797500459915, "learning_rate": 1.2320600230748677e-06, "loss": 0.0347, "step": 1681 }, { "epoch": 0.382707622298066, "grad_norm": 1.0249897852947059, "learning_rate": 1.232038762519671e-06, "loss": 0.0337, "step": 1682 }, { "epoch": 0.38293515358361774, "grad_norm": 1.6718567212147661, "learning_rate": 1.2320174895577138e-06, "loss": 0.1132, "step": 1683 }, { "epoch": 0.3831626848691695, "grad_norm": 0.994384807721514, "learning_rate": 1.2319962041894307e-06, "loss": 0.0362, "step": 1684 }, { "epoch": 0.3833902161547213, "grad_norm": 2.303334030981133, "learning_rate": 1.2319749064152569e-06, "loss": 0.1277, "step": 1685 }, { "epoch": 0.38361774744027305, "grad_norm": 1.609337504264491, "learning_rate": 1.2319535962356277e-06, "loss": 0.0575, "step": 1686 }, { "epoch": 0.3838452787258248, "grad_norm": 1.2011777956026055, "learning_rate": 1.2319322736509784e-06, "loss": 0.0552, "step": 1687 }, { "epoch": 0.38407281001137655, "grad_norm": 1.7322047315095004, "learning_rate": 1.2319109386617452e-06, "loss": 0.0753, "step": 1688 }, { "epoch": 0.3843003412969283, "grad_norm": 1.4045767611712419, "learning_rate": 1.2318895912683638e-06, "loss": 0.0726, "step": 1689 }, { "epoch": 0.3845278725824801, "grad_norm": 0.8734396508754521, "learning_rate": 1.2318682314712706e-06, "loss": 0.029, "step": 1690 }, { "epoch": 0.38475540386803186, "grad_norm": 3.125984298027021, "learning_rate": 1.2318468592709022e-06, "loss": 0.0794, "step": 1691 }, { "epoch": 0.3849829351535836, "grad_norm": 0.9959306519128199, "learning_rate": 1.2318254746676954e-06, "loss": 0.0504, "step": 1692 }, { "epoch": 0.38521046643913537, "grad_norm": 1.0411287511380087, "learning_rate": 1.2318040776620872e-06, "loss": 0.0423, "step": 1693 }, { "epoch": 0.3854379977246871, "grad_norm": 1.299794772066647, "learning_rate": 1.231782668254515e-06, "loss": 0.0633, "step": 1694 }, { "epoch": 0.3856655290102389, "grad_norm": 1.5302626083298043, "learning_rate": 1.2317612464454161e-06, "loss": 0.0581, "step": 1695 }, { "epoch": 0.3858930602957907, "grad_norm": 1.3051944839085843, "learning_rate": 1.2317398122352289e-06, "loss": 0.0524, "step": 1696 }, { "epoch": 0.38612059158134243, "grad_norm": 2.5811565293035166, "learning_rate": 1.2317183656243912e-06, "loss": 0.1304, "step": 1697 }, { "epoch": 0.3863481228668942, "grad_norm": 0.7679176052329142, "learning_rate": 1.231696906613341e-06, "loss": 0.0311, "step": 1698 }, { "epoch": 0.38657565415244594, "grad_norm": 1.2458183734125623, "learning_rate": 1.2316754352025173e-06, "loss": 0.0601, "step": 1699 }, { "epoch": 0.38680318543799774, "grad_norm": 1.4002525637777081, "learning_rate": 1.2316539513923585e-06, "loss": 0.052, "step": 1700 }, { "epoch": 0.3870307167235495, "grad_norm": 1.40244177955916, "learning_rate": 1.2316324551833042e-06, "loss": 0.0798, "step": 1701 }, { "epoch": 0.38725824800910125, "grad_norm": 0.636865674778868, "learning_rate": 1.2316109465757934e-06, "loss": 0.0236, "step": 1702 }, { "epoch": 0.387485779294653, "grad_norm": 0.911548468016125, "learning_rate": 1.231589425570266e-06, "loss": 0.048, "step": 1703 }, { "epoch": 0.38771331058020475, "grad_norm": 0.916105640198358, "learning_rate": 1.2315678921671615e-06, "loss": 0.0559, "step": 1704 }, { "epoch": 0.38794084186575656, "grad_norm": 1.1465421388050945, "learning_rate": 1.2315463463669202e-06, "loss": 0.0561, "step": 1705 }, { "epoch": 0.3881683731513083, "grad_norm": 1.441271671684643, "learning_rate": 1.2315247881699825e-06, "loss": 0.0507, "step": 1706 }, { "epoch": 0.38839590443686006, "grad_norm": 1.5270606085562182, "learning_rate": 1.2315032175767887e-06, "loss": 0.049, "step": 1707 }, { "epoch": 0.3886234357224118, "grad_norm": 1.5250540029402313, "learning_rate": 1.23148163458778e-06, "loss": 0.0597, "step": 1708 }, { "epoch": 0.3888509670079636, "grad_norm": 1.4207097878273431, "learning_rate": 1.2314600392033974e-06, "loss": 0.0676, "step": 1709 }, { "epoch": 0.3890784982935154, "grad_norm": 1.5896950089365356, "learning_rate": 1.2314384314240824e-06, "loss": 0.0617, "step": 1710 }, { "epoch": 0.3893060295790671, "grad_norm": 2.036043003063475, "learning_rate": 1.2314168112502765e-06, "loss": 0.1324, "step": 1711 }, { "epoch": 0.3895335608646189, "grad_norm": 1.1640099693184898, "learning_rate": 1.2313951786824213e-06, "loss": 0.0634, "step": 1712 }, { "epoch": 0.38976109215017063, "grad_norm": 1.4865385157328637, "learning_rate": 1.2313735337209593e-06, "loss": 0.0584, "step": 1713 }, { "epoch": 0.38998862343572244, "grad_norm": 1.252450965977985, "learning_rate": 1.231351876366333e-06, "loss": 0.0368, "step": 1714 }, { "epoch": 0.3902161547212742, "grad_norm": 1.4371658281059463, "learning_rate": 1.2313302066189846e-06, "loss": 0.0557, "step": 1715 }, { "epoch": 0.39044368600682594, "grad_norm": 1.579902726372128, "learning_rate": 1.2313085244793573e-06, "loss": 0.063, "step": 1716 }, { "epoch": 0.3906712172923777, "grad_norm": 2.3512912102608277, "learning_rate": 1.2312868299478944e-06, "loss": 0.0774, "step": 1717 }, { "epoch": 0.39089874857792944, "grad_norm": 1.2308435632473214, "learning_rate": 1.2312651230250387e-06, "loss": 0.0585, "step": 1718 }, { "epoch": 0.39112627986348125, "grad_norm": 1.0173462925036059, "learning_rate": 1.2312434037112345e-06, "loss": 0.0306, "step": 1719 }, { "epoch": 0.391353811149033, "grad_norm": 1.3611629215931131, "learning_rate": 1.2312216720069251e-06, "loss": 0.0822, "step": 1720 }, { "epoch": 0.39158134243458476, "grad_norm": 2.0854012174946135, "learning_rate": 1.2311999279125552e-06, "loss": 0.0877, "step": 1721 }, { "epoch": 0.3918088737201365, "grad_norm": 0.9865613588959612, "learning_rate": 1.2311781714285689e-06, "loss": 0.0557, "step": 1722 }, { "epoch": 0.39203640500568826, "grad_norm": 1.4304461856241661, "learning_rate": 1.231156402555411e-06, "loss": 0.0586, "step": 1723 }, { "epoch": 0.39226393629124007, "grad_norm": 1.0337361927182938, "learning_rate": 1.2311346212935262e-06, "loss": 0.0433, "step": 1724 }, { "epoch": 0.3924914675767918, "grad_norm": 0.7476757498111144, "learning_rate": 1.23111282764336e-06, "loss": 0.0303, "step": 1725 }, { "epoch": 0.39271899886234357, "grad_norm": 1.1404795389862592, "learning_rate": 1.2310910216053576e-06, "loss": 0.0518, "step": 1726 }, { "epoch": 0.3929465301478953, "grad_norm": 1.6483693490580467, "learning_rate": 1.2310692031799646e-06, "loss": 0.0754, "step": 1727 }, { "epoch": 0.3931740614334471, "grad_norm": 1.0959398843175312, "learning_rate": 1.2310473723676272e-06, "loss": 0.0444, "step": 1728 }, { "epoch": 0.3934015927189989, "grad_norm": 4.125333379715717, "learning_rate": 1.2310255291687913e-06, "loss": 0.0891, "step": 1729 }, { "epoch": 0.39362912400455063, "grad_norm": 1.4792691340855277, "learning_rate": 1.2310036735839037e-06, "loss": 0.0414, "step": 1730 }, { "epoch": 0.3938566552901024, "grad_norm": 1.4532148859913319, "learning_rate": 1.2309818056134108e-06, "loss": 0.0631, "step": 1731 }, { "epoch": 0.39408418657565414, "grad_norm": 0.7598305700170358, "learning_rate": 1.2309599252577593e-06, "loss": 0.026, "step": 1732 }, { "epoch": 0.3943117178612059, "grad_norm": 1.3606922738087333, "learning_rate": 1.230938032517397e-06, "loss": 0.0845, "step": 1733 }, { "epoch": 0.3945392491467577, "grad_norm": 0.9458332790402086, "learning_rate": 1.2309161273927708e-06, "loss": 0.0405, "step": 1734 }, { "epoch": 0.39476678043230945, "grad_norm": 1.338928802144235, "learning_rate": 1.2308942098843289e-06, "loss": 0.0506, "step": 1735 }, { "epoch": 0.3949943117178612, "grad_norm": 0.9875822099203543, "learning_rate": 1.2308722799925188e-06, "loss": 0.0474, "step": 1736 }, { "epoch": 0.39522184300341295, "grad_norm": 1.144643764479203, "learning_rate": 1.2308503377177887e-06, "loss": 0.0482, "step": 1737 }, { "epoch": 0.3954493742889647, "grad_norm": 1.2433153307693834, "learning_rate": 1.2308283830605877e-06, "loss": 0.0382, "step": 1738 }, { "epoch": 0.3956769055745165, "grad_norm": 0.8868244265843614, "learning_rate": 1.230806416021364e-06, "loss": 0.0312, "step": 1739 }, { "epoch": 0.39590443686006827, "grad_norm": 1.2733746862834583, "learning_rate": 1.2307844366005665e-06, "loss": 0.0558, "step": 1740 }, { "epoch": 0.39613196814562, "grad_norm": 1.5929660791826068, "learning_rate": 1.2307624447986446e-06, "loss": 0.0939, "step": 1741 }, { "epoch": 0.39635949943117177, "grad_norm": 1.6706588043136787, "learning_rate": 1.2307404406160476e-06, "loss": 0.0856, "step": 1742 }, { "epoch": 0.3965870307167236, "grad_norm": 1.4925539377680157, "learning_rate": 1.2307184240532255e-06, "loss": 0.051, "step": 1743 }, { "epoch": 0.39681456200227533, "grad_norm": 0.8314484619002069, "learning_rate": 1.2306963951106283e-06, "loss": 0.0398, "step": 1744 }, { "epoch": 0.3970420932878271, "grad_norm": 1.8438134884760746, "learning_rate": 1.2306743537887058e-06, "loss": 0.1539, "step": 1745 }, { "epoch": 0.39726962457337883, "grad_norm": 1.1012953571620363, "learning_rate": 1.2306523000879086e-06, "loss": 0.0395, "step": 1746 }, { "epoch": 0.3974971558589306, "grad_norm": 1.2928925052093003, "learning_rate": 1.230630234008688e-06, "loss": 0.0807, "step": 1747 }, { "epoch": 0.3977246871444824, "grad_norm": 0.9015308252174471, "learning_rate": 1.2306081555514942e-06, "loss": 0.0286, "step": 1748 }, { "epoch": 0.39795221843003414, "grad_norm": 1.4467952739416505, "learning_rate": 1.2305860647167792e-06, "loss": 0.0557, "step": 1749 }, { "epoch": 0.3981797497155859, "grad_norm": 0.8808019982734657, "learning_rate": 1.2305639615049938e-06, "loss": 0.033, "step": 1750 }, { "epoch": 0.39840728100113765, "grad_norm": 1.3896950910533592, "learning_rate": 1.2305418459165902e-06, "loss": 0.0425, "step": 1751 }, { "epoch": 0.3986348122866894, "grad_norm": 1.0876862572580541, "learning_rate": 1.2305197179520203e-06, "loss": 0.0529, "step": 1752 }, { "epoch": 0.3988623435722412, "grad_norm": 1.3270997207033957, "learning_rate": 1.2304975776117362e-06, "loss": 0.0539, "step": 1753 }, { "epoch": 0.39908987485779296, "grad_norm": 0.7035727027322921, "learning_rate": 1.2304754248961906e-06, "loss": 0.0319, "step": 1754 }, { "epoch": 0.3993174061433447, "grad_norm": 1.2785559710612535, "learning_rate": 1.2304532598058363e-06, "loss": 0.0481, "step": 1755 }, { "epoch": 0.39954493742889646, "grad_norm": 1.0072143636243023, "learning_rate": 1.230431082341126e-06, "loss": 0.0481, "step": 1756 }, { "epoch": 0.3997724687144482, "grad_norm": 1.063335290490128, "learning_rate": 1.2304088925025133e-06, "loss": 0.0405, "step": 1757 }, { "epoch": 0.4, "grad_norm": 1.3383471149297288, "learning_rate": 1.2303866902904515e-06, "loss": 0.0551, "step": 1758 }, { "epoch": 0.4002275312855518, "grad_norm": 1.3624741606263786, "learning_rate": 1.2303644757053945e-06, "loss": 0.0608, "step": 1759 }, { "epoch": 0.4004550625711035, "grad_norm": 1.5467274319242612, "learning_rate": 1.2303422487477965e-06, "loss": 0.0824, "step": 1760 }, { "epoch": 0.4006825938566553, "grad_norm": 0.9214849308730754, "learning_rate": 1.230320009418111e-06, "loss": 0.0367, "step": 1761 }, { "epoch": 0.40091012514220703, "grad_norm": 1.1758563975131424, "learning_rate": 1.2302977577167937e-06, "loss": 0.0549, "step": 1762 }, { "epoch": 0.40113765642775884, "grad_norm": 1.2826994675732137, "learning_rate": 1.2302754936442986e-06, "loss": 0.0591, "step": 1763 }, { "epoch": 0.4013651877133106, "grad_norm": 1.1321972166341843, "learning_rate": 1.2302532172010809e-06, "loss": 0.0309, "step": 1764 }, { "epoch": 0.40159271899886234, "grad_norm": 0.8928863966424516, "learning_rate": 1.2302309283875958e-06, "loss": 0.0443, "step": 1765 }, { "epoch": 0.4018202502844141, "grad_norm": 0.9140748312352776, "learning_rate": 1.230208627204299e-06, "loss": 0.0462, "step": 1766 }, { "epoch": 0.40204778156996585, "grad_norm": 1.365652466902771, "learning_rate": 1.2301863136516463e-06, "loss": 0.0575, "step": 1767 }, { "epoch": 0.40227531285551765, "grad_norm": 1.8435555715075644, "learning_rate": 1.2301639877300937e-06, "loss": 0.0835, "step": 1768 }, { "epoch": 0.4025028441410694, "grad_norm": 1.0698988090252757, "learning_rate": 1.2301416494400974e-06, "loss": 0.039, "step": 1769 }, { "epoch": 0.40273037542662116, "grad_norm": 1.5392783242527026, "learning_rate": 1.2301192987821142e-06, "loss": 0.0635, "step": 1770 }, { "epoch": 0.4029579067121729, "grad_norm": 1.992163233200094, "learning_rate": 1.2300969357566008e-06, "loss": 0.0687, "step": 1771 }, { "epoch": 0.40318543799772466, "grad_norm": 1.1143248581214018, "learning_rate": 1.230074560364014e-06, "loss": 0.0431, "step": 1772 }, { "epoch": 0.40341296928327647, "grad_norm": 0.9736384244561508, "learning_rate": 1.2300521726048114e-06, "loss": 0.0502, "step": 1773 }, { "epoch": 0.4036405005688282, "grad_norm": 1.6202139485972007, "learning_rate": 1.2300297724794506e-06, "loss": 0.0501, "step": 1774 }, { "epoch": 0.40386803185437997, "grad_norm": 2.6531740859909854, "learning_rate": 1.2300073599883892e-06, "loss": 0.0674, "step": 1775 }, { "epoch": 0.4040955631399317, "grad_norm": 1.4336293766578745, "learning_rate": 1.2299849351320854e-06, "loss": 0.0513, "step": 1776 }, { "epoch": 0.40432309442548353, "grad_norm": 0.9141317140330615, "learning_rate": 1.2299624979109976e-06, "loss": 0.0407, "step": 1777 }, { "epoch": 0.4045506257110353, "grad_norm": 1.1533277774875146, "learning_rate": 1.229940048325584e-06, "loss": 0.0561, "step": 1778 }, { "epoch": 0.40477815699658704, "grad_norm": 1.502695522459793, "learning_rate": 1.229917586376304e-06, "loss": 0.045, "step": 1779 }, { "epoch": 0.4050056882821388, "grad_norm": 1.8452181062352009, "learning_rate": 1.2298951120636163e-06, "loss": 0.0805, "step": 1780 }, { "epoch": 0.40523321956769054, "grad_norm": 1.5002423894959154, "learning_rate": 1.2298726253879802e-06, "loss": 0.0599, "step": 1781 }, { "epoch": 0.40546075085324235, "grad_norm": 1.988959282415028, "learning_rate": 1.2298501263498557e-06, "loss": 0.0951, "step": 1782 }, { "epoch": 0.4056882821387941, "grad_norm": 1.0188710069870275, "learning_rate": 1.229827614949702e-06, "loss": 0.0451, "step": 1783 }, { "epoch": 0.40591581342434585, "grad_norm": 1.44475701906824, "learning_rate": 1.22980509118798e-06, "loss": 0.0707, "step": 1784 }, { "epoch": 0.4061433447098976, "grad_norm": 1.5656238194585963, "learning_rate": 1.2297825550651491e-06, "loss": 0.0658, "step": 1785 }, { "epoch": 0.40637087599544935, "grad_norm": 1.1011276149008262, "learning_rate": 1.2297600065816707e-06, "loss": 0.0786, "step": 1786 }, { "epoch": 0.40659840728100116, "grad_norm": 1.5148687422019658, "learning_rate": 1.229737445738005e-06, "loss": 0.0519, "step": 1787 }, { "epoch": 0.4068259385665529, "grad_norm": 2.402810934984467, "learning_rate": 1.2297148725346137e-06, "loss": 0.0844, "step": 1788 }, { "epoch": 0.40705346985210467, "grad_norm": 0.9099961278233849, "learning_rate": 1.229692286971958e-06, "loss": 0.0249, "step": 1789 }, { "epoch": 0.4072810011376564, "grad_norm": 1.4154667950181927, "learning_rate": 1.2296696890504992e-06, "loss": 0.0473, "step": 1790 }, { "epoch": 0.40750853242320817, "grad_norm": 0.6978007467353329, "learning_rate": 1.2296470787706993e-06, "loss": 0.0294, "step": 1791 }, { "epoch": 0.40773606370876, "grad_norm": 0.7284846144551249, "learning_rate": 1.2296244561330206e-06, "loss": 0.0294, "step": 1792 }, { "epoch": 0.40796359499431173, "grad_norm": 1.3976750286698911, "learning_rate": 1.2296018211379253e-06, "loss": 0.0719, "step": 1793 }, { "epoch": 0.4081911262798635, "grad_norm": 1.7561963591958132, "learning_rate": 1.229579173785876e-06, "loss": 0.0929, "step": 1794 }, { "epoch": 0.40841865756541523, "grad_norm": 1.2130144152724294, "learning_rate": 1.2295565140773357e-06, "loss": 0.055, "step": 1795 }, { "epoch": 0.408646188850967, "grad_norm": 1.5931552656845787, "learning_rate": 1.2295338420127673e-06, "loss": 0.0996, "step": 1796 }, { "epoch": 0.4088737201365188, "grad_norm": 2.213301169642664, "learning_rate": 1.2295111575926344e-06, "loss": 0.0757, "step": 1797 }, { "epoch": 0.40910125142207054, "grad_norm": 0.7230584272794064, "learning_rate": 1.2294884608174007e-06, "loss": 0.0268, "step": 1798 }, { "epoch": 0.4093287827076223, "grad_norm": 1.8735567431020124, "learning_rate": 1.2294657516875297e-06, "loss": 0.0777, "step": 1799 }, { "epoch": 0.40955631399317405, "grad_norm": 1.4508553485648286, "learning_rate": 1.229443030203486e-06, "loss": 0.0592, "step": 1800 }, { "epoch": 0.4097838452787258, "grad_norm": 1.649254441958079, "learning_rate": 1.2294202963657335e-06, "loss": 0.0663, "step": 1801 }, { "epoch": 0.4100113765642776, "grad_norm": 1.2184400652023568, "learning_rate": 1.2293975501747372e-06, "loss": 0.058, "step": 1802 }, { "epoch": 0.41023890784982936, "grad_norm": 1.3977455445674583, "learning_rate": 1.229374791630962e-06, "loss": 0.0513, "step": 1803 }, { "epoch": 0.4104664391353811, "grad_norm": 1.7032814483710292, "learning_rate": 1.2293520207348727e-06, "loss": 0.0526, "step": 1804 }, { "epoch": 0.41069397042093286, "grad_norm": 1.506685007842939, "learning_rate": 1.229329237486935e-06, "loss": 0.0584, "step": 1805 }, { "epoch": 0.4109215017064846, "grad_norm": 1.1882085263706217, "learning_rate": 1.2293064418876145e-06, "loss": 0.0379, "step": 1806 }, { "epoch": 0.4111490329920364, "grad_norm": 0.8676322681005716, "learning_rate": 1.2292836339373771e-06, "loss": 0.0423, "step": 1807 }, { "epoch": 0.4113765642775882, "grad_norm": 1.0357464791970643, "learning_rate": 1.2292608136366887e-06, "loss": 0.0384, "step": 1808 }, { "epoch": 0.4116040955631399, "grad_norm": 1.2686531396882506, "learning_rate": 1.2292379809860162e-06, "loss": 0.048, "step": 1809 }, { "epoch": 0.4118316268486917, "grad_norm": 0.6203768192444805, "learning_rate": 1.2292151359858258e-06, "loss": 0.028, "step": 1810 }, { "epoch": 0.4120591581342435, "grad_norm": 0.9010901499957009, "learning_rate": 1.2291922786365846e-06, "loss": 0.0341, "step": 1811 }, { "epoch": 0.41228668941979524, "grad_norm": 1.1445391336093391, "learning_rate": 1.2291694089387599e-06, "loss": 0.0479, "step": 1812 }, { "epoch": 0.412514220705347, "grad_norm": 1.6521327765404437, "learning_rate": 1.2291465268928187e-06, "loss": 0.1017, "step": 1813 }, { "epoch": 0.41274175199089874, "grad_norm": 1.79397743679785, "learning_rate": 1.2291236324992291e-06, "loss": 0.1033, "step": 1814 }, { "epoch": 0.4129692832764505, "grad_norm": 1.5152525260856162, "learning_rate": 1.229100725758459e-06, "loss": 0.0685, "step": 1815 }, { "epoch": 0.4131968145620023, "grad_norm": 1.483120669394339, "learning_rate": 1.2290778066709763e-06, "loss": 0.0487, "step": 1816 }, { "epoch": 0.41342434584755405, "grad_norm": 0.9540882649733521, "learning_rate": 1.2290548752372494e-06, "loss": 0.0311, "step": 1817 }, { "epoch": 0.4136518771331058, "grad_norm": 0.7907197059168389, "learning_rate": 1.2290319314577473e-06, "loss": 0.0319, "step": 1818 }, { "epoch": 0.41387940841865756, "grad_norm": 0.9184118117485006, "learning_rate": 1.2290089753329386e-06, "loss": 0.0364, "step": 1819 }, { "epoch": 0.4141069397042093, "grad_norm": 0.7846265232624993, "learning_rate": 1.2289860068632929e-06, "loss": 0.0294, "step": 1820 }, { "epoch": 0.4143344709897611, "grad_norm": 1.507108248570942, "learning_rate": 1.228963026049279e-06, "loss": 0.0664, "step": 1821 }, { "epoch": 0.41456200227531287, "grad_norm": 1.9973321813531546, "learning_rate": 1.228940032891367e-06, "loss": 0.0638, "step": 1822 }, { "epoch": 0.4147895335608646, "grad_norm": 1.5615035868464353, "learning_rate": 1.2289170273900272e-06, "loss": 0.0643, "step": 1823 }, { "epoch": 0.4150170648464164, "grad_norm": 0.9932945336330554, "learning_rate": 1.228894009545729e-06, "loss": 0.0645, "step": 1824 }, { "epoch": 0.4152445961319681, "grad_norm": 1.5587019691492865, "learning_rate": 1.2288709793589434e-06, "loss": 0.1044, "step": 1825 }, { "epoch": 0.41547212741751993, "grad_norm": 1.786679686535424, "learning_rate": 1.2288479368301408e-06, "loss": 0.0724, "step": 1826 }, { "epoch": 0.4156996587030717, "grad_norm": 1.690300262458873, "learning_rate": 1.2288248819597922e-06, "loss": 0.091, "step": 1827 }, { "epoch": 0.41592718998862344, "grad_norm": 0.7331004990082142, "learning_rate": 1.228801814748369e-06, "loss": 0.0219, "step": 1828 }, { "epoch": 0.4161547212741752, "grad_norm": 1.3161127926479836, "learning_rate": 1.2287787351963427e-06, "loss": 0.0549, "step": 1829 }, { "epoch": 0.41638225255972694, "grad_norm": 2.165472731866584, "learning_rate": 1.2287556433041845e-06, "loss": 0.0976, "step": 1830 }, { "epoch": 0.41660978384527875, "grad_norm": 1.5021088638901345, "learning_rate": 1.2287325390723669e-06, "loss": 0.0676, "step": 1831 }, { "epoch": 0.4168373151308305, "grad_norm": 1.776482868261773, "learning_rate": 1.2287094225013618e-06, "loss": 0.0955, "step": 1832 }, { "epoch": 0.41706484641638225, "grad_norm": 1.4781242043850178, "learning_rate": 1.2286862935916416e-06, "loss": 0.087, "step": 1833 }, { "epoch": 0.417292377701934, "grad_norm": 0.5015421036696045, "learning_rate": 1.2286631523436793e-06, "loss": 0.0127, "step": 1834 }, { "epoch": 0.41751990898748575, "grad_norm": 1.0956161872763561, "learning_rate": 1.2286399987579478e-06, "loss": 0.0569, "step": 1835 }, { "epoch": 0.41774744027303756, "grad_norm": 0.9950125745841572, "learning_rate": 1.2286168328349202e-06, "loss": 0.0464, "step": 1836 }, { "epoch": 0.4179749715585893, "grad_norm": 0.9975401241840096, "learning_rate": 1.2285936545750698e-06, "loss": 0.0484, "step": 1837 }, { "epoch": 0.41820250284414107, "grad_norm": 1.1811400302907533, "learning_rate": 1.2285704639788707e-06, "loss": 0.0543, "step": 1838 }, { "epoch": 0.4184300341296928, "grad_norm": 1.3042909512695973, "learning_rate": 1.2285472610467969e-06, "loss": 0.0426, "step": 1839 }, { "epoch": 0.41865756541524457, "grad_norm": 0.9151776818093462, "learning_rate": 1.228524045779322e-06, "loss": 0.0403, "step": 1840 }, { "epoch": 0.4188850967007964, "grad_norm": 1.6251189485123572, "learning_rate": 1.2285008181769212e-06, "loss": 0.0503, "step": 1841 }, { "epoch": 0.41911262798634813, "grad_norm": 1.3392895278247294, "learning_rate": 1.228477578240069e-06, "loss": 0.0489, "step": 1842 }, { "epoch": 0.4193401592718999, "grad_norm": 1.2464077367956892, "learning_rate": 1.22845432596924e-06, "loss": 0.0405, "step": 1843 }, { "epoch": 0.41956769055745163, "grad_norm": 1.1623606536272444, "learning_rate": 1.22843106136491e-06, "loss": 0.0358, "step": 1844 }, { "epoch": 0.4197952218430034, "grad_norm": 1.0651018669316255, "learning_rate": 1.2284077844275543e-06, "loss": 0.0416, "step": 1845 }, { "epoch": 0.4200227531285552, "grad_norm": 1.84496334967716, "learning_rate": 1.2283844951576484e-06, "loss": 0.0468, "step": 1846 }, { "epoch": 0.42025028441410694, "grad_norm": 1.0014097401871376, "learning_rate": 1.2283611935556686e-06, "loss": 0.0334, "step": 1847 }, { "epoch": 0.4204778156996587, "grad_norm": 1.2875305555648238, "learning_rate": 1.2283378796220909e-06, "loss": 0.0568, "step": 1848 }, { "epoch": 0.42070534698521045, "grad_norm": 2.1705410042003095, "learning_rate": 1.228314553357392e-06, "loss": 0.0814, "step": 1849 }, { "epoch": 0.42093287827076226, "grad_norm": 0.9864984577581978, "learning_rate": 1.2282912147620483e-06, "loss": 0.035, "step": 1850 }, { "epoch": 0.421160409556314, "grad_norm": 1.0361264682055424, "learning_rate": 1.2282678638365373e-06, "loss": 0.0555, "step": 1851 }, { "epoch": 0.42138794084186576, "grad_norm": 1.2255325101947971, "learning_rate": 1.2282445005813359e-06, "loss": 0.0487, "step": 1852 }, { "epoch": 0.4216154721274175, "grad_norm": 1.21533819095114, "learning_rate": 1.2282211249969217e-06, "loss": 0.0401, "step": 1853 }, { "epoch": 0.42184300341296926, "grad_norm": 1.3664419425404826, "learning_rate": 1.2281977370837725e-06, "loss": 0.0615, "step": 1854 }, { "epoch": 0.42207053469852107, "grad_norm": 1.0812048471076985, "learning_rate": 1.2281743368423662e-06, "loss": 0.0346, "step": 1855 }, { "epoch": 0.4222980659840728, "grad_norm": 1.60512477075616, "learning_rate": 1.2281509242731813e-06, "loss": 0.0945, "step": 1856 }, { "epoch": 0.4225255972696246, "grad_norm": 1.6388845900863238, "learning_rate": 1.228127499376696e-06, "loss": 0.0818, "step": 1857 }, { "epoch": 0.4227531285551763, "grad_norm": 0.9782543566946358, "learning_rate": 1.228104062153389e-06, "loss": 0.0417, "step": 1858 }, { "epoch": 0.4229806598407281, "grad_norm": 0.9911432963854596, "learning_rate": 1.2280806126037396e-06, "loss": 0.0652, "step": 1859 }, { "epoch": 0.4232081911262799, "grad_norm": 1.6481089463408192, "learning_rate": 1.2280571507282272e-06, "loss": 0.104, "step": 1860 }, { "epoch": 0.42343572241183164, "grad_norm": 1.1739506923046776, "learning_rate": 1.2280336765273309e-06, "loss": 0.0499, "step": 1861 }, { "epoch": 0.4236632536973834, "grad_norm": 1.0261276957174543, "learning_rate": 1.2280101900015306e-06, "loss": 0.0467, "step": 1862 }, { "epoch": 0.42389078498293514, "grad_norm": 1.4414669057942235, "learning_rate": 1.2279866911513064e-06, "loss": 0.0559, "step": 1863 }, { "epoch": 0.4241183162684869, "grad_norm": 1.4975361597068904, "learning_rate": 1.2279631799771386e-06, "loss": 0.0816, "step": 1864 }, { "epoch": 0.4243458475540387, "grad_norm": 1.9040003169842432, "learning_rate": 1.2279396564795077e-06, "loss": 0.0914, "step": 1865 }, { "epoch": 0.42457337883959045, "grad_norm": 1.3040862552521917, "learning_rate": 1.2279161206588944e-06, "loss": 0.0402, "step": 1866 }, { "epoch": 0.4248009101251422, "grad_norm": 1.4037836584117365, "learning_rate": 1.2278925725157798e-06, "loss": 0.0583, "step": 1867 }, { "epoch": 0.42502844141069396, "grad_norm": 0.9243184821654356, "learning_rate": 1.2278690120506451e-06, "loss": 0.0461, "step": 1868 }, { "epoch": 0.4252559726962457, "grad_norm": 0.9991936622504879, "learning_rate": 1.2278454392639722e-06, "loss": 0.0342, "step": 1869 }, { "epoch": 0.4254835039817975, "grad_norm": 0.8487853121482433, "learning_rate": 1.2278218541562422e-06, "loss": 0.0301, "step": 1870 }, { "epoch": 0.42571103526734927, "grad_norm": 1.6907022090274095, "learning_rate": 1.2277982567279377e-06, "loss": 0.0786, "step": 1871 }, { "epoch": 0.425938566552901, "grad_norm": 1.7052726832363685, "learning_rate": 1.2277746469795407e-06, "loss": 0.0737, "step": 1872 }, { "epoch": 0.4261660978384528, "grad_norm": 1.3329829339197778, "learning_rate": 1.227751024911534e-06, "loss": 0.055, "step": 1873 }, { "epoch": 0.4263936291240045, "grad_norm": 1.3273033221545125, "learning_rate": 1.2277273905244002e-06, "loss": 0.0484, "step": 1874 }, { "epoch": 0.42662116040955633, "grad_norm": 1.1982373731004552, "learning_rate": 1.2277037438186224e-06, "loss": 0.0466, "step": 1875 }, { "epoch": 0.4268486916951081, "grad_norm": 1.0093718042807764, "learning_rate": 1.2276800847946839e-06, "loss": 0.0363, "step": 1876 }, { "epoch": 0.42707622298065984, "grad_norm": 0.9811547399478636, "learning_rate": 1.227656413453068e-06, "loss": 0.0425, "step": 1877 }, { "epoch": 0.4273037542662116, "grad_norm": 1.146528135934977, "learning_rate": 1.227632729794259e-06, "loss": 0.0485, "step": 1878 }, { "epoch": 0.42753128555176334, "grad_norm": 1.5824604547576673, "learning_rate": 1.2276090338187403e-06, "loss": 0.0487, "step": 1879 }, { "epoch": 0.42775881683731515, "grad_norm": 1.3160117730674203, "learning_rate": 1.2275853255269967e-06, "loss": 0.0446, "step": 1880 }, { "epoch": 0.4279863481228669, "grad_norm": 1.3027261584989582, "learning_rate": 1.2275616049195129e-06, "loss": 0.0395, "step": 1881 }, { "epoch": 0.42821387940841865, "grad_norm": 1.0814544003095643, "learning_rate": 1.2275378719967733e-06, "loss": 0.0292, "step": 1882 }, { "epoch": 0.4284414106939704, "grad_norm": 0.7587179405824312, "learning_rate": 1.227514126759263e-06, "loss": 0.0256, "step": 1883 }, { "epoch": 0.4286689419795222, "grad_norm": 1.4079924288332035, "learning_rate": 1.2274903692074674e-06, "loss": 0.0636, "step": 1884 }, { "epoch": 0.42889647326507396, "grad_norm": 1.024771832790498, "learning_rate": 1.2274665993418722e-06, "loss": 0.0399, "step": 1885 }, { "epoch": 0.4291240045506257, "grad_norm": 1.1395572191018248, "learning_rate": 1.227442817162963e-06, "loss": 0.0427, "step": 1886 }, { "epoch": 0.42935153583617747, "grad_norm": 1.9966203221580272, "learning_rate": 1.227419022671226e-06, "loss": 0.1059, "step": 1887 }, { "epoch": 0.4295790671217292, "grad_norm": 1.6299484076057391, "learning_rate": 1.2273952158671472e-06, "loss": 0.0633, "step": 1888 }, { "epoch": 0.429806598407281, "grad_norm": 1.2903207410298125, "learning_rate": 1.2273713967512137e-06, "loss": 0.0618, "step": 1889 }, { "epoch": 0.4300341296928328, "grad_norm": 1.3026692339522694, "learning_rate": 1.227347565323912e-06, "loss": 0.0795, "step": 1890 }, { "epoch": 0.43026166097838453, "grad_norm": 1.175449820116291, "learning_rate": 1.2273237215857293e-06, "loss": 0.0401, "step": 1891 }, { "epoch": 0.4304891922639363, "grad_norm": 1.6959835304167243, "learning_rate": 1.2272998655371526e-06, "loss": 0.0825, "step": 1892 }, { "epoch": 0.43071672354948803, "grad_norm": 1.4197022325382747, "learning_rate": 1.22727599717867e-06, "loss": 0.0576, "step": 1893 }, { "epoch": 0.43094425483503984, "grad_norm": 1.2471339436500026, "learning_rate": 1.2272521165107687e-06, "loss": 0.0722, "step": 1894 }, { "epoch": 0.4311717861205916, "grad_norm": 1.3643717577874817, "learning_rate": 1.2272282235339372e-06, "loss": 0.0449, "step": 1895 }, { "epoch": 0.43139931740614335, "grad_norm": 1.1744612621898987, "learning_rate": 1.2272043182486638e-06, "loss": 0.0416, "step": 1896 }, { "epoch": 0.4316268486916951, "grad_norm": 1.0384312678976468, "learning_rate": 1.227180400655437e-06, "loss": 0.0399, "step": 1897 }, { "epoch": 0.43185437997724685, "grad_norm": 1.7243528985527803, "learning_rate": 1.2271564707547457e-06, "loss": 0.1495, "step": 1898 }, { "epoch": 0.43208191126279866, "grad_norm": 0.9548507541370914, "learning_rate": 1.227132528547079e-06, "loss": 0.0415, "step": 1899 }, { "epoch": 0.4323094425483504, "grad_norm": 1.3617841660517656, "learning_rate": 1.2271085740329261e-06, "loss": 0.0607, "step": 1900 }, { "epoch": 0.43253697383390216, "grad_norm": 1.6402508487098346, "learning_rate": 1.2270846072127764e-06, "loss": 0.064, "step": 1901 }, { "epoch": 0.4327645051194539, "grad_norm": 1.131250294762535, "learning_rate": 1.2270606280871205e-06, "loss": 0.034, "step": 1902 }, { "epoch": 0.43299203640500566, "grad_norm": 1.2541444770869459, "learning_rate": 1.2270366366564476e-06, "loss": 0.0354, "step": 1903 }, { "epoch": 0.43321956769055747, "grad_norm": 1.1469941807844757, "learning_rate": 1.2270126329212486e-06, "loss": 0.0568, "step": 1904 }, { "epoch": 0.4334470989761092, "grad_norm": 0.9467488862348428, "learning_rate": 1.2269886168820138e-06, "loss": 0.0433, "step": 1905 }, { "epoch": 0.433674630261661, "grad_norm": 2.421156992660593, "learning_rate": 1.2269645885392342e-06, "loss": 0.1575, "step": 1906 }, { "epoch": 0.43390216154721273, "grad_norm": 1.2853239102420944, "learning_rate": 1.226940547893401e-06, "loss": 0.0523, "step": 1907 }, { "epoch": 0.4341296928327645, "grad_norm": 0.7677705648665133, "learning_rate": 1.2269164949450052e-06, "loss": 0.0367, "step": 1908 }, { "epoch": 0.4343572241183163, "grad_norm": 1.7484475434950735, "learning_rate": 1.2268924296945387e-06, "loss": 0.0551, "step": 1909 }, { "epoch": 0.43458475540386804, "grad_norm": 1.8872851215095336, "learning_rate": 1.2268683521424932e-06, "loss": 0.0713, "step": 1910 }, { "epoch": 0.4348122866894198, "grad_norm": 1.2082493686491087, "learning_rate": 1.226844262289361e-06, "loss": 0.0411, "step": 1911 }, { "epoch": 0.43503981797497154, "grad_norm": 1.7963587387869338, "learning_rate": 1.2268201601356342e-06, "loss": 0.0829, "step": 1912 }, { "epoch": 0.4352673492605233, "grad_norm": 1.2079928907070248, "learning_rate": 1.2267960456818054e-06, "loss": 0.0399, "step": 1913 }, { "epoch": 0.4354948805460751, "grad_norm": 1.3116442013593013, "learning_rate": 1.2267719189283676e-06, "loss": 0.0483, "step": 1914 }, { "epoch": 0.43572241183162685, "grad_norm": 1.3635629491104682, "learning_rate": 1.2267477798758141e-06, "loss": 0.0415, "step": 1915 }, { "epoch": 0.4359499431171786, "grad_norm": 1.1777595750610328, "learning_rate": 1.2267236285246376e-06, "loss": 0.0635, "step": 1916 }, { "epoch": 0.43617747440273036, "grad_norm": 1.54425482806484, "learning_rate": 1.2266994648753325e-06, "loss": 0.0713, "step": 1917 }, { "epoch": 0.43640500568828217, "grad_norm": 1.3128341631768956, "learning_rate": 1.2266752889283923e-06, "loss": 0.064, "step": 1918 }, { "epoch": 0.4366325369738339, "grad_norm": 1.3460506007652075, "learning_rate": 1.226651100684311e-06, "loss": 0.0489, "step": 1919 }, { "epoch": 0.43686006825938567, "grad_norm": 1.3320121603668689, "learning_rate": 1.2266269001435829e-06, "loss": 0.0756, "step": 1920 }, { "epoch": 0.4370875995449374, "grad_norm": 1.1661019863589135, "learning_rate": 1.226602687306703e-06, "loss": 0.0313, "step": 1921 }, { "epoch": 0.4373151308304892, "grad_norm": 1.320896853626411, "learning_rate": 1.226578462174166e-06, "loss": 0.0421, "step": 1922 }, { "epoch": 0.437542662116041, "grad_norm": 2.159753678644095, "learning_rate": 1.2265542247464668e-06, "loss": 0.1307, "step": 1923 }, { "epoch": 0.43777019340159273, "grad_norm": 1.5563003879211055, "learning_rate": 1.226529975024101e-06, "loss": 0.0593, "step": 1924 }, { "epoch": 0.4379977246871445, "grad_norm": 0.9689169184561489, "learning_rate": 1.2265057130075641e-06, "loss": 0.0552, "step": 1925 }, { "epoch": 0.43822525597269624, "grad_norm": 1.4053723286715865, "learning_rate": 1.2264814386973523e-06, "loss": 0.0761, "step": 1926 }, { "epoch": 0.438452787258248, "grad_norm": 1.373860475632686, "learning_rate": 1.2264571520939612e-06, "loss": 0.061, "step": 1927 }, { "epoch": 0.4386803185437998, "grad_norm": 1.5229851486555135, "learning_rate": 1.2264328531978875e-06, "loss": 0.0871, "step": 1928 }, { "epoch": 0.43890784982935155, "grad_norm": 0.9510671111937553, "learning_rate": 1.2264085420096277e-06, "loss": 0.0372, "step": 1929 }, { "epoch": 0.4391353811149033, "grad_norm": 1.411692319953813, "learning_rate": 1.226384218529679e-06, "loss": 0.0744, "step": 1930 }, { "epoch": 0.43936291240045505, "grad_norm": 0.9987666361393835, "learning_rate": 1.2263598827585379e-06, "loss": 0.0392, "step": 1931 }, { "epoch": 0.4395904436860068, "grad_norm": 0.8661437238334854, "learning_rate": 1.2263355346967023e-06, "loss": 0.0418, "step": 1932 }, { "epoch": 0.4398179749715586, "grad_norm": 1.2249390316316808, "learning_rate": 1.2263111743446697e-06, "loss": 0.0375, "step": 1933 }, { "epoch": 0.44004550625711036, "grad_norm": 1.5944859887412242, "learning_rate": 1.2262868017029377e-06, "loss": 0.062, "step": 1934 }, { "epoch": 0.4402730375426621, "grad_norm": 1.3941560967832323, "learning_rate": 1.226262416772005e-06, "loss": 0.065, "step": 1935 }, { "epoch": 0.44050056882821387, "grad_norm": 0.9120544787723815, "learning_rate": 1.2262380195523696e-06, "loss": 0.0392, "step": 1936 }, { "epoch": 0.4407281001137656, "grad_norm": 1.764018460971706, "learning_rate": 1.2262136100445303e-06, "loss": 0.0805, "step": 1937 }, { "epoch": 0.4409556313993174, "grad_norm": 1.1144365245430385, "learning_rate": 1.2261891882489855e-06, "loss": 0.0394, "step": 1938 }, { "epoch": 0.4411831626848692, "grad_norm": 1.1821432018479405, "learning_rate": 1.226164754166235e-06, "loss": 0.0411, "step": 1939 }, { "epoch": 0.44141069397042093, "grad_norm": 1.2454202923866486, "learning_rate": 1.2261403077967778e-06, "loss": 0.0441, "step": 1940 }, { "epoch": 0.4416382252559727, "grad_norm": 1.271626432566145, "learning_rate": 1.2261158491411136e-06, "loss": 0.0452, "step": 1941 }, { "epoch": 0.44186575654152443, "grad_norm": 1.0596255221554163, "learning_rate": 1.2260913781997425e-06, "loss": 0.0394, "step": 1942 }, { "epoch": 0.44209328782707624, "grad_norm": 1.4836582697063838, "learning_rate": 1.2260668949731644e-06, "loss": 0.0579, "step": 1943 }, { "epoch": 0.442320819112628, "grad_norm": 1.687109016999551, "learning_rate": 1.2260423994618798e-06, "loss": 0.0544, "step": 1944 }, { "epoch": 0.44254835039817975, "grad_norm": 1.2086563543761475, "learning_rate": 1.2260178916663892e-06, "loss": 0.0351, "step": 1945 }, { "epoch": 0.4427758816837315, "grad_norm": 0.9870058509060623, "learning_rate": 1.2259933715871935e-06, "loss": 0.0458, "step": 1946 }, { "epoch": 0.44300341296928325, "grad_norm": 1.2117861018085792, "learning_rate": 1.2259688392247942e-06, "loss": 0.0466, "step": 1947 }, { "epoch": 0.44323094425483506, "grad_norm": 1.4026535262541224, "learning_rate": 1.2259442945796926e-06, "loss": 0.0695, "step": 1948 }, { "epoch": 0.4434584755403868, "grad_norm": 0.7731288867011977, "learning_rate": 1.2259197376523898e-06, "loss": 0.033, "step": 1949 }, { "epoch": 0.44368600682593856, "grad_norm": 0.7657804826696037, "learning_rate": 1.2258951684433883e-06, "loss": 0.041, "step": 1950 }, { "epoch": 0.4439135381114903, "grad_norm": 1.8552107522089532, "learning_rate": 1.2258705869531901e-06, "loss": 0.0944, "step": 1951 }, { "epoch": 0.4441410693970421, "grad_norm": 1.111099312238907, "learning_rate": 1.2258459931822974e-06, "loss": 0.0651, "step": 1952 }, { "epoch": 0.4443686006825939, "grad_norm": 1.6141386501644925, "learning_rate": 1.225821387131213e-06, "loss": 0.0574, "step": 1953 }, { "epoch": 0.4445961319681456, "grad_norm": 0.9741551342534741, "learning_rate": 1.22579676880044e-06, "loss": 0.0398, "step": 1954 }, { "epoch": 0.4448236632536974, "grad_norm": 1.0138692072583615, "learning_rate": 1.2257721381904811e-06, "loss": 0.0547, "step": 1955 }, { "epoch": 0.44505119453924913, "grad_norm": 1.4460053506809138, "learning_rate": 1.22574749530184e-06, "loss": 0.0624, "step": 1956 }, { "epoch": 0.44527872582480094, "grad_norm": 1.9350781623487736, "learning_rate": 1.2257228401350205e-06, "loss": 0.0694, "step": 1957 }, { "epoch": 0.4455062571103527, "grad_norm": 0.8261029534789511, "learning_rate": 1.2256981726905262e-06, "loss": 0.0295, "step": 1958 }, { "epoch": 0.44573378839590444, "grad_norm": 1.6085761442666993, "learning_rate": 1.2256734929688612e-06, "loss": 0.0699, "step": 1959 }, { "epoch": 0.4459613196814562, "grad_norm": 1.2323407982068377, "learning_rate": 1.2256488009705303e-06, "loss": 0.0409, "step": 1960 }, { "epoch": 0.44618885096700794, "grad_norm": 1.6360239341263807, "learning_rate": 1.225624096696038e-06, "loss": 0.0645, "step": 1961 }, { "epoch": 0.44641638225255975, "grad_norm": 0.9576742303137104, "learning_rate": 1.225599380145889e-06, "loss": 0.0531, "step": 1962 }, { "epoch": 0.4466439135381115, "grad_norm": 1.545322326325835, "learning_rate": 1.2255746513205889e-06, "loss": 0.058, "step": 1963 }, { "epoch": 0.44687144482366326, "grad_norm": 0.9718630260311564, "learning_rate": 1.2255499102206423e-06, "loss": 0.0303, "step": 1964 }, { "epoch": 0.447098976109215, "grad_norm": 1.992196356794709, "learning_rate": 1.2255251568465558e-06, "loss": 0.0711, "step": 1965 }, { "epoch": 0.44732650739476676, "grad_norm": 1.758374736199823, "learning_rate": 1.2255003911988348e-06, "loss": 0.097, "step": 1966 }, { "epoch": 0.44755403868031857, "grad_norm": 1.1999149710277988, "learning_rate": 1.2254756132779855e-06, "loss": 0.0762, "step": 1967 }, { "epoch": 0.4477815699658703, "grad_norm": 0.750734148431945, "learning_rate": 1.2254508230845144e-06, "loss": 0.0366, "step": 1968 }, { "epoch": 0.44800910125142207, "grad_norm": 0.9217944145406265, "learning_rate": 1.2254260206189283e-06, "loss": 0.0388, "step": 1969 }, { "epoch": 0.4482366325369738, "grad_norm": 1.0142165579258442, "learning_rate": 1.2254012058817337e-06, "loss": 0.0582, "step": 1970 }, { "epoch": 0.4484641638225256, "grad_norm": 1.7763236230644281, "learning_rate": 1.2253763788734384e-06, "loss": 0.049, "step": 1971 }, { "epoch": 0.4486916951080774, "grad_norm": 1.1246346527787483, "learning_rate": 1.2253515395945492e-06, "loss": 0.0605, "step": 1972 }, { "epoch": 0.44891922639362913, "grad_norm": 1.0749214639270723, "learning_rate": 1.2253266880455742e-06, "loss": 0.0617, "step": 1973 }, { "epoch": 0.4491467576791809, "grad_norm": 0.7801246159886271, "learning_rate": 1.225301824227021e-06, "loss": 0.028, "step": 1974 }, { "epoch": 0.44937428896473264, "grad_norm": 1.3195521199092928, "learning_rate": 1.2252769481393979e-06, "loss": 0.0471, "step": 1975 }, { "epoch": 0.4496018202502844, "grad_norm": 1.5596892760854768, "learning_rate": 1.2252520597832132e-06, "loss": 0.0337, "step": 1976 }, { "epoch": 0.4498293515358362, "grad_norm": 0.8505107972505791, "learning_rate": 1.2252271591589759e-06, "loss": 0.0398, "step": 1977 }, { "epoch": 0.45005688282138795, "grad_norm": 1.1325746921296718, "learning_rate": 1.2252022462671947e-06, "loss": 0.0486, "step": 1978 }, { "epoch": 0.4502844141069397, "grad_norm": 1.813164319739497, "learning_rate": 1.2251773211083789e-06, "loss": 0.0991, "step": 1979 }, { "epoch": 0.45051194539249145, "grad_norm": 2.0540653923112164, "learning_rate": 1.225152383683038e-06, "loss": 0.0985, "step": 1980 }, { "epoch": 0.4507394766780432, "grad_norm": 1.3064957694509771, "learning_rate": 1.225127433991681e-06, "loss": 0.0408, "step": 1981 }, { "epoch": 0.450967007963595, "grad_norm": 1.055797400460298, "learning_rate": 1.2251024720348186e-06, "loss": 0.0427, "step": 1982 }, { "epoch": 0.45119453924914676, "grad_norm": 1.1174321415520143, "learning_rate": 1.2250774978129606e-06, "loss": 0.0436, "step": 1983 }, { "epoch": 0.4514220705346985, "grad_norm": 1.7220669335096757, "learning_rate": 1.2250525113266175e-06, "loss": 0.0855, "step": 1984 }, { "epoch": 0.45164960182025027, "grad_norm": 1.061543218680689, "learning_rate": 1.2250275125763002e-06, "loss": 0.0386, "step": 1985 }, { "epoch": 0.451877133105802, "grad_norm": 1.4190135452138255, "learning_rate": 1.2250025015625194e-06, "loss": 0.0616, "step": 1986 }, { "epoch": 0.4521046643913538, "grad_norm": 1.8336951622768547, "learning_rate": 1.2249774782857863e-06, "loss": 0.0527, "step": 1987 }, { "epoch": 0.4523321956769056, "grad_norm": 1.3808921921483752, "learning_rate": 1.2249524427466123e-06, "loss": 0.0449, "step": 1988 }, { "epoch": 0.45255972696245733, "grad_norm": 1.0413790190683898, "learning_rate": 1.2249273949455092e-06, "loss": 0.0571, "step": 1989 }, { "epoch": 0.4527872582480091, "grad_norm": 1.1506775133869898, "learning_rate": 1.224902334882989e-06, "loss": 0.0362, "step": 1990 }, { "epoch": 0.4530147895335609, "grad_norm": 0.5659500114505296, "learning_rate": 1.2248772625595633e-06, "loss": 0.0243, "step": 1991 }, { "epoch": 0.45324232081911264, "grad_norm": 1.8858126631285193, "learning_rate": 1.2248521779757455e-06, "loss": 0.0921, "step": 1992 }, { "epoch": 0.4534698521046644, "grad_norm": 1.4534701341984988, "learning_rate": 1.2248270811320473e-06, "loss": 0.0626, "step": 1993 }, { "epoch": 0.45369738339021615, "grad_norm": 0.957481343542944, "learning_rate": 1.2248019720289826e-06, "loss": 0.0273, "step": 1994 }, { "epoch": 0.4539249146757679, "grad_norm": 0.8791382523836305, "learning_rate": 1.2247768506670639e-06, "loss": 0.0589, "step": 1995 }, { "epoch": 0.4541524459613197, "grad_norm": 2.2598713011024794, "learning_rate": 1.2247517170468045e-06, "loss": 0.0993, "step": 1996 }, { "epoch": 0.45437997724687146, "grad_norm": 0.9870730424185967, "learning_rate": 1.2247265711687186e-06, "loss": 0.0355, "step": 1997 }, { "epoch": 0.4546075085324232, "grad_norm": 1.1900169451289013, "learning_rate": 1.2247014130333198e-06, "loss": 0.0525, "step": 1998 }, { "epoch": 0.45483503981797496, "grad_norm": 1.384236012560106, "learning_rate": 1.2246762426411227e-06, "loss": 0.0653, "step": 1999 }, { "epoch": 0.4550625711035267, "grad_norm": 1.3459272018496387, "learning_rate": 1.2246510599926412e-06, "loss": 0.0587, "step": 2000 }, { "epoch": 0.4552901023890785, "grad_norm": 1.4831181130397366, "learning_rate": 1.2246258650883904e-06, "loss": 0.0632, "step": 2001 }, { "epoch": 0.4555176336746303, "grad_norm": 1.4710214394162384, "learning_rate": 1.224600657928885e-06, "loss": 0.0707, "step": 2002 }, { "epoch": 0.455745164960182, "grad_norm": 1.6318800586995355, "learning_rate": 1.2245754385146402e-06, "loss": 0.0737, "step": 2003 }, { "epoch": 0.4559726962457338, "grad_norm": 1.3377789919521947, "learning_rate": 1.2245502068461718e-06, "loss": 0.0549, "step": 2004 }, { "epoch": 0.45620022753128553, "grad_norm": 1.5464491108432568, "learning_rate": 1.224524962923995e-06, "loss": 0.0414, "step": 2005 }, { "epoch": 0.45642775881683734, "grad_norm": 1.7217302295238814, "learning_rate": 1.2244997067486258e-06, "loss": 0.1082, "step": 2006 }, { "epoch": 0.4566552901023891, "grad_norm": 1.3057544355327637, "learning_rate": 1.2244744383205807e-06, "loss": 0.0553, "step": 2007 }, { "epoch": 0.45688282138794084, "grad_norm": 1.4993635003547536, "learning_rate": 1.224449157640376e-06, "loss": 0.0694, "step": 2008 }, { "epoch": 0.4571103526734926, "grad_norm": 1.4893330797563609, "learning_rate": 1.2244238647085283e-06, "loss": 0.0715, "step": 2009 }, { "epoch": 0.45733788395904434, "grad_norm": 1.8914472286199435, "learning_rate": 1.2243985595255546e-06, "loss": 0.0577, "step": 2010 }, { "epoch": 0.45756541524459615, "grad_norm": 1.8242839299021114, "learning_rate": 1.224373242091972e-06, "loss": 0.0565, "step": 2011 }, { "epoch": 0.4577929465301479, "grad_norm": 1.0856942810712096, "learning_rate": 1.224347912408298e-06, "loss": 0.0357, "step": 2012 }, { "epoch": 0.45802047781569966, "grad_norm": 1.3194141628678389, "learning_rate": 1.2243225704750506e-06, "loss": 0.0449, "step": 2013 }, { "epoch": 0.4582480091012514, "grad_norm": 1.860571028270787, "learning_rate": 1.2242972162927472e-06, "loss": 0.0855, "step": 2014 }, { "epoch": 0.45847554038680316, "grad_norm": 1.0986966810344647, "learning_rate": 1.2242718498619063e-06, "loss": 0.048, "step": 2015 }, { "epoch": 0.45870307167235497, "grad_norm": 1.286949991423601, "learning_rate": 1.2242464711830463e-06, "loss": 0.0648, "step": 2016 }, { "epoch": 0.4589306029579067, "grad_norm": 1.7867407908291044, "learning_rate": 1.224221080256686e-06, "loss": 0.0656, "step": 2017 }, { "epoch": 0.45915813424345847, "grad_norm": 0.9964611889527885, "learning_rate": 1.2241956770833444e-06, "loss": 0.0287, "step": 2018 }, { "epoch": 0.4593856655290102, "grad_norm": 0.758262966491813, "learning_rate": 1.2241702616635403e-06, "loss": 0.0266, "step": 2019 }, { "epoch": 0.459613196814562, "grad_norm": 2.5974139992788707, "learning_rate": 1.2241448339977931e-06, "loss": 0.111, "step": 2020 }, { "epoch": 0.4598407281001138, "grad_norm": 0.771625496801363, "learning_rate": 1.2241193940866234e-06, "loss": 0.0291, "step": 2021 }, { "epoch": 0.46006825938566553, "grad_norm": 1.04981191475765, "learning_rate": 1.22409394193055e-06, "loss": 0.0445, "step": 2022 }, { "epoch": 0.4602957906712173, "grad_norm": 1.1765006507267777, "learning_rate": 1.224068477530094e-06, "loss": 0.0534, "step": 2023 }, { "epoch": 0.46052332195676904, "grad_norm": 1.7004930482631033, "learning_rate": 1.2240430008857751e-06, "loss": 0.1093, "step": 2024 }, { "epoch": 0.46075085324232085, "grad_norm": 0.9880326133501596, "learning_rate": 1.2240175119981146e-06, "loss": 0.0596, "step": 2025 }, { "epoch": 0.4609783845278726, "grad_norm": 0.9331212134306713, "learning_rate": 1.223992010867633e-06, "loss": 0.0364, "step": 2026 }, { "epoch": 0.46120591581342435, "grad_norm": 0.592705357456653, "learning_rate": 1.223966497494852e-06, "loss": 0.0267, "step": 2027 }, { "epoch": 0.4614334470989761, "grad_norm": 2.103103329890297, "learning_rate": 1.2239409718802927e-06, "loss": 0.0789, "step": 2028 }, { "epoch": 0.46166097838452785, "grad_norm": 1.2774828791183843, "learning_rate": 1.2239154340244766e-06, "loss": 0.0617, "step": 2029 }, { "epoch": 0.46188850967007966, "grad_norm": 1.9492863350229845, "learning_rate": 1.2238898839279262e-06, "loss": 0.0715, "step": 2030 }, { "epoch": 0.4621160409556314, "grad_norm": 1.7906002686883862, "learning_rate": 1.223864321591163e-06, "loss": 0.0795, "step": 2031 }, { "epoch": 0.46234357224118316, "grad_norm": 1.0120261476046106, "learning_rate": 1.2238387470147103e-06, "loss": 0.0397, "step": 2032 }, { "epoch": 0.4625711035267349, "grad_norm": 1.4202832673434427, "learning_rate": 1.22381316019909e-06, "loss": 0.0551, "step": 2033 }, { "epoch": 0.46279863481228667, "grad_norm": 0.9745272894465992, "learning_rate": 1.2237875611448254e-06, "loss": 0.0634, "step": 2034 }, { "epoch": 0.4630261660978385, "grad_norm": 0.7432221539823165, "learning_rate": 1.2237619498524397e-06, "loss": 0.0218, "step": 2035 }, { "epoch": 0.46325369738339023, "grad_norm": 1.9819427368647708, "learning_rate": 1.2237363263224564e-06, "loss": 0.1136, "step": 2036 }, { "epoch": 0.463481228668942, "grad_norm": 1.538163547331391, "learning_rate": 1.2237106905553991e-06, "loss": 0.0581, "step": 2037 }, { "epoch": 0.46370875995449373, "grad_norm": 1.1781797084882926, "learning_rate": 1.2236850425517918e-06, "loss": 0.0593, "step": 2038 }, { "epoch": 0.4639362912400455, "grad_norm": 1.7417190267667673, "learning_rate": 1.2236593823121586e-06, "loss": 0.092, "step": 2039 }, { "epoch": 0.4641638225255973, "grad_norm": 1.2090125054365826, "learning_rate": 1.223633709837024e-06, "loss": 0.0238, "step": 2040 }, { "epoch": 0.46439135381114904, "grad_norm": 0.8736586233913058, "learning_rate": 1.2236080251269129e-06, "loss": 0.0436, "step": 2041 }, { "epoch": 0.4646188850967008, "grad_norm": 1.4877382030386759, "learning_rate": 1.2235823281823498e-06, "loss": 0.0673, "step": 2042 }, { "epoch": 0.46484641638225255, "grad_norm": 1.8784786331322982, "learning_rate": 1.2235566190038602e-06, "loss": 0.0872, "step": 2043 }, { "epoch": 0.4650739476678043, "grad_norm": 0.9749064913400318, "learning_rate": 1.2235308975919697e-06, "loss": 0.0408, "step": 2044 }, { "epoch": 0.4653014789533561, "grad_norm": 1.6562176997205134, "learning_rate": 1.2235051639472037e-06, "loss": 0.0593, "step": 2045 }, { "epoch": 0.46552901023890786, "grad_norm": 7.178097985744176, "learning_rate": 1.2234794180700883e-06, "loss": 0.2677, "step": 2046 }, { "epoch": 0.4657565415244596, "grad_norm": 1.2664824384389577, "learning_rate": 1.2234536599611496e-06, "loss": 0.0578, "step": 2047 }, { "epoch": 0.46598407281001136, "grad_norm": 1.0979349025374328, "learning_rate": 1.2234278896209142e-06, "loss": 0.053, "step": 2048 }, { "epoch": 0.4662116040955631, "grad_norm": 0.6490727646887132, "learning_rate": 1.2234021070499086e-06, "loss": 0.0262, "step": 2049 }, { "epoch": 0.4664391353811149, "grad_norm": 0.9140641916499196, "learning_rate": 1.22337631224866e-06, "loss": 0.0325, "step": 2050 }, { "epoch": 0.4666666666666667, "grad_norm": 1.0654195689636294, "learning_rate": 1.2233505052176952e-06, "loss": 0.0539, "step": 2051 }, { "epoch": 0.4668941979522184, "grad_norm": 1.2246413980257993, "learning_rate": 1.223324685957542e-06, "loss": 0.0539, "step": 2052 }, { "epoch": 0.4671217292377702, "grad_norm": 0.9496001567387945, "learning_rate": 1.2232988544687282e-06, "loss": 0.0523, "step": 2053 }, { "epoch": 0.46734926052332193, "grad_norm": 1.1280442595857205, "learning_rate": 1.2232730107517813e-06, "loss": 0.0483, "step": 2054 }, { "epoch": 0.46757679180887374, "grad_norm": 0.939380252104929, "learning_rate": 1.22324715480723e-06, "loss": 0.032, "step": 2055 }, { "epoch": 0.4678043230944255, "grad_norm": 1.56220155639698, "learning_rate": 1.2232212866356022e-06, "loss": 0.0744, "step": 2056 }, { "epoch": 0.46803185437997724, "grad_norm": 0.8923779970732735, "learning_rate": 1.2231954062374272e-06, "loss": 0.0278, "step": 2057 }, { "epoch": 0.468259385665529, "grad_norm": 1.366515353592018, "learning_rate": 1.2231695136132333e-06, "loss": 0.0699, "step": 2058 }, { "epoch": 0.4684869169510808, "grad_norm": 2.0148188928409616, "learning_rate": 1.2231436087635504e-06, "loss": 0.0599, "step": 2059 }, { "epoch": 0.46871444823663255, "grad_norm": 0.8270174628565943, "learning_rate": 1.2231176916889072e-06, "loss": 0.027, "step": 2060 }, { "epoch": 0.4689419795221843, "grad_norm": 1.2532661876313687, "learning_rate": 1.223091762389834e-06, "loss": 0.0684, "step": 2061 }, { "epoch": 0.46916951080773606, "grad_norm": 0.961835609964009, "learning_rate": 1.2230658208668606e-06, "loss": 0.0419, "step": 2062 }, { "epoch": 0.4693970420932878, "grad_norm": 1.2485009940950207, "learning_rate": 1.223039867120517e-06, "loss": 0.06, "step": 2063 }, { "epoch": 0.4696245733788396, "grad_norm": 1.0550492918928736, "learning_rate": 1.2230139011513338e-06, "loss": 0.0325, "step": 2064 }, { "epoch": 0.46985210466439137, "grad_norm": 1.0002830612932734, "learning_rate": 1.2229879229598416e-06, "loss": 0.0309, "step": 2065 }, { "epoch": 0.4700796359499431, "grad_norm": 1.0559040750849908, "learning_rate": 1.2229619325465717e-06, "loss": 0.044, "step": 2066 }, { "epoch": 0.47030716723549487, "grad_norm": 1.8211747050798512, "learning_rate": 1.2229359299120548e-06, "loss": 0.0544, "step": 2067 }, { "epoch": 0.4705346985210466, "grad_norm": 1.1244511125230865, "learning_rate": 1.2229099150568224e-06, "loss": 0.0458, "step": 2068 }, { "epoch": 0.47076222980659843, "grad_norm": 1.2446047468564831, "learning_rate": 1.2228838879814065e-06, "loss": 0.0454, "step": 2069 }, { "epoch": 0.4709897610921502, "grad_norm": 2.7103883547474132, "learning_rate": 1.222857848686339e-06, "loss": 0.1164, "step": 2070 }, { "epoch": 0.47121729237770194, "grad_norm": 1.0755709452712496, "learning_rate": 1.2228317971721518e-06, "loss": 0.0497, "step": 2071 }, { "epoch": 0.4714448236632537, "grad_norm": 1.3822921197656612, "learning_rate": 1.2228057334393777e-06, "loss": 0.0606, "step": 2072 }, { "epoch": 0.47167235494880544, "grad_norm": 1.436834207352602, "learning_rate": 1.2227796574885493e-06, "loss": 0.0759, "step": 2073 }, { "epoch": 0.47189988623435725, "grad_norm": 0.8895801225433585, "learning_rate": 1.2227535693201994e-06, "loss": 0.0278, "step": 2074 }, { "epoch": 0.472127417519909, "grad_norm": 0.9871504029067106, "learning_rate": 1.2227274689348612e-06, "loss": 0.0339, "step": 2075 }, { "epoch": 0.47235494880546075, "grad_norm": 2.4304810090092217, "learning_rate": 1.2227013563330684e-06, "loss": 0.1278, "step": 2076 }, { "epoch": 0.4725824800910125, "grad_norm": 1.3611006420880847, "learning_rate": 1.2226752315153543e-06, "loss": 0.056, "step": 2077 }, { "epoch": 0.47281001137656425, "grad_norm": 1.2835019677874184, "learning_rate": 1.2226490944822533e-06, "loss": 0.0495, "step": 2078 }, { "epoch": 0.47303754266211606, "grad_norm": 0.9616764487213456, "learning_rate": 1.2226229452342991e-06, "loss": 0.0346, "step": 2079 }, { "epoch": 0.4732650739476678, "grad_norm": 1.0696378165797904, "learning_rate": 1.2225967837720265e-06, "loss": 0.0408, "step": 2080 }, { "epoch": 0.47349260523321957, "grad_norm": 1.40355131716461, "learning_rate": 1.2225706100959702e-06, "loss": 0.0567, "step": 2081 }, { "epoch": 0.4737201365187713, "grad_norm": 1.228435046083943, "learning_rate": 1.2225444242066648e-06, "loss": 0.0483, "step": 2082 }, { "epoch": 0.47394766780432307, "grad_norm": 2.414708407757396, "learning_rate": 1.2225182261046458e-06, "loss": 0.0817, "step": 2083 }, { "epoch": 0.4741751990898749, "grad_norm": 1.3036262799520595, "learning_rate": 1.2224920157904487e-06, "loss": 0.0552, "step": 2084 }, { "epoch": 0.47440273037542663, "grad_norm": 1.4961223747917856, "learning_rate": 1.222465793264609e-06, "loss": 0.0653, "step": 2085 }, { "epoch": 0.4746302616609784, "grad_norm": 0.9962870632246056, "learning_rate": 1.2224395585276626e-06, "loss": 0.059, "step": 2086 }, { "epoch": 0.47485779294653013, "grad_norm": 1.1085210761396838, "learning_rate": 1.2224133115801458e-06, "loss": 0.0415, "step": 2087 }, { "epoch": 0.4750853242320819, "grad_norm": 2.147238444020511, "learning_rate": 1.222387052422595e-06, "loss": 0.0832, "step": 2088 }, { "epoch": 0.4753128555176337, "grad_norm": 1.1789213043757707, "learning_rate": 1.222360781055547e-06, "loss": 0.0439, "step": 2089 }, { "epoch": 0.47554038680318544, "grad_norm": 1.0136823057916216, "learning_rate": 1.2223344974795386e-06, "loss": 0.0491, "step": 2090 }, { "epoch": 0.4757679180887372, "grad_norm": 1.0762034069369268, "learning_rate": 1.2223082016951071e-06, "loss": 0.0414, "step": 2091 }, { "epoch": 0.47599544937428895, "grad_norm": 0.7861883267498344, "learning_rate": 1.2222818937027898e-06, "loss": 0.0225, "step": 2092 }, { "epoch": 0.47622298065984076, "grad_norm": 1.4307219009209873, "learning_rate": 1.2222555735031245e-06, "loss": 0.0518, "step": 2093 }, { "epoch": 0.4764505119453925, "grad_norm": 1.7521480356699026, "learning_rate": 1.222229241096649e-06, "loss": 0.0686, "step": 2094 }, { "epoch": 0.47667804323094426, "grad_norm": 1.084257413613486, "learning_rate": 1.2222028964839017e-06, "loss": 0.0623, "step": 2095 }, { "epoch": 0.476905574516496, "grad_norm": 0.9853812502387866, "learning_rate": 1.2221765396654209e-06, "loss": 0.0322, "step": 2096 }, { "epoch": 0.47713310580204776, "grad_norm": 1.593782379839956, "learning_rate": 1.2221501706417454e-06, "loss": 0.0776, "step": 2097 }, { "epoch": 0.47736063708759957, "grad_norm": 1.2316203058040796, "learning_rate": 1.2221237894134138e-06, "loss": 0.0415, "step": 2098 }, { "epoch": 0.4775881683731513, "grad_norm": 1.7492243550591817, "learning_rate": 1.222097395980966e-06, "loss": 0.0863, "step": 2099 }, { "epoch": 0.4778156996587031, "grad_norm": 1.5123822677786471, "learning_rate": 1.2220709903449403e-06, "loss": 0.0839, "step": 2100 }, { "epoch": 0.4780432309442548, "grad_norm": 0.6925370489166974, "learning_rate": 1.2220445725058775e-06, "loss": 0.036, "step": 2101 }, { "epoch": 0.4782707622298066, "grad_norm": 1.416495751104088, "learning_rate": 1.222018142464317e-06, "loss": 0.0745, "step": 2102 }, { "epoch": 0.4784982935153584, "grad_norm": 1.3844263789079834, "learning_rate": 1.221991700220799e-06, "loss": 0.0635, "step": 2103 }, { "epoch": 0.47872582480091014, "grad_norm": 1.293007537973823, "learning_rate": 1.221965245775864e-06, "loss": 0.0489, "step": 2104 }, { "epoch": 0.4789533560864619, "grad_norm": 1.4567681046992622, "learning_rate": 1.2219387791300527e-06, "loss": 0.0645, "step": 2105 }, { "epoch": 0.47918088737201364, "grad_norm": 0.9331875777291676, "learning_rate": 1.2219123002839058e-06, "loss": 0.0374, "step": 2106 }, { "epoch": 0.4794084186575654, "grad_norm": 1.0880231881224058, "learning_rate": 1.2218858092379648e-06, "loss": 0.0495, "step": 2107 }, { "epoch": 0.4796359499431172, "grad_norm": 2.087839894098663, "learning_rate": 1.221859305992771e-06, "loss": 0.0775, "step": 2108 }, { "epoch": 0.47986348122866895, "grad_norm": 2.0258749667718408, "learning_rate": 1.2218327905488662e-06, "loss": 0.0928, "step": 2109 }, { "epoch": 0.4800910125142207, "grad_norm": 1.077278940685259, "learning_rate": 1.221806262906792e-06, "loss": 0.0395, "step": 2110 }, { "epoch": 0.48031854379977246, "grad_norm": 1.3800617021949446, "learning_rate": 1.221779723067091e-06, "loss": 0.0547, "step": 2111 }, { "epoch": 0.4805460750853242, "grad_norm": 1.1882918051107954, "learning_rate": 1.2217531710303053e-06, "loss": 0.0646, "step": 2112 }, { "epoch": 0.480773606370876, "grad_norm": 1.648037642948223, "learning_rate": 1.2217266067969778e-06, "loss": 0.0883, "step": 2113 }, { "epoch": 0.48100113765642777, "grad_norm": 1.1114503968376748, "learning_rate": 1.221700030367651e-06, "loss": 0.0532, "step": 2114 }, { "epoch": 0.4812286689419795, "grad_norm": 2.318260685332167, "learning_rate": 1.2216734417428686e-06, "loss": 0.1156, "step": 2115 }, { "epoch": 0.4814562002275313, "grad_norm": 0.943158220881948, "learning_rate": 1.2216468409231738e-06, "loss": 0.0478, "step": 2116 }, { "epoch": 0.481683731513083, "grad_norm": 1.3059340047526014, "learning_rate": 1.2216202279091104e-06, "loss": 0.0474, "step": 2117 }, { "epoch": 0.48191126279863483, "grad_norm": 1.5108825640696393, "learning_rate": 1.2215936027012221e-06, "loss": 0.0528, "step": 2118 }, { "epoch": 0.4821387940841866, "grad_norm": 1.6389721165810114, "learning_rate": 1.2215669653000532e-06, "loss": 0.0636, "step": 2119 }, { "epoch": 0.48236632536973834, "grad_norm": 0.8263598171620422, "learning_rate": 1.2215403157061478e-06, "loss": 0.0288, "step": 2120 }, { "epoch": 0.4825938566552901, "grad_norm": 1.9615999942786497, "learning_rate": 1.2215136539200512e-06, "loss": 0.0735, "step": 2121 }, { "epoch": 0.48282138794084184, "grad_norm": 1.052892038422689, "learning_rate": 1.2214869799423078e-06, "loss": 0.0375, "step": 2122 }, { "epoch": 0.48304891922639365, "grad_norm": 1.0184495668702673, "learning_rate": 1.2214602937734632e-06, "loss": 0.0457, "step": 2123 }, { "epoch": 0.4832764505119454, "grad_norm": 2.1042829626272868, "learning_rate": 1.2214335954140624e-06, "loss": 0.1008, "step": 2124 }, { "epoch": 0.48350398179749715, "grad_norm": 1.2825506743830564, "learning_rate": 1.221406884864651e-06, "loss": 0.0542, "step": 2125 }, { "epoch": 0.4837315130830489, "grad_norm": 1.2404980369925334, "learning_rate": 1.2213801621257754e-06, "loss": 0.076, "step": 2126 }, { "epoch": 0.48395904436860065, "grad_norm": 1.6535875011294203, "learning_rate": 1.2213534271979815e-06, "loss": 0.0826, "step": 2127 }, { "epoch": 0.48418657565415246, "grad_norm": 1.5742759100449686, "learning_rate": 1.2213266800818158e-06, "loss": 0.0722, "step": 2128 }, { "epoch": 0.4844141069397042, "grad_norm": 1.4590518459696438, "learning_rate": 1.2212999207778246e-06, "loss": 0.0626, "step": 2129 }, { "epoch": 0.48464163822525597, "grad_norm": 1.032592661119547, "learning_rate": 1.2212731492865552e-06, "loss": 0.0352, "step": 2130 }, { "epoch": 0.4848691695108077, "grad_norm": 1.943068406348077, "learning_rate": 1.2212463656085548e-06, "loss": 0.0878, "step": 2131 }, { "epoch": 0.4850967007963595, "grad_norm": 1.4222918495364802, "learning_rate": 1.2212195697443704e-06, "loss": 0.0662, "step": 2132 }, { "epoch": 0.4853242320819113, "grad_norm": 1.0835698936451348, "learning_rate": 1.2211927616945502e-06, "loss": 0.0315, "step": 2133 }, { "epoch": 0.48555176336746303, "grad_norm": 1.4804001036145082, "learning_rate": 1.2211659414596417e-06, "loss": 0.057, "step": 2134 }, { "epoch": 0.4857792946530148, "grad_norm": 1.0372510496516572, "learning_rate": 1.2211391090401931e-06, "loss": 0.0347, "step": 2135 }, { "epoch": 0.48600682593856653, "grad_norm": 1.4539414447995862, "learning_rate": 1.2211122644367531e-06, "loss": 0.0546, "step": 2136 }, { "epoch": 0.48623435722411834, "grad_norm": 0.8272464584595134, "learning_rate": 1.2210854076498699e-06, "loss": 0.0291, "step": 2137 }, { "epoch": 0.4864618885096701, "grad_norm": 1.082723119747633, "learning_rate": 1.2210585386800927e-06, "loss": 0.0631, "step": 2138 }, { "epoch": 0.48668941979522184, "grad_norm": 1.7613862158027351, "learning_rate": 1.2210316575279707e-06, "loss": 0.0516, "step": 2139 }, { "epoch": 0.4869169510807736, "grad_norm": 1.05018126679848, "learning_rate": 1.221004764194053e-06, "loss": 0.0503, "step": 2140 }, { "epoch": 0.48714448236632535, "grad_norm": 1.4765291215619578, "learning_rate": 1.22097785867889e-06, "loss": 0.0462, "step": 2141 }, { "epoch": 0.48737201365187716, "grad_norm": 1.6033668171289224, "learning_rate": 1.2209509409830304e-06, "loss": 0.0656, "step": 2142 }, { "epoch": 0.4875995449374289, "grad_norm": 1.2469591088986771, "learning_rate": 1.2209240111070254e-06, "loss": 0.0644, "step": 2143 }, { "epoch": 0.48782707622298066, "grad_norm": 1.676030146319255, "learning_rate": 1.2208970690514247e-06, "loss": 0.0502, "step": 2144 }, { "epoch": 0.4880546075085324, "grad_norm": 0.89826172703067, "learning_rate": 1.2208701148167795e-06, "loss": 0.0378, "step": 2145 }, { "epoch": 0.48828213879408416, "grad_norm": 1.2953659435102682, "learning_rate": 1.2208431484036405e-06, "loss": 0.0532, "step": 2146 }, { "epoch": 0.48850967007963597, "grad_norm": 1.6937135620824826, "learning_rate": 1.2208161698125584e-06, "loss": 0.0777, "step": 2147 }, { "epoch": 0.4887372013651877, "grad_norm": 1.4516409315976513, "learning_rate": 1.2207891790440852e-06, "loss": 0.0532, "step": 2148 }, { "epoch": 0.4889647326507395, "grad_norm": 1.466966804713568, "learning_rate": 1.2207621760987723e-06, "loss": 0.0542, "step": 2149 }, { "epoch": 0.4891922639362912, "grad_norm": 1.5403030963325346, "learning_rate": 1.2207351609771718e-06, "loss": 0.0699, "step": 2150 }, { "epoch": 0.489419795221843, "grad_norm": 1.1658099032169384, "learning_rate": 1.2207081336798352e-06, "loss": 0.0747, "step": 2151 }, { "epoch": 0.4896473265073948, "grad_norm": 1.272828829650441, "learning_rate": 1.2206810942073158e-06, "loss": 0.0339, "step": 2152 }, { "epoch": 0.48987485779294654, "grad_norm": 0.8638651055871553, "learning_rate": 1.2206540425601653e-06, "loss": 0.0258, "step": 2153 }, { "epoch": 0.4901023890784983, "grad_norm": 1.1406798846203596, "learning_rate": 1.2206269787389374e-06, "loss": 0.0434, "step": 2154 }, { "epoch": 0.49032992036405004, "grad_norm": 1.3546994084602113, "learning_rate": 1.220599902744185e-06, "loss": 0.0505, "step": 2155 }, { "epoch": 0.4905574516496018, "grad_norm": 0.8028716165840231, "learning_rate": 1.220572814576461e-06, "loss": 0.0252, "step": 2156 }, { "epoch": 0.4907849829351536, "grad_norm": 1.5525391069467074, "learning_rate": 1.2205457142363197e-06, "loss": 0.0671, "step": 2157 }, { "epoch": 0.49101251422070535, "grad_norm": 1.5565330239281656, "learning_rate": 1.2205186017243146e-06, "loss": 0.0485, "step": 2158 }, { "epoch": 0.4912400455062571, "grad_norm": 0.9990569085398061, "learning_rate": 1.2204914770409999e-06, "loss": 0.0379, "step": 2159 }, { "epoch": 0.49146757679180886, "grad_norm": 1.3885909890564816, "learning_rate": 1.22046434018693e-06, "loss": 0.0815, "step": 2160 }, { "epoch": 0.4916951080773606, "grad_norm": 1.0037322866680125, "learning_rate": 1.2204371911626597e-06, "loss": 0.0409, "step": 2161 }, { "epoch": 0.4919226393629124, "grad_norm": 2.005545489895357, "learning_rate": 1.2204100299687436e-06, "loss": 0.1019, "step": 2162 }, { "epoch": 0.49215017064846417, "grad_norm": 1.5901419467739697, "learning_rate": 1.2203828566057368e-06, "loss": 0.0497, "step": 2163 }, { "epoch": 0.4923777019340159, "grad_norm": 1.0024861467422042, "learning_rate": 1.2203556710741948e-06, "loss": 0.0537, "step": 2164 }, { "epoch": 0.4926052332195677, "grad_norm": 1.1764359755022478, "learning_rate": 1.2203284733746734e-06, "loss": 0.0609, "step": 2165 }, { "epoch": 0.4928327645051195, "grad_norm": 1.4780687059085984, "learning_rate": 1.2203012635077283e-06, "loss": 0.0467, "step": 2166 }, { "epoch": 0.49306029579067123, "grad_norm": 1.0636152584968832, "learning_rate": 1.2202740414739153e-06, "loss": 0.0334, "step": 2167 }, { "epoch": 0.493287827076223, "grad_norm": 0.9777542514618388, "learning_rate": 1.2202468072737914e-06, "loss": 0.0299, "step": 2168 }, { "epoch": 0.49351535836177474, "grad_norm": 1.099998345093073, "learning_rate": 1.2202195609079128e-06, "loss": 0.0349, "step": 2169 }, { "epoch": 0.4937428896473265, "grad_norm": 0.9946626000618751, "learning_rate": 1.2201923023768365e-06, "loss": 0.0439, "step": 2170 }, { "epoch": 0.4939704209328783, "grad_norm": 2.1311963123579707, "learning_rate": 1.2201650316811194e-06, "loss": 0.0871, "step": 2171 }, { "epoch": 0.49419795221843005, "grad_norm": 1.492842268786691, "learning_rate": 1.2201377488213195e-06, "loss": 0.0851, "step": 2172 }, { "epoch": 0.4944254835039818, "grad_norm": 2.5237867181473876, "learning_rate": 1.2201104537979934e-06, "loss": 0.0637, "step": 2173 }, { "epoch": 0.49465301478953355, "grad_norm": 2.556432817981331, "learning_rate": 1.2200831466117e-06, "loss": 0.1105, "step": 2174 }, { "epoch": 0.4948805460750853, "grad_norm": 0.8981926808894833, "learning_rate": 1.2200558272629967e-06, "loss": 0.03, "step": 2175 }, { "epoch": 0.4951080773606371, "grad_norm": 1.6324971068278198, "learning_rate": 1.2200284957524421e-06, "loss": 0.063, "step": 2176 }, { "epoch": 0.49533560864618886, "grad_norm": 1.0341597824022535, "learning_rate": 1.2200011520805947e-06, "loss": 0.0533, "step": 2177 }, { "epoch": 0.4955631399317406, "grad_norm": 0.8107094209291485, "learning_rate": 1.2199737962480136e-06, "loss": 0.0427, "step": 2178 }, { "epoch": 0.49579067121729237, "grad_norm": 1.106894286453914, "learning_rate": 1.2199464282552576e-06, "loss": 0.0594, "step": 2179 }, { "epoch": 0.4960182025028441, "grad_norm": 1.7291263581636043, "learning_rate": 1.2199190481028864e-06, "loss": 0.0675, "step": 2180 }, { "epoch": 0.4962457337883959, "grad_norm": 0.9412668127265857, "learning_rate": 1.2198916557914592e-06, "loss": 0.0364, "step": 2181 }, { "epoch": 0.4964732650739477, "grad_norm": 1.4114602905566138, "learning_rate": 1.2198642513215362e-06, "loss": 0.0652, "step": 2182 }, { "epoch": 0.49670079635949943, "grad_norm": 0.609976074205654, "learning_rate": 1.2198368346936774e-06, "loss": 0.03, "step": 2183 }, { "epoch": 0.4969283276450512, "grad_norm": 1.2570918433725233, "learning_rate": 1.219809405908443e-06, "loss": 0.0479, "step": 2184 }, { "epoch": 0.49715585893060293, "grad_norm": 1.0852743744371969, "learning_rate": 1.2197819649663934e-06, "loss": 0.0383, "step": 2185 }, { "epoch": 0.49738339021615474, "grad_norm": 0.8202048600920889, "learning_rate": 1.21975451186809e-06, "loss": 0.0242, "step": 2186 }, { "epoch": 0.4976109215017065, "grad_norm": 1.180630635637642, "learning_rate": 1.2197270466140936e-06, "loss": 0.0597, "step": 2187 }, { "epoch": 0.49783845278725825, "grad_norm": 0.5499273259791628, "learning_rate": 1.2196995692049656e-06, "loss": 0.0194, "step": 2188 }, { "epoch": 0.49806598407281, "grad_norm": 1.3699181548074078, "learning_rate": 1.2196720796412675e-06, "loss": 0.0729, "step": 2189 }, { "epoch": 0.49829351535836175, "grad_norm": 1.6813076102860163, "learning_rate": 1.219644577923561e-06, "loss": 0.0576, "step": 2190 }, { "epoch": 0.49852104664391356, "grad_norm": 1.3365991781516084, "learning_rate": 1.2196170640524087e-06, "loss": 0.0682, "step": 2191 }, { "epoch": 0.4987485779294653, "grad_norm": 1.5433585155887166, "learning_rate": 1.2195895380283725e-06, "loss": 0.0712, "step": 2192 }, { "epoch": 0.49897610921501706, "grad_norm": 0.7540413090381725, "learning_rate": 1.219561999852015e-06, "loss": 0.0358, "step": 2193 }, { "epoch": 0.4992036405005688, "grad_norm": 1.0443802296277052, "learning_rate": 1.2195344495238993e-06, "loss": 0.0443, "step": 2194 }, { "epoch": 0.49943117178612056, "grad_norm": 1.0620744241395403, "learning_rate": 1.2195068870445878e-06, "loss": 0.0317, "step": 2195 }, { "epoch": 0.49965870307167237, "grad_norm": 0.8699498301198716, "learning_rate": 1.2194793124146447e-06, "loss": 0.0291, "step": 2196 }, { "epoch": 0.4998862343572241, "grad_norm": 1.4610167793964797, "learning_rate": 1.2194517256346333e-06, "loss": 0.0701, "step": 2197 }, { "epoch": 0.5001137656427759, "grad_norm": 1.2434868855241086, "learning_rate": 1.2194241267051172e-06, "loss": 0.0454, "step": 2198 }, { "epoch": 0.5003412969283276, "grad_norm": 1.4551693858285264, "learning_rate": 1.2193965156266605e-06, "loss": 0.0547, "step": 2199 }, { "epoch": 0.5005688282138794, "grad_norm": 1.9356847413883649, "learning_rate": 1.2193688923998277e-06, "loss": 0.0644, "step": 2200 }, { "epoch": 0.5007963594994311, "grad_norm": 1.1503513652252215, "learning_rate": 1.219341257025183e-06, "loss": 0.0474, "step": 2201 }, { "epoch": 0.5010238907849829, "grad_norm": 1.5368491876593886, "learning_rate": 1.2193136095032918e-06, "loss": 0.0573, "step": 2202 }, { "epoch": 0.5012514220705347, "grad_norm": 1.7879629590621755, "learning_rate": 1.2192859498347189e-06, "loss": 0.0917, "step": 2203 }, { "epoch": 0.5014789533560865, "grad_norm": 1.5178773819159281, "learning_rate": 1.2192582780200293e-06, "loss": 0.0622, "step": 2204 }, { "epoch": 0.5017064846416383, "grad_norm": 1.3331971965539129, "learning_rate": 1.219230594059789e-06, "loss": 0.0649, "step": 2205 }, { "epoch": 0.50193401592719, "grad_norm": 2.165221254848427, "learning_rate": 1.2192028979545637e-06, "loss": 0.1342, "step": 2206 }, { "epoch": 0.5021615472127418, "grad_norm": 1.743824072315693, "learning_rate": 1.2191751897049192e-06, "loss": 0.0792, "step": 2207 }, { "epoch": 0.5023890784982935, "grad_norm": 1.3495321864370744, "learning_rate": 1.2191474693114223e-06, "loss": 0.0553, "step": 2208 }, { "epoch": 0.5026166097838453, "grad_norm": 1.2047719291625048, "learning_rate": 1.2191197367746389e-06, "loss": 0.0493, "step": 2209 }, { "epoch": 0.502844141069397, "grad_norm": 0.7568498406042676, "learning_rate": 1.2190919920951363e-06, "loss": 0.0425, "step": 2210 }, { "epoch": 0.5030716723549488, "grad_norm": 1.0905266622637186, "learning_rate": 1.2190642352734814e-06, "loss": 0.0392, "step": 2211 }, { "epoch": 0.5032992036405005, "grad_norm": 1.0772245840535826, "learning_rate": 1.2190364663102417e-06, "loss": 0.0602, "step": 2212 }, { "epoch": 0.5035267349260524, "grad_norm": 1.2112393942159567, "learning_rate": 1.2190086852059844e-06, "loss": 0.0561, "step": 2213 }, { "epoch": 0.5037542662116041, "grad_norm": 1.164381821277193, "learning_rate": 1.2189808919612773e-06, "loss": 0.0567, "step": 2214 }, { "epoch": 0.5039817974971559, "grad_norm": 1.980841011359919, "learning_rate": 1.2189530865766888e-06, "loss": 0.1002, "step": 2215 }, { "epoch": 0.5042093287827076, "grad_norm": 1.0195390074518678, "learning_rate": 1.218925269052787e-06, "loss": 0.0378, "step": 2216 }, { "epoch": 0.5044368600682594, "grad_norm": 1.58039989452312, "learning_rate": 1.2188974393901404e-06, "loss": 0.079, "step": 2217 }, { "epoch": 0.5046643913538111, "grad_norm": 1.7632794902075288, "learning_rate": 1.2188695975893177e-06, "loss": 0.0704, "step": 2218 }, { "epoch": 0.5048919226393629, "grad_norm": 1.8641645104616156, "learning_rate": 1.218841743650888e-06, "loss": 0.0767, "step": 2219 }, { "epoch": 0.5051194539249146, "grad_norm": 0.7736340615610193, "learning_rate": 1.2188138775754207e-06, "loss": 0.0255, "step": 2220 }, { "epoch": 0.5053469852104664, "grad_norm": 1.706014884062171, "learning_rate": 1.2187859993634854e-06, "loss": 0.0842, "step": 2221 }, { "epoch": 0.5055745164960183, "grad_norm": 1.0275306984300476, "learning_rate": 1.2187581090156518e-06, "loss": 0.0372, "step": 2222 }, { "epoch": 0.50580204778157, "grad_norm": 1.560530367319729, "learning_rate": 1.2187302065324896e-06, "loss": 0.0957, "step": 2223 }, { "epoch": 0.5060295790671218, "grad_norm": 1.1142447765551218, "learning_rate": 1.2187022919145695e-06, "loss": 0.0372, "step": 2224 }, { "epoch": 0.5062571103526735, "grad_norm": 2.0014178600777157, "learning_rate": 1.2186743651624617e-06, "loss": 0.1222, "step": 2225 }, { "epoch": 0.5064846416382253, "grad_norm": 0.6371915222683989, "learning_rate": 1.2186464262767372e-06, "loss": 0.0303, "step": 2226 }, { "epoch": 0.506712172923777, "grad_norm": 1.1677216592762003, "learning_rate": 1.2186184752579671e-06, "loss": 0.0375, "step": 2227 }, { "epoch": 0.5069397042093288, "grad_norm": 1.0363110917575087, "learning_rate": 1.2185905121067223e-06, "loss": 0.0465, "step": 2228 }, { "epoch": 0.5071672354948805, "grad_norm": 0.7880205913361013, "learning_rate": 1.218562536823575e-06, "loss": 0.0461, "step": 2229 }, { "epoch": 0.5073947667804323, "grad_norm": 1.5494942731094494, "learning_rate": 1.2185345494090959e-06, "loss": 0.0633, "step": 2230 }, { "epoch": 0.507622298065984, "grad_norm": 1.24395335626353, "learning_rate": 1.218506549863858e-06, "loss": 0.037, "step": 2231 }, { "epoch": 0.5078498293515359, "grad_norm": 0.8497919211723142, "learning_rate": 1.2184785381884332e-06, "loss": 0.0281, "step": 2232 }, { "epoch": 0.5080773606370876, "grad_norm": 1.6659795749856976, "learning_rate": 1.218450514383394e-06, "loss": 0.0684, "step": 2233 }, { "epoch": 0.5083048919226394, "grad_norm": 1.2993624404216275, "learning_rate": 1.2184224784493127e-06, "loss": 0.0737, "step": 2234 }, { "epoch": 0.5085324232081911, "grad_norm": 2.2363218045516287, "learning_rate": 1.218394430386763e-06, "loss": 0.1023, "step": 2235 }, { "epoch": 0.5087599544937429, "grad_norm": 1.9367152712705378, "learning_rate": 1.2183663701963181e-06, "loss": 0.0801, "step": 2236 }, { "epoch": 0.5089874857792946, "grad_norm": 1.683178965938102, "learning_rate": 1.218338297878551e-06, "loss": 0.0511, "step": 2237 }, { "epoch": 0.5092150170648464, "grad_norm": 1.2570305483870696, "learning_rate": 1.2183102134340361e-06, "loss": 0.0531, "step": 2238 }, { "epoch": 0.5094425483503981, "grad_norm": 1.3387251635869548, "learning_rate": 1.2182821168633466e-06, "loss": 0.0488, "step": 2239 }, { "epoch": 0.5096700796359499, "grad_norm": 0.9109698538663267, "learning_rate": 1.2182540081670574e-06, "loss": 0.0297, "step": 2240 }, { "epoch": 0.5098976109215017, "grad_norm": 2.0458637944350917, "learning_rate": 1.2182258873457428e-06, "loss": 0.0932, "step": 2241 }, { "epoch": 0.5101251422070535, "grad_norm": 1.546517047083001, "learning_rate": 1.2181977543999776e-06, "loss": 0.0831, "step": 2242 }, { "epoch": 0.5103526734926053, "grad_norm": 0.5799180817857912, "learning_rate": 1.2181696093303363e-06, "loss": 0.0246, "step": 2243 }, { "epoch": 0.510580204778157, "grad_norm": 1.5842605922722366, "learning_rate": 1.218141452137395e-06, "loss": 0.1109, "step": 2244 }, { "epoch": 0.5108077360637088, "grad_norm": 1.1518936456507234, "learning_rate": 1.2181132828217284e-06, "loss": 0.0392, "step": 2245 }, { "epoch": 0.5110352673492605, "grad_norm": 1.5601510600984896, "learning_rate": 1.2180851013839127e-06, "loss": 0.0516, "step": 2246 }, { "epoch": 0.5112627986348123, "grad_norm": 1.1933267652037376, "learning_rate": 1.2180569078245236e-06, "loss": 0.0768, "step": 2247 }, { "epoch": 0.511490329920364, "grad_norm": 1.0276357760825077, "learning_rate": 1.2180287021441372e-06, "loss": 0.0392, "step": 2248 }, { "epoch": 0.5117178612059158, "grad_norm": 1.4568173829581255, "learning_rate": 1.2180004843433305e-06, "loss": 0.0711, "step": 2249 }, { "epoch": 0.5119453924914675, "grad_norm": 1.6296361312434235, "learning_rate": 1.21797225442268e-06, "loss": 0.061, "step": 2250 }, { "epoch": 0.5121729237770194, "grad_norm": 1.4099321985545326, "learning_rate": 1.2179440123827625e-06, "loss": 0.0562, "step": 2251 }, { "epoch": 0.5124004550625711, "grad_norm": 1.0111117892815282, "learning_rate": 1.2179157582241554e-06, "loss": 0.0425, "step": 2252 }, { "epoch": 0.5126279863481229, "grad_norm": 1.100003009354464, "learning_rate": 1.2178874919474359e-06, "loss": 0.059, "step": 2253 }, { "epoch": 0.5128555176336747, "grad_norm": 1.325363901130602, "learning_rate": 1.217859213553182e-06, "loss": 0.0711, "step": 2254 }, { "epoch": 0.5130830489192264, "grad_norm": 0.846300456406513, "learning_rate": 1.2178309230419714e-06, "loss": 0.0271, "step": 2255 }, { "epoch": 0.5133105802047782, "grad_norm": 1.2410062157733406, "learning_rate": 1.2178026204143827e-06, "loss": 0.0556, "step": 2256 }, { "epoch": 0.5135381114903299, "grad_norm": 1.6420974990981174, "learning_rate": 1.217774305670994e-06, "loss": 0.0712, "step": 2257 }, { "epoch": 0.5137656427758817, "grad_norm": 1.2130253789178054, "learning_rate": 1.2177459788123841e-06, "loss": 0.045, "step": 2258 }, { "epoch": 0.5139931740614334, "grad_norm": 1.5302885687128625, "learning_rate": 1.217717639839132e-06, "loss": 0.0788, "step": 2259 }, { "epoch": 0.5142207053469852, "grad_norm": 0.9160947105126066, "learning_rate": 1.2176892887518166e-06, "loss": 0.0381, "step": 2260 }, { "epoch": 0.514448236632537, "grad_norm": 0.9460716271280594, "learning_rate": 1.2176609255510176e-06, "loss": 0.0453, "step": 2261 }, { "epoch": 0.5146757679180888, "grad_norm": 0.6452347430540285, "learning_rate": 1.217632550237315e-06, "loss": 0.0225, "step": 2262 }, { "epoch": 0.5149032992036405, "grad_norm": 1.1495647599088414, "learning_rate": 1.2176041628112884e-06, "loss": 0.0506, "step": 2263 }, { "epoch": 0.5151308304891923, "grad_norm": 1.4009996525001283, "learning_rate": 1.217575763273518e-06, "loss": 0.0667, "step": 2264 }, { "epoch": 0.515358361774744, "grad_norm": 0.9674758245027556, "learning_rate": 1.217547351624584e-06, "loss": 0.0383, "step": 2265 }, { "epoch": 0.5155858930602958, "grad_norm": 1.6049309598180266, "learning_rate": 1.2175189278650677e-06, "loss": 0.0685, "step": 2266 }, { "epoch": 0.5158134243458475, "grad_norm": 1.047951830927466, "learning_rate": 1.2174904919955493e-06, "loss": 0.031, "step": 2267 }, { "epoch": 0.5160409556313993, "grad_norm": 1.0060223411128129, "learning_rate": 1.2174620440166105e-06, "loss": 0.0442, "step": 2268 }, { "epoch": 0.516268486916951, "grad_norm": 1.2945214325099708, "learning_rate": 1.2174335839288326e-06, "loss": 0.0558, "step": 2269 }, { "epoch": 0.5164960182025028, "grad_norm": 1.2199251228288925, "learning_rate": 1.2174051117327972e-06, "loss": 0.0398, "step": 2270 }, { "epoch": 0.5167235494880547, "grad_norm": 1.8177803789021154, "learning_rate": 1.2173766274290864e-06, "loss": 0.0618, "step": 2271 }, { "epoch": 0.5169510807736064, "grad_norm": 2.1534503937526557, "learning_rate": 1.217348131018282e-06, "loss": 0.0946, "step": 2272 }, { "epoch": 0.5171786120591582, "grad_norm": 1.335439582615335, "learning_rate": 1.2173196225009666e-06, "loss": 0.0467, "step": 2273 }, { "epoch": 0.5174061433447099, "grad_norm": 3.39634994855923, "learning_rate": 1.217291101877723e-06, "loss": 0.1597, "step": 2274 }, { "epoch": 0.5176336746302617, "grad_norm": 13.151087600523141, "learning_rate": 1.2172625691491342e-06, "loss": 0.061, "step": 2275 }, { "epoch": 0.5178612059158134, "grad_norm": 0.9388770540310672, "learning_rate": 1.217234024315783e-06, "loss": 0.0469, "step": 2276 }, { "epoch": 0.5180887372013652, "grad_norm": 1.237623086215354, "learning_rate": 1.2172054673782527e-06, "loss": 0.0494, "step": 2277 }, { "epoch": 0.5183162684869169, "grad_norm": 1.3918576506801177, "learning_rate": 1.2171768983371276e-06, "loss": 0.0688, "step": 2278 }, { "epoch": 0.5185437997724687, "grad_norm": 1.2775225468090683, "learning_rate": 1.217148317192991e-06, "loss": 0.034, "step": 2279 }, { "epoch": 0.5187713310580204, "grad_norm": 1.2629250238471204, "learning_rate": 1.2171197239464274e-06, "loss": 0.0477, "step": 2280 }, { "epoch": 0.5189988623435723, "grad_norm": 0.9222994348691986, "learning_rate": 1.217091118598021e-06, "loss": 0.0518, "step": 2281 }, { "epoch": 0.519226393629124, "grad_norm": 1.1733465242259884, "learning_rate": 1.2170625011483565e-06, "loss": 0.0362, "step": 2282 }, { "epoch": 0.5194539249146758, "grad_norm": 1.0540409111201516, "learning_rate": 1.2170338715980187e-06, "loss": 0.0333, "step": 2283 }, { "epoch": 0.5196814562002275, "grad_norm": 1.0390405907126188, "learning_rate": 1.217005229947593e-06, "loss": 0.0307, "step": 2284 }, { "epoch": 0.5199089874857793, "grad_norm": 0.7575429344893744, "learning_rate": 1.2169765761976646e-06, "loss": 0.0416, "step": 2285 }, { "epoch": 0.520136518771331, "grad_norm": 1.3326371343495533, "learning_rate": 1.216947910348819e-06, "loss": 0.054, "step": 2286 }, { "epoch": 0.5203640500568828, "grad_norm": 1.386966435546428, "learning_rate": 1.2169192324016423e-06, "loss": 0.0552, "step": 2287 }, { "epoch": 0.5205915813424346, "grad_norm": 1.957649195968788, "learning_rate": 1.2168905423567205e-06, "loss": 0.0941, "step": 2288 }, { "epoch": 0.5208191126279863, "grad_norm": 2.448802297286155, "learning_rate": 1.21686184021464e-06, "loss": 0.1116, "step": 2289 }, { "epoch": 0.5210466439135382, "grad_norm": 1.1844985987836856, "learning_rate": 1.2168331259759875e-06, "loss": 0.0449, "step": 2290 }, { "epoch": 0.5212741751990899, "grad_norm": 1.8104263725287626, "learning_rate": 1.2168043996413497e-06, "loss": 0.0575, "step": 2291 }, { "epoch": 0.5215017064846417, "grad_norm": 1.1147839216238626, "learning_rate": 1.216775661211314e-06, "loss": 0.05, "step": 2292 }, { "epoch": 0.5217292377701934, "grad_norm": 1.543703515430326, "learning_rate": 1.2167469106864673e-06, "loss": 0.0813, "step": 2293 }, { "epoch": 0.5219567690557452, "grad_norm": 1.5787563749864177, "learning_rate": 1.2167181480673977e-06, "loss": 0.0843, "step": 2294 }, { "epoch": 0.5221843003412969, "grad_norm": 1.3389543319065593, "learning_rate": 1.2166893733546927e-06, "loss": 0.0854, "step": 2295 }, { "epoch": 0.5224118316268487, "grad_norm": 1.79843376105108, "learning_rate": 1.2166605865489406e-06, "loss": 0.111, "step": 2296 }, { "epoch": 0.5226393629124004, "grad_norm": 1.2843560350462406, "learning_rate": 1.2166317876507296e-06, "loss": 0.0617, "step": 2297 }, { "epoch": 0.5228668941979522, "grad_norm": 1.2028371451089743, "learning_rate": 1.2166029766606486e-06, "loss": 0.0721, "step": 2298 }, { "epoch": 0.5230944254835039, "grad_norm": 2.882673882589606, "learning_rate": 1.2165741535792861e-06, "loss": 0.1223, "step": 2299 }, { "epoch": 0.5233219567690558, "grad_norm": 1.3155493992738794, "learning_rate": 1.2165453184072312e-06, "loss": 0.0591, "step": 2300 }, { "epoch": 0.5235494880546075, "grad_norm": 1.6629360145623788, "learning_rate": 1.2165164711450735e-06, "loss": 0.0775, "step": 2301 }, { "epoch": 0.5237770193401593, "grad_norm": 1.3013507571586798, "learning_rate": 1.2164876117934024e-06, "loss": 0.0638, "step": 2302 }, { "epoch": 0.524004550625711, "grad_norm": 2.648828931200833, "learning_rate": 1.2164587403528078e-06, "loss": 0.0864, "step": 2303 }, { "epoch": 0.5242320819112628, "grad_norm": 1.4929308932558032, "learning_rate": 1.2164298568238797e-06, "loss": 0.0679, "step": 2304 }, { "epoch": 0.5244596131968146, "grad_norm": 0.8718556925311854, "learning_rate": 1.2164009612072085e-06, "loss": 0.0359, "step": 2305 }, { "epoch": 0.5246871444823663, "grad_norm": 0.9783095053030553, "learning_rate": 1.2163720535033845e-06, "loss": 0.0442, "step": 2306 }, { "epoch": 0.5249146757679181, "grad_norm": 1.5130471240864227, "learning_rate": 1.2163431337129993e-06, "loss": 0.0512, "step": 2307 }, { "epoch": 0.5251422070534698, "grad_norm": 1.5432509321722079, "learning_rate": 1.216314201836643e-06, "loss": 0.0704, "step": 2308 }, { "epoch": 0.5253697383390216, "grad_norm": 1.069477672250475, "learning_rate": 1.2162852578749076e-06, "loss": 0.0445, "step": 2309 }, { "epoch": 0.5255972696245734, "grad_norm": 1.831811758429303, "learning_rate": 1.2162563018283843e-06, "loss": 0.086, "step": 2310 }, { "epoch": 0.5258248009101252, "grad_norm": 0.9600707211513121, "learning_rate": 1.216227333697665e-06, "loss": 0.0387, "step": 2311 }, { "epoch": 0.5260523321956769, "grad_norm": 1.0398262718871747, "learning_rate": 1.216198353483342e-06, "loss": 0.0336, "step": 2312 }, { "epoch": 0.5262798634812287, "grad_norm": 0.7659268750272631, "learning_rate": 1.2161693611860072e-06, "loss": 0.0292, "step": 2313 }, { "epoch": 0.5265073947667804, "grad_norm": 1.003194143526719, "learning_rate": 1.2161403568062533e-06, "loss": 0.0446, "step": 2314 }, { "epoch": 0.5267349260523322, "grad_norm": 0.8822650643996454, "learning_rate": 1.2161113403446733e-06, "loss": 0.036, "step": 2315 }, { "epoch": 0.5269624573378839, "grad_norm": 1.2445369505027863, "learning_rate": 1.21608231180186e-06, "loss": 0.056, "step": 2316 }, { "epoch": 0.5271899886234357, "grad_norm": 1.9110079675084388, "learning_rate": 1.2160532711784066e-06, "loss": 0.085, "step": 2317 }, { "epoch": 0.5274175199089874, "grad_norm": 1.3201545262151622, "learning_rate": 1.2160242184749069e-06, "loss": 0.0596, "step": 2318 }, { "epoch": 0.5276450511945392, "grad_norm": 1.3929491912851009, "learning_rate": 1.2159951536919547e-06, "loss": 0.0575, "step": 2319 }, { "epoch": 0.5278725824800911, "grad_norm": 1.4551403932656901, "learning_rate": 1.2159660768301438e-06, "loss": 0.0492, "step": 2320 }, { "epoch": 0.5281001137656428, "grad_norm": 1.4130548917935428, "learning_rate": 1.2159369878900687e-06, "loss": 0.047, "step": 2321 }, { "epoch": 0.5283276450511946, "grad_norm": 1.4452980530222412, "learning_rate": 1.2159078868723238e-06, "loss": 0.0525, "step": 2322 }, { "epoch": 0.5285551763367463, "grad_norm": 1.6104531531662376, "learning_rate": 1.2158787737775037e-06, "loss": 0.0755, "step": 2323 }, { "epoch": 0.5287827076222981, "grad_norm": 0.8559219056147712, "learning_rate": 1.2158496486062039e-06, "loss": 0.0415, "step": 2324 }, { "epoch": 0.5290102389078498, "grad_norm": 1.789699350536378, "learning_rate": 1.215820511359019e-06, "loss": 0.081, "step": 2325 }, { "epoch": 0.5292377701934016, "grad_norm": 1.3025849756724603, "learning_rate": 1.215791362036545e-06, "loss": 0.0485, "step": 2326 }, { "epoch": 0.5294653014789533, "grad_norm": 1.8682155789508528, "learning_rate": 1.2157622006393777e-06, "loss": 0.0634, "step": 2327 }, { "epoch": 0.5296928327645051, "grad_norm": 1.176130717608952, "learning_rate": 1.2157330271681129e-06, "loss": 0.039, "step": 2328 }, { "epoch": 0.5299203640500569, "grad_norm": 1.5070209136147386, "learning_rate": 1.215703841623347e-06, "loss": 0.0515, "step": 2329 }, { "epoch": 0.5301478953356087, "grad_norm": 1.2847130041310237, "learning_rate": 1.2156746440056762e-06, "loss": 0.0515, "step": 2330 }, { "epoch": 0.5303754266211604, "grad_norm": 2.141191883255034, "learning_rate": 1.2156454343156976e-06, "loss": 0.0828, "step": 2331 }, { "epoch": 0.5306029579067122, "grad_norm": 0.9805091402589389, "learning_rate": 1.2156162125540078e-06, "loss": 0.0315, "step": 2332 }, { "epoch": 0.5308304891922639, "grad_norm": 0.7809668560849007, "learning_rate": 1.2155869787212046e-06, "loss": 0.0365, "step": 2333 }, { "epoch": 0.5310580204778157, "grad_norm": 1.0616410014514985, "learning_rate": 1.215557732817885e-06, "loss": 0.0754, "step": 2334 }, { "epoch": 0.5312855517633674, "grad_norm": 1.74228651169713, "learning_rate": 1.2155284748446469e-06, "loss": 0.0628, "step": 2335 }, { "epoch": 0.5315130830489192, "grad_norm": 1.8258581718716098, "learning_rate": 1.2154992048020882e-06, "loss": 0.0957, "step": 2336 }, { "epoch": 0.531740614334471, "grad_norm": 1.5209500494569268, "learning_rate": 1.2154699226908072e-06, "loss": 0.0719, "step": 2337 }, { "epoch": 0.5319681456200227, "grad_norm": 0.9184689674787639, "learning_rate": 1.2154406285114025e-06, "loss": 0.0512, "step": 2338 }, { "epoch": 0.5321956769055746, "grad_norm": 0.92211428137911, "learning_rate": 1.2154113222644727e-06, "loss": 0.048, "step": 2339 }, { "epoch": 0.5324232081911263, "grad_norm": 1.2486653702619341, "learning_rate": 1.2153820039506167e-06, "loss": 0.036, "step": 2340 }, { "epoch": 0.5326507394766781, "grad_norm": 1.4084664381645906, "learning_rate": 1.2153526735704337e-06, "loss": 0.0641, "step": 2341 }, { "epoch": 0.5328782707622298, "grad_norm": 1.006089995573004, "learning_rate": 1.2153233311245234e-06, "loss": 0.0471, "step": 2342 }, { "epoch": 0.5331058020477816, "grad_norm": 1.2533267107040431, "learning_rate": 1.2152939766134852e-06, "loss": 0.044, "step": 2343 }, { "epoch": 0.5333333333333333, "grad_norm": 1.7169848907323204, "learning_rate": 1.2152646100379193e-06, "loss": 0.0979, "step": 2344 }, { "epoch": 0.5335608646188851, "grad_norm": 1.1457088139957912, "learning_rate": 1.2152352313984257e-06, "loss": 0.0539, "step": 2345 }, { "epoch": 0.5337883959044368, "grad_norm": 0.9527962003003786, "learning_rate": 1.2152058406956049e-06, "loss": 0.0389, "step": 2346 }, { "epoch": 0.5340159271899886, "grad_norm": 1.5909759190824926, "learning_rate": 1.2151764379300578e-06, "loss": 0.058, "step": 2347 }, { "epoch": 0.5342434584755403, "grad_norm": 1.065418317407388, "learning_rate": 1.2151470231023851e-06, "loss": 0.0304, "step": 2348 }, { "epoch": 0.5344709897610922, "grad_norm": 1.007958090684921, "learning_rate": 1.2151175962131881e-06, "loss": 0.049, "step": 2349 }, { "epoch": 0.534698521046644, "grad_norm": 1.7395647605444997, "learning_rate": 1.215088157263068e-06, "loss": 0.06, "step": 2350 }, { "epoch": 0.5349260523321957, "grad_norm": 1.0555560500880805, "learning_rate": 1.2150587062526267e-06, "loss": 0.0466, "step": 2351 }, { "epoch": 0.5351535836177475, "grad_norm": 1.4330290353052704, "learning_rate": 1.2150292431824663e-06, "loss": 0.1029, "step": 2352 }, { "epoch": 0.5353811149032992, "grad_norm": 1.0120120325632629, "learning_rate": 1.2149997680531886e-06, "loss": 0.0372, "step": 2353 }, { "epoch": 0.535608646188851, "grad_norm": 1.6445264766335106, "learning_rate": 1.214970280865396e-06, "loss": 0.0601, "step": 2354 }, { "epoch": 0.5358361774744027, "grad_norm": 1.479298949629959, "learning_rate": 1.2149407816196917e-06, "loss": 0.0713, "step": 2355 }, { "epoch": 0.5360637087599545, "grad_norm": 1.0674415697842976, "learning_rate": 1.214911270316678e-06, "loss": 0.0529, "step": 2356 }, { "epoch": 0.5362912400455062, "grad_norm": 1.1601322180484739, "learning_rate": 1.2148817469569584e-06, "loss": 0.0762, "step": 2357 }, { "epoch": 0.5365187713310581, "grad_norm": 0.6289268827845146, "learning_rate": 1.214852211541136e-06, "loss": 0.0271, "step": 2358 }, { "epoch": 0.5367463026166098, "grad_norm": 1.127391002234741, "learning_rate": 1.2148226640698148e-06, "loss": 0.0374, "step": 2359 }, { "epoch": 0.5369738339021616, "grad_norm": 1.4660398732133397, "learning_rate": 1.2147931045435988e-06, "loss": 0.0851, "step": 2360 }, { "epoch": 0.5372013651877133, "grad_norm": 1.4378197818055753, "learning_rate": 1.2147635329630916e-06, "loss": 0.0887, "step": 2361 }, { "epoch": 0.5374288964732651, "grad_norm": 1.0407552632451906, "learning_rate": 1.214733949328898e-06, "loss": 0.0274, "step": 2362 }, { "epoch": 0.5376564277588168, "grad_norm": 1.2421238364546752, "learning_rate": 1.2147043536416226e-06, "loss": 0.0547, "step": 2363 }, { "epoch": 0.5378839590443686, "grad_norm": 1.9182602099931712, "learning_rate": 1.21467474590187e-06, "loss": 0.1061, "step": 2364 }, { "epoch": 0.5381114903299203, "grad_norm": 1.7756462089099831, "learning_rate": 1.2146451261102458e-06, "loss": 0.0633, "step": 2365 }, { "epoch": 0.5383390216154721, "grad_norm": 0.9945218699288574, "learning_rate": 1.2146154942673548e-06, "loss": 0.0458, "step": 2366 }, { "epoch": 0.5385665529010238, "grad_norm": 0.8213845393107763, "learning_rate": 1.2145858503738032e-06, "loss": 0.0425, "step": 2367 }, { "epoch": 0.5387940841865757, "grad_norm": 1.8457471393460891, "learning_rate": 1.2145561944301963e-06, "loss": 0.0922, "step": 2368 }, { "epoch": 0.5390216154721275, "grad_norm": 1.264742540597221, "learning_rate": 1.2145265264371406e-06, "loss": 0.0668, "step": 2369 }, { "epoch": 0.5392491467576792, "grad_norm": 0.8301569695908675, "learning_rate": 1.2144968463952425e-06, "loss": 0.0312, "step": 2370 }, { "epoch": 0.539476678043231, "grad_norm": 1.4005462540595164, "learning_rate": 1.2144671543051085e-06, "loss": 0.0385, "step": 2371 }, { "epoch": 0.5397042093287827, "grad_norm": 1.0427937857498775, "learning_rate": 1.2144374501673454e-06, "loss": 0.0405, "step": 2372 }, { "epoch": 0.5399317406143345, "grad_norm": 2.509030531929466, "learning_rate": 1.2144077339825603e-06, "loss": 0.129, "step": 2373 }, { "epoch": 0.5401592718998862, "grad_norm": 1.4490653559115954, "learning_rate": 1.2143780057513605e-06, "loss": 0.076, "step": 2374 }, { "epoch": 0.540386803185438, "grad_norm": 1.1167399295770026, "learning_rate": 1.2143482654743535e-06, "loss": 0.0433, "step": 2375 }, { "epoch": 0.5406143344709897, "grad_norm": 1.1732119937209518, "learning_rate": 1.2143185131521475e-06, "loss": 0.0639, "step": 2376 }, { "epoch": 0.5408418657565415, "grad_norm": 1.083852720432599, "learning_rate": 1.2142887487853503e-06, "loss": 0.0534, "step": 2377 }, { "epoch": 0.5410693970420933, "grad_norm": 1.2045837567267914, "learning_rate": 1.2142589723745705e-06, "loss": 0.047, "step": 2378 }, { "epoch": 0.5412969283276451, "grad_norm": 1.149610953840798, "learning_rate": 1.2142291839204163e-06, "loss": 0.0451, "step": 2379 }, { "epoch": 0.5415244596131968, "grad_norm": 1.17145770612206, "learning_rate": 1.2141993834234967e-06, "loss": 0.0594, "step": 2380 }, { "epoch": 0.5417519908987486, "grad_norm": 0.8279362230767302, "learning_rate": 1.2141695708844209e-06, "loss": 0.0331, "step": 2381 }, { "epoch": 0.5419795221843003, "grad_norm": 1.1783984776248138, "learning_rate": 1.214139746303798e-06, "loss": 0.0479, "step": 2382 }, { "epoch": 0.5422070534698521, "grad_norm": 1.6263426873577607, "learning_rate": 1.2141099096822376e-06, "loss": 0.0788, "step": 2383 }, { "epoch": 0.5424345847554038, "grad_norm": 1.339603186745851, "learning_rate": 1.2140800610203497e-06, "loss": 0.0819, "step": 2384 }, { "epoch": 0.5426621160409556, "grad_norm": 1.1850152950049393, "learning_rate": 1.214050200318744e-06, "loss": 0.0515, "step": 2385 }, { "epoch": 0.5428896473265074, "grad_norm": 1.4608498983467832, "learning_rate": 1.214020327578031e-06, "loss": 0.0528, "step": 2386 }, { "epoch": 0.5431171786120591, "grad_norm": 1.1158219646844105, "learning_rate": 1.2139904427988213e-06, "loss": 0.0328, "step": 2387 }, { "epoch": 0.543344709897611, "grad_norm": 0.8262393644685219, "learning_rate": 1.2139605459817259e-06, "loss": 0.0415, "step": 2388 }, { "epoch": 0.5435722411831627, "grad_norm": 1.096976415569224, "learning_rate": 1.2139306371273552e-06, "loss": 0.0619, "step": 2389 }, { "epoch": 0.5437997724687145, "grad_norm": 0.8704738503788053, "learning_rate": 1.213900716236321e-06, "loss": 0.0522, "step": 2390 }, { "epoch": 0.5440273037542662, "grad_norm": 0.9955866102135448, "learning_rate": 1.2138707833092345e-06, "loss": 0.0606, "step": 2391 }, { "epoch": 0.544254835039818, "grad_norm": 1.488547151459133, "learning_rate": 1.213840838346708e-06, "loss": 0.0871, "step": 2392 }, { "epoch": 0.5444823663253697, "grad_norm": 1.3949089434078865, "learning_rate": 1.213810881349353e-06, "loss": 0.0833, "step": 2393 }, { "epoch": 0.5447098976109215, "grad_norm": 1.5448754824040467, "learning_rate": 1.213780912317782e-06, "loss": 0.0567, "step": 2394 }, { "epoch": 0.5449374288964732, "grad_norm": 1.2930562589697765, "learning_rate": 1.2137509312526074e-06, "loss": 0.0486, "step": 2395 }, { "epoch": 0.545164960182025, "grad_norm": 0.8149733951933631, "learning_rate": 1.213720938154442e-06, "loss": 0.0447, "step": 2396 }, { "epoch": 0.5453924914675768, "grad_norm": 1.5207149596968195, "learning_rate": 1.2136909330238988e-06, "loss": 0.0712, "step": 2397 }, { "epoch": 0.5456200227531286, "grad_norm": 1.6094750936001516, "learning_rate": 1.213660915861591e-06, "loss": 0.0717, "step": 2398 }, { "epoch": 0.5458475540386803, "grad_norm": 1.0354263989549572, "learning_rate": 1.2136308866681323e-06, "loss": 0.0543, "step": 2399 }, { "epoch": 0.5460750853242321, "grad_norm": 1.312237280962152, "learning_rate": 1.2136008454441363e-06, "loss": 0.0651, "step": 2400 }, { "epoch": 0.5463026166097839, "grad_norm": 0.9265942352701293, "learning_rate": 1.213570792190217e-06, "loss": 0.0399, "step": 2401 }, { "epoch": 0.5465301478953356, "grad_norm": 1.3165932395587605, "learning_rate": 1.2135407269069885e-06, "loss": 0.0711, "step": 2402 }, { "epoch": 0.5467576791808874, "grad_norm": 1.3250744955676776, "learning_rate": 1.2135106495950655e-06, "loss": 0.055, "step": 2403 }, { "epoch": 0.5469852104664391, "grad_norm": 1.1269361373493476, "learning_rate": 1.2134805602550625e-06, "loss": 0.0391, "step": 2404 }, { "epoch": 0.5472127417519909, "grad_norm": 1.6294590878339321, "learning_rate": 1.2134504588875948e-06, "loss": 0.0752, "step": 2405 }, { "epoch": 0.5474402730375426, "grad_norm": 1.942161162101733, "learning_rate": 1.2134203454932772e-06, "loss": 0.111, "step": 2406 }, { "epoch": 0.5476678043230945, "grad_norm": 0.9091105494763078, "learning_rate": 1.2133902200727256e-06, "loss": 0.0366, "step": 2407 }, { "epoch": 0.5478953356086462, "grad_norm": 0.8734404453486602, "learning_rate": 1.2133600826265555e-06, "loss": 0.0339, "step": 2408 }, { "epoch": 0.548122866894198, "grad_norm": 1.4745463801324197, "learning_rate": 1.2133299331553826e-06, "loss": 0.0446, "step": 2409 }, { "epoch": 0.5483503981797497, "grad_norm": 0.9275974290923358, "learning_rate": 1.2132997716598236e-06, "loss": 0.0395, "step": 2410 }, { "epoch": 0.5485779294653015, "grad_norm": 0.8344512585289461, "learning_rate": 1.2132695981404943e-06, "loss": 0.0315, "step": 2411 }, { "epoch": 0.5488054607508532, "grad_norm": 1.6903517654405738, "learning_rate": 1.2132394125980122e-06, "loss": 0.0618, "step": 2412 }, { "epoch": 0.549032992036405, "grad_norm": 1.5596571212256216, "learning_rate": 1.2132092150329936e-06, "loss": 0.0623, "step": 2413 }, { "epoch": 0.5492605233219567, "grad_norm": 1.5970478431414654, "learning_rate": 1.213179005446056e-06, "loss": 0.0676, "step": 2414 }, { "epoch": 0.5494880546075085, "grad_norm": 1.1358043470989139, "learning_rate": 1.2131487838378167e-06, "loss": 0.0444, "step": 2415 }, { "epoch": 0.5497155858930602, "grad_norm": 0.7736657460795575, "learning_rate": 1.2131185502088932e-06, "loss": 0.0234, "step": 2416 }, { "epoch": 0.5499431171786121, "grad_norm": 1.0171839684581578, "learning_rate": 1.213088304559904e-06, "loss": 0.0354, "step": 2417 }, { "epoch": 0.5501706484641639, "grad_norm": 1.5638848346839926, "learning_rate": 1.2130580468914665e-06, "loss": 0.0907, "step": 2418 }, { "epoch": 0.5503981797497156, "grad_norm": 1.195018951013028, "learning_rate": 1.2130277772041999e-06, "loss": 0.0492, "step": 2419 }, { "epoch": 0.5506257110352674, "grad_norm": 1.0599309139929776, "learning_rate": 1.2129974954987222e-06, "loss": 0.0468, "step": 2420 }, { "epoch": 0.5508532423208191, "grad_norm": 0.9174363109363153, "learning_rate": 1.2129672017756524e-06, "loss": 0.0396, "step": 2421 }, { "epoch": 0.5510807736063709, "grad_norm": 0.996062027313234, "learning_rate": 1.2129368960356102e-06, "loss": 0.0461, "step": 2422 }, { "epoch": 0.5513083048919226, "grad_norm": 1.1748800644809907, "learning_rate": 1.2129065782792142e-06, "loss": 0.0468, "step": 2423 }, { "epoch": 0.5515358361774744, "grad_norm": 1.0572109261778357, "learning_rate": 1.2128762485070848e-06, "loss": 0.0337, "step": 2424 }, { "epoch": 0.5517633674630261, "grad_norm": 0.7078117825584842, "learning_rate": 1.2128459067198414e-06, "loss": 0.0279, "step": 2425 }, { "epoch": 0.5519908987485779, "grad_norm": 1.2504565872577411, "learning_rate": 1.2128155529181042e-06, "loss": 0.0432, "step": 2426 }, { "epoch": 0.5522184300341297, "grad_norm": 2.0545311286439656, "learning_rate": 1.2127851871024937e-06, "loss": 0.13, "step": 2427 }, { "epoch": 0.5524459613196815, "grad_norm": 1.6325039766646547, "learning_rate": 1.2127548092736305e-06, "loss": 0.0712, "step": 2428 }, { "epoch": 0.5526734926052332, "grad_norm": 1.4302972588358862, "learning_rate": 1.2127244194321353e-06, "loss": 0.0928, "step": 2429 }, { "epoch": 0.552901023890785, "grad_norm": 0.9939753923200273, "learning_rate": 1.2126940175786294e-06, "loss": 0.0537, "step": 2430 }, { "epoch": 0.5531285551763367, "grad_norm": 1.1837217608085497, "learning_rate": 1.212663603713734e-06, "loss": 0.0411, "step": 2431 }, { "epoch": 0.5533560864618885, "grad_norm": 1.0395121186554928, "learning_rate": 1.212633177838071e-06, "loss": 0.0499, "step": 2432 }, { "epoch": 0.5535836177474402, "grad_norm": 0.9514659467888016, "learning_rate": 1.2126027399522617e-06, "loss": 0.0453, "step": 2433 }, { "epoch": 0.553811149032992, "grad_norm": 1.6880603772633955, "learning_rate": 1.2125722900569288e-06, "loss": 0.0745, "step": 2434 }, { "epoch": 0.5540386803185438, "grad_norm": 1.8855045585505437, "learning_rate": 1.2125418281526944e-06, "loss": 0.1082, "step": 2435 }, { "epoch": 0.5542662116040956, "grad_norm": 1.2963290000539638, "learning_rate": 1.212511354240181e-06, "loss": 0.0571, "step": 2436 }, { "epoch": 0.5544937428896474, "grad_norm": 1.075287013825043, "learning_rate": 1.2124808683200113e-06, "loss": 0.0522, "step": 2437 }, { "epoch": 0.5547212741751991, "grad_norm": 1.5923929041091236, "learning_rate": 1.2124503703928088e-06, "loss": 0.0634, "step": 2438 }, { "epoch": 0.5549488054607509, "grad_norm": 1.832683570099282, "learning_rate": 1.2124198604591965e-06, "loss": 0.0659, "step": 2439 }, { "epoch": 0.5551763367463026, "grad_norm": 1.4530576893525877, "learning_rate": 1.2123893385197982e-06, "loss": 0.0508, "step": 2440 }, { "epoch": 0.5554038680318544, "grad_norm": 2.2165292342455967, "learning_rate": 1.2123588045752373e-06, "loss": 0.0788, "step": 2441 }, { "epoch": 0.5556313993174061, "grad_norm": 0.7214271689112302, "learning_rate": 1.2123282586261384e-06, "loss": 0.0373, "step": 2442 }, { "epoch": 0.5558589306029579, "grad_norm": 1.733249440041101, "learning_rate": 1.2122977006731256e-06, "loss": 0.0849, "step": 2443 }, { "epoch": 0.5560864618885096, "grad_norm": 0.9877695265064453, "learning_rate": 1.2122671307168232e-06, "loss": 0.0293, "step": 2444 }, { "epoch": 0.5563139931740614, "grad_norm": 2.369119537407654, "learning_rate": 1.212236548757856e-06, "loss": 0.1193, "step": 2445 }, { "epoch": 0.5565415244596132, "grad_norm": 0.9124531328873139, "learning_rate": 1.2122059547968496e-06, "loss": 0.0304, "step": 2446 }, { "epoch": 0.556769055745165, "grad_norm": 2.08793896339006, "learning_rate": 1.2121753488344286e-06, "loss": 0.102, "step": 2447 }, { "epoch": 0.5569965870307167, "grad_norm": 1.128455134895707, "learning_rate": 1.212144730871219e-06, "loss": 0.0446, "step": 2448 }, { "epoch": 0.5572241183162685, "grad_norm": 1.6401919927448718, "learning_rate": 1.2121141009078462e-06, "loss": 0.0557, "step": 2449 }, { "epoch": 0.5574516496018203, "grad_norm": 1.058585660351393, "learning_rate": 1.2120834589449368e-06, "loss": 0.0397, "step": 2450 }, { "epoch": 0.557679180887372, "grad_norm": 1.850497858074919, "learning_rate": 1.2120528049831165e-06, "loss": 0.0898, "step": 2451 }, { "epoch": 0.5579067121729238, "grad_norm": 2.1238500476076614, "learning_rate": 1.212022139023012e-06, "loss": 0.0775, "step": 2452 }, { "epoch": 0.5581342434584755, "grad_norm": 1.1451793439610103, "learning_rate": 1.21199146106525e-06, "loss": 0.0395, "step": 2453 }, { "epoch": 0.5583617747440273, "grad_norm": 0.8655064333461899, "learning_rate": 1.2119607711104574e-06, "loss": 0.0414, "step": 2454 }, { "epoch": 0.558589306029579, "grad_norm": 0.7739545180233858, "learning_rate": 1.211930069159262e-06, "loss": 0.0376, "step": 2455 }, { "epoch": 0.5588168373151309, "grad_norm": 1.1255668925149045, "learning_rate": 1.2118993552122907e-06, "loss": 0.0615, "step": 2456 }, { "epoch": 0.5590443686006826, "grad_norm": 0.8837865527117891, "learning_rate": 1.2118686292701715e-06, "loss": 0.033, "step": 2457 }, { "epoch": 0.5592718998862344, "grad_norm": 1.4857021615037345, "learning_rate": 1.211837891333532e-06, "loss": 0.0515, "step": 2458 }, { "epoch": 0.5594994311717861, "grad_norm": 1.687364075587055, "learning_rate": 1.211807141403001e-06, "loss": 0.0788, "step": 2459 }, { "epoch": 0.5597269624573379, "grad_norm": 1.1101722611046627, "learning_rate": 1.211776379479207e-06, "loss": 0.0361, "step": 2460 }, { "epoch": 0.5599544937428896, "grad_norm": 1.4952697824972245, "learning_rate": 1.211745605562778e-06, "loss": 0.0557, "step": 2461 }, { "epoch": 0.5601820250284414, "grad_norm": 0.8923044749255172, "learning_rate": 1.2117148196543436e-06, "loss": 0.0577, "step": 2462 }, { "epoch": 0.5604095563139931, "grad_norm": 2.562794461200439, "learning_rate": 1.2116840217545329e-06, "loss": 0.1001, "step": 2463 }, { "epoch": 0.5606370875995449, "grad_norm": 0.9586201769696352, "learning_rate": 1.211653211863975e-06, "loss": 0.0502, "step": 2464 }, { "epoch": 0.5608646188850968, "grad_norm": 1.4257435635791937, "learning_rate": 1.2116223899833e-06, "loss": 0.0544, "step": 2465 }, { "epoch": 0.5610921501706485, "grad_norm": 1.1082780136920223, "learning_rate": 1.2115915561131376e-06, "loss": 0.0368, "step": 2466 }, { "epoch": 0.5613196814562003, "grad_norm": 1.1191301112997847, "learning_rate": 1.211560710254118e-06, "loss": 0.0346, "step": 2467 }, { "epoch": 0.561547212741752, "grad_norm": 1.5912996699076145, "learning_rate": 1.211529852406872e-06, "loss": 0.0596, "step": 2468 }, { "epoch": 0.5617747440273038, "grad_norm": 1.3053443148766963, "learning_rate": 1.2114989825720298e-06, "loss": 0.0617, "step": 2469 }, { "epoch": 0.5620022753128555, "grad_norm": 1.935737911886316, "learning_rate": 1.2114681007502227e-06, "loss": 0.0713, "step": 2470 }, { "epoch": 0.5622298065984073, "grad_norm": 1.6784690811076477, "learning_rate": 1.2114372069420815e-06, "loss": 0.0898, "step": 2471 }, { "epoch": 0.562457337883959, "grad_norm": 1.6677773674255931, "learning_rate": 1.2114063011482378e-06, "loss": 0.0515, "step": 2472 }, { "epoch": 0.5626848691695108, "grad_norm": 0.7751790351928534, "learning_rate": 1.2113753833693234e-06, "loss": 0.0276, "step": 2473 }, { "epoch": 0.5629124004550625, "grad_norm": 1.5172528416500193, "learning_rate": 1.2113444536059699e-06, "loss": 0.078, "step": 2474 }, { "epoch": 0.5631399317406144, "grad_norm": 1.0306067244802375, "learning_rate": 1.2113135118588096e-06, "loss": 0.0601, "step": 2475 }, { "epoch": 0.5633674630261661, "grad_norm": 0.9276551177060361, "learning_rate": 1.2112825581284752e-06, "loss": 0.055, "step": 2476 }, { "epoch": 0.5635949943117179, "grad_norm": 1.2647808751954976, "learning_rate": 1.2112515924155987e-06, "loss": 0.048, "step": 2477 }, { "epoch": 0.5638225255972696, "grad_norm": 1.1803809621410315, "learning_rate": 1.2112206147208134e-06, "loss": 0.0513, "step": 2478 }, { "epoch": 0.5640500568828214, "grad_norm": 1.8816100409430934, "learning_rate": 1.2111896250447525e-06, "loss": 0.0972, "step": 2479 }, { "epoch": 0.5642775881683731, "grad_norm": 1.2512746487710684, "learning_rate": 1.211158623388049e-06, "loss": 0.0416, "step": 2480 }, { "epoch": 0.5645051194539249, "grad_norm": 1.1080723770280754, "learning_rate": 1.2111276097513369e-06, "loss": 0.044, "step": 2481 }, { "epoch": 0.5647326507394766, "grad_norm": 1.060740374175569, "learning_rate": 1.2110965841352498e-06, "loss": 0.0633, "step": 2482 }, { "epoch": 0.5649601820250284, "grad_norm": 1.0075017492283267, "learning_rate": 1.211065546540422e-06, "loss": 0.0341, "step": 2483 }, { "epoch": 0.5651877133105802, "grad_norm": 0.7064865281725817, "learning_rate": 1.2110344969674877e-06, "loss": 0.0286, "step": 2484 }, { "epoch": 0.565415244596132, "grad_norm": 1.1946068302141277, "learning_rate": 1.2110034354170816e-06, "loss": 0.0659, "step": 2485 }, { "epoch": 0.5656427758816838, "grad_norm": 0.9370117142292799, "learning_rate": 1.2109723618898383e-06, "loss": 0.0288, "step": 2486 }, { "epoch": 0.5658703071672355, "grad_norm": 2.2018132198270046, "learning_rate": 1.2109412763863933e-06, "loss": 0.1172, "step": 2487 }, { "epoch": 0.5660978384527873, "grad_norm": 1.1995855232529598, "learning_rate": 1.2109101789073815e-06, "loss": 0.0511, "step": 2488 }, { "epoch": 0.566325369738339, "grad_norm": 1.1031248857035114, "learning_rate": 1.2108790694534389e-06, "loss": 0.064, "step": 2489 }, { "epoch": 0.5665529010238908, "grad_norm": 1.738059897032761, "learning_rate": 1.2108479480252011e-06, "loss": 0.0756, "step": 2490 }, { "epoch": 0.5667804323094425, "grad_norm": 1.8262819909910217, "learning_rate": 1.210816814623304e-06, "loss": 0.1204, "step": 2491 }, { "epoch": 0.5670079635949943, "grad_norm": 0.9990130696869628, "learning_rate": 1.2107856692483843e-06, "loss": 0.0357, "step": 2492 }, { "epoch": 0.567235494880546, "grad_norm": 1.3613754270975984, "learning_rate": 1.2107545119010783e-06, "loss": 0.0638, "step": 2493 }, { "epoch": 0.5674630261660978, "grad_norm": 1.2474975066272391, "learning_rate": 1.2107233425820229e-06, "loss": 0.0599, "step": 2494 }, { "epoch": 0.5676905574516496, "grad_norm": 1.0265382923896895, "learning_rate": 1.2106921612918549e-06, "loss": 0.0385, "step": 2495 }, { "epoch": 0.5679180887372014, "grad_norm": 1.9111381375185976, "learning_rate": 1.2106609680312117e-06, "loss": 0.0686, "step": 2496 }, { "epoch": 0.5681456200227532, "grad_norm": 1.2560597740440176, "learning_rate": 1.210629762800731e-06, "loss": 0.0642, "step": 2497 }, { "epoch": 0.5683731513083049, "grad_norm": 1.0400297969186523, "learning_rate": 1.2105985456010506e-06, "loss": 0.0419, "step": 2498 }, { "epoch": 0.5686006825938567, "grad_norm": 1.5436476934145886, "learning_rate": 1.2105673164328081e-06, "loss": 0.0771, "step": 2499 }, { "epoch": 0.5688282138794084, "grad_norm": 1.2147723738279594, "learning_rate": 1.2105360752966424e-06, "loss": 0.044, "step": 2500 }, { "epoch": 0.5690557451649602, "grad_norm": 1.2940548926007058, "learning_rate": 1.2105048221931915e-06, "loss": 0.0556, "step": 2501 }, { "epoch": 0.5692832764505119, "grad_norm": 1.1343959115342286, "learning_rate": 1.2104735571230944e-06, "loss": 0.0325, "step": 2502 }, { "epoch": 0.5695108077360637, "grad_norm": 0.7696868285921366, "learning_rate": 1.21044228008699e-06, "loss": 0.0321, "step": 2503 }, { "epoch": 0.5697383390216155, "grad_norm": 1.277277873025092, "learning_rate": 1.2104109910855176e-06, "loss": 0.0597, "step": 2504 }, { "epoch": 0.5699658703071673, "grad_norm": 1.433825874950651, "learning_rate": 1.2103796901193166e-06, "loss": 0.0763, "step": 2505 }, { "epoch": 0.570193401592719, "grad_norm": 0.8577837659660162, "learning_rate": 1.210348377189027e-06, "loss": 0.0284, "step": 2506 }, { "epoch": 0.5704209328782708, "grad_norm": 1.1944391480100813, "learning_rate": 1.2103170522952885e-06, "loss": 0.0562, "step": 2507 }, { "epoch": 0.5706484641638225, "grad_norm": 1.283294042740657, "learning_rate": 1.2102857154387413e-06, "loss": 0.0468, "step": 2508 }, { "epoch": 0.5708759954493743, "grad_norm": 1.7437964279202711, "learning_rate": 1.210254366620026e-06, "loss": 0.097, "step": 2509 }, { "epoch": 0.571103526734926, "grad_norm": 1.224329122924591, "learning_rate": 1.2102230058397832e-06, "loss": 0.0595, "step": 2510 }, { "epoch": 0.5713310580204778, "grad_norm": 1.3007053728417897, "learning_rate": 1.210191633098654e-06, "loss": 0.0685, "step": 2511 }, { "epoch": 0.5715585893060295, "grad_norm": 1.1429406193383025, "learning_rate": 1.2101602483972797e-06, "loss": 0.048, "step": 2512 }, { "epoch": 0.5717861205915813, "grad_norm": 0.7595899866293734, "learning_rate": 1.2101288517363016e-06, "loss": 0.0194, "step": 2513 }, { "epoch": 0.5720136518771332, "grad_norm": 1.025256317864455, "learning_rate": 1.2100974431163614e-06, "loss": 0.0423, "step": 2514 }, { "epoch": 0.5722411831626849, "grad_norm": 1.053909591806734, "learning_rate": 1.2100660225381008e-06, "loss": 0.0284, "step": 2515 }, { "epoch": 0.5724687144482367, "grad_norm": 1.384456570911114, "learning_rate": 1.2100345900021624e-06, "loss": 0.0659, "step": 2516 }, { "epoch": 0.5726962457337884, "grad_norm": 1.3325135654239626, "learning_rate": 1.2100031455091883e-06, "loss": 0.0675, "step": 2517 }, { "epoch": 0.5729237770193402, "grad_norm": 1.179139361018548, "learning_rate": 1.2099716890598212e-06, "loss": 0.0475, "step": 2518 }, { "epoch": 0.5731513083048919, "grad_norm": 1.1347488592526416, "learning_rate": 1.2099402206547042e-06, "loss": 0.0563, "step": 2519 }, { "epoch": 0.5733788395904437, "grad_norm": 1.2845284052496178, "learning_rate": 1.2099087402944805e-06, "loss": 0.053, "step": 2520 }, { "epoch": 0.5736063708759954, "grad_norm": 0.9939680061445038, "learning_rate": 1.2098772479797933e-06, "loss": 0.0388, "step": 2521 }, { "epoch": 0.5738339021615472, "grad_norm": 1.2948668621318795, "learning_rate": 1.2098457437112862e-06, "loss": 0.0451, "step": 2522 }, { "epoch": 0.5740614334470989, "grad_norm": 1.6486523140737381, "learning_rate": 1.2098142274896033e-06, "loss": 0.0722, "step": 2523 }, { "epoch": 0.5742889647326508, "grad_norm": 1.1690776697359815, "learning_rate": 1.2097826993153886e-06, "loss": 0.0504, "step": 2524 }, { "epoch": 0.5745164960182025, "grad_norm": 0.8812949923396778, "learning_rate": 1.2097511591892863e-06, "loss": 0.0232, "step": 2525 }, { "epoch": 0.5747440273037543, "grad_norm": 1.6847623228179178, "learning_rate": 1.2097196071119415e-06, "loss": 0.0643, "step": 2526 }, { "epoch": 0.574971558589306, "grad_norm": 1.5124757371829332, "learning_rate": 1.2096880430839985e-06, "loss": 0.0859, "step": 2527 }, { "epoch": 0.5751990898748578, "grad_norm": 1.6240299660391282, "learning_rate": 1.2096564671061031e-06, "loss": 0.0637, "step": 2528 }, { "epoch": 0.5754266211604095, "grad_norm": 1.5970094647815496, "learning_rate": 1.2096248791789e-06, "loss": 0.0654, "step": 2529 }, { "epoch": 0.5756541524459613, "grad_norm": 1.381679821195058, "learning_rate": 1.2095932793030353e-06, "loss": 0.0513, "step": 2530 }, { "epoch": 0.575881683731513, "grad_norm": 1.468449586854502, "learning_rate": 1.2095616674791543e-06, "loss": 0.064, "step": 2531 }, { "epoch": 0.5761092150170648, "grad_norm": 1.3812350043911665, "learning_rate": 1.2095300437079034e-06, "loss": 0.0532, "step": 2532 }, { "epoch": 0.5763367463026167, "grad_norm": 0.9730226451517775, "learning_rate": 1.2094984079899292e-06, "loss": 0.0355, "step": 2533 }, { "epoch": 0.5765642775881684, "grad_norm": 1.0694961505250529, "learning_rate": 1.2094667603258779e-06, "loss": 0.0404, "step": 2534 }, { "epoch": 0.5767918088737202, "grad_norm": 1.2344551784501863, "learning_rate": 1.2094351007163962e-06, "loss": 0.0472, "step": 2535 }, { "epoch": 0.5770193401592719, "grad_norm": 1.256788629460907, "learning_rate": 1.2094034291621315e-06, "loss": 0.0744, "step": 2536 }, { "epoch": 0.5772468714448237, "grad_norm": 1.1106837372555467, "learning_rate": 1.2093717456637311e-06, "loss": 0.0338, "step": 2537 }, { "epoch": 0.5774744027303754, "grad_norm": 1.0354356057027125, "learning_rate": 1.2093400502218422e-06, "loss": 0.0525, "step": 2538 }, { "epoch": 0.5777019340159272, "grad_norm": 1.3796812821808617, "learning_rate": 1.209308342837113e-06, "loss": 0.0582, "step": 2539 }, { "epoch": 0.5779294653014789, "grad_norm": 1.0890955420481698, "learning_rate": 1.2092766235101917e-06, "loss": 0.0597, "step": 2540 }, { "epoch": 0.5781569965870307, "grad_norm": 1.0327303160169794, "learning_rate": 1.2092448922417258e-06, "loss": 0.0549, "step": 2541 }, { "epoch": 0.5783845278725824, "grad_norm": 1.0446209152101336, "learning_rate": 1.2092131490323644e-06, "loss": 0.0512, "step": 2542 }, { "epoch": 0.5786120591581343, "grad_norm": 1.1872918690030627, "learning_rate": 1.2091813938827563e-06, "loss": 0.0467, "step": 2543 }, { "epoch": 0.578839590443686, "grad_norm": 1.6213845013127128, "learning_rate": 1.2091496267935502e-06, "loss": 0.0913, "step": 2544 }, { "epoch": 0.5790671217292378, "grad_norm": 0.25107695804250185, "learning_rate": 1.2091178477653957e-06, "loss": 0.017, "step": 2545 }, { "epoch": 0.5792946530147896, "grad_norm": 0.8996867474301544, "learning_rate": 1.209086056798942e-06, "loss": 0.0386, "step": 2546 }, { "epoch": 0.5795221843003413, "grad_norm": 2.2594305507095944, "learning_rate": 1.2090542538948392e-06, "loss": 0.1133, "step": 2547 }, { "epoch": 0.579749715585893, "grad_norm": 1.1393658831306852, "learning_rate": 1.209022439053737e-06, "loss": 0.0445, "step": 2548 }, { "epoch": 0.5799772468714448, "grad_norm": 1.2977706300782228, "learning_rate": 1.2089906122762859e-06, "loss": 0.0754, "step": 2549 }, { "epoch": 0.5802047781569966, "grad_norm": 1.8878513858209192, "learning_rate": 1.208958773563136e-06, "loss": 0.0896, "step": 2550 }, { "epoch": 0.5804323094425483, "grad_norm": 1.8406180655967013, "learning_rate": 1.2089269229149383e-06, "loss": 0.0884, "step": 2551 }, { "epoch": 0.5806598407281001, "grad_norm": 1.0239444959470219, "learning_rate": 1.2088950603323438e-06, "loss": 0.0461, "step": 2552 }, { "epoch": 0.5808873720136519, "grad_norm": 1.0773801965795455, "learning_rate": 1.2088631858160033e-06, "loss": 0.0415, "step": 2553 }, { "epoch": 0.5811149032992037, "grad_norm": 0.9310673305069316, "learning_rate": 1.2088312993665689e-06, "loss": 0.042, "step": 2554 }, { "epoch": 0.5813424345847554, "grad_norm": 1.3375692899110723, "learning_rate": 1.208799400984692e-06, "loss": 0.0727, "step": 2555 }, { "epoch": 0.5815699658703072, "grad_norm": 1.4881435052347582, "learning_rate": 1.2087674906710242e-06, "loss": 0.0482, "step": 2556 }, { "epoch": 0.5817974971558589, "grad_norm": 0.9553759696828109, "learning_rate": 1.2087355684262183e-06, "loss": 0.0631, "step": 2557 }, { "epoch": 0.5820250284414107, "grad_norm": 0.6274163364554146, "learning_rate": 1.2087036342509265e-06, "loss": 0.0219, "step": 2558 }, { "epoch": 0.5822525597269624, "grad_norm": 1.3757750182752106, "learning_rate": 1.208671688145801e-06, "loss": 0.0626, "step": 2559 }, { "epoch": 0.5824800910125142, "grad_norm": 2.481636121235935, "learning_rate": 1.2086397301114955e-06, "loss": 0.1418, "step": 2560 }, { "epoch": 0.5827076222980659, "grad_norm": 1.0378265608573296, "learning_rate": 1.2086077601486627e-06, "loss": 0.0576, "step": 2561 }, { "epoch": 0.5829351535836177, "grad_norm": 1.4924581569226714, "learning_rate": 1.2085757782579562e-06, "loss": 0.0816, "step": 2562 }, { "epoch": 0.5831626848691696, "grad_norm": 0.8886308787621965, "learning_rate": 1.2085437844400293e-06, "loss": 0.0278, "step": 2563 }, { "epoch": 0.5833902161547213, "grad_norm": 1.9559855395157613, "learning_rate": 1.2085117786955366e-06, "loss": 0.107, "step": 2564 }, { "epoch": 0.5836177474402731, "grad_norm": 1.3285689681822581, "learning_rate": 1.2084797610251314e-06, "loss": 0.0593, "step": 2565 }, { "epoch": 0.5838452787258248, "grad_norm": 1.042908390638745, "learning_rate": 1.2084477314294688e-06, "loss": 0.0299, "step": 2566 }, { "epoch": 0.5840728100113766, "grad_norm": 0.9604010784829523, "learning_rate": 1.2084156899092028e-06, "loss": 0.0336, "step": 2567 }, { "epoch": 0.5843003412969283, "grad_norm": 0.9621929837602977, "learning_rate": 1.2083836364649888e-06, "loss": 0.0431, "step": 2568 }, { "epoch": 0.5845278725824801, "grad_norm": 1.2097619610555346, "learning_rate": 1.2083515710974813e-06, "loss": 0.0446, "step": 2569 }, { "epoch": 0.5847554038680318, "grad_norm": 1.3118996444529896, "learning_rate": 1.2083194938073363e-06, "loss": 0.0491, "step": 2570 }, { "epoch": 0.5849829351535836, "grad_norm": 2.2788662451246915, "learning_rate": 1.2082874045952094e-06, "loss": 0.0905, "step": 2571 }, { "epoch": 0.5852104664391354, "grad_norm": 1.7999056722845332, "learning_rate": 1.2082553034617559e-06, "loss": 0.0798, "step": 2572 }, { "epoch": 0.5854379977246872, "grad_norm": 1.1859950837264095, "learning_rate": 1.2082231904076323e-06, "loss": 0.0361, "step": 2573 }, { "epoch": 0.5856655290102389, "grad_norm": 2.3662421804783618, "learning_rate": 1.2081910654334948e-06, "loss": 0.0962, "step": 2574 }, { "epoch": 0.5858930602957907, "grad_norm": 1.248609581967699, "learning_rate": 1.2081589285399998e-06, "loss": 0.0509, "step": 2575 }, { "epoch": 0.5861205915813424, "grad_norm": 1.1728856094231628, "learning_rate": 1.2081267797278043e-06, "loss": 0.0474, "step": 2576 }, { "epoch": 0.5863481228668942, "grad_norm": 1.3290932236966655, "learning_rate": 1.2080946189975656e-06, "loss": 0.0608, "step": 2577 }, { "epoch": 0.586575654152446, "grad_norm": 1.3099426521521451, "learning_rate": 1.2080624463499407e-06, "loss": 0.0697, "step": 2578 }, { "epoch": 0.5868031854379977, "grad_norm": 1.4265140149288442, "learning_rate": 1.2080302617855874e-06, "loss": 0.0802, "step": 2579 }, { "epoch": 0.5870307167235495, "grad_norm": 0.6748962342724948, "learning_rate": 1.2079980653051629e-06, "loss": 0.0194, "step": 2580 }, { "epoch": 0.5872582480091012, "grad_norm": 1.1857133228322694, "learning_rate": 1.207965856909326e-06, "loss": 0.0603, "step": 2581 }, { "epoch": 0.5874857792946531, "grad_norm": 1.4269876330750941, "learning_rate": 1.2079336365987345e-06, "loss": 0.0674, "step": 2582 }, { "epoch": 0.5877133105802048, "grad_norm": 1.1171535242191324, "learning_rate": 1.2079014043740471e-06, "loss": 0.0474, "step": 2583 }, { "epoch": 0.5879408418657566, "grad_norm": 1.049798577134009, "learning_rate": 1.2078691602359224e-06, "loss": 0.0515, "step": 2584 }, { "epoch": 0.5881683731513083, "grad_norm": 1.4079703732165962, "learning_rate": 1.2078369041850197e-06, "loss": 0.0614, "step": 2585 }, { "epoch": 0.5883959044368601, "grad_norm": 0.6740789590290865, "learning_rate": 1.207804636221998e-06, "loss": 0.0259, "step": 2586 }, { "epoch": 0.5886234357224118, "grad_norm": 1.189214147562108, "learning_rate": 1.207772356347517e-06, "loss": 0.0527, "step": 2587 }, { "epoch": 0.5888509670079636, "grad_norm": 1.1360915553171849, "learning_rate": 1.2077400645622363e-06, "loss": 0.0458, "step": 2588 }, { "epoch": 0.5890784982935153, "grad_norm": 0.9198535003511792, "learning_rate": 1.207707760866816e-06, "loss": 0.0402, "step": 2589 }, { "epoch": 0.5893060295790671, "grad_norm": 1.174521842214451, "learning_rate": 1.207675445261916e-06, "loss": 0.0421, "step": 2590 }, { "epoch": 0.5895335608646188, "grad_norm": 2.8929933375186287, "learning_rate": 1.207643117748197e-06, "loss": 0.1536, "step": 2591 }, { "epoch": 0.5897610921501707, "grad_norm": 1.5474448385855322, "learning_rate": 1.2076107783263199e-06, "loss": 0.0567, "step": 2592 }, { "epoch": 0.5899886234357224, "grad_norm": 0.8451598056278512, "learning_rate": 1.2075784269969457e-06, "loss": 0.0362, "step": 2593 }, { "epoch": 0.5902161547212742, "grad_norm": 0.7031869485161736, "learning_rate": 1.2075460637607351e-06, "loss": 0.0312, "step": 2594 }, { "epoch": 0.590443686006826, "grad_norm": 1.449466507606748, "learning_rate": 1.20751368861835e-06, "loss": 0.0573, "step": 2595 }, { "epoch": 0.5906712172923777, "grad_norm": 1.1927092104354498, "learning_rate": 1.2074813015704518e-06, "loss": 0.0402, "step": 2596 }, { "epoch": 0.5908987485779295, "grad_norm": 1.636503401466634, "learning_rate": 1.2074489026177024e-06, "loss": 0.0657, "step": 2597 }, { "epoch": 0.5911262798634812, "grad_norm": 1.4909987612945546, "learning_rate": 1.2074164917607644e-06, "loss": 0.0658, "step": 2598 }, { "epoch": 0.591353811149033, "grad_norm": 1.1952099163133536, "learning_rate": 1.2073840690003e-06, "loss": 0.0603, "step": 2599 }, { "epoch": 0.5915813424345847, "grad_norm": 1.0622134341599558, "learning_rate": 1.2073516343369717e-06, "loss": 0.0459, "step": 2600 }, { "epoch": 0.5918088737201365, "grad_norm": 0.8032008417926766, "learning_rate": 1.2073191877714424e-06, "loss": 0.0368, "step": 2601 }, { "epoch": 0.5920364050056883, "grad_norm": 1.1983356217235557, "learning_rate": 1.2072867293043755e-06, "loss": 0.045, "step": 2602 }, { "epoch": 0.5922639362912401, "grad_norm": 1.1811729971845715, "learning_rate": 1.2072542589364343e-06, "loss": 0.0413, "step": 2603 }, { "epoch": 0.5924914675767918, "grad_norm": 0.9199950791965811, "learning_rate": 1.2072217766682822e-06, "loss": 0.0471, "step": 2604 }, { "epoch": 0.5927189988623436, "grad_norm": 1.5522989255979738, "learning_rate": 1.2071892825005835e-06, "loss": 0.0531, "step": 2605 }, { "epoch": 0.5929465301478953, "grad_norm": 2.34185081988946, "learning_rate": 1.207156776434002e-06, "loss": 0.1052, "step": 2606 }, { "epoch": 0.5931740614334471, "grad_norm": 1.6203202208735543, "learning_rate": 1.2071242584692022e-06, "loss": 0.0824, "step": 2607 }, { "epoch": 0.5934015927189988, "grad_norm": 1.8225272194053617, "learning_rate": 1.2070917286068486e-06, "loss": 0.0816, "step": 2608 }, { "epoch": 0.5936291240045506, "grad_norm": 1.0482127292144585, "learning_rate": 1.2070591868476062e-06, "loss": 0.0336, "step": 2609 }, { "epoch": 0.5938566552901023, "grad_norm": 1.2383829160698303, "learning_rate": 1.20702663319214e-06, "loss": 0.0558, "step": 2610 }, { "epoch": 0.5940841865756542, "grad_norm": 1.0803065071888958, "learning_rate": 1.2069940676411154e-06, "loss": 0.0663, "step": 2611 }, { "epoch": 0.594311717861206, "grad_norm": 1.5171982637367767, "learning_rate": 1.2069614901951978e-06, "loss": 0.0601, "step": 2612 }, { "epoch": 0.5945392491467577, "grad_norm": 1.3847567182951876, "learning_rate": 1.2069289008550533e-06, "loss": 0.0611, "step": 2613 }, { "epoch": 0.5947667804323095, "grad_norm": 0.9955435148692463, "learning_rate": 1.2068962996213476e-06, "loss": 0.0403, "step": 2614 }, { "epoch": 0.5949943117178612, "grad_norm": 1.13405892693637, "learning_rate": 1.2068636864947475e-06, "loss": 0.0519, "step": 2615 }, { "epoch": 0.595221843003413, "grad_norm": 0.7900528990598964, "learning_rate": 1.206831061475919e-06, "loss": 0.027, "step": 2616 }, { "epoch": 0.5954493742889647, "grad_norm": 0.6257774504076663, "learning_rate": 1.2067984245655292e-06, "loss": 0.0174, "step": 2617 }, { "epoch": 0.5956769055745165, "grad_norm": 0.7037772555599539, "learning_rate": 1.2067657757642453e-06, "loss": 0.0168, "step": 2618 }, { "epoch": 0.5959044368600682, "grad_norm": 0.8538766117483473, "learning_rate": 1.2067331150727343e-06, "loss": 0.0382, "step": 2619 }, { "epoch": 0.59613196814562, "grad_norm": 0.9862142615994054, "learning_rate": 1.206700442491664e-06, "loss": 0.0359, "step": 2620 }, { "epoch": 0.5963594994311718, "grad_norm": 1.5267273993929875, "learning_rate": 1.2066677580217018e-06, "loss": 0.0625, "step": 2621 }, { "epoch": 0.5965870307167236, "grad_norm": 1.5028260871816748, "learning_rate": 1.2066350616635159e-06, "loss": 0.0545, "step": 2622 }, { "epoch": 0.5968145620022753, "grad_norm": 2.64736254484509, "learning_rate": 1.2066023534177746e-06, "loss": 0.1266, "step": 2623 }, { "epoch": 0.5970420932878271, "grad_norm": 1.4120526861644525, "learning_rate": 1.2065696332851463e-06, "loss": 0.0591, "step": 2624 }, { "epoch": 0.5972696245733788, "grad_norm": 0.9067523643094818, "learning_rate": 1.2065369012662999e-06, "loss": 0.0331, "step": 2625 }, { "epoch": 0.5974971558589306, "grad_norm": 1.0988059318675343, "learning_rate": 1.206504157361904e-06, "loss": 0.0605, "step": 2626 }, { "epoch": 0.5977246871444823, "grad_norm": 1.2270739952449312, "learning_rate": 1.2064714015726283e-06, "loss": 0.056, "step": 2627 }, { "epoch": 0.5979522184300341, "grad_norm": 0.9257304826598889, "learning_rate": 1.2064386338991423e-06, "loss": 0.0433, "step": 2628 }, { "epoch": 0.5981797497155859, "grad_norm": 1.8761997788068991, "learning_rate": 1.2064058543421152e-06, "loss": 0.084, "step": 2629 }, { "epoch": 0.5984072810011376, "grad_norm": 1.5801176338760017, "learning_rate": 1.2063730629022173e-06, "loss": 0.0419, "step": 2630 }, { "epoch": 0.5986348122866895, "grad_norm": 1.1971080645956096, "learning_rate": 1.2063402595801187e-06, "loss": 0.0438, "step": 2631 }, { "epoch": 0.5988623435722412, "grad_norm": 1.405844210266892, "learning_rate": 1.2063074443764897e-06, "loss": 0.0624, "step": 2632 }, { "epoch": 0.599089874857793, "grad_norm": 1.5341316627448498, "learning_rate": 1.2062746172920014e-06, "loss": 0.088, "step": 2633 }, { "epoch": 0.5993174061433447, "grad_norm": 1.0531520309166145, "learning_rate": 1.2062417783273246e-06, "loss": 0.0474, "step": 2634 }, { "epoch": 0.5995449374288965, "grad_norm": 1.36821624035935, "learning_rate": 1.2062089274831299e-06, "loss": 0.0677, "step": 2635 }, { "epoch": 0.5997724687144482, "grad_norm": 0.7040976905373539, "learning_rate": 1.2061760647600894e-06, "loss": 0.0308, "step": 2636 }, { "epoch": 0.6, "grad_norm": 0.9178208746030599, "learning_rate": 1.2061431901588747e-06, "loss": 0.0205, "step": 2637 }, { "epoch": 0.6002275312855517, "grad_norm": 0.9105858796703735, "learning_rate": 1.2061103036801573e-06, "loss": 0.0442, "step": 2638 }, { "epoch": 0.6004550625711035, "grad_norm": 1.40951436107761, "learning_rate": 1.2060774053246096e-06, "loss": 0.0644, "step": 2639 }, { "epoch": 0.6006825938566553, "grad_norm": 1.1409764583060835, "learning_rate": 1.2060444950929038e-06, "loss": 0.0705, "step": 2640 }, { "epoch": 0.6009101251422071, "grad_norm": 1.1909546375772935, "learning_rate": 1.2060115729857128e-06, "loss": 0.0515, "step": 2641 }, { "epoch": 0.6011376564277588, "grad_norm": 1.2179103001644596, "learning_rate": 1.2059786390037093e-06, "loss": 0.0387, "step": 2642 }, { "epoch": 0.6013651877133106, "grad_norm": 1.135969807370913, "learning_rate": 1.2059456931475663e-06, "loss": 0.0224, "step": 2643 }, { "epoch": 0.6015927189988624, "grad_norm": 1.6455009465660149, "learning_rate": 1.2059127354179573e-06, "loss": 0.1139, "step": 2644 }, { "epoch": 0.6018202502844141, "grad_norm": 1.0904940664660454, "learning_rate": 1.205879765815556e-06, "loss": 0.0409, "step": 2645 }, { "epoch": 0.6020477815699659, "grad_norm": 1.8187560107849106, "learning_rate": 1.205846784341036e-06, "loss": 0.0746, "step": 2646 }, { "epoch": 0.6022753128555176, "grad_norm": 1.2846083666249735, "learning_rate": 1.2058137909950719e-06, "loss": 0.0397, "step": 2647 }, { "epoch": 0.6025028441410694, "grad_norm": 1.3139384696968406, "learning_rate": 1.2057807857783371e-06, "loss": 0.0513, "step": 2648 }, { "epoch": 0.6027303754266211, "grad_norm": 1.5334002395835504, "learning_rate": 1.205747768691507e-06, "loss": 0.0451, "step": 2649 }, { "epoch": 0.602957906712173, "grad_norm": 0.9668095767338745, "learning_rate": 1.2057147397352559e-06, "loss": 0.0519, "step": 2650 }, { "epoch": 0.6031854379977247, "grad_norm": 1.1043891282747957, "learning_rate": 1.2056816989102591e-06, "loss": 0.0421, "step": 2651 }, { "epoch": 0.6034129692832765, "grad_norm": 1.63627667644995, "learning_rate": 1.2056486462171918e-06, "loss": 0.0715, "step": 2652 }, { "epoch": 0.6036405005688282, "grad_norm": 0.986630375032043, "learning_rate": 1.2056155816567297e-06, "loss": 0.0497, "step": 2653 }, { "epoch": 0.60386803185438, "grad_norm": 1.198264842052037, "learning_rate": 1.2055825052295486e-06, "loss": 0.0565, "step": 2654 }, { "epoch": 0.6040955631399317, "grad_norm": 0.7380211191011931, "learning_rate": 1.205549416936324e-06, "loss": 0.0423, "step": 2655 }, { "epoch": 0.6043230944254835, "grad_norm": 0.8003290473544856, "learning_rate": 1.2055163167777328e-06, "loss": 0.057, "step": 2656 }, { "epoch": 0.6045506257110352, "grad_norm": 1.2699158922395328, "learning_rate": 1.2054832047544512e-06, "loss": 0.0588, "step": 2657 }, { "epoch": 0.604778156996587, "grad_norm": 0.7610615046539442, "learning_rate": 1.205450080867156e-06, "loss": 0.0343, "step": 2658 }, { "epoch": 0.6050056882821387, "grad_norm": 1.3103630700346172, "learning_rate": 1.205416945116524e-06, "loss": 0.0406, "step": 2659 }, { "epoch": 0.6052332195676906, "grad_norm": 1.0165185597393296, "learning_rate": 1.2053837975032328e-06, "loss": 0.0337, "step": 2660 }, { "epoch": 0.6054607508532424, "grad_norm": 1.387250593088855, "learning_rate": 1.2053506380279597e-06, "loss": 0.0603, "step": 2661 }, { "epoch": 0.6056882821387941, "grad_norm": 1.335719349119118, "learning_rate": 1.2053174666913826e-06, "loss": 0.0729, "step": 2662 }, { "epoch": 0.6059158134243459, "grad_norm": 0.7777911730162685, "learning_rate": 1.2052842834941791e-06, "loss": 0.0351, "step": 2663 }, { "epoch": 0.6061433447098976, "grad_norm": 0.9791113605180757, "learning_rate": 1.2052510884370274e-06, "loss": 0.0447, "step": 2664 }, { "epoch": 0.6063708759954494, "grad_norm": 0.967866940301246, "learning_rate": 1.2052178815206064e-06, "loss": 0.0371, "step": 2665 }, { "epoch": 0.6065984072810011, "grad_norm": 1.534725370801602, "learning_rate": 1.2051846627455946e-06, "loss": 0.0826, "step": 2666 }, { "epoch": 0.6068259385665529, "grad_norm": 0.9691101488135158, "learning_rate": 1.2051514321126705e-06, "loss": 0.0486, "step": 2667 }, { "epoch": 0.6070534698521046, "grad_norm": 1.352223067560777, "learning_rate": 1.2051181896225139e-06, "loss": 0.0496, "step": 2668 }, { "epoch": 0.6072810011376564, "grad_norm": 0.9959104730289088, "learning_rate": 1.205084935275804e-06, "loss": 0.0458, "step": 2669 }, { "epoch": 0.6075085324232082, "grad_norm": 1.8958304200279301, "learning_rate": 1.20505166907322e-06, "loss": 0.0702, "step": 2670 }, { "epoch": 0.60773606370876, "grad_norm": 1.1373672856837178, "learning_rate": 1.2050183910154425e-06, "loss": 0.0642, "step": 2671 }, { "epoch": 0.6079635949943117, "grad_norm": 1.5146568276899794, "learning_rate": 1.2049851011031514e-06, "loss": 0.0615, "step": 2672 }, { "epoch": 0.6081911262798635, "grad_norm": 1.3486666030528254, "learning_rate": 1.2049517993370269e-06, "loss": 0.0407, "step": 2673 }, { "epoch": 0.6084186575654152, "grad_norm": 1.7553759733095198, "learning_rate": 1.2049184857177498e-06, "loss": 0.089, "step": 2674 }, { "epoch": 0.608646188850967, "grad_norm": 1.6341921000073825, "learning_rate": 1.204885160246001e-06, "loss": 0.1034, "step": 2675 }, { "epoch": 0.6088737201365187, "grad_norm": 1.2886908765685168, "learning_rate": 1.2048518229224613e-06, "loss": 0.0371, "step": 2676 }, { "epoch": 0.6091012514220705, "grad_norm": 1.147794266112336, "learning_rate": 1.2048184737478124e-06, "loss": 0.0513, "step": 2677 }, { "epoch": 0.6093287827076223, "grad_norm": 1.108848937036823, "learning_rate": 1.2047851127227358e-06, "loss": 0.0372, "step": 2678 }, { "epoch": 0.6095563139931741, "grad_norm": 0.9494871582849728, "learning_rate": 1.2047517398479135e-06, "loss": 0.0313, "step": 2679 }, { "epoch": 0.6097838452787259, "grad_norm": 1.4403996402965795, "learning_rate": 1.204718355124027e-06, "loss": 0.0648, "step": 2680 }, { "epoch": 0.6100113765642776, "grad_norm": 1.1467814441502786, "learning_rate": 1.2046849585517595e-06, "loss": 0.0363, "step": 2681 }, { "epoch": 0.6102389078498294, "grad_norm": 0.8769285585937754, "learning_rate": 1.2046515501317927e-06, "loss": 0.0439, "step": 2682 }, { "epoch": 0.6104664391353811, "grad_norm": 1.0907507673380405, "learning_rate": 1.2046181298648101e-06, "loss": 0.0362, "step": 2683 }, { "epoch": 0.6106939704209329, "grad_norm": 1.4400540200529313, "learning_rate": 1.2045846977514943e-06, "loss": 0.0695, "step": 2684 }, { "epoch": 0.6109215017064846, "grad_norm": 0.926017693420237, "learning_rate": 1.2045512537925287e-06, "loss": 0.0319, "step": 2685 }, { "epoch": 0.6111490329920364, "grad_norm": 1.9422364837523665, "learning_rate": 1.2045177979885969e-06, "loss": 0.0678, "step": 2686 }, { "epoch": 0.6113765642775881, "grad_norm": 1.0780377835715493, "learning_rate": 1.2044843303403827e-06, "loss": 0.051, "step": 2687 }, { "epoch": 0.6116040955631399, "grad_norm": 1.522022597658668, "learning_rate": 1.20445085084857e-06, "loss": 0.0917, "step": 2688 }, { "epoch": 0.6118316268486917, "grad_norm": 1.0239752154710169, "learning_rate": 1.204417359513843e-06, "loss": 0.0286, "step": 2689 }, { "epoch": 0.6120591581342435, "grad_norm": 1.150678969827508, "learning_rate": 1.2043838563368865e-06, "loss": 0.0554, "step": 2690 }, { "epoch": 0.6122866894197952, "grad_norm": 1.2280896133913035, "learning_rate": 1.204350341318385e-06, "loss": 0.0562, "step": 2691 }, { "epoch": 0.612514220705347, "grad_norm": 1.0374592547169612, "learning_rate": 1.2043168144590237e-06, "loss": 0.0483, "step": 2692 }, { "epoch": 0.6127417519908988, "grad_norm": 1.8353693399206994, "learning_rate": 1.2042832757594875e-06, "loss": 0.0901, "step": 2693 }, { "epoch": 0.6129692832764505, "grad_norm": 1.6915158293911816, "learning_rate": 1.204249725220462e-06, "loss": 0.0741, "step": 2694 }, { "epoch": 0.6131968145620023, "grad_norm": 1.18563782209136, "learning_rate": 1.204216162842633e-06, "loss": 0.0406, "step": 2695 }, { "epoch": 0.613424345847554, "grad_norm": 1.2961769715929403, "learning_rate": 1.2041825886266866e-06, "loss": 0.0449, "step": 2696 }, { "epoch": 0.6136518771331058, "grad_norm": 1.5168600612694154, "learning_rate": 1.2041490025733089e-06, "loss": 0.0853, "step": 2697 }, { "epoch": 0.6138794084186575, "grad_norm": 1.0552035992692197, "learning_rate": 1.2041154046831859e-06, "loss": 0.0459, "step": 2698 }, { "epoch": 0.6141069397042094, "grad_norm": 1.0052589171106947, "learning_rate": 1.2040817949570046e-06, "loss": 0.0519, "step": 2699 }, { "epoch": 0.6143344709897611, "grad_norm": 1.5847978638614886, "learning_rate": 1.2040481733954523e-06, "loss": 0.0489, "step": 2700 }, { "epoch": 0.6145620022753129, "grad_norm": 1.5433588402630525, "learning_rate": 1.2040145399992157e-06, "loss": 0.0554, "step": 2701 }, { "epoch": 0.6147895335608646, "grad_norm": 0.9199676231545262, "learning_rate": 1.203980894768982e-06, "loss": 0.051, "step": 2702 }, { "epoch": 0.6150170648464164, "grad_norm": 1.6984060525727276, "learning_rate": 1.2039472377054395e-06, "loss": 0.0951, "step": 2703 }, { "epoch": 0.6152445961319681, "grad_norm": 1.9254054628589912, "learning_rate": 1.2039135688092757e-06, "loss": 0.0614, "step": 2704 }, { "epoch": 0.6154721274175199, "grad_norm": 1.3832191004617072, "learning_rate": 1.203879888081179e-06, "loss": 0.0666, "step": 2705 }, { "epoch": 0.6156996587030716, "grad_norm": 1.915733750626152, "learning_rate": 1.203846195521837e-06, "loss": 0.0622, "step": 2706 }, { "epoch": 0.6159271899886234, "grad_norm": 1.1800428967306633, "learning_rate": 1.2038124911319393e-06, "loss": 0.0428, "step": 2707 }, { "epoch": 0.6161547212741753, "grad_norm": 1.464495992586222, "learning_rate": 1.2037787749121741e-06, "loss": 0.069, "step": 2708 }, { "epoch": 0.616382252559727, "grad_norm": 1.1760191210505098, "learning_rate": 1.203745046863231e-06, "loss": 0.0503, "step": 2709 }, { "epoch": 0.6166097838452788, "grad_norm": 1.2442850515401647, "learning_rate": 1.203711306985799e-06, "loss": 0.04, "step": 2710 }, { "epoch": 0.6168373151308305, "grad_norm": 0.865019588027552, "learning_rate": 1.2036775552805674e-06, "loss": 0.0294, "step": 2711 }, { "epoch": 0.6170648464163823, "grad_norm": 2.6756015679269978, "learning_rate": 1.2036437917482267e-06, "loss": 0.0814, "step": 2712 }, { "epoch": 0.617292377701934, "grad_norm": 0.8893160007809519, "learning_rate": 1.2036100163894665e-06, "loss": 0.0434, "step": 2713 }, { "epoch": 0.6175199089874858, "grad_norm": 1.5121328039736281, "learning_rate": 1.2035762292049772e-06, "loss": 0.0848, "step": 2714 }, { "epoch": 0.6177474402730375, "grad_norm": 1.1556104934545364, "learning_rate": 1.2035424301954496e-06, "loss": 0.0564, "step": 2715 }, { "epoch": 0.6179749715585893, "grad_norm": 1.344567899518094, "learning_rate": 1.2035086193615743e-06, "loss": 0.0449, "step": 2716 }, { "epoch": 0.618202502844141, "grad_norm": 1.6415013868308683, "learning_rate": 1.203474796704042e-06, "loss": 0.0598, "step": 2717 }, { "epoch": 0.6184300341296929, "grad_norm": 0.9852915602376644, "learning_rate": 1.2034409622235444e-06, "loss": 0.0285, "step": 2718 }, { "epoch": 0.6186575654152446, "grad_norm": 1.1301434136989037, "learning_rate": 1.203407115920773e-06, "loss": 0.0473, "step": 2719 }, { "epoch": 0.6188850967007964, "grad_norm": 1.3150440764406408, "learning_rate": 1.2033732577964194e-06, "loss": 0.0717, "step": 2720 }, { "epoch": 0.6191126279863481, "grad_norm": 1.4850057002676174, "learning_rate": 1.2033393878511756e-06, "loss": 0.0589, "step": 2721 }, { "epoch": 0.6193401592718999, "grad_norm": 1.5995015548370441, "learning_rate": 1.203305506085734e-06, "loss": 0.0705, "step": 2722 }, { "epoch": 0.6195676905574516, "grad_norm": 0.8404790504075466, "learning_rate": 1.2032716125007868e-06, "loss": 0.0437, "step": 2723 }, { "epoch": 0.6197952218430034, "grad_norm": 1.3772954832300879, "learning_rate": 1.2032377070970268e-06, "loss": 0.0318, "step": 2724 }, { "epoch": 0.6200227531285551, "grad_norm": 1.4515256482801804, "learning_rate": 1.2032037898751475e-06, "loss": 0.0473, "step": 2725 }, { "epoch": 0.6202502844141069, "grad_norm": 0.7545612524043191, "learning_rate": 1.2031698608358414e-06, "loss": 0.0269, "step": 2726 }, { "epoch": 0.6204778156996587, "grad_norm": 0.743007367054589, "learning_rate": 1.2031359199798021e-06, "loss": 0.0283, "step": 2727 }, { "epoch": 0.6207053469852105, "grad_norm": 1.2718184804954529, "learning_rate": 1.2031019673077237e-06, "loss": 0.0634, "step": 2728 }, { "epoch": 0.6209328782707623, "grad_norm": 1.775193564914863, "learning_rate": 1.2030680028202995e-06, "loss": 0.1335, "step": 2729 }, { "epoch": 0.621160409556314, "grad_norm": 1.1547031507192722, "learning_rate": 1.2030340265182242e-06, "loss": 0.026, "step": 2730 }, { "epoch": 0.6213879408418658, "grad_norm": 1.81101747698367, "learning_rate": 1.2030000384021919e-06, "loss": 0.1018, "step": 2731 }, { "epoch": 0.6216154721274175, "grad_norm": 1.5054245916089843, "learning_rate": 1.2029660384728974e-06, "loss": 0.0495, "step": 2732 }, { "epoch": 0.6218430034129693, "grad_norm": 1.6121127283946548, "learning_rate": 1.2029320267310359e-06, "loss": 0.0782, "step": 2733 }, { "epoch": 0.622070534698521, "grad_norm": 1.6042883442076186, "learning_rate": 1.2028980031773018e-06, "loss": 0.0589, "step": 2734 }, { "epoch": 0.6222980659840728, "grad_norm": 1.2540922996339912, "learning_rate": 1.202863967812391e-06, "loss": 0.0774, "step": 2735 }, { "epoch": 0.6225255972696245, "grad_norm": 1.518635659663246, "learning_rate": 1.2028299206369991e-06, "loss": 0.1072, "step": 2736 }, { "epoch": 0.6227531285551763, "grad_norm": 1.3308844577317762, "learning_rate": 1.2027958616518218e-06, "loss": 0.0445, "step": 2737 }, { "epoch": 0.6229806598407281, "grad_norm": 2.209735756502863, "learning_rate": 1.2027617908575553e-06, "loss": 0.1124, "step": 2738 }, { "epoch": 0.6232081911262799, "grad_norm": 1.3929457690467317, "learning_rate": 1.2027277082548958e-06, "loss": 0.0545, "step": 2739 }, { "epoch": 0.6234357224118316, "grad_norm": 1.051968724737917, "learning_rate": 1.20269361384454e-06, "loss": 0.033, "step": 2740 }, { "epoch": 0.6236632536973834, "grad_norm": 1.6764017429869924, "learning_rate": 1.2026595076271848e-06, "loss": 0.0834, "step": 2741 }, { "epoch": 0.6238907849829352, "grad_norm": 1.6397713261851863, "learning_rate": 1.2026253896035273e-06, "loss": 0.0693, "step": 2742 }, { "epoch": 0.6241183162684869, "grad_norm": 1.591510561765584, "learning_rate": 1.2025912597742646e-06, "loss": 0.0768, "step": 2743 }, { "epoch": 0.6243458475540387, "grad_norm": 1.5008432457963459, "learning_rate": 1.2025571181400944e-06, "loss": 0.0771, "step": 2744 }, { "epoch": 0.6245733788395904, "grad_norm": 1.362876261935557, "learning_rate": 1.2025229647017145e-06, "loss": 0.0523, "step": 2745 }, { "epoch": 0.6248009101251422, "grad_norm": 2.2117120292707977, "learning_rate": 1.2024887994598227e-06, "loss": 0.0941, "step": 2746 }, { "epoch": 0.625028441410694, "grad_norm": 0.962824420377618, "learning_rate": 1.2024546224151176e-06, "loss": 0.0339, "step": 2747 }, { "epoch": 0.6252559726962458, "grad_norm": 0.8088610019406368, "learning_rate": 1.2024204335682977e-06, "loss": 0.0269, "step": 2748 }, { "epoch": 0.6254835039817975, "grad_norm": 2.415791185781217, "learning_rate": 1.2023862329200613e-06, "loss": 0.0925, "step": 2749 }, { "epoch": 0.6257110352673493, "grad_norm": 1.0450350621113902, "learning_rate": 1.2023520204711078e-06, "loss": 0.0539, "step": 2750 }, { "epoch": 0.625938566552901, "grad_norm": 1.3226699575436216, "learning_rate": 1.2023177962221366e-06, "loss": 0.098, "step": 2751 }, { "epoch": 0.6261660978384528, "grad_norm": 1.6169539390839551, "learning_rate": 1.2022835601738467e-06, "loss": 0.0674, "step": 2752 }, { "epoch": 0.6263936291240045, "grad_norm": 0.926101104299797, "learning_rate": 1.2022493123269383e-06, "loss": 0.0326, "step": 2753 }, { "epoch": 0.6266211604095563, "grad_norm": 1.7687331075293982, "learning_rate": 1.202215052682111e-06, "loss": 0.1007, "step": 2754 }, { "epoch": 0.626848691695108, "grad_norm": 1.2611712167362608, "learning_rate": 1.2021807812400652e-06, "loss": 0.0739, "step": 2755 }, { "epoch": 0.6270762229806598, "grad_norm": 1.5800647590282382, "learning_rate": 1.2021464980015014e-06, "loss": 0.078, "step": 2756 }, { "epoch": 0.6273037542662117, "grad_norm": 1.2594753097389422, "learning_rate": 1.20211220296712e-06, "loss": 0.0642, "step": 2757 }, { "epoch": 0.6275312855517634, "grad_norm": 1.121495104083563, "learning_rate": 1.2020778961376223e-06, "loss": 0.0328, "step": 2758 }, { "epoch": 0.6277588168373152, "grad_norm": 1.4683080872027747, "learning_rate": 1.202043577513709e-06, "loss": 0.0902, "step": 2759 }, { "epoch": 0.6279863481228669, "grad_norm": 1.8245567164154206, "learning_rate": 1.202009247096082e-06, "loss": 0.0761, "step": 2760 }, { "epoch": 0.6282138794084187, "grad_norm": 1.1125392914595493, "learning_rate": 1.2019749048854426e-06, "loss": 0.0646, "step": 2761 }, { "epoch": 0.6284414106939704, "grad_norm": 0.8642533280519576, "learning_rate": 1.2019405508824927e-06, "loss": 0.0324, "step": 2762 }, { "epoch": 0.6286689419795222, "grad_norm": 1.7625633245035697, "learning_rate": 1.201906185087935e-06, "loss": 0.0776, "step": 2763 }, { "epoch": 0.6288964732650739, "grad_norm": 1.9920964212571328, "learning_rate": 1.201871807502471e-06, "loss": 0.0628, "step": 2764 }, { "epoch": 0.6291240045506257, "grad_norm": 1.371538527134396, "learning_rate": 1.2018374181268039e-06, "loss": 0.054, "step": 2765 }, { "epoch": 0.6293515358361774, "grad_norm": 1.603991934514247, "learning_rate": 1.2018030169616363e-06, "loss": 0.0675, "step": 2766 }, { "epoch": 0.6295790671217293, "grad_norm": 1.3082298269977088, "learning_rate": 1.2017686040076715e-06, "loss": 0.059, "step": 2767 }, { "epoch": 0.629806598407281, "grad_norm": 1.0629082940323715, "learning_rate": 1.2017341792656129e-06, "loss": 0.0526, "step": 2768 }, { "epoch": 0.6300341296928328, "grad_norm": 1.5246818248480305, "learning_rate": 1.2016997427361635e-06, "loss": 0.0489, "step": 2769 }, { "epoch": 0.6302616609783845, "grad_norm": 1.4969549226937944, "learning_rate": 1.201665294420028e-06, "loss": 0.079, "step": 2770 }, { "epoch": 0.6304891922639363, "grad_norm": 1.195617244868444, "learning_rate": 1.2016308343179098e-06, "loss": 0.038, "step": 2771 }, { "epoch": 0.630716723549488, "grad_norm": 1.0734726876278198, "learning_rate": 1.2015963624305132e-06, "loss": 0.0528, "step": 2772 }, { "epoch": 0.6309442548350398, "grad_norm": 0.9060818686627535, "learning_rate": 1.201561878758543e-06, "loss": 0.0321, "step": 2773 }, { "epoch": 0.6311717861205915, "grad_norm": 1.9200212018830873, "learning_rate": 1.2015273833027041e-06, "loss": 0.0827, "step": 2774 }, { "epoch": 0.6313993174061433, "grad_norm": 1.1822527617116831, "learning_rate": 1.2014928760637014e-06, "loss": 0.0537, "step": 2775 }, { "epoch": 0.631626848691695, "grad_norm": 1.618374665019338, "learning_rate": 1.20145835704224e-06, "loss": 0.0563, "step": 2776 }, { "epoch": 0.6318543799772469, "grad_norm": 2.134594638963389, "learning_rate": 1.2014238262390254e-06, "loss": 0.0709, "step": 2777 }, { "epoch": 0.6320819112627987, "grad_norm": 2.1175666116051968, "learning_rate": 1.2013892836547635e-06, "loss": 0.0924, "step": 2778 }, { "epoch": 0.6323094425483504, "grad_norm": 1.5175276686793735, "learning_rate": 1.2013547292901605e-06, "loss": 0.0818, "step": 2779 }, { "epoch": 0.6325369738339022, "grad_norm": 1.1839298832479785, "learning_rate": 1.2013201631459222e-06, "loss": 0.0709, "step": 2780 }, { "epoch": 0.6327645051194539, "grad_norm": 2.288412160525939, "learning_rate": 1.201285585222755e-06, "loss": 0.105, "step": 2781 }, { "epoch": 0.6329920364050057, "grad_norm": 1.569225058207049, "learning_rate": 1.2012509955213664e-06, "loss": 0.0477, "step": 2782 }, { "epoch": 0.6332195676905574, "grad_norm": 0.7422297833198885, "learning_rate": 1.2012163940424624e-06, "loss": 0.0359, "step": 2783 }, { "epoch": 0.6334470989761092, "grad_norm": 1.009892072927878, "learning_rate": 1.2011817807867507e-06, "loss": 0.0472, "step": 2784 }, { "epoch": 0.6336746302616609, "grad_norm": 1.8711194663634534, "learning_rate": 1.2011471557549387e-06, "loss": 0.0621, "step": 2785 }, { "epoch": 0.6339021615472128, "grad_norm": 1.0981859377257484, "learning_rate": 1.2011125189477339e-06, "loss": 0.0407, "step": 2786 }, { "epoch": 0.6341296928327645, "grad_norm": 2.3667510812808903, "learning_rate": 1.2010778703658441e-06, "loss": 0.103, "step": 2787 }, { "epoch": 0.6343572241183163, "grad_norm": 1.4344646107903631, "learning_rate": 1.2010432100099781e-06, "loss": 0.0796, "step": 2788 }, { "epoch": 0.634584755403868, "grad_norm": 1.6112614680461406, "learning_rate": 1.2010085378808437e-06, "loss": 0.0635, "step": 2789 }, { "epoch": 0.6348122866894198, "grad_norm": 1.8958308802132453, "learning_rate": 1.2009738539791497e-06, "loss": 0.1027, "step": 2790 }, { "epoch": 0.6350398179749716, "grad_norm": 1.4317662705377017, "learning_rate": 1.2009391583056048e-06, "loss": 0.0689, "step": 2791 }, { "epoch": 0.6352673492605233, "grad_norm": 1.2007483338261875, "learning_rate": 1.2009044508609182e-06, "loss": 0.0584, "step": 2792 }, { "epoch": 0.6354948805460751, "grad_norm": 1.9010319599598573, "learning_rate": 1.2008697316457997e-06, "loss": 0.0966, "step": 2793 }, { "epoch": 0.6357224118316268, "grad_norm": 0.9068372322632697, "learning_rate": 1.2008350006609584e-06, "loss": 0.0319, "step": 2794 }, { "epoch": 0.6359499431171786, "grad_norm": 1.9186704917037192, "learning_rate": 1.2008002579071043e-06, "loss": 0.0834, "step": 2795 }, { "epoch": 0.6361774744027304, "grad_norm": 1.5719062287923162, "learning_rate": 1.2007655033849474e-06, "loss": 0.0719, "step": 2796 }, { "epoch": 0.6364050056882822, "grad_norm": 0.8675484564571867, "learning_rate": 1.2007307370951983e-06, "loss": 0.0382, "step": 2797 }, { "epoch": 0.6366325369738339, "grad_norm": 1.2487471267893597, "learning_rate": 1.200695959038567e-06, "loss": 0.0541, "step": 2798 }, { "epoch": 0.6368600682593857, "grad_norm": 1.340853119074798, "learning_rate": 1.2006611692157648e-06, "loss": 0.0625, "step": 2799 }, { "epoch": 0.6370875995449374, "grad_norm": 1.0090275039380872, "learning_rate": 1.2006263676275026e-06, "loss": 0.0386, "step": 2800 }, { "epoch": 0.6373151308304892, "grad_norm": 1.3604638479008277, "learning_rate": 1.2005915542744915e-06, "loss": 0.0579, "step": 2801 }, { "epoch": 0.6375426621160409, "grad_norm": 1.3626136251906178, "learning_rate": 1.2005567291574434e-06, "loss": 0.0506, "step": 2802 }, { "epoch": 0.6377701934015927, "grad_norm": 2.24330408064307, "learning_rate": 1.2005218922770695e-06, "loss": 0.0934, "step": 2803 }, { "epoch": 0.6379977246871444, "grad_norm": 1.652449071090047, "learning_rate": 1.2004870436340824e-06, "loss": 0.0916, "step": 2804 }, { "epoch": 0.6382252559726962, "grad_norm": 1.335757404355839, "learning_rate": 1.2004521832291943e-06, "loss": 0.0319, "step": 2805 }, { "epoch": 0.6384527872582481, "grad_norm": 1.2669579033763387, "learning_rate": 1.200417311063117e-06, "loss": 0.0452, "step": 2806 }, { "epoch": 0.6386803185437998, "grad_norm": 1.3480996457776724, "learning_rate": 1.200382427136564e-06, "loss": 0.0664, "step": 2807 }, { "epoch": 0.6389078498293516, "grad_norm": 1.6036199545645866, "learning_rate": 1.2003475314502477e-06, "loss": 0.0715, "step": 2808 }, { "epoch": 0.6391353811149033, "grad_norm": 1.0938499350027229, "learning_rate": 1.200312624004882e-06, "loss": 0.0467, "step": 2809 }, { "epoch": 0.6393629124004551, "grad_norm": 1.301558947668022, "learning_rate": 1.2002777048011794e-06, "loss": 0.0611, "step": 2810 }, { "epoch": 0.6395904436860068, "grad_norm": 1.2950309349789517, "learning_rate": 1.2002427738398543e-06, "loss": 0.0501, "step": 2811 }, { "epoch": 0.6398179749715586, "grad_norm": 1.074515099435565, "learning_rate": 1.2002078311216205e-06, "loss": 0.0273, "step": 2812 }, { "epoch": 0.6400455062571103, "grad_norm": 0.9534944533034787, "learning_rate": 1.2001728766471919e-06, "loss": 0.0517, "step": 2813 }, { "epoch": 0.6402730375426621, "grad_norm": 1.5158088143964041, "learning_rate": 1.2001379104172832e-06, "loss": 0.0638, "step": 2814 }, { "epoch": 0.6405005688282139, "grad_norm": 1.247966598668176, "learning_rate": 1.2001029324326087e-06, "loss": 0.054, "step": 2815 }, { "epoch": 0.6407281001137657, "grad_norm": 0.9514285262896064, "learning_rate": 1.2000679426938838e-06, "loss": 0.0324, "step": 2816 }, { "epoch": 0.6409556313993174, "grad_norm": 1.5357576363213208, "learning_rate": 1.2000329412018233e-06, "loss": 0.0504, "step": 2817 }, { "epoch": 0.6411831626848692, "grad_norm": 1.3766532946507657, "learning_rate": 1.1999979279571425e-06, "loss": 0.0785, "step": 2818 }, { "epoch": 0.6414106939704209, "grad_norm": 1.1854802730452816, "learning_rate": 1.1999629029605572e-06, "loss": 0.0471, "step": 2819 }, { "epoch": 0.6416382252559727, "grad_norm": 0.9120829198047485, "learning_rate": 1.1999278662127832e-06, "loss": 0.0465, "step": 2820 }, { "epoch": 0.6418657565415244, "grad_norm": 1.215725225022476, "learning_rate": 1.1998928177145363e-06, "loss": 0.0539, "step": 2821 }, { "epoch": 0.6420932878270762, "grad_norm": 1.6055605292211583, "learning_rate": 1.1998577574665334e-06, "loss": 0.0794, "step": 2822 }, { "epoch": 0.642320819112628, "grad_norm": 1.5194681984361975, "learning_rate": 1.1998226854694906e-06, "loss": 0.0775, "step": 2823 }, { "epoch": 0.6425483503981797, "grad_norm": 1.217305433486093, "learning_rate": 1.1997876017241248e-06, "loss": 0.0363, "step": 2824 }, { "epoch": 0.6427758816837316, "grad_norm": 1.2554857295036752, "learning_rate": 1.199752506231153e-06, "loss": 0.0451, "step": 2825 }, { "epoch": 0.6430034129692833, "grad_norm": 1.4903034342688748, "learning_rate": 1.1997173989912928e-06, "loss": 0.0651, "step": 2826 }, { "epoch": 0.6432309442548351, "grad_norm": 0.9479490152328633, "learning_rate": 1.1996822800052614e-06, "loss": 0.0416, "step": 2827 }, { "epoch": 0.6434584755403868, "grad_norm": 1.3630232299101448, "learning_rate": 1.1996471492737767e-06, "loss": 0.053, "step": 2828 }, { "epoch": 0.6436860068259386, "grad_norm": 0.8310504363214145, "learning_rate": 1.1996120067975568e-06, "loss": 0.0401, "step": 2829 }, { "epoch": 0.6439135381114903, "grad_norm": 0.7817607162647777, "learning_rate": 1.1995768525773195e-06, "loss": 0.0381, "step": 2830 }, { "epoch": 0.6441410693970421, "grad_norm": 1.19151900025748, "learning_rate": 1.1995416866137837e-06, "loss": 0.0401, "step": 2831 }, { "epoch": 0.6443686006825938, "grad_norm": 0.8480287553862164, "learning_rate": 1.1995065089076682e-06, "loss": 0.0427, "step": 2832 }, { "epoch": 0.6445961319681456, "grad_norm": 1.3823956321024167, "learning_rate": 1.199471319459692e-06, "loss": 0.0678, "step": 2833 }, { "epoch": 0.6448236632536973, "grad_norm": 1.0881726943732215, "learning_rate": 1.199436118270574e-06, "loss": 0.0463, "step": 2834 }, { "epoch": 0.6450511945392492, "grad_norm": 0.9603443906924007, "learning_rate": 1.1994009053410336e-06, "loss": 0.0334, "step": 2835 }, { "epoch": 0.645278725824801, "grad_norm": 0.7257173068914297, "learning_rate": 1.1993656806717906e-06, "loss": 0.022, "step": 2836 }, { "epoch": 0.6455062571103527, "grad_norm": 1.3460255228704814, "learning_rate": 1.199330444263565e-06, "loss": 0.0589, "step": 2837 }, { "epoch": 0.6457337883959045, "grad_norm": 1.8598440039360982, "learning_rate": 1.1992951961170774e-06, "loss": 0.0884, "step": 2838 }, { "epoch": 0.6459613196814562, "grad_norm": 1.7631231436296406, "learning_rate": 1.1992599362330474e-06, "loss": 0.0691, "step": 2839 }, { "epoch": 0.646188850967008, "grad_norm": 1.0081908118372565, "learning_rate": 1.199224664612196e-06, "loss": 0.046, "step": 2840 }, { "epoch": 0.6464163822525597, "grad_norm": 1.5846986206428932, "learning_rate": 1.199189381255244e-06, "loss": 0.0819, "step": 2841 }, { "epoch": 0.6466439135381115, "grad_norm": 1.3965515241420483, "learning_rate": 1.199154086162913e-06, "loss": 0.0675, "step": 2842 }, { "epoch": 0.6468714448236632, "grad_norm": 1.7830102963673449, "learning_rate": 1.1991187793359239e-06, "loss": 0.1173, "step": 2843 }, { "epoch": 0.647098976109215, "grad_norm": 0.7979390598594543, "learning_rate": 1.1990834607749981e-06, "loss": 0.0288, "step": 2844 }, { "epoch": 0.6473265073947668, "grad_norm": 1.2345551184177335, "learning_rate": 1.199048130480858e-06, "loss": 0.0823, "step": 2845 }, { "epoch": 0.6475540386803186, "grad_norm": 1.3892698568903075, "learning_rate": 1.1990127884542251e-06, "loss": 0.0739, "step": 2846 }, { "epoch": 0.6477815699658703, "grad_norm": 1.5536465235111332, "learning_rate": 1.1989774346958225e-06, "loss": 0.0725, "step": 2847 }, { "epoch": 0.6480091012514221, "grad_norm": 1.482520572829848, "learning_rate": 1.1989420692063723e-06, "loss": 0.0902, "step": 2848 }, { "epoch": 0.6482366325369738, "grad_norm": 1.0973672503057945, "learning_rate": 1.198906691986597e-06, "loss": 0.0567, "step": 2849 }, { "epoch": 0.6484641638225256, "grad_norm": 1.974255381506691, "learning_rate": 1.1988713030372202e-06, "loss": 0.1019, "step": 2850 }, { "epoch": 0.6486916951080773, "grad_norm": 0.9822063316024513, "learning_rate": 1.198835902358965e-06, "loss": 0.0556, "step": 2851 }, { "epoch": 0.6489192263936291, "grad_norm": 0.9883294090430303, "learning_rate": 1.1988004899525547e-06, "loss": 0.0511, "step": 2852 }, { "epoch": 0.6491467576791808, "grad_norm": 1.7242005203993112, "learning_rate": 1.1987650658187133e-06, "loss": 0.0684, "step": 2853 }, { "epoch": 0.6493742889647327, "grad_norm": 1.163292058133356, "learning_rate": 1.1987296299581648e-06, "loss": 0.055, "step": 2854 }, { "epoch": 0.6496018202502845, "grad_norm": 0.842300844455492, "learning_rate": 1.1986941823716333e-06, "loss": 0.0361, "step": 2855 }, { "epoch": 0.6498293515358362, "grad_norm": 0.7877836086747282, "learning_rate": 1.1986587230598437e-06, "loss": 0.0269, "step": 2856 }, { "epoch": 0.650056882821388, "grad_norm": 1.1854138002017856, "learning_rate": 1.19862325202352e-06, "loss": 0.0789, "step": 2857 }, { "epoch": 0.6502844141069397, "grad_norm": 1.5193193171125443, "learning_rate": 1.198587769263388e-06, "loss": 0.0647, "step": 2858 }, { "epoch": 0.6505119453924915, "grad_norm": 0.9471414553218402, "learning_rate": 1.198552274780172e-06, "loss": 0.0394, "step": 2859 }, { "epoch": 0.6507394766780432, "grad_norm": 1.142561403387133, "learning_rate": 1.1985167685745982e-06, "loss": 0.0427, "step": 2860 }, { "epoch": 0.650967007963595, "grad_norm": 1.163361004688848, "learning_rate": 1.198481250647392e-06, "loss": 0.0583, "step": 2861 }, { "epoch": 0.6511945392491467, "grad_norm": 1.2956201317841325, "learning_rate": 1.1984457209992792e-06, "loss": 0.0497, "step": 2862 }, { "epoch": 0.6514220705346985, "grad_norm": 1.0871624809389135, "learning_rate": 1.1984101796309862e-06, "loss": 0.0719, "step": 2863 }, { "epoch": 0.6516496018202503, "grad_norm": 1.3146485801211771, "learning_rate": 1.1983746265432392e-06, "loss": 0.0589, "step": 2864 }, { "epoch": 0.6518771331058021, "grad_norm": 1.5681925610927387, "learning_rate": 1.1983390617367649e-06, "loss": 0.0701, "step": 2865 }, { "epoch": 0.6521046643913538, "grad_norm": 2.2916879452331926, "learning_rate": 1.1983034852122902e-06, "loss": 0.1033, "step": 2866 }, { "epoch": 0.6523321956769056, "grad_norm": 1.0785758663467682, "learning_rate": 1.1982678969705425e-06, "loss": 0.0264, "step": 2867 }, { "epoch": 0.6525597269624573, "grad_norm": 1.8613288950042697, "learning_rate": 1.1982322970122485e-06, "loss": 0.066, "step": 2868 }, { "epoch": 0.6527872582480091, "grad_norm": 1.1933202650904684, "learning_rate": 1.1981966853381364e-06, "loss": 0.0553, "step": 2869 }, { "epoch": 0.6530147895335608, "grad_norm": 0.9553057216020039, "learning_rate": 1.1981610619489337e-06, "loss": 0.0391, "step": 2870 }, { "epoch": 0.6532423208191126, "grad_norm": 1.1836315703365476, "learning_rate": 1.1981254268453684e-06, "loss": 0.054, "step": 2871 }, { "epoch": 0.6534698521046644, "grad_norm": 1.276765336289362, "learning_rate": 1.1980897800281694e-06, "loss": 0.0585, "step": 2872 }, { "epoch": 0.6536973833902161, "grad_norm": 1.5829433588482489, "learning_rate": 1.1980541214980646e-06, "loss": 0.0811, "step": 2873 }, { "epoch": 0.653924914675768, "grad_norm": 2.2299790037798854, "learning_rate": 1.1980184512557833e-06, "loss": 0.0838, "step": 2874 }, { "epoch": 0.6541524459613197, "grad_norm": 1.8246918071788611, "learning_rate": 1.1979827693020541e-06, "loss": 0.0778, "step": 2875 }, { "epoch": 0.6543799772468715, "grad_norm": 0.8023113638584155, "learning_rate": 1.1979470756376064e-06, "loss": 0.0374, "step": 2876 }, { "epoch": 0.6546075085324232, "grad_norm": 1.0665996787354195, "learning_rate": 1.1979113702631697e-06, "loss": 0.0642, "step": 2877 }, { "epoch": 0.654835039817975, "grad_norm": 1.0607142907030562, "learning_rate": 1.197875653179474e-06, "loss": 0.0379, "step": 2878 }, { "epoch": 0.6550625711035267, "grad_norm": 1.509330774335647, "learning_rate": 1.1978399243872492e-06, "loss": 0.0429, "step": 2879 }, { "epoch": 0.6552901023890785, "grad_norm": 1.1080169650246614, "learning_rate": 1.1978041838872253e-06, "loss": 0.041, "step": 2880 }, { "epoch": 0.6555176336746302, "grad_norm": 0.8616722922724181, "learning_rate": 1.197768431680133e-06, "loss": 0.0308, "step": 2881 }, { "epoch": 0.655745164960182, "grad_norm": 1.625281473798501, "learning_rate": 1.197732667766703e-06, "loss": 0.0732, "step": 2882 }, { "epoch": 0.6559726962457337, "grad_norm": 2.0203110844795056, "learning_rate": 1.1976968921476662e-06, "loss": 0.0885, "step": 2883 }, { "epoch": 0.6562002275312856, "grad_norm": 1.325667144252201, "learning_rate": 1.1976611048237534e-06, "loss": 0.0471, "step": 2884 }, { "epoch": 0.6564277588168373, "grad_norm": 1.108094823287875, "learning_rate": 1.1976253057956968e-06, "loss": 0.0486, "step": 2885 }, { "epoch": 0.6566552901023891, "grad_norm": 1.5070679596895156, "learning_rate": 1.1975894950642276e-06, "loss": 0.0609, "step": 2886 }, { "epoch": 0.6568828213879409, "grad_norm": 1.8747406123784733, "learning_rate": 1.1975536726300776e-06, "loss": 0.084, "step": 2887 }, { "epoch": 0.6571103526734926, "grad_norm": 1.0968771480268504, "learning_rate": 1.1975178384939793e-06, "loss": 0.0518, "step": 2888 }, { "epoch": 0.6573378839590444, "grad_norm": 1.2149776100702219, "learning_rate": 1.197481992656665e-06, "loss": 0.0663, "step": 2889 }, { "epoch": 0.6575654152445961, "grad_norm": 1.0219972948941853, "learning_rate": 1.1974461351188668e-06, "loss": 0.0421, "step": 2890 }, { "epoch": 0.6577929465301479, "grad_norm": 1.0668851298436286, "learning_rate": 1.1974102658813183e-06, "loss": 0.0372, "step": 2891 }, { "epoch": 0.6580204778156996, "grad_norm": 0.9280161130235592, "learning_rate": 1.1973743849447522e-06, "loss": 0.041, "step": 2892 }, { "epoch": 0.6582480091012515, "grad_norm": 1.5811942617888033, "learning_rate": 1.197338492309902e-06, "loss": 0.0644, "step": 2893 }, { "epoch": 0.6584755403868032, "grad_norm": 0.9301441187543878, "learning_rate": 1.1973025879775011e-06, "loss": 0.0452, "step": 2894 }, { "epoch": 0.658703071672355, "grad_norm": 1.4309181679010228, "learning_rate": 1.1972666719482833e-06, "loss": 0.0594, "step": 2895 }, { "epoch": 0.6589306029579067, "grad_norm": 1.2896695199681876, "learning_rate": 1.197230744222983e-06, "loss": 0.0462, "step": 2896 }, { "epoch": 0.6591581342434585, "grad_norm": 1.3825152304918809, "learning_rate": 1.1971948048023343e-06, "loss": 0.0645, "step": 2897 }, { "epoch": 0.6593856655290102, "grad_norm": 1.5004332728601224, "learning_rate": 1.1971588536870717e-06, "loss": 0.0709, "step": 2898 }, { "epoch": 0.659613196814562, "grad_norm": 1.4884045373736343, "learning_rate": 1.19712289087793e-06, "loss": 0.0779, "step": 2899 }, { "epoch": 0.6598407281001137, "grad_norm": 1.8129968316744782, "learning_rate": 1.197086916375644e-06, "loss": 0.0848, "step": 2900 }, { "epoch": 0.6600682593856655, "grad_norm": 0.6798855950472126, "learning_rate": 1.1970509301809493e-06, "loss": 0.0182, "step": 2901 }, { "epoch": 0.6602957906712172, "grad_norm": 1.3632444561301862, "learning_rate": 1.1970149322945812e-06, "loss": 0.0885, "step": 2902 }, { "epoch": 0.6605233219567691, "grad_norm": 1.1160426689933205, "learning_rate": 1.1969789227172755e-06, "loss": 0.0467, "step": 2903 }, { "epoch": 0.6607508532423209, "grad_norm": 0.9677303090234676, "learning_rate": 1.1969429014497684e-06, "loss": 0.0257, "step": 2904 }, { "epoch": 0.6609783845278726, "grad_norm": 17.353739180619908, "learning_rate": 1.1969068684927954e-06, "loss": 0.0818, "step": 2905 }, { "epoch": 0.6612059158134244, "grad_norm": 1.2792498241135908, "learning_rate": 1.1968708238470936e-06, "loss": 0.058, "step": 2906 }, { "epoch": 0.6614334470989761, "grad_norm": 1.4555868612084297, "learning_rate": 1.1968347675133995e-06, "loss": 0.0562, "step": 2907 }, { "epoch": 0.6616609783845279, "grad_norm": 1.5729405842398636, "learning_rate": 1.19679869949245e-06, "loss": 0.0901, "step": 2908 }, { "epoch": 0.6618885096700796, "grad_norm": 2.205720994640489, "learning_rate": 1.1967626197849824e-06, "loss": 0.0712, "step": 2909 }, { "epoch": 0.6621160409556314, "grad_norm": 2.2706627078782478, "learning_rate": 1.1967265283917339e-06, "loss": 0.1286, "step": 2910 }, { "epoch": 0.6623435722411831, "grad_norm": 1.3583089823132908, "learning_rate": 1.1966904253134422e-06, "loss": 0.051, "step": 2911 }, { "epoch": 0.6625711035267349, "grad_norm": 1.2040227987956196, "learning_rate": 1.1966543105508454e-06, "loss": 0.0303, "step": 2912 }, { "epoch": 0.6627986348122867, "grad_norm": 1.8898241749294495, "learning_rate": 1.1966181841046812e-06, "loss": 0.1013, "step": 2913 }, { "epoch": 0.6630261660978385, "grad_norm": 1.1212373105622575, "learning_rate": 1.1965820459756882e-06, "loss": 0.0423, "step": 2914 }, { "epoch": 0.6632536973833902, "grad_norm": 1.4942787777207431, "learning_rate": 1.1965458961646051e-06, "loss": 0.074, "step": 2915 }, { "epoch": 0.663481228668942, "grad_norm": 1.2554067286772215, "learning_rate": 1.1965097346721707e-06, "loss": 0.0569, "step": 2916 }, { "epoch": 0.6637087599544937, "grad_norm": 2.064257552726771, "learning_rate": 1.1964735614991237e-06, "loss": 0.1222, "step": 2917 }, { "epoch": 0.6639362912400455, "grad_norm": 1.7127511151118642, "learning_rate": 1.196437376646204e-06, "loss": 0.059, "step": 2918 }, { "epoch": 0.6641638225255972, "grad_norm": 1.747734548367446, "learning_rate": 1.1964011801141505e-06, "loss": 0.0699, "step": 2919 }, { "epoch": 0.664391353811149, "grad_norm": 1.7260170721527037, "learning_rate": 1.1963649719037037e-06, "loss": 0.0682, "step": 2920 }, { "epoch": 0.6646188850967008, "grad_norm": 1.0412792846070889, "learning_rate": 1.196328752015603e-06, "loss": 0.0469, "step": 2921 }, { "epoch": 0.6648464163822526, "grad_norm": 1.0652258289806211, "learning_rate": 1.1962925204505894e-06, "loss": 0.0292, "step": 2922 }, { "epoch": 0.6650739476678044, "grad_norm": 1.05165136662707, "learning_rate": 1.1962562772094024e-06, "loss": 0.0707, "step": 2923 }, { "epoch": 0.6653014789533561, "grad_norm": 0.877541367497127, "learning_rate": 1.1962200222927836e-06, "loss": 0.0382, "step": 2924 }, { "epoch": 0.6655290102389079, "grad_norm": 1.0400570459306038, "learning_rate": 1.1961837557014736e-06, "loss": 0.0334, "step": 2925 }, { "epoch": 0.6657565415244596, "grad_norm": 1.6730636365537126, "learning_rate": 1.196147477436214e-06, "loss": 0.1133, "step": 2926 }, { "epoch": 0.6659840728100114, "grad_norm": 0.9388002444280849, "learning_rate": 1.1961111874977455e-06, "loss": 0.0425, "step": 2927 }, { "epoch": 0.6662116040955631, "grad_norm": 1.2971959782913498, "learning_rate": 1.1960748858868104e-06, "loss": 0.0598, "step": 2928 }, { "epoch": 0.6664391353811149, "grad_norm": 1.6257500523277735, "learning_rate": 1.1960385726041507e-06, "loss": 0.0773, "step": 2929 }, { "epoch": 0.6666666666666666, "grad_norm": 1.113728651042982, "learning_rate": 1.1960022476505082e-06, "loss": 0.0406, "step": 2930 }, { "epoch": 0.6668941979522184, "grad_norm": 0.8908601808254871, "learning_rate": 1.1959659110266256e-06, "loss": 0.0392, "step": 2931 }, { "epoch": 0.6671217292377702, "grad_norm": 1.6390262789672483, "learning_rate": 1.1959295627332454e-06, "loss": 0.0888, "step": 2932 }, { "epoch": 0.667349260523322, "grad_norm": 1.4346808601596435, "learning_rate": 1.1958932027711106e-06, "loss": 0.0534, "step": 2933 }, { "epoch": 0.6675767918088737, "grad_norm": 0.9348135708089949, "learning_rate": 1.1958568311409643e-06, "loss": 0.0472, "step": 2934 }, { "epoch": 0.6678043230944255, "grad_norm": 1.159822973816305, "learning_rate": 1.1958204478435497e-06, "loss": 0.0602, "step": 2935 }, { "epoch": 0.6680318543799773, "grad_norm": 1.0013241030005304, "learning_rate": 1.195784052879611e-06, "loss": 0.0347, "step": 2936 }, { "epoch": 0.668259385665529, "grad_norm": 0.7712539814391252, "learning_rate": 1.195747646249891e-06, "loss": 0.0238, "step": 2937 }, { "epoch": 0.6684869169510808, "grad_norm": 1.8913673239815798, "learning_rate": 1.1957112279551347e-06, "loss": 0.0967, "step": 2938 }, { "epoch": 0.6687144482366325, "grad_norm": 1.3385349237767592, "learning_rate": 1.195674797996086e-06, "loss": 0.0452, "step": 2939 }, { "epoch": 0.6689419795221843, "grad_norm": 1.8314127610766537, "learning_rate": 1.1956383563734897e-06, "loss": 0.0888, "step": 2940 }, { "epoch": 0.669169510807736, "grad_norm": 1.4820883842924408, "learning_rate": 1.1956019030880902e-06, "loss": 0.0629, "step": 2941 }, { "epoch": 0.6693970420932879, "grad_norm": 0.9377075991001586, "learning_rate": 1.1955654381406331e-06, "loss": 0.0541, "step": 2942 }, { "epoch": 0.6696245733788396, "grad_norm": 1.1625402940924978, "learning_rate": 1.1955289615318632e-06, "loss": 0.0583, "step": 2943 }, { "epoch": 0.6698521046643914, "grad_norm": 0.9043356889096931, "learning_rate": 1.1954924732625264e-06, "loss": 0.0386, "step": 2944 }, { "epoch": 0.6700796359499431, "grad_norm": 1.0364482997261704, "learning_rate": 1.1954559733333681e-06, "loss": 0.0353, "step": 2945 }, { "epoch": 0.6703071672354949, "grad_norm": 1.6526326184491673, "learning_rate": 1.1954194617451345e-06, "loss": 0.0531, "step": 2946 }, { "epoch": 0.6705346985210466, "grad_norm": 0.9766619653449208, "learning_rate": 1.1953829384985716e-06, "loss": 0.0391, "step": 2947 }, { "epoch": 0.6707622298065984, "grad_norm": 2.163216907401221, "learning_rate": 1.1953464035944262e-06, "loss": 0.0835, "step": 2948 }, { "epoch": 0.6709897610921501, "grad_norm": 1.4844803459994427, "learning_rate": 1.195309857033445e-06, "loss": 0.0842, "step": 2949 }, { "epoch": 0.6712172923777019, "grad_norm": 0.6644024917990391, "learning_rate": 1.1952732988163745e-06, "loss": 0.0347, "step": 2950 }, { "epoch": 0.6714448236632536, "grad_norm": 1.3392424005542798, "learning_rate": 1.1952367289439624e-06, "loss": 0.0593, "step": 2951 }, { "epoch": 0.6716723549488055, "grad_norm": 1.5693502580306737, "learning_rate": 1.1952001474169558e-06, "loss": 0.0723, "step": 2952 }, { "epoch": 0.6718998862343573, "grad_norm": 1.3882537221447417, "learning_rate": 1.1951635542361025e-06, "loss": 0.0482, "step": 2953 }, { "epoch": 0.672127417519909, "grad_norm": 1.6367400788566016, "learning_rate": 1.1951269494021503e-06, "loss": 0.0541, "step": 2954 }, { "epoch": 0.6723549488054608, "grad_norm": 1.2264958404782558, "learning_rate": 1.1950903329158475e-06, "loss": 0.0543, "step": 2955 }, { "epoch": 0.6725824800910125, "grad_norm": 0.9638518358259208, "learning_rate": 1.1950537047779424e-06, "loss": 0.0425, "step": 2956 }, { "epoch": 0.6728100113765643, "grad_norm": 0.7797808668666366, "learning_rate": 1.1950170649891836e-06, "loss": 0.0266, "step": 2957 }, { "epoch": 0.673037542662116, "grad_norm": 1.256662868306741, "learning_rate": 1.1949804135503196e-06, "loss": 0.0762, "step": 2958 }, { "epoch": 0.6732650739476678, "grad_norm": 1.0816193366991107, "learning_rate": 1.1949437504621e-06, "loss": 0.047, "step": 2959 }, { "epoch": 0.6734926052332195, "grad_norm": 1.5164857137673318, "learning_rate": 1.194907075725274e-06, "loss": 0.0995, "step": 2960 }, { "epoch": 0.6737201365187714, "grad_norm": 1.1360766280459287, "learning_rate": 1.1948703893405911e-06, "loss": 0.0427, "step": 2961 }, { "epoch": 0.6739476678043231, "grad_norm": 1.342451075159084, "learning_rate": 1.194833691308801e-06, "loss": 0.0422, "step": 2962 }, { "epoch": 0.6741751990898749, "grad_norm": 1.7047601826121286, "learning_rate": 1.194796981630654e-06, "loss": 0.0532, "step": 2963 }, { "epoch": 0.6744027303754266, "grad_norm": 1.3289514422481006, "learning_rate": 1.1947602603069002e-06, "loss": 0.0648, "step": 2964 }, { "epoch": 0.6746302616609784, "grad_norm": 1.8203278250275807, "learning_rate": 1.19472352733829e-06, "loss": 0.0591, "step": 2965 }, { "epoch": 0.6748577929465301, "grad_norm": 0.9130861662390535, "learning_rate": 1.1946867827255744e-06, "loss": 0.0392, "step": 2966 }, { "epoch": 0.6750853242320819, "grad_norm": 2.1792488178961866, "learning_rate": 1.1946500264695044e-06, "loss": 0.0852, "step": 2967 }, { "epoch": 0.6753128555176336, "grad_norm": 1.3430961956554572, "learning_rate": 1.194613258570831e-06, "loss": 0.054, "step": 2968 }, { "epoch": 0.6755403868031854, "grad_norm": 1.0194337724675724, "learning_rate": 1.1945764790303059e-06, "loss": 0.0577, "step": 2969 }, { "epoch": 0.6757679180887372, "grad_norm": 1.414677659719635, "learning_rate": 1.1945396878486805e-06, "loss": 0.0575, "step": 2970 }, { "epoch": 0.675995449374289, "grad_norm": 1.3881918329356409, "learning_rate": 1.194502885026707e-06, "loss": 0.07, "step": 2971 }, { "epoch": 0.6762229806598408, "grad_norm": 0.8742447812198808, "learning_rate": 1.1944660705651375e-06, "loss": 0.0545, "step": 2972 }, { "epoch": 0.6764505119453925, "grad_norm": 0.9000949345293963, "learning_rate": 1.1944292444647248e-06, "loss": 0.0465, "step": 2973 }, { "epoch": 0.6766780432309443, "grad_norm": 1.0764234435889581, "learning_rate": 1.1943924067262208e-06, "loss": 0.0432, "step": 2974 }, { "epoch": 0.676905574516496, "grad_norm": 1.249115475491158, "learning_rate": 1.194355557350379e-06, "loss": 0.0548, "step": 2975 }, { "epoch": 0.6771331058020478, "grad_norm": 1.067373727393787, "learning_rate": 1.1943186963379522e-06, "loss": 0.0528, "step": 2976 }, { "epoch": 0.6773606370875995, "grad_norm": 1.5676460836455215, "learning_rate": 1.194281823689694e-06, "loss": 0.0672, "step": 2977 }, { "epoch": 0.6775881683731513, "grad_norm": 1.334005489011124, "learning_rate": 1.194244939406358e-06, "loss": 0.0565, "step": 2978 }, { "epoch": 0.677815699658703, "grad_norm": 1.324440667565256, "learning_rate": 1.1942080434886978e-06, "loss": 0.0488, "step": 2979 }, { "epoch": 0.6780432309442548, "grad_norm": 1.155754728183158, "learning_rate": 1.1941711359374678e-06, "loss": 0.0622, "step": 2980 }, { "epoch": 0.6782707622298066, "grad_norm": 0.7220306306891887, "learning_rate": 1.194134216753422e-06, "loss": 0.0363, "step": 2981 }, { "epoch": 0.6784982935153584, "grad_norm": 2.259664003454956, "learning_rate": 1.1940972859373151e-06, "loss": 0.084, "step": 2982 }, { "epoch": 0.6787258248009101, "grad_norm": 0.9209026025756932, "learning_rate": 1.194060343489902e-06, "loss": 0.0247, "step": 2983 }, { "epoch": 0.6789533560864619, "grad_norm": 2.237931268641456, "learning_rate": 1.1940233894119377e-06, "loss": 0.1003, "step": 2984 }, { "epoch": 0.6791808873720137, "grad_norm": 1.45745994111441, "learning_rate": 1.1939864237041774e-06, "loss": 0.0656, "step": 2985 }, { "epoch": 0.6794084186575654, "grad_norm": 0.9550126427103799, "learning_rate": 1.1939494463673767e-06, "loss": 0.0442, "step": 2986 }, { "epoch": 0.6796359499431172, "grad_norm": 1.9833777656739968, "learning_rate": 1.1939124574022914e-06, "loss": 0.0904, "step": 2987 }, { "epoch": 0.6798634812286689, "grad_norm": 1.3661977813438515, "learning_rate": 1.1938754568096771e-06, "loss": 0.0644, "step": 2988 }, { "epoch": 0.6800910125142207, "grad_norm": 0.8616871036933312, "learning_rate": 1.1938384445902905e-06, "loss": 0.0393, "step": 2989 }, { "epoch": 0.6803185437997725, "grad_norm": 1.1588349083327987, "learning_rate": 1.1938014207448877e-06, "loss": 0.047, "step": 2990 }, { "epoch": 0.6805460750853243, "grad_norm": 1.0841199268841493, "learning_rate": 1.1937643852742258e-06, "loss": 0.0315, "step": 2991 }, { "epoch": 0.680773606370876, "grad_norm": 4.593658878524584, "learning_rate": 1.1937273381790615e-06, "loss": 0.1461, "step": 2992 }, { "epoch": 0.6810011376564278, "grad_norm": 1.1490732435329922, "learning_rate": 1.1936902794601518e-06, "loss": 0.0479, "step": 2993 }, { "epoch": 0.6812286689419795, "grad_norm": 1.450896272579947, "learning_rate": 1.1936532091182544e-06, "loss": 0.0613, "step": 2994 }, { "epoch": 0.6814562002275313, "grad_norm": 1.2205030830111818, "learning_rate": 1.1936161271541268e-06, "loss": 0.0626, "step": 2995 }, { "epoch": 0.681683731513083, "grad_norm": 1.1653942327121265, "learning_rate": 1.1935790335685272e-06, "loss": 0.0464, "step": 2996 }, { "epoch": 0.6819112627986348, "grad_norm": 1.9153169444858846, "learning_rate": 1.193541928362213e-06, "loss": 0.0732, "step": 2997 }, { "epoch": 0.6821387940841865, "grad_norm": 1.4815686264003924, "learning_rate": 1.1935048115359432e-06, "loss": 0.0638, "step": 2998 }, { "epoch": 0.6823663253697383, "grad_norm": 1.6915818417968453, "learning_rate": 1.1934676830904763e-06, "loss": 0.0773, "step": 2999 }, { "epoch": 0.6825938566552902, "grad_norm": 1.3182256509758368, "learning_rate": 1.193430543026571e-06, "loss": 0.0627, "step": 3000 }, { "epoch": 0.6828213879408419, "grad_norm": 1.6374773596627972, "learning_rate": 1.1933933913449867e-06, "loss": 0.1202, "step": 3001 }, { "epoch": 0.6830489192263937, "grad_norm": 1.6225368015279966, "learning_rate": 1.193356228046482e-06, "loss": 0.1016, "step": 3002 }, { "epoch": 0.6832764505119454, "grad_norm": 1.158897733977453, "learning_rate": 1.1933190531318172e-06, "loss": 0.0455, "step": 3003 }, { "epoch": 0.6835039817974972, "grad_norm": 1.2180725170363247, "learning_rate": 1.1932818666017516e-06, "loss": 0.0335, "step": 3004 }, { "epoch": 0.6837315130830489, "grad_norm": 1.5088158398690277, "learning_rate": 1.1932446684570455e-06, "loss": 0.0478, "step": 3005 }, { "epoch": 0.6839590443686007, "grad_norm": 1.0577989164533361, "learning_rate": 1.1932074586984592e-06, "loss": 0.0411, "step": 3006 }, { "epoch": 0.6841865756541524, "grad_norm": 1.265620028127131, "learning_rate": 1.1931702373267527e-06, "loss": 0.0373, "step": 3007 }, { "epoch": 0.6844141069397042, "grad_norm": 1.015589756317499, "learning_rate": 1.1931330043426872e-06, "loss": 0.0701, "step": 3008 }, { "epoch": 0.6846416382252559, "grad_norm": 0.9468195016052887, "learning_rate": 1.1930957597470238e-06, "loss": 0.0502, "step": 3009 }, { "epoch": 0.6848691695108078, "grad_norm": 1.4788516581774798, "learning_rate": 1.1930585035405235e-06, "loss": 0.0579, "step": 3010 }, { "epoch": 0.6850967007963595, "grad_norm": 1.5755558972332642, "learning_rate": 1.1930212357239475e-06, "loss": 0.0525, "step": 3011 }, { "epoch": 0.6853242320819113, "grad_norm": 1.2998868209788172, "learning_rate": 1.1929839562980579e-06, "loss": 0.0585, "step": 3012 }, { "epoch": 0.685551763367463, "grad_norm": 1.5356635666634295, "learning_rate": 1.1929466652636164e-06, "loss": 0.0604, "step": 3013 }, { "epoch": 0.6857792946530148, "grad_norm": 2.0893811231577524, "learning_rate": 1.1929093626213852e-06, "loss": 0.1015, "step": 3014 }, { "epoch": 0.6860068259385665, "grad_norm": 0.5321529547637093, "learning_rate": 1.1928720483721269e-06, "loss": 0.0139, "step": 3015 }, { "epoch": 0.6862343572241183, "grad_norm": 1.0580052416688368, "learning_rate": 1.1928347225166035e-06, "loss": 0.0499, "step": 3016 }, { "epoch": 0.68646188850967, "grad_norm": 1.3320645495655943, "learning_rate": 1.1927973850555785e-06, "loss": 0.0465, "step": 3017 }, { "epoch": 0.6866894197952218, "grad_norm": 1.3194332531036985, "learning_rate": 1.192760035989815e-06, "loss": 0.0489, "step": 3018 }, { "epoch": 0.6869169510807736, "grad_norm": 1.7873894152489311, "learning_rate": 1.192722675320076e-06, "loss": 0.0683, "step": 3019 }, { "epoch": 0.6871444823663254, "grad_norm": 1.533578323181438, "learning_rate": 1.1926853030471253e-06, "loss": 0.0536, "step": 3020 }, { "epoch": 0.6873720136518772, "grad_norm": 0.9261434578357977, "learning_rate": 1.1926479191717267e-06, "loss": 0.0437, "step": 3021 }, { "epoch": 0.6875995449374289, "grad_norm": 1.1444794832316916, "learning_rate": 1.1926105236946443e-06, "loss": 0.0577, "step": 3022 }, { "epoch": 0.6878270762229807, "grad_norm": 1.335750162762178, "learning_rate": 1.192573116616642e-06, "loss": 0.0529, "step": 3023 }, { "epoch": 0.6880546075085324, "grad_norm": 1.434078699306857, "learning_rate": 1.192535697938485e-06, "loss": 0.0529, "step": 3024 }, { "epoch": 0.6882821387940842, "grad_norm": 1.3416490998400092, "learning_rate": 1.1924982676609377e-06, "loss": 0.074, "step": 3025 }, { "epoch": 0.6885096700796359, "grad_norm": 1.3232433001494412, "learning_rate": 1.1924608257847651e-06, "loss": 0.0451, "step": 3026 }, { "epoch": 0.6887372013651877, "grad_norm": 0.8764669981180951, "learning_rate": 1.1924233723107322e-06, "loss": 0.0484, "step": 3027 }, { "epoch": 0.6889647326507394, "grad_norm": 1.1655688024633921, "learning_rate": 1.1923859072396051e-06, "loss": 0.0606, "step": 3028 }, { "epoch": 0.6891922639362913, "grad_norm": 1.0179281217422282, "learning_rate": 1.1923484305721489e-06, "loss": 0.0466, "step": 3029 }, { "epoch": 0.689419795221843, "grad_norm": 0.8659456244009691, "learning_rate": 1.19231094230913e-06, "loss": 0.0404, "step": 3030 }, { "epoch": 0.6896473265073948, "grad_norm": 2.0636576063861565, "learning_rate": 1.1922734424513144e-06, "loss": 0.0784, "step": 3031 }, { "epoch": 0.6898748577929465, "grad_norm": 0.8695263646618282, "learning_rate": 1.1922359309994685e-06, "loss": 0.0277, "step": 3032 }, { "epoch": 0.6901023890784983, "grad_norm": 1.4190056652396834, "learning_rate": 1.192198407954359e-06, "loss": 0.0616, "step": 3033 }, { "epoch": 0.69032992036405, "grad_norm": 1.4753814874094142, "learning_rate": 1.192160873316753e-06, "loss": 0.0517, "step": 3034 }, { "epoch": 0.6905574516496018, "grad_norm": 1.5554050029090873, "learning_rate": 1.1921233270874174e-06, "loss": 0.0563, "step": 3035 }, { "epoch": 0.6907849829351536, "grad_norm": 0.7022912796986149, "learning_rate": 1.1920857692671196e-06, "loss": 0.0312, "step": 3036 }, { "epoch": 0.6910125142207053, "grad_norm": 2.0009143095316646, "learning_rate": 1.192048199856627e-06, "loss": 0.0994, "step": 3037 }, { "epoch": 0.6912400455062571, "grad_norm": 2.134476815905357, "learning_rate": 1.192010618856708e-06, "loss": 0.1024, "step": 3038 }, { "epoch": 0.6914675767918089, "grad_norm": 1.1849138073132004, "learning_rate": 1.1919730262681304e-06, "loss": 0.055, "step": 3039 }, { "epoch": 0.6916951080773607, "grad_norm": 1.845998643453397, "learning_rate": 1.1919354220916624e-06, "loss": 0.0983, "step": 3040 }, { "epoch": 0.6919226393629124, "grad_norm": 2.449072929938712, "learning_rate": 1.1918978063280726e-06, "loss": 0.0849, "step": 3041 }, { "epoch": 0.6921501706484642, "grad_norm": 0.8729079689297836, "learning_rate": 1.1918601789781299e-06, "loss": 0.0306, "step": 3042 }, { "epoch": 0.6923777019340159, "grad_norm": 1.780329225544261, "learning_rate": 1.1918225400426032e-06, "loss": 0.0689, "step": 3043 }, { "epoch": 0.6926052332195677, "grad_norm": 1.063152792810893, "learning_rate": 1.191784889522262e-06, "loss": 0.0408, "step": 3044 }, { "epoch": 0.6928327645051194, "grad_norm": 1.6901254212789465, "learning_rate": 1.1917472274178757e-06, "loss": 0.0846, "step": 3045 }, { "epoch": 0.6930602957906712, "grad_norm": 1.4002132139198793, "learning_rate": 1.191709553730214e-06, "loss": 0.0772, "step": 3046 }, { "epoch": 0.6932878270762229, "grad_norm": 1.5076734596253167, "learning_rate": 1.1916718684600469e-06, "loss": 0.0819, "step": 3047 }, { "epoch": 0.6935153583617747, "grad_norm": 1.0960152159965912, "learning_rate": 1.1916341716081446e-06, "loss": 0.049, "step": 3048 }, { "epoch": 0.6937428896473266, "grad_norm": 2.557391206512559, "learning_rate": 1.1915964631752775e-06, "loss": 0.0876, "step": 3049 }, { "epoch": 0.6939704209328783, "grad_norm": 0.920645510461067, "learning_rate": 1.1915587431622164e-06, "loss": 0.0307, "step": 3050 }, { "epoch": 0.6941979522184301, "grad_norm": 1.04173789253993, "learning_rate": 1.1915210115697324e-06, "loss": 0.0539, "step": 3051 }, { "epoch": 0.6944254835039818, "grad_norm": 1.6810913949726196, "learning_rate": 1.1914832683985962e-06, "loss": 0.0544, "step": 3052 }, { "epoch": 0.6946530147895336, "grad_norm": 1.850644903401837, "learning_rate": 1.1914455136495796e-06, "loss": 0.0899, "step": 3053 }, { "epoch": 0.6948805460750853, "grad_norm": 0.7506486967154027, "learning_rate": 1.191407747323454e-06, "loss": 0.0265, "step": 3054 }, { "epoch": 0.6951080773606371, "grad_norm": 2.1012986938124696, "learning_rate": 1.1913699694209914e-06, "loss": 0.1144, "step": 3055 }, { "epoch": 0.6953356086461888, "grad_norm": 1.2089722077198737, "learning_rate": 1.191332179942964e-06, "loss": 0.0609, "step": 3056 }, { "epoch": 0.6955631399317406, "grad_norm": 1.5324615273992814, "learning_rate": 1.1912943788901438e-06, "loss": 0.067, "step": 3057 }, { "epoch": 0.6957906712172923, "grad_norm": 1.4377403085013738, "learning_rate": 1.191256566263304e-06, "loss": 0.0841, "step": 3058 }, { "epoch": 0.6960182025028442, "grad_norm": 1.7028519956627337, "learning_rate": 1.1912187420632165e-06, "loss": 0.0796, "step": 3059 }, { "epoch": 0.6962457337883959, "grad_norm": 1.535145639508393, "learning_rate": 1.1911809062906552e-06, "loss": 0.0581, "step": 3060 }, { "epoch": 0.6964732650739477, "grad_norm": 1.3876603600761275, "learning_rate": 1.1911430589463931e-06, "loss": 0.0548, "step": 3061 }, { "epoch": 0.6967007963594994, "grad_norm": 2.3585378108380057, "learning_rate": 1.1911052000312038e-06, "loss": 0.1131, "step": 3062 }, { "epoch": 0.6969283276450512, "grad_norm": 1.224179370923149, "learning_rate": 1.1910673295458607e-06, "loss": 0.0733, "step": 3063 }, { "epoch": 0.697155858930603, "grad_norm": 1.080344689666754, "learning_rate": 1.1910294474911382e-06, "loss": 0.0339, "step": 3064 }, { "epoch": 0.6973833902161547, "grad_norm": 1.132793254903834, "learning_rate": 1.1909915538678105e-06, "loss": 0.0668, "step": 3065 }, { "epoch": 0.6976109215017064, "grad_norm": 0.9670480449446429, "learning_rate": 1.1909536486766522e-06, "loss": 0.0613, "step": 3066 }, { "epoch": 0.6978384527872582, "grad_norm": 0.8333929861107748, "learning_rate": 1.1909157319184373e-06, "loss": 0.0313, "step": 3067 }, { "epoch": 0.6980659840728101, "grad_norm": 1.5088053682741849, "learning_rate": 1.1908778035939416e-06, "loss": 0.0723, "step": 3068 }, { "epoch": 0.6982935153583618, "grad_norm": 2.496852043007627, "learning_rate": 1.1908398637039398e-06, "loss": 0.1006, "step": 3069 }, { "epoch": 0.6985210466439136, "grad_norm": 1.340716909671611, "learning_rate": 1.1908019122492077e-06, "loss": 0.0447, "step": 3070 }, { "epoch": 0.6987485779294653, "grad_norm": 2.0378089803817887, "learning_rate": 1.1907639492305205e-06, "loss": 0.0808, "step": 3071 }, { "epoch": 0.6989761092150171, "grad_norm": 0.7800931599177211, "learning_rate": 1.1907259746486547e-06, "loss": 0.0358, "step": 3072 }, { "epoch": 0.6992036405005688, "grad_norm": 0.9202110065520366, "learning_rate": 1.1906879885043856e-06, "loss": 0.0571, "step": 3073 }, { "epoch": 0.6994311717861206, "grad_norm": 0.7009452078498103, "learning_rate": 1.1906499907984903e-06, "loss": 0.0318, "step": 3074 }, { "epoch": 0.6996587030716723, "grad_norm": 1.1314308444671177, "learning_rate": 1.190611981531745e-06, "loss": 0.0446, "step": 3075 }, { "epoch": 0.6998862343572241, "grad_norm": 1.332217799137746, "learning_rate": 1.1905739607049267e-06, "loss": 0.0744, "step": 3076 }, { "epoch": 0.7001137656427758, "grad_norm": 0.9698337291959647, "learning_rate": 1.1905359283188126e-06, "loss": 0.0368, "step": 3077 }, { "epoch": 0.7003412969283277, "grad_norm": 0.5221877787168306, "learning_rate": 1.1904978843741796e-06, "loss": 0.0167, "step": 3078 }, { "epoch": 0.7005688282138794, "grad_norm": 1.4498948354718102, "learning_rate": 1.1904598288718055e-06, "loss": 0.0727, "step": 3079 }, { "epoch": 0.7007963594994312, "grad_norm": 1.5160712367792761, "learning_rate": 1.1904217618124684e-06, "loss": 0.0787, "step": 3080 }, { "epoch": 0.701023890784983, "grad_norm": 1.0604730462463068, "learning_rate": 1.1903836831969458e-06, "loss": 0.0639, "step": 3081 }, { "epoch": 0.7012514220705347, "grad_norm": 0.7275851025633676, "learning_rate": 1.190345593026016e-06, "loss": 0.0286, "step": 3082 }, { "epoch": 0.7014789533560865, "grad_norm": 1.4202168912069701, "learning_rate": 1.1903074913004579e-06, "loss": 0.0717, "step": 3083 }, { "epoch": 0.7017064846416382, "grad_norm": 1.6514017164531183, "learning_rate": 1.19026937802105e-06, "loss": 0.0787, "step": 3084 }, { "epoch": 0.70193401592719, "grad_norm": 0.6956539774170061, "learning_rate": 1.1902312531885712e-06, "loss": 0.0252, "step": 3085 }, { "epoch": 0.7021615472127417, "grad_norm": 1.5569684152870062, "learning_rate": 1.1901931168038007e-06, "loss": 0.0685, "step": 3086 }, { "epoch": 0.7023890784982935, "grad_norm": 1.2406976952968178, "learning_rate": 1.190154968867518e-06, "loss": 0.0454, "step": 3087 }, { "epoch": 0.7026166097838453, "grad_norm": 0.813921596018141, "learning_rate": 1.190116809380503e-06, "loss": 0.0253, "step": 3088 }, { "epoch": 0.7028441410693971, "grad_norm": 1.0275424113403453, "learning_rate": 1.190078638343535e-06, "loss": 0.0514, "step": 3089 }, { "epoch": 0.7030716723549488, "grad_norm": 1.0727110658094747, "learning_rate": 1.1900404557573948e-06, "loss": 0.0453, "step": 3090 }, { "epoch": 0.7032992036405006, "grad_norm": 1.6426665504516007, "learning_rate": 1.1900022616228624e-06, "loss": 0.0832, "step": 3091 }, { "epoch": 0.7035267349260523, "grad_norm": 1.9180762135067262, "learning_rate": 1.1899640559407186e-06, "loss": 0.0934, "step": 3092 }, { "epoch": 0.7037542662116041, "grad_norm": 1.7251637250688259, "learning_rate": 1.189925838711744e-06, "loss": 0.0744, "step": 3093 }, { "epoch": 0.7039817974971558, "grad_norm": 2.301009331337415, "learning_rate": 1.18988760993672e-06, "loss": 0.1061, "step": 3094 }, { "epoch": 0.7042093287827076, "grad_norm": 0.8730951635346749, "learning_rate": 1.1898493696164279e-06, "loss": 0.0251, "step": 3095 }, { "epoch": 0.7044368600682593, "grad_norm": 0.6226893588047131, "learning_rate": 1.1898111177516488e-06, "loss": 0.0396, "step": 3096 }, { "epoch": 0.7046643913538112, "grad_norm": 2.39998461661108, "learning_rate": 1.1897728543431653e-06, "loss": 0.1219, "step": 3097 }, { "epoch": 0.704891922639363, "grad_norm": 1.834814384170055, "learning_rate": 1.1897345793917589e-06, "loss": 0.0615, "step": 3098 }, { "epoch": 0.7051194539249147, "grad_norm": 1.0235189868055947, "learning_rate": 1.1896962928982116e-06, "loss": 0.0256, "step": 3099 }, { "epoch": 0.7053469852104665, "grad_norm": 1.084986587012269, "learning_rate": 1.1896579948633067e-06, "loss": 0.0839, "step": 3100 }, { "epoch": 0.7055745164960182, "grad_norm": 1.0758159672428005, "learning_rate": 1.1896196852878262e-06, "loss": 0.0554, "step": 3101 }, { "epoch": 0.70580204778157, "grad_norm": 1.653037063661443, "learning_rate": 1.1895813641725535e-06, "loss": 0.0592, "step": 3102 }, { "epoch": 0.7060295790671217, "grad_norm": 0.888432730316198, "learning_rate": 1.1895430315182719e-06, "loss": 0.0346, "step": 3103 }, { "epoch": 0.7062571103526735, "grad_norm": 0.7144453665457149, "learning_rate": 1.1895046873257644e-06, "loss": 0.0339, "step": 3104 }, { "epoch": 0.7064846416382252, "grad_norm": 1.1974179578050035, "learning_rate": 1.189466331595815e-06, "loss": 0.0584, "step": 3105 }, { "epoch": 0.706712172923777, "grad_norm": 2.0074894159842804, "learning_rate": 1.1894279643292074e-06, "loss": 0.0702, "step": 3106 }, { "epoch": 0.7069397042093288, "grad_norm": 1.1258269572183623, "learning_rate": 1.1893895855267262e-06, "loss": 0.0483, "step": 3107 }, { "epoch": 0.7071672354948806, "grad_norm": 1.8909664822572771, "learning_rate": 1.1893511951891553e-06, "loss": 0.0426, "step": 3108 }, { "epoch": 0.7073947667804323, "grad_norm": 0.9858584509738625, "learning_rate": 1.1893127933172794e-06, "loss": 0.0358, "step": 3109 }, { "epoch": 0.7076222980659841, "grad_norm": 1.6798238240138743, "learning_rate": 1.1892743799118838e-06, "loss": 0.0926, "step": 3110 }, { "epoch": 0.7078498293515358, "grad_norm": 1.8662422210150158, "learning_rate": 1.189235954973753e-06, "loss": 0.0913, "step": 3111 }, { "epoch": 0.7080773606370876, "grad_norm": 1.2592951116026574, "learning_rate": 1.189197518503673e-06, "loss": 0.0834, "step": 3112 }, { "epoch": 0.7083048919226393, "grad_norm": 2.2263635127158707, "learning_rate": 1.1891590705024288e-06, "loss": 0.1094, "step": 3113 }, { "epoch": 0.7085324232081911, "grad_norm": 1.2398530882425465, "learning_rate": 1.1891206109708065e-06, "loss": 0.0634, "step": 3114 }, { "epoch": 0.7087599544937428, "grad_norm": 0.8888283163380168, "learning_rate": 1.1890821399095917e-06, "loss": 0.0518, "step": 3115 }, { "epoch": 0.7089874857792946, "grad_norm": 1.7877536696152463, "learning_rate": 1.1890436573195714e-06, "loss": 0.0799, "step": 3116 }, { "epoch": 0.7092150170648465, "grad_norm": 0.6507653429669747, "learning_rate": 1.1890051632015315e-06, "loss": 0.0177, "step": 3117 }, { "epoch": 0.7094425483503982, "grad_norm": 0.810817143611541, "learning_rate": 1.1889666575562593e-06, "loss": 0.0294, "step": 3118 }, { "epoch": 0.70967007963595, "grad_norm": 1.351260714026528, "learning_rate": 1.1889281403845413e-06, "loss": 0.0764, "step": 3119 }, { "epoch": 0.7098976109215017, "grad_norm": 0.9662989115635772, "learning_rate": 1.1888896116871649e-06, "loss": 0.0592, "step": 3120 }, { "epoch": 0.7101251422070535, "grad_norm": 1.3131475158616286, "learning_rate": 1.1888510714649176e-06, "loss": 0.0485, "step": 3121 }, { "epoch": 0.7103526734926052, "grad_norm": 1.4877436902230308, "learning_rate": 1.1888125197185867e-06, "loss": 0.0786, "step": 3122 }, { "epoch": 0.710580204778157, "grad_norm": 0.9404627141658739, "learning_rate": 1.1887739564489608e-06, "loss": 0.0363, "step": 3123 }, { "epoch": 0.7108077360637087, "grad_norm": 1.664364740849866, "learning_rate": 1.1887353816568277e-06, "loss": 0.0836, "step": 3124 }, { "epoch": 0.7110352673492605, "grad_norm": 1.1581179507287511, "learning_rate": 1.188696795342976e-06, "loss": 0.0447, "step": 3125 }, { "epoch": 0.7112627986348122, "grad_norm": 1.4278480990598597, "learning_rate": 1.188658197508194e-06, "loss": 0.0702, "step": 3126 }, { "epoch": 0.7114903299203641, "grad_norm": 1.8166789381335957, "learning_rate": 1.1886195881532705e-06, "loss": 0.0756, "step": 3127 }, { "epoch": 0.7117178612059158, "grad_norm": 1.0532098621527515, "learning_rate": 1.1885809672789953e-06, "loss": 0.0427, "step": 3128 }, { "epoch": 0.7119453924914676, "grad_norm": 1.1496591085120342, "learning_rate": 1.188542334886157e-06, "loss": 0.0345, "step": 3129 }, { "epoch": 0.7121729237770194, "grad_norm": 1.3871333419583287, "learning_rate": 1.1885036909755454e-06, "loss": 0.0591, "step": 3130 }, { "epoch": 0.7124004550625711, "grad_norm": 1.4115338701891487, "learning_rate": 1.1884650355479505e-06, "loss": 0.0824, "step": 3131 }, { "epoch": 0.7126279863481229, "grad_norm": 1.4867197919013324, "learning_rate": 1.1884263686041622e-06, "loss": 0.0793, "step": 3132 }, { "epoch": 0.7128555176336746, "grad_norm": 1.2824607789793006, "learning_rate": 1.1883876901449707e-06, "loss": 0.0451, "step": 3133 }, { "epoch": 0.7130830489192264, "grad_norm": 0.6708570443551615, "learning_rate": 1.1883490001711667e-06, "loss": 0.0219, "step": 3134 }, { "epoch": 0.7133105802047781, "grad_norm": 2.015395007692978, "learning_rate": 1.1883102986835408e-06, "loss": 0.0757, "step": 3135 }, { "epoch": 0.71353811149033, "grad_norm": 1.6702641230416622, "learning_rate": 1.1882715856828842e-06, "loss": 0.075, "step": 3136 }, { "epoch": 0.7137656427758817, "grad_norm": 0.912883335407981, "learning_rate": 1.1882328611699879e-06, "loss": 0.0388, "step": 3137 }, { "epoch": 0.7139931740614335, "grad_norm": 1.9915888368964108, "learning_rate": 1.1881941251456434e-06, "loss": 0.0736, "step": 3138 }, { "epoch": 0.7142207053469852, "grad_norm": 1.432753078445199, "learning_rate": 1.1881553776106423e-06, "loss": 0.0686, "step": 3139 }, { "epoch": 0.714448236632537, "grad_norm": 1.8572484999557712, "learning_rate": 1.1881166185657765e-06, "loss": 0.0639, "step": 3140 }, { "epoch": 0.7146757679180887, "grad_norm": 1.1698682146531856, "learning_rate": 1.1880778480118388e-06, "loss": 0.0506, "step": 3141 }, { "epoch": 0.7149032992036405, "grad_norm": 1.4247011889264531, "learning_rate": 1.1880390659496207e-06, "loss": 0.0688, "step": 3142 }, { "epoch": 0.7151308304891922, "grad_norm": 1.1897422778029059, "learning_rate": 1.1880002723799155e-06, "loss": 0.0488, "step": 3143 }, { "epoch": 0.715358361774744, "grad_norm": 0.7067410656877349, "learning_rate": 1.1879614673035158e-06, "loss": 0.0316, "step": 3144 }, { "epoch": 0.7155858930602957, "grad_norm": 1.1073184850973958, "learning_rate": 1.1879226507212146e-06, "loss": 0.04, "step": 3145 }, { "epoch": 0.7158134243458476, "grad_norm": 1.551329079095373, "learning_rate": 1.1878838226338054e-06, "loss": 0.0449, "step": 3146 }, { "epoch": 0.7160409556313994, "grad_norm": 1.2160812373705194, "learning_rate": 1.187844983042082e-06, "loss": 0.0547, "step": 3147 }, { "epoch": 0.7162684869169511, "grad_norm": 1.463195276621994, "learning_rate": 1.1878061319468376e-06, "loss": 0.0604, "step": 3148 }, { "epoch": 0.7164960182025029, "grad_norm": 0.7715205659161936, "learning_rate": 1.1877672693488669e-06, "loss": 0.0395, "step": 3149 }, { "epoch": 0.7167235494880546, "grad_norm": 0.9856101752123785, "learning_rate": 1.1877283952489636e-06, "loss": 0.0419, "step": 3150 }, { "epoch": 0.7169510807736064, "grad_norm": 1.1172173173435798, "learning_rate": 1.1876895096479226e-06, "loss": 0.044, "step": 3151 }, { "epoch": 0.7171786120591581, "grad_norm": 1.7824313127710856, "learning_rate": 1.1876506125465386e-06, "loss": 0.0882, "step": 3152 }, { "epoch": 0.7174061433447099, "grad_norm": 0.6797130682621123, "learning_rate": 1.1876117039456065e-06, "loss": 0.0241, "step": 3153 }, { "epoch": 0.7176336746302616, "grad_norm": 1.8479763283658723, "learning_rate": 1.1875727838459213e-06, "loss": 0.0718, "step": 3154 }, { "epoch": 0.7178612059158134, "grad_norm": 0.8348967152826982, "learning_rate": 1.187533852248279e-06, "loss": 0.0293, "step": 3155 }, { "epoch": 0.7180887372013652, "grad_norm": 1.0039148482185, "learning_rate": 1.1874949091534749e-06, "loss": 0.0465, "step": 3156 }, { "epoch": 0.718316268486917, "grad_norm": 1.6124637787966882, "learning_rate": 1.1874559545623049e-06, "loss": 0.093, "step": 3157 }, { "epoch": 0.7185437997724687, "grad_norm": 1.3428421165694695, "learning_rate": 1.1874169884755654e-06, "loss": 0.068, "step": 3158 }, { "epoch": 0.7187713310580205, "grad_norm": 0.9564008146931468, "learning_rate": 1.1873780108940527e-06, "loss": 0.0424, "step": 3159 }, { "epoch": 0.7189988623435722, "grad_norm": 0.8282933582416673, "learning_rate": 1.1873390218185636e-06, "loss": 0.0237, "step": 3160 }, { "epoch": 0.719226393629124, "grad_norm": 0.8573276208421515, "learning_rate": 1.1873000212498942e-06, "loss": 0.0431, "step": 3161 }, { "epoch": 0.7194539249146757, "grad_norm": 0.9698959738736916, "learning_rate": 1.1872610091888426e-06, "loss": 0.0444, "step": 3162 }, { "epoch": 0.7196814562002275, "grad_norm": 1.8230237032369256, "learning_rate": 1.1872219856362057e-06, "loss": 0.0878, "step": 3163 }, { "epoch": 0.7199089874857793, "grad_norm": 1.1950764632445416, "learning_rate": 1.187182950592781e-06, "loss": 0.0559, "step": 3164 }, { "epoch": 0.7201365187713311, "grad_norm": 1.1584194920218946, "learning_rate": 1.1871439040593663e-06, "loss": 0.0659, "step": 3165 }, { "epoch": 0.7203640500568829, "grad_norm": 1.149871056931848, "learning_rate": 1.1871048460367598e-06, "loss": 0.0385, "step": 3166 }, { "epoch": 0.7205915813424346, "grad_norm": 1.1250408047699638, "learning_rate": 1.1870657765257595e-06, "loss": 0.041, "step": 3167 }, { "epoch": 0.7208191126279864, "grad_norm": 0.9750498880779663, "learning_rate": 1.1870266955271645e-06, "loss": 0.0286, "step": 3168 }, { "epoch": 0.7210466439135381, "grad_norm": 1.93920042584572, "learning_rate": 1.186987603041773e-06, "loss": 0.1038, "step": 3169 }, { "epoch": 0.7212741751990899, "grad_norm": 1.1716818159995082, "learning_rate": 1.1869484990703839e-06, "loss": 0.0567, "step": 3170 }, { "epoch": 0.7215017064846416, "grad_norm": 1.39986755402065, "learning_rate": 1.1869093836137968e-06, "loss": 0.0763, "step": 3171 }, { "epoch": 0.7217292377701934, "grad_norm": 1.3078245254309557, "learning_rate": 1.186870256672811e-06, "loss": 0.061, "step": 3172 }, { "epoch": 0.7219567690557451, "grad_norm": 1.3174384504374115, "learning_rate": 1.1868311182482262e-06, "loss": 0.0416, "step": 3173 }, { "epoch": 0.7221843003412969, "grad_norm": 1.404891211071726, "learning_rate": 1.1867919683408421e-06, "loss": 0.0272, "step": 3174 }, { "epoch": 0.7224118316268487, "grad_norm": 1.4160327712086356, "learning_rate": 1.1867528069514591e-06, "loss": 0.0863, "step": 3175 }, { "epoch": 0.7226393629124005, "grad_norm": 1.400708180969138, "learning_rate": 1.1867136340808778e-06, "loss": 0.0502, "step": 3176 }, { "epoch": 0.7228668941979522, "grad_norm": 1.6148464588484233, "learning_rate": 1.1866744497298982e-06, "loss": 0.0693, "step": 3177 }, { "epoch": 0.723094425483504, "grad_norm": 1.0345610557360225, "learning_rate": 1.1866352538993216e-06, "loss": 0.0441, "step": 3178 }, { "epoch": 0.7233219567690558, "grad_norm": 1.2686233759159034, "learning_rate": 1.1865960465899492e-06, "loss": 0.0658, "step": 3179 }, { "epoch": 0.7235494880546075, "grad_norm": 1.0561518957357936, "learning_rate": 1.186556827802582e-06, "loss": 0.0316, "step": 3180 }, { "epoch": 0.7237770193401593, "grad_norm": 1.2859639053927798, "learning_rate": 1.1865175975380218e-06, "loss": 0.0748, "step": 3181 }, { "epoch": 0.724004550625711, "grad_norm": 1.0612859349624024, "learning_rate": 1.18647835579707e-06, "loss": 0.0533, "step": 3182 }, { "epoch": 0.7242320819112628, "grad_norm": 1.252216188797781, "learning_rate": 1.186439102580529e-06, "loss": 0.0582, "step": 3183 }, { "epoch": 0.7244596131968145, "grad_norm": 1.4352458095090412, "learning_rate": 1.1863998378892011e-06, "loss": 0.0498, "step": 3184 }, { "epoch": 0.7246871444823664, "grad_norm": 2.834373155221246, "learning_rate": 1.1863605617238885e-06, "loss": 0.1159, "step": 3185 }, { "epoch": 0.7249146757679181, "grad_norm": 1.1067917536466692, "learning_rate": 1.1863212740853941e-06, "loss": 0.0721, "step": 3186 }, { "epoch": 0.7251422070534699, "grad_norm": 1.2158906431759549, "learning_rate": 1.1862819749745212e-06, "loss": 0.0272, "step": 3187 }, { "epoch": 0.7253697383390216, "grad_norm": 1.6050627766428684, "learning_rate": 1.1862426643920722e-06, "loss": 0.0567, "step": 3188 }, { "epoch": 0.7255972696245734, "grad_norm": 1.0752979084787344, "learning_rate": 1.1862033423388513e-06, "loss": 0.0442, "step": 3189 }, { "epoch": 0.7258248009101251, "grad_norm": 0.9789837014009594, "learning_rate": 1.1861640088156617e-06, "loss": 0.0524, "step": 3190 }, { "epoch": 0.7260523321956769, "grad_norm": 1.085883546609774, "learning_rate": 1.1861246638233077e-06, "loss": 0.043, "step": 3191 }, { "epoch": 0.7262798634812286, "grad_norm": 2.3885050844926305, "learning_rate": 1.1860853073625931e-06, "loss": 0.052, "step": 3192 }, { "epoch": 0.7265073947667804, "grad_norm": 1.8498560328174254, "learning_rate": 1.1860459394343223e-06, "loss": 0.1156, "step": 3193 }, { "epoch": 0.7267349260523321, "grad_norm": 1.1970597608121667, "learning_rate": 1.1860065600393002e-06, "loss": 0.0409, "step": 3194 }, { "epoch": 0.726962457337884, "grad_norm": 1.5076572874006593, "learning_rate": 1.1859671691783315e-06, "loss": 0.0561, "step": 3195 }, { "epoch": 0.7271899886234358, "grad_norm": 0.7392429484644525, "learning_rate": 1.1859277668522209e-06, "loss": 0.0228, "step": 3196 }, { "epoch": 0.7274175199089875, "grad_norm": 2.2057392375122973, "learning_rate": 1.1858883530617743e-06, "loss": 0.1463, "step": 3197 }, { "epoch": 0.7276450511945393, "grad_norm": 2.23022548380175, "learning_rate": 1.185848927807797e-06, "loss": 0.0798, "step": 3198 }, { "epoch": 0.727872582480091, "grad_norm": 1.7101734591640143, "learning_rate": 1.1858094910910945e-06, "loss": 0.0721, "step": 3199 }, { "epoch": 0.7281001137656428, "grad_norm": 1.013052127850482, "learning_rate": 1.1857700429124733e-06, "loss": 0.044, "step": 3200 }, { "epoch": 0.7283276450511945, "grad_norm": 0.9338207707410625, "learning_rate": 1.1857305832727395e-06, "loss": 0.0356, "step": 3201 }, { "epoch": 0.7285551763367463, "grad_norm": 1.151011668858322, "learning_rate": 1.1856911121726993e-06, "loss": 0.055, "step": 3202 }, { "epoch": 0.728782707622298, "grad_norm": 0.882191391961338, "learning_rate": 1.1856516296131596e-06, "loss": 0.0293, "step": 3203 }, { "epoch": 0.7290102389078499, "grad_norm": 1.701498982454748, "learning_rate": 1.1856121355949276e-06, "loss": 0.0632, "step": 3204 }, { "epoch": 0.7292377701934016, "grad_norm": 0.9653458303805711, "learning_rate": 1.18557263011881e-06, "loss": 0.0357, "step": 3205 }, { "epoch": 0.7294653014789534, "grad_norm": 1.2647182576075804, "learning_rate": 1.1855331131856146e-06, "loss": 0.0988, "step": 3206 }, { "epoch": 0.7296928327645051, "grad_norm": 1.2708027008753917, "learning_rate": 1.185493584796149e-06, "loss": 0.0659, "step": 3207 }, { "epoch": 0.7299203640500569, "grad_norm": 1.7886995517155944, "learning_rate": 1.185454044951221e-06, "loss": 0.0779, "step": 3208 }, { "epoch": 0.7301478953356086, "grad_norm": 0.6845065442131042, "learning_rate": 1.1854144936516388e-06, "loss": 0.0325, "step": 3209 }, { "epoch": 0.7303754266211604, "grad_norm": 0.9425373691725761, "learning_rate": 1.1853749308982107e-06, "loss": 0.0353, "step": 3210 }, { "epoch": 0.7306029579067121, "grad_norm": 1.6009640208057585, "learning_rate": 1.1853353566917452e-06, "loss": 0.0689, "step": 3211 }, { "epoch": 0.7308304891922639, "grad_norm": 0.9565911200831438, "learning_rate": 1.1852957710330511e-06, "loss": 0.0438, "step": 3212 }, { "epoch": 0.7310580204778157, "grad_norm": 1.6166034190859908, "learning_rate": 1.1852561739229377e-06, "loss": 0.0521, "step": 3213 }, { "epoch": 0.7312855517633675, "grad_norm": 2.3048115272176983, "learning_rate": 1.1852165653622141e-06, "loss": 0.1036, "step": 3214 }, { "epoch": 0.7315130830489193, "grad_norm": 0.6867861750300609, "learning_rate": 1.18517694535169e-06, "loss": 0.0285, "step": 3215 }, { "epoch": 0.731740614334471, "grad_norm": 0.7222977767557096, "learning_rate": 1.1851373138921752e-06, "loss": 0.0332, "step": 3216 }, { "epoch": 0.7319681456200228, "grad_norm": 1.0427932498841996, "learning_rate": 1.1850976709844792e-06, "loss": 0.0654, "step": 3217 }, { "epoch": 0.7321956769055745, "grad_norm": 0.7626880126713813, "learning_rate": 1.1850580166294127e-06, "loss": 0.0257, "step": 3218 }, { "epoch": 0.7324232081911263, "grad_norm": 1.3204712038737856, "learning_rate": 1.1850183508277862e-06, "loss": 0.0821, "step": 3219 }, { "epoch": 0.732650739476678, "grad_norm": 1.920439901931252, "learning_rate": 1.18497867358041e-06, "loss": 0.0843, "step": 3220 }, { "epoch": 0.7328782707622298, "grad_norm": 1.4946908757395634, "learning_rate": 1.1849389848880955e-06, "loss": 0.075, "step": 3221 }, { "epoch": 0.7331058020477815, "grad_norm": 0.5605078244225655, "learning_rate": 1.1848992847516535e-06, "loss": 0.0188, "step": 3222 }, { "epoch": 0.7333333333333333, "grad_norm": 0.7658587959864014, "learning_rate": 1.1848595731718955e-06, "loss": 0.0246, "step": 3223 }, { "epoch": 0.7335608646188851, "grad_norm": 1.6693233826267546, "learning_rate": 1.1848198501496331e-06, "loss": 0.0709, "step": 3224 }, { "epoch": 0.7337883959044369, "grad_norm": 1.6483972044168969, "learning_rate": 1.1847801156856783e-06, "loss": 0.0735, "step": 3225 }, { "epoch": 0.7340159271899886, "grad_norm": 1.4807700617572261, "learning_rate": 1.1847403697808433e-06, "loss": 0.0838, "step": 3226 }, { "epoch": 0.7342434584755404, "grad_norm": 1.485456481779637, "learning_rate": 1.18470061243594e-06, "loss": 0.0687, "step": 3227 }, { "epoch": 0.7344709897610922, "grad_norm": 1.4565020245267035, "learning_rate": 1.1846608436517813e-06, "loss": 0.0593, "step": 3228 }, { "epoch": 0.7346985210466439, "grad_norm": 1.3974828149244531, "learning_rate": 1.1846210634291799e-06, "loss": 0.0683, "step": 3229 }, { "epoch": 0.7349260523321957, "grad_norm": 0.9824198436565561, "learning_rate": 1.184581271768949e-06, "loss": 0.0474, "step": 3230 }, { "epoch": 0.7351535836177474, "grad_norm": 1.4337603662453289, "learning_rate": 1.1845414686719014e-06, "loss": 0.0621, "step": 3231 }, { "epoch": 0.7353811149032992, "grad_norm": 0.8374788974669934, "learning_rate": 1.1845016541388513e-06, "loss": 0.0388, "step": 3232 }, { "epoch": 0.7356086461888509, "grad_norm": 1.6437938029709063, "learning_rate": 1.184461828170612e-06, "loss": 0.0822, "step": 3233 }, { "epoch": 0.7358361774744028, "grad_norm": 0.9054278416004449, "learning_rate": 1.1844219907679973e-06, "loss": 0.0364, "step": 3234 }, { "epoch": 0.7360637087599545, "grad_norm": 1.2269654818634814, "learning_rate": 1.184382141931822e-06, "loss": 0.0474, "step": 3235 }, { "epoch": 0.7362912400455063, "grad_norm": 1.2170865361290026, "learning_rate": 1.1843422816628998e-06, "loss": 0.0463, "step": 3236 }, { "epoch": 0.736518771331058, "grad_norm": 1.224432806747687, "learning_rate": 1.184302409962046e-06, "loss": 0.047, "step": 3237 }, { "epoch": 0.7367463026166098, "grad_norm": 1.0965112824005279, "learning_rate": 1.1842625268300754e-06, "loss": 0.0359, "step": 3238 }, { "epoch": 0.7369738339021615, "grad_norm": 1.0155537003395356, "learning_rate": 1.1842226322678028e-06, "loss": 0.0325, "step": 3239 }, { "epoch": 0.7372013651877133, "grad_norm": 1.6425941417778662, "learning_rate": 1.1841827262760436e-06, "loss": 0.0666, "step": 3240 }, { "epoch": 0.737428896473265, "grad_norm": 1.0729010350000274, "learning_rate": 1.1841428088556137e-06, "loss": 0.0446, "step": 3241 }, { "epoch": 0.7376564277588168, "grad_norm": 1.5054940909406098, "learning_rate": 1.184102880007329e-06, "loss": 0.0734, "step": 3242 }, { "epoch": 0.7378839590443687, "grad_norm": 1.4685805990987935, "learning_rate": 1.1840629397320052e-06, "loss": 0.0657, "step": 3243 }, { "epoch": 0.7381114903299204, "grad_norm": 1.0491905607582221, "learning_rate": 1.1840229880304589e-06, "loss": 0.0553, "step": 3244 }, { "epoch": 0.7383390216154722, "grad_norm": 1.6183523557552497, "learning_rate": 1.1839830249035062e-06, "loss": 0.0401, "step": 3245 }, { "epoch": 0.7385665529010239, "grad_norm": 2.0313633648707885, "learning_rate": 1.1839430503519645e-06, "loss": 0.0871, "step": 3246 }, { "epoch": 0.7387940841865757, "grad_norm": 2.2817905288401485, "learning_rate": 1.1839030643766505e-06, "loss": 0.0876, "step": 3247 }, { "epoch": 0.7390216154721274, "grad_norm": 1.2662039437671349, "learning_rate": 1.1838630669783814e-06, "loss": 0.0597, "step": 3248 }, { "epoch": 0.7392491467576792, "grad_norm": 2.4757641907590116, "learning_rate": 1.1838230581579746e-06, "loss": 0.0364, "step": 3249 }, { "epoch": 0.7394766780432309, "grad_norm": 1.509162194685489, "learning_rate": 1.183783037916248e-06, "loss": 0.0728, "step": 3250 }, { "epoch": 0.7397042093287827, "grad_norm": 1.8117671336209638, "learning_rate": 1.1837430062540196e-06, "loss": 0.074, "step": 3251 }, { "epoch": 0.7399317406143344, "grad_norm": 1.426185256628181, "learning_rate": 1.1837029631721072e-06, "loss": 0.0663, "step": 3252 }, { "epoch": 0.7401592718998863, "grad_norm": 1.930245275398741, "learning_rate": 1.1836629086713296e-06, "loss": 0.0579, "step": 3253 }, { "epoch": 0.740386803185438, "grad_norm": 2.323978623936963, "learning_rate": 1.1836228427525054e-06, "loss": 0.0314, "step": 3254 }, { "epoch": 0.7406143344709898, "grad_norm": 1.2652157180134698, "learning_rate": 1.183582765416453e-06, "loss": 0.0451, "step": 3255 }, { "epoch": 0.7408418657565415, "grad_norm": 1.1001275189243191, "learning_rate": 1.1835426766639923e-06, "loss": 0.034, "step": 3256 }, { "epoch": 0.7410693970420933, "grad_norm": 1.2716136324777447, "learning_rate": 1.183502576495942e-06, "loss": 0.0434, "step": 3257 }, { "epoch": 0.741296928327645, "grad_norm": 1.8985708463935187, "learning_rate": 1.1834624649131218e-06, "loss": 0.0825, "step": 3258 }, { "epoch": 0.7415244596131968, "grad_norm": 0.9934071400912606, "learning_rate": 1.1834223419163518e-06, "loss": 0.0332, "step": 3259 }, { "epoch": 0.7417519908987485, "grad_norm": 1.9148887694046421, "learning_rate": 1.1833822075064517e-06, "loss": 0.0288, "step": 3260 }, { "epoch": 0.7419795221843003, "grad_norm": 1.1004472895017543, "learning_rate": 1.183342061684242e-06, "loss": 0.0487, "step": 3261 }, { "epoch": 0.742207053469852, "grad_norm": 1.203746668995667, "learning_rate": 1.183301904450543e-06, "loss": 0.0423, "step": 3262 }, { "epoch": 0.7424345847554039, "grad_norm": 2.131599554211613, "learning_rate": 1.1832617358061756e-06, "loss": 0.078, "step": 3263 }, { "epoch": 0.7426621160409557, "grad_norm": 1.3232000144442189, "learning_rate": 1.1832215557519608e-06, "loss": 0.0542, "step": 3264 }, { "epoch": 0.7428896473265074, "grad_norm": 1.3347834617776861, "learning_rate": 1.1831813642887196e-06, "loss": 0.0571, "step": 3265 }, { "epoch": 0.7431171786120592, "grad_norm": 1.491233272879141, "learning_rate": 1.1831411614172735e-06, "loss": 0.0706, "step": 3266 }, { "epoch": 0.7433447098976109, "grad_norm": 2.338157694562792, "learning_rate": 1.1831009471384445e-06, "loss": 0.0591, "step": 3267 }, { "epoch": 0.7435722411831627, "grad_norm": 1.1906122468487437, "learning_rate": 1.1830607214530543e-06, "loss": 0.0693, "step": 3268 }, { "epoch": 0.7437997724687144, "grad_norm": 1.2940810772686644, "learning_rate": 1.1830204843619248e-06, "loss": 0.0517, "step": 3269 }, { "epoch": 0.7440273037542662, "grad_norm": 2.214601640279187, "learning_rate": 1.1829802358658785e-06, "loss": 0.1046, "step": 3270 }, { "epoch": 0.7442548350398179, "grad_norm": 1.039009352489729, "learning_rate": 1.1829399759657383e-06, "loss": 0.0592, "step": 3271 }, { "epoch": 0.7444823663253698, "grad_norm": 1.8684769750059191, "learning_rate": 1.1828997046623267e-06, "loss": 0.093, "step": 3272 }, { "epoch": 0.7447098976109215, "grad_norm": 1.823521554100394, "learning_rate": 1.1828594219564669e-06, "loss": 0.049, "step": 3273 }, { "epoch": 0.7449374288964733, "grad_norm": 1.8271992779263462, "learning_rate": 1.182819127848982e-06, "loss": 0.0303, "step": 3274 }, { "epoch": 0.745164960182025, "grad_norm": 3.0795350516203928, "learning_rate": 1.1827788223406959e-06, "loss": 0.1191, "step": 3275 }, { "epoch": 0.7453924914675768, "grad_norm": 1.7589178709629525, "learning_rate": 1.1827385054324323e-06, "loss": 0.0616, "step": 3276 }, { "epoch": 0.7456200227531286, "grad_norm": 2.4742706309940976, "learning_rate": 1.1826981771250148e-06, "loss": 0.077, "step": 3277 }, { "epoch": 0.7458475540386803, "grad_norm": 1.9750018572038446, "learning_rate": 1.1826578374192681e-06, "loss": 0.0795, "step": 3278 }, { "epoch": 0.7460750853242321, "grad_norm": 1.5314832082240002, "learning_rate": 1.1826174863160168e-06, "loss": 0.0694, "step": 3279 }, { "epoch": 0.7463026166097838, "grad_norm": 0.9199631256267508, "learning_rate": 1.182577123816085e-06, "loss": 0.0407, "step": 3280 }, { "epoch": 0.7465301478953356, "grad_norm": 1.6996895039706184, "learning_rate": 1.1825367499202978e-06, "loss": 0.0648, "step": 3281 }, { "epoch": 0.7467576791808874, "grad_norm": 1.538865347545186, "learning_rate": 1.1824963646294806e-06, "loss": 0.0483, "step": 3282 }, { "epoch": 0.7469852104664392, "grad_norm": 1.5246278126883535, "learning_rate": 1.1824559679444588e-06, "loss": 0.0652, "step": 3283 }, { "epoch": 0.7472127417519909, "grad_norm": 0.7990923695187152, "learning_rate": 1.182415559866058e-06, "loss": 0.0416, "step": 3284 }, { "epoch": 0.7474402730375427, "grad_norm": 1.0564862923399663, "learning_rate": 1.182375140395104e-06, "loss": 0.0414, "step": 3285 }, { "epoch": 0.7476678043230944, "grad_norm": 1.1846269731523205, "learning_rate": 1.1823347095324228e-06, "loss": 0.0621, "step": 3286 }, { "epoch": 0.7478953356086462, "grad_norm": 0.8569640753381744, "learning_rate": 1.1822942672788409e-06, "loss": 0.0278, "step": 3287 }, { "epoch": 0.7481228668941979, "grad_norm": 1.571726141718777, "learning_rate": 1.1822538136351849e-06, "loss": 0.0613, "step": 3288 }, { "epoch": 0.7483503981797497, "grad_norm": 1.1282851528775475, "learning_rate": 1.1822133486022815e-06, "loss": 0.0392, "step": 3289 }, { "epoch": 0.7485779294653014, "grad_norm": 1.0669556173185228, "learning_rate": 1.1821728721809577e-06, "loss": 0.0294, "step": 3290 }, { "epoch": 0.7488054607508532, "grad_norm": 2.466980162201729, "learning_rate": 1.1821323843720408e-06, "loss": 0.0745, "step": 3291 }, { "epoch": 0.749032992036405, "grad_norm": 1.8125453554610331, "learning_rate": 1.1820918851763582e-06, "loss": 0.0798, "step": 3292 }, { "epoch": 0.7492605233219568, "grad_norm": 1.6160104087285256, "learning_rate": 1.182051374594738e-06, "loss": 0.0769, "step": 3293 }, { "epoch": 0.7494880546075086, "grad_norm": 1.5687919942801445, "learning_rate": 1.1820108526280076e-06, "loss": 0.0626, "step": 3294 }, { "epoch": 0.7497155858930603, "grad_norm": 0.9398572843854899, "learning_rate": 1.1819703192769955e-06, "loss": 0.0482, "step": 3295 }, { "epoch": 0.7499431171786121, "grad_norm": 12.232356704164316, "learning_rate": 1.1819297745425304e-06, "loss": 0.0627, "step": 3296 }, { "epoch": 0.7501706484641638, "grad_norm": 1.4183355936794655, "learning_rate": 1.1818892184254404e-06, "loss": 0.0731, "step": 3297 }, { "epoch": 0.7503981797497156, "grad_norm": 0.7929826048625227, "learning_rate": 1.1818486509265547e-06, "loss": 0.028, "step": 3298 }, { "epoch": 0.7506257110352673, "grad_norm": 0.9321132746109633, "learning_rate": 1.1818080720467026e-06, "loss": 0.0406, "step": 3299 }, { "epoch": 0.7508532423208191, "grad_norm": 0.7184037331999952, "learning_rate": 1.1817674817867131e-06, "loss": 0.0292, "step": 3300 }, { "epoch": 0.7510807736063708, "grad_norm": 1.7585855147942302, "learning_rate": 1.181726880147416e-06, "loss": 0.074, "step": 3301 }, { "epoch": 0.7513083048919227, "grad_norm": 0.9632647926307707, "learning_rate": 1.181686267129641e-06, "loss": 0.0318, "step": 3302 }, { "epoch": 0.7515358361774744, "grad_norm": 1.2947286461920264, "learning_rate": 1.1816456427342181e-06, "loss": 0.0649, "step": 3303 }, { "epoch": 0.7517633674630262, "grad_norm": 0.9051783521749419, "learning_rate": 1.181605006961978e-06, "loss": 0.0318, "step": 3304 }, { "epoch": 0.7519908987485779, "grad_norm": 1.9818984386721539, "learning_rate": 1.1815643598137507e-06, "loss": 0.0909, "step": 3305 }, { "epoch": 0.7522184300341297, "grad_norm": 1.1059310163256162, "learning_rate": 1.1815237012903675e-06, "loss": 0.0422, "step": 3306 }, { "epoch": 0.7524459613196814, "grad_norm": 2.4093572946300865, "learning_rate": 1.1814830313926589e-06, "loss": 0.0944, "step": 3307 }, { "epoch": 0.7526734926052332, "grad_norm": 1.2458152163270595, "learning_rate": 1.1814423501214562e-06, "loss": 0.0422, "step": 3308 }, { "epoch": 0.752901023890785, "grad_norm": 1.1846230368435793, "learning_rate": 1.1814016574775909e-06, "loss": 0.0459, "step": 3309 }, { "epoch": 0.7531285551763367, "grad_norm": 0.7274927229665066, "learning_rate": 1.1813609534618948e-06, "loss": 0.031, "step": 3310 }, { "epoch": 0.7533560864618886, "grad_norm": 1.0084301910129103, "learning_rate": 1.1813202380751998e-06, "loss": 0.022, "step": 3311 }, { "epoch": 0.7535836177474403, "grad_norm": 1.2248641094215589, "learning_rate": 1.1812795113183378e-06, "loss": 0.0519, "step": 3312 }, { "epoch": 0.7538111490329921, "grad_norm": 1.2989392125838315, "learning_rate": 1.1812387731921415e-06, "loss": 0.0418, "step": 3313 }, { "epoch": 0.7540386803185438, "grad_norm": 1.8035598035830265, "learning_rate": 1.1811980236974435e-06, "loss": 0.047, "step": 3314 }, { "epoch": 0.7542662116040956, "grad_norm": 0.9445160919482642, "learning_rate": 1.1811572628350764e-06, "loss": 0.053, "step": 3315 }, { "epoch": 0.7544937428896473, "grad_norm": 1.820531092229777, "learning_rate": 1.1811164906058735e-06, "loss": 0.0898, "step": 3316 }, { "epoch": 0.7547212741751991, "grad_norm": 1.7017364368903272, "learning_rate": 1.1810757070106678e-06, "loss": 0.064, "step": 3317 }, { "epoch": 0.7549488054607508, "grad_norm": 1.589156926554848, "learning_rate": 1.1810349120502932e-06, "loss": 0.069, "step": 3318 }, { "epoch": 0.7551763367463026, "grad_norm": 0.9315330165580284, "learning_rate": 1.1809941057255834e-06, "loss": 0.043, "step": 3319 }, { "epoch": 0.7554038680318543, "grad_norm": 1.7002202165396425, "learning_rate": 1.1809532880373721e-06, "loss": 0.0662, "step": 3320 }, { "epoch": 0.7556313993174062, "grad_norm": 1.4641161178529223, "learning_rate": 1.180912458986494e-06, "loss": 0.0696, "step": 3321 }, { "epoch": 0.755858930602958, "grad_norm": 1.5316937073382941, "learning_rate": 1.180871618573783e-06, "loss": 0.0532, "step": 3322 }, { "epoch": 0.7560864618885097, "grad_norm": 1.5604240478591438, "learning_rate": 1.1808307668000745e-06, "loss": 0.0511, "step": 3323 }, { "epoch": 0.7563139931740614, "grad_norm": 1.7255746868374173, "learning_rate": 1.180789903666203e-06, "loss": 0.0646, "step": 3324 }, { "epoch": 0.7565415244596132, "grad_norm": 1.3143153209229907, "learning_rate": 1.1807490291730036e-06, "loss": 0.0792, "step": 3325 }, { "epoch": 0.756769055745165, "grad_norm": 0.8135022202076733, "learning_rate": 1.1807081433213122e-06, "loss": 0.0432, "step": 3326 }, { "epoch": 0.7569965870307167, "grad_norm": 1.4086605703937913, "learning_rate": 1.1806672461119637e-06, "loss": 0.0774, "step": 3327 }, { "epoch": 0.7572241183162685, "grad_norm": 1.7398967165213481, "learning_rate": 1.1806263375457947e-06, "loss": 0.0598, "step": 3328 }, { "epoch": 0.7574516496018202, "grad_norm": 1.4489241741578003, "learning_rate": 1.1805854176236406e-06, "loss": 0.0655, "step": 3329 }, { "epoch": 0.757679180887372, "grad_norm": 2.000585789486283, "learning_rate": 1.1805444863463384e-06, "loss": 0.1195, "step": 3330 }, { "epoch": 0.7579067121729238, "grad_norm": 1.1274809608332625, "learning_rate": 1.180503543714724e-06, "loss": 0.0355, "step": 3331 }, { "epoch": 0.7581342434584756, "grad_norm": 0.8018211568884192, "learning_rate": 1.1804625897296345e-06, "loss": 0.0354, "step": 3332 }, { "epoch": 0.7583617747440273, "grad_norm": 1.8306901932048323, "learning_rate": 1.1804216243919074e-06, "loss": 0.0604, "step": 3333 }, { "epoch": 0.7585893060295791, "grad_norm": 1.0198114562975762, "learning_rate": 1.1803806477023792e-06, "loss": 0.034, "step": 3334 }, { "epoch": 0.7588168373151308, "grad_norm": 1.0883117507836249, "learning_rate": 1.1803396596618878e-06, "loss": 0.0476, "step": 3335 }, { "epoch": 0.7590443686006826, "grad_norm": 1.2568287515264616, "learning_rate": 1.1802986602712705e-06, "loss": 0.0415, "step": 3336 }, { "epoch": 0.7592718998862343, "grad_norm": 1.5230920956011404, "learning_rate": 1.1802576495313657e-06, "loss": 0.0516, "step": 3337 }, { "epoch": 0.7594994311717861, "grad_norm": 3.0018010476610044, "learning_rate": 1.1802166274430116e-06, "loss": 0.0773, "step": 3338 }, { "epoch": 0.7597269624573378, "grad_norm": 0.8881506140039246, "learning_rate": 1.1801755940070464e-06, "loss": 0.0332, "step": 3339 }, { "epoch": 0.7599544937428896, "grad_norm": 1.5510159044642893, "learning_rate": 1.1801345492243087e-06, "loss": 0.0653, "step": 3340 }, { "epoch": 0.7601820250284415, "grad_norm": 1.732180484199383, "learning_rate": 1.1800934930956378e-06, "loss": 0.0681, "step": 3341 }, { "epoch": 0.7604095563139932, "grad_norm": 1.0655757603385652, "learning_rate": 1.1800524256218724e-06, "loss": 0.0635, "step": 3342 }, { "epoch": 0.760637087599545, "grad_norm": 1.3481251487359558, "learning_rate": 1.1800113468038518e-06, "loss": 0.0685, "step": 3343 }, { "epoch": 0.7608646188850967, "grad_norm": 2.2560803054292418, "learning_rate": 1.1799702566424159e-06, "loss": 0.1302, "step": 3344 }, { "epoch": 0.7610921501706485, "grad_norm": 1.6837334743270123, "learning_rate": 1.1799291551384042e-06, "loss": 0.0842, "step": 3345 }, { "epoch": 0.7613196814562002, "grad_norm": 1.3300039883273982, "learning_rate": 1.1798880422926568e-06, "loss": 0.0518, "step": 3346 }, { "epoch": 0.761547212741752, "grad_norm": 1.8839734220786841, "learning_rate": 1.1798469181060143e-06, "loss": 0.0994, "step": 3347 }, { "epoch": 0.7617747440273037, "grad_norm": 1.600150172121613, "learning_rate": 1.1798057825793167e-06, "loss": 0.0651, "step": 3348 }, { "epoch": 0.7620022753128555, "grad_norm": 1.3818697167200884, "learning_rate": 1.179764635713405e-06, "loss": 0.0473, "step": 3349 }, { "epoch": 0.7622298065984073, "grad_norm": 0.8317911257581198, "learning_rate": 1.1797234775091204e-06, "loss": 0.0337, "step": 3350 }, { "epoch": 0.7624573378839591, "grad_norm": 1.1808932366446725, "learning_rate": 1.1796823079673036e-06, "loss": 0.0376, "step": 3351 }, { "epoch": 0.7626848691695108, "grad_norm": 1.6710801191008726, "learning_rate": 1.1796411270887965e-06, "loss": 0.0592, "step": 3352 }, { "epoch": 0.7629124004550626, "grad_norm": 1.0835763404496392, "learning_rate": 1.1795999348744403e-06, "loss": 0.0525, "step": 3353 }, { "epoch": 0.7631399317406143, "grad_norm": 1.2436113476656212, "learning_rate": 1.1795587313250773e-06, "loss": 0.0451, "step": 3354 }, { "epoch": 0.7633674630261661, "grad_norm": 1.1914849618578496, "learning_rate": 1.1795175164415493e-06, "loss": 0.0578, "step": 3355 }, { "epoch": 0.7635949943117178, "grad_norm": 2.3441953010272867, "learning_rate": 1.1794762902246988e-06, "loss": 0.1105, "step": 3356 }, { "epoch": 0.7638225255972696, "grad_norm": 2.1756802775613364, "learning_rate": 1.1794350526753688e-06, "loss": 0.1026, "step": 3357 }, { "epoch": 0.7640500568828213, "grad_norm": 2.3594222993387652, "learning_rate": 1.1793938037944014e-06, "loss": 0.1163, "step": 3358 }, { "epoch": 0.7642775881683731, "grad_norm": 0.9175490163523685, "learning_rate": 1.1793525435826399e-06, "loss": 0.0287, "step": 3359 }, { "epoch": 0.764505119453925, "grad_norm": 1.9177635612847674, "learning_rate": 1.1793112720409277e-06, "loss": 0.0945, "step": 3360 }, { "epoch": 0.7647326507394767, "grad_norm": 1.0136874739515975, "learning_rate": 1.1792699891701085e-06, "loss": 0.0389, "step": 3361 }, { "epoch": 0.7649601820250285, "grad_norm": 0.9880415043802684, "learning_rate": 1.1792286949710254e-06, "loss": 0.052, "step": 3362 }, { "epoch": 0.7651877133105802, "grad_norm": 1.5402287968172736, "learning_rate": 1.1791873894445233e-06, "loss": 0.0698, "step": 3363 }, { "epoch": 0.765415244596132, "grad_norm": 1.3968644273802429, "learning_rate": 1.1791460725914455e-06, "loss": 0.065, "step": 3364 }, { "epoch": 0.7656427758816837, "grad_norm": 1.8023364682651095, "learning_rate": 1.179104744412637e-06, "loss": 0.0653, "step": 3365 }, { "epoch": 0.7658703071672355, "grad_norm": 1.3938733296222532, "learning_rate": 1.1790634049089425e-06, "loss": 0.0604, "step": 3366 }, { "epoch": 0.7660978384527872, "grad_norm": 1.6406363966121917, "learning_rate": 1.1790220540812063e-06, "loss": 0.0781, "step": 3367 }, { "epoch": 0.766325369738339, "grad_norm": 1.2609639774979144, "learning_rate": 1.1789806919302743e-06, "loss": 0.0429, "step": 3368 }, { "epoch": 0.7665529010238907, "grad_norm": 1.4055922666036842, "learning_rate": 1.1789393184569914e-06, "loss": 0.0447, "step": 3369 }, { "epoch": 0.7667804323094426, "grad_norm": 1.2844015411070886, "learning_rate": 1.1788979336622034e-06, "loss": 0.0686, "step": 3370 }, { "epoch": 0.7670079635949943, "grad_norm": 1.2894781437661027, "learning_rate": 1.178856537546756e-06, "loss": 0.0676, "step": 3371 }, { "epoch": 0.7672354948805461, "grad_norm": 1.0960231571013044, "learning_rate": 1.1788151301114952e-06, "loss": 0.0472, "step": 3372 }, { "epoch": 0.7674630261660979, "grad_norm": 1.504396802843696, "learning_rate": 1.1787737113572678e-06, "loss": 0.0588, "step": 3373 }, { "epoch": 0.7676905574516496, "grad_norm": 1.6683614299229446, "learning_rate": 1.1787322812849196e-06, "loss": 0.0711, "step": 3374 }, { "epoch": 0.7679180887372014, "grad_norm": 2.581634805020921, "learning_rate": 1.1786908398952977e-06, "loss": 0.1279, "step": 3375 }, { "epoch": 0.7681456200227531, "grad_norm": 1.1979082190366048, "learning_rate": 1.1786493871892491e-06, "loss": 0.0387, "step": 3376 }, { "epoch": 0.7683731513083049, "grad_norm": 0.7863088959802915, "learning_rate": 1.178607923167621e-06, "loss": 0.032, "step": 3377 }, { "epoch": 0.7686006825938566, "grad_norm": 1.395956623470284, "learning_rate": 1.1785664478312607e-06, "loss": 0.0557, "step": 3378 }, { "epoch": 0.7688282138794085, "grad_norm": 1.3990313414663118, "learning_rate": 1.1785249611810163e-06, "loss": 0.0675, "step": 3379 }, { "epoch": 0.7690557451649602, "grad_norm": 1.5727682457288055, "learning_rate": 1.1784834632177352e-06, "loss": 0.0488, "step": 3380 }, { "epoch": 0.769283276450512, "grad_norm": 1.2997891762689524, "learning_rate": 1.178441953942266e-06, "loss": 0.0497, "step": 3381 }, { "epoch": 0.7695108077360637, "grad_norm": 1.1951369765878084, "learning_rate": 1.1784004333554565e-06, "loss": 0.043, "step": 3382 }, { "epoch": 0.7697383390216155, "grad_norm": 1.3760280462231655, "learning_rate": 1.178358901458156e-06, "loss": 0.0637, "step": 3383 }, { "epoch": 0.7699658703071672, "grad_norm": 1.167678907961144, "learning_rate": 1.1783173582512127e-06, "loss": 0.0504, "step": 3384 }, { "epoch": 0.770193401592719, "grad_norm": 1.1188494063998051, "learning_rate": 1.178275803735476e-06, "loss": 0.0396, "step": 3385 }, { "epoch": 0.7704209328782707, "grad_norm": 1.6858173282426545, "learning_rate": 1.1782342379117954e-06, "loss": 0.0749, "step": 3386 }, { "epoch": 0.7706484641638225, "grad_norm": 1.6532847814641132, "learning_rate": 1.17819266078102e-06, "loss": 0.0531, "step": 3387 }, { "epoch": 0.7708759954493742, "grad_norm": 1.309511727163334, "learning_rate": 1.1781510723439995e-06, "loss": 0.0619, "step": 3388 }, { "epoch": 0.7711035267349261, "grad_norm": 1.8164710685094503, "learning_rate": 1.1781094726015842e-06, "loss": 0.0849, "step": 3389 }, { "epoch": 0.7713310580204779, "grad_norm": 1.5604092892535402, "learning_rate": 1.1780678615546245e-06, "loss": 0.0718, "step": 3390 }, { "epoch": 0.7715585893060296, "grad_norm": 1.3284971139146458, "learning_rate": 1.1780262392039706e-06, "loss": 0.0532, "step": 3391 }, { "epoch": 0.7717861205915814, "grad_norm": 0.8981286940096231, "learning_rate": 1.177984605550473e-06, "loss": 0.038, "step": 3392 }, { "epoch": 0.7720136518771331, "grad_norm": 1.2803703590807574, "learning_rate": 1.177942960594983e-06, "loss": 0.0381, "step": 3393 }, { "epoch": 0.7722411831626849, "grad_norm": 1.0532494793515876, "learning_rate": 1.1779013043383516e-06, "loss": 0.0274, "step": 3394 }, { "epoch": 0.7724687144482366, "grad_norm": 1.0370979550305721, "learning_rate": 1.17785963678143e-06, "loss": 0.0527, "step": 3395 }, { "epoch": 0.7726962457337884, "grad_norm": 0.8794501213484224, "learning_rate": 1.1778179579250699e-06, "loss": 0.0252, "step": 3396 }, { "epoch": 0.7729237770193401, "grad_norm": 1.2333346024054503, "learning_rate": 1.1777762677701232e-06, "loss": 0.0443, "step": 3397 }, { "epoch": 0.7731513083048919, "grad_norm": 1.0027591833023217, "learning_rate": 1.1777345663174419e-06, "loss": 0.0689, "step": 3398 }, { "epoch": 0.7733788395904437, "grad_norm": 1.1808711247974837, "learning_rate": 1.1776928535678784e-06, "loss": 0.0575, "step": 3399 }, { "epoch": 0.7736063708759955, "grad_norm": 1.3093615145015158, "learning_rate": 1.1776511295222852e-06, "loss": 0.0602, "step": 3400 }, { "epoch": 0.7738339021615472, "grad_norm": 1.488841199985786, "learning_rate": 1.177609394181515e-06, "loss": 0.0713, "step": 3401 }, { "epoch": 0.774061433447099, "grad_norm": 2.280972914382851, "learning_rate": 1.177567647546421e-06, "loss": 0.1191, "step": 3402 }, { "epoch": 0.7742889647326507, "grad_norm": 1.6714940535505132, "learning_rate": 1.177525889617856e-06, "loss": 0.0872, "step": 3403 }, { "epoch": 0.7745164960182025, "grad_norm": 0.7472197048359, "learning_rate": 1.177484120396674e-06, "loss": 0.0307, "step": 3404 }, { "epoch": 0.7747440273037542, "grad_norm": 0.9070022095243302, "learning_rate": 1.1774423398837282e-06, "loss": 0.0269, "step": 3405 }, { "epoch": 0.774971558589306, "grad_norm": 0.8847029817903657, "learning_rate": 1.177400548079873e-06, "loss": 0.038, "step": 3406 }, { "epoch": 0.7751990898748577, "grad_norm": 1.0038791040094233, "learning_rate": 1.177358744985962e-06, "loss": 0.0514, "step": 3407 }, { "epoch": 0.7754266211604095, "grad_norm": 0.9653957279075387, "learning_rate": 1.1773169306028498e-06, "loss": 0.0376, "step": 3408 }, { "epoch": 0.7756541524459614, "grad_norm": 1.025381489223499, "learning_rate": 1.1772751049313911e-06, "loss": 0.0404, "step": 3409 }, { "epoch": 0.7758816837315131, "grad_norm": 4.749905802391231, "learning_rate": 1.1772332679724408e-06, "loss": 0.0609, "step": 3410 }, { "epoch": 0.7761092150170649, "grad_norm": 0.9550524873306326, "learning_rate": 1.1771914197268538e-06, "loss": 0.0303, "step": 3411 }, { "epoch": 0.7763367463026166, "grad_norm": 0.7920355825602112, "learning_rate": 1.1771495601954856e-06, "loss": 0.0357, "step": 3412 }, { "epoch": 0.7765642775881684, "grad_norm": 1.0204752879838752, "learning_rate": 1.1771076893791914e-06, "loss": 0.0296, "step": 3413 }, { "epoch": 0.7767918088737201, "grad_norm": 1.1240956814281524, "learning_rate": 1.1770658072788272e-06, "loss": 0.0326, "step": 3414 }, { "epoch": 0.7770193401592719, "grad_norm": 2.188614006253551, "learning_rate": 1.1770239138952492e-06, "loss": 0.0814, "step": 3415 }, { "epoch": 0.7772468714448236, "grad_norm": 2.1028001694138916, "learning_rate": 1.1769820092293132e-06, "loss": 0.1016, "step": 3416 }, { "epoch": 0.7774744027303754, "grad_norm": 1.3041493538358495, "learning_rate": 1.1769400932818758e-06, "loss": 0.0689, "step": 3417 }, { "epoch": 0.7777019340159272, "grad_norm": 0.8082774287076608, "learning_rate": 1.1768981660537938e-06, "loss": 0.0465, "step": 3418 }, { "epoch": 0.777929465301479, "grad_norm": 0.9743628621052991, "learning_rate": 1.1768562275459242e-06, "loss": 0.0515, "step": 3419 }, { "epoch": 0.7781569965870307, "grad_norm": 1.3767418297539598, "learning_rate": 1.1768142777591235e-06, "loss": 0.0765, "step": 3420 }, { "epoch": 0.7783845278725825, "grad_norm": 3.29832363084199, "learning_rate": 1.17677231669425e-06, "loss": 0.1072, "step": 3421 }, { "epoch": 0.7786120591581343, "grad_norm": 1.3112653588991108, "learning_rate": 1.1767303443521608e-06, "loss": 0.0412, "step": 3422 }, { "epoch": 0.778839590443686, "grad_norm": 1.3040229403784145, "learning_rate": 1.1766883607337137e-06, "loss": 0.0748, "step": 3423 }, { "epoch": 0.7790671217292378, "grad_norm": 1.5632822983270742, "learning_rate": 1.176646365839767e-06, "loss": 0.0565, "step": 3424 }, { "epoch": 0.7792946530147895, "grad_norm": 0.8997709014511377, "learning_rate": 1.1766043596711787e-06, "loss": 0.0612, "step": 3425 }, { "epoch": 0.7795221843003413, "grad_norm": 1.0705681786627015, "learning_rate": 1.1765623422288078e-06, "loss": 0.0353, "step": 3426 }, { "epoch": 0.779749715585893, "grad_norm": 0.9644671179029716, "learning_rate": 1.1765203135135126e-06, "loss": 0.0371, "step": 3427 }, { "epoch": 0.7799772468714449, "grad_norm": 1.1638233882315414, "learning_rate": 1.176478273526152e-06, "loss": 0.0519, "step": 3428 }, { "epoch": 0.7802047781569966, "grad_norm": 1.6611610210478276, "learning_rate": 1.1764362222675857e-06, "loss": 0.0733, "step": 3429 }, { "epoch": 0.7804323094425484, "grad_norm": 1.3309978458504417, "learning_rate": 1.176394159738673e-06, "loss": 0.0658, "step": 3430 }, { "epoch": 0.7806598407281001, "grad_norm": 1.379255981946512, "learning_rate": 1.1763520859402735e-06, "loss": 0.0803, "step": 3431 }, { "epoch": 0.7808873720136519, "grad_norm": 1.2786437648131908, "learning_rate": 1.176310000873247e-06, "loss": 0.0523, "step": 3432 }, { "epoch": 0.7811149032992036, "grad_norm": 1.382541657289892, "learning_rate": 1.1762679045384537e-06, "loss": 0.0593, "step": 3433 }, { "epoch": 0.7813424345847554, "grad_norm": 1.141255046714792, "learning_rate": 1.1762257969367543e-06, "loss": 0.0404, "step": 3434 }, { "epoch": 0.7815699658703071, "grad_norm": 1.6145161686719605, "learning_rate": 1.176183678069009e-06, "loss": 0.0636, "step": 3435 }, { "epoch": 0.7817974971558589, "grad_norm": 2.0980684433512, "learning_rate": 1.1761415479360784e-06, "loss": 0.0895, "step": 3436 }, { "epoch": 0.7820250284414106, "grad_norm": 1.1114214017016597, "learning_rate": 1.1760994065388246e-06, "loss": 0.0443, "step": 3437 }, { "epoch": 0.7822525597269625, "grad_norm": 1.8826452294482943, "learning_rate": 1.1760572538781077e-06, "loss": 0.0543, "step": 3438 }, { "epoch": 0.7824800910125143, "grad_norm": 0.8873377874884448, "learning_rate": 1.17601508995479e-06, "loss": 0.0376, "step": 3439 }, { "epoch": 0.782707622298066, "grad_norm": 1.113759690507731, "learning_rate": 1.175972914769733e-06, "loss": 0.0387, "step": 3440 }, { "epoch": 0.7829351535836178, "grad_norm": 0.880447245871124, "learning_rate": 1.1759307283237986e-06, "loss": 0.0364, "step": 3441 }, { "epoch": 0.7831626848691695, "grad_norm": 0.687251787996786, "learning_rate": 1.175888530617849e-06, "loss": 0.0272, "step": 3442 }, { "epoch": 0.7833902161547213, "grad_norm": 1.9024845236880248, "learning_rate": 1.175846321652747e-06, "loss": 0.0815, "step": 3443 }, { "epoch": 0.783617747440273, "grad_norm": 0.9590041079160219, "learning_rate": 1.1758041014293548e-06, "loss": 0.0503, "step": 3444 }, { "epoch": 0.7838452787258248, "grad_norm": 0.8857462180203992, "learning_rate": 1.1757618699485353e-06, "loss": 0.0472, "step": 3445 }, { "epoch": 0.7840728100113765, "grad_norm": 1.299303333784786, "learning_rate": 1.1757196272111524e-06, "loss": 0.0555, "step": 3446 }, { "epoch": 0.7843003412969284, "grad_norm": 0.845660192083495, "learning_rate": 1.1756773732180684e-06, "loss": 0.025, "step": 3447 }, { "epoch": 0.7845278725824801, "grad_norm": 2.1486155155967737, "learning_rate": 1.1756351079701477e-06, "loss": 0.086, "step": 3448 }, { "epoch": 0.7847554038680319, "grad_norm": 1.373331795777772, "learning_rate": 1.1755928314682537e-06, "loss": 0.0432, "step": 3449 }, { "epoch": 0.7849829351535836, "grad_norm": 0.8852881088520247, "learning_rate": 1.1755505437132507e-06, "loss": 0.0291, "step": 3450 }, { "epoch": 0.7852104664391354, "grad_norm": 2.454713357414872, "learning_rate": 1.1755082447060029e-06, "loss": 0.1266, "step": 3451 }, { "epoch": 0.7854379977246871, "grad_norm": 1.4196724985388591, "learning_rate": 1.1754659344473747e-06, "loss": 0.0548, "step": 3452 }, { "epoch": 0.7856655290102389, "grad_norm": 1.0229434892505915, "learning_rate": 1.175423612938231e-06, "loss": 0.0558, "step": 3453 }, { "epoch": 0.7858930602957906, "grad_norm": 1.3800295427652078, "learning_rate": 1.1753812801794368e-06, "loss": 0.0617, "step": 3454 }, { "epoch": 0.7861205915813424, "grad_norm": 1.5307677991589903, "learning_rate": 1.175338936171857e-06, "loss": 0.0551, "step": 3455 }, { "epoch": 0.7863481228668942, "grad_norm": 1.1267953232901513, "learning_rate": 1.1752965809163574e-06, "loss": 0.0427, "step": 3456 }, { "epoch": 0.786575654152446, "grad_norm": 1.227541921588263, "learning_rate": 1.1752542144138033e-06, "loss": 0.0486, "step": 3457 }, { "epoch": 0.7868031854379978, "grad_norm": 1.679516124458468, "learning_rate": 1.1752118366650608e-06, "loss": 0.0589, "step": 3458 }, { "epoch": 0.7870307167235495, "grad_norm": 1.0236231221195786, "learning_rate": 1.1751694476709962e-06, "loss": 0.0306, "step": 3459 }, { "epoch": 0.7872582480091013, "grad_norm": 1.3331183973827108, "learning_rate": 1.1751270474324757e-06, "loss": 0.0651, "step": 3460 }, { "epoch": 0.787485779294653, "grad_norm": 1.3868556370726, "learning_rate": 1.1750846359503657e-06, "loss": 0.0753, "step": 3461 }, { "epoch": 0.7877133105802048, "grad_norm": 1.2017443079409735, "learning_rate": 1.1750422132255334e-06, "loss": 0.0626, "step": 3462 }, { "epoch": 0.7879408418657565, "grad_norm": 1.4474690077464614, "learning_rate": 1.1749997792588453e-06, "loss": 0.0449, "step": 3463 }, { "epoch": 0.7881683731513083, "grad_norm": 1.2518797374298931, "learning_rate": 1.1749573340511693e-06, "loss": 0.0621, "step": 3464 }, { "epoch": 0.78839590443686, "grad_norm": 1.639438514025765, "learning_rate": 1.1749148776033723e-06, "loss": 0.063, "step": 3465 }, { "epoch": 0.7886234357224118, "grad_norm": 0.8717910913192211, "learning_rate": 1.1748724099163225e-06, "loss": 0.042, "step": 3466 }, { "epoch": 0.7888509670079636, "grad_norm": 1.144252834465786, "learning_rate": 1.1748299309908878e-06, "loss": 0.0396, "step": 3467 }, { "epoch": 0.7890784982935154, "grad_norm": 1.2552868730516495, "learning_rate": 1.174787440827936e-06, "loss": 0.0561, "step": 3468 }, { "epoch": 0.7893060295790671, "grad_norm": 0.9915360367623126, "learning_rate": 1.1747449394283361e-06, "loss": 0.0417, "step": 3469 }, { "epoch": 0.7895335608646189, "grad_norm": 2.2809743574627612, "learning_rate": 1.174702426792956e-06, "loss": 0.122, "step": 3470 }, { "epoch": 0.7897610921501707, "grad_norm": 0.9319050863528758, "learning_rate": 1.1746599029226656e-06, "loss": 0.024, "step": 3471 }, { "epoch": 0.7899886234357224, "grad_norm": 1.0243621293291936, "learning_rate": 1.174617367818333e-06, "loss": 0.0334, "step": 3472 }, { "epoch": 0.7902161547212742, "grad_norm": 0.5162499932362775, "learning_rate": 1.174574821480828e-06, "loss": 0.028, "step": 3473 }, { "epoch": 0.7904436860068259, "grad_norm": 1.1251631282315664, "learning_rate": 1.1745322639110203e-06, "loss": 0.0314, "step": 3474 }, { "epoch": 0.7906712172923777, "grad_norm": 0.881794120596432, "learning_rate": 1.1744896951097798e-06, "loss": 0.0453, "step": 3475 }, { "epoch": 0.7908987485779294, "grad_norm": 0.7316213076139558, "learning_rate": 1.1744471150779758e-06, "loss": 0.0183, "step": 3476 }, { "epoch": 0.7911262798634813, "grad_norm": 1.6604024020521415, "learning_rate": 1.1744045238164793e-06, "loss": 0.0587, "step": 3477 }, { "epoch": 0.791353811149033, "grad_norm": 1.3826539577284072, "learning_rate": 1.1743619213261604e-06, "loss": 0.0626, "step": 3478 }, { "epoch": 0.7915813424345848, "grad_norm": 0.9873190669247738, "learning_rate": 1.1743193076078901e-06, "loss": 0.0407, "step": 3479 }, { "epoch": 0.7918088737201365, "grad_norm": 2.2069878657118243, "learning_rate": 1.174276682662539e-06, "loss": 0.0907, "step": 3480 }, { "epoch": 0.7920364050056883, "grad_norm": 0.9695329009047923, "learning_rate": 1.1742340464909786e-06, "loss": 0.0444, "step": 3481 }, { "epoch": 0.79226393629124, "grad_norm": 1.18743378027036, "learning_rate": 1.1741913990940801e-06, "loss": 0.0417, "step": 3482 }, { "epoch": 0.7924914675767918, "grad_norm": 1.2335459566427895, "learning_rate": 1.174148740472715e-06, "loss": 0.0357, "step": 3483 }, { "epoch": 0.7927189988623435, "grad_norm": 1.7123789864089465, "learning_rate": 1.1741060706277557e-06, "loss": 0.0549, "step": 3484 }, { "epoch": 0.7929465301478953, "grad_norm": 1.0088683967525243, "learning_rate": 1.1740633895600738e-06, "loss": 0.0487, "step": 3485 }, { "epoch": 0.7931740614334472, "grad_norm": 0.9729774800349912, "learning_rate": 1.1740206972705418e-06, "loss": 0.0487, "step": 3486 }, { "epoch": 0.7934015927189989, "grad_norm": 1.357974224485339, "learning_rate": 1.1739779937600323e-06, "loss": 0.0501, "step": 3487 }, { "epoch": 0.7936291240045507, "grad_norm": 1.2508424825549256, "learning_rate": 1.1739352790294177e-06, "loss": 0.0373, "step": 3488 }, { "epoch": 0.7938566552901024, "grad_norm": 1.3686653687913515, "learning_rate": 1.1738925530795716e-06, "loss": 0.0517, "step": 3489 }, { "epoch": 0.7940841865756542, "grad_norm": 1.3919271731699832, "learning_rate": 1.173849815911367e-06, "loss": 0.0513, "step": 3490 }, { "epoch": 0.7943117178612059, "grad_norm": 1.0732983866451746, "learning_rate": 1.1738070675256773e-06, "loss": 0.0485, "step": 3491 }, { "epoch": 0.7945392491467577, "grad_norm": 0.8920921807243749, "learning_rate": 1.1737643079233763e-06, "loss": 0.0423, "step": 3492 }, { "epoch": 0.7947667804323094, "grad_norm": 0.8985878758463217, "learning_rate": 1.1737215371053376e-06, "loss": 0.0315, "step": 3493 }, { "epoch": 0.7949943117178612, "grad_norm": 1.4217871233611445, "learning_rate": 1.1736787550724357e-06, "loss": 0.0634, "step": 3494 }, { "epoch": 0.7952218430034129, "grad_norm": 1.6601308191313218, "learning_rate": 1.1736359618255452e-06, "loss": 0.072, "step": 3495 }, { "epoch": 0.7954493742889648, "grad_norm": 1.9516506015444255, "learning_rate": 1.1735931573655402e-06, "loss": 0.0845, "step": 3496 }, { "epoch": 0.7956769055745165, "grad_norm": 13.731764530376946, "learning_rate": 1.1735503416932957e-06, "loss": 0.3361, "step": 3497 }, { "epoch": 0.7959044368600683, "grad_norm": 0.8458278086989922, "learning_rate": 1.1735075148096869e-06, "loss": 0.0459, "step": 3498 }, { "epoch": 0.79613196814562, "grad_norm": 1.0325577020668653, "learning_rate": 1.173464676715589e-06, "loss": 0.0491, "step": 3499 }, { "epoch": 0.7963594994311718, "grad_norm": 1.0941809224515442, "learning_rate": 1.1734218274118775e-06, "loss": 0.0509, "step": 3500 }, { "epoch": 0.7965870307167235, "grad_norm": 1.4998412599053896, "learning_rate": 1.1733789668994285e-06, "loss": 0.0674, "step": 3501 }, { "epoch": 0.7968145620022753, "grad_norm": 1.4510084408943071, "learning_rate": 1.1733360951791176e-06, "loss": 0.078, "step": 3502 }, { "epoch": 0.797042093287827, "grad_norm": 0.8199454039615341, "learning_rate": 1.173293212251821e-06, "loss": 0.0277, "step": 3503 }, { "epoch": 0.7972696245733788, "grad_norm": 1.4797806837589549, "learning_rate": 1.1732503181184155e-06, "loss": 0.0457, "step": 3504 }, { "epoch": 0.7974971558589306, "grad_norm": 1.6954088656786952, "learning_rate": 1.1732074127797773e-06, "loss": 0.0431, "step": 3505 }, { "epoch": 0.7977246871444824, "grad_norm": 0.78592343457484, "learning_rate": 1.1731644962367838e-06, "loss": 0.0289, "step": 3506 }, { "epoch": 0.7979522184300342, "grad_norm": 1.0800031446768374, "learning_rate": 1.173121568490312e-06, "loss": 0.068, "step": 3507 }, { "epoch": 0.7981797497155859, "grad_norm": 1.4924469013770905, "learning_rate": 1.173078629541239e-06, "loss": 0.0986, "step": 3508 }, { "epoch": 0.7984072810011377, "grad_norm": 1.0339163270277598, "learning_rate": 1.1730356793904426e-06, "loss": 0.0393, "step": 3509 }, { "epoch": 0.7986348122866894, "grad_norm": 2.1650674647440002, "learning_rate": 1.1729927180388008e-06, "loss": 0.0748, "step": 3510 }, { "epoch": 0.7988623435722412, "grad_norm": 1.3477613309939473, "learning_rate": 1.172949745487191e-06, "loss": 0.0511, "step": 3511 }, { "epoch": 0.7990898748577929, "grad_norm": 1.941320037871878, "learning_rate": 1.1729067617364923e-06, "loss": 0.0653, "step": 3512 }, { "epoch": 0.7993174061433447, "grad_norm": 1.3711705573827695, "learning_rate": 1.1728637667875825e-06, "loss": 0.0583, "step": 3513 }, { "epoch": 0.7995449374288964, "grad_norm": 1.4216556543524117, "learning_rate": 1.172820760641341e-06, "loss": 0.0767, "step": 3514 }, { "epoch": 0.7997724687144482, "grad_norm": 1.0065561289408549, "learning_rate": 1.1727777432986465e-06, "loss": 0.0469, "step": 3515 }, { "epoch": 0.8, "grad_norm": 1.3114054375469062, "learning_rate": 1.1727347147603778e-06, "loss": 0.051, "step": 3516 }, { "epoch": 0.8002275312855518, "grad_norm": 1.1635377251522268, "learning_rate": 1.1726916750274148e-06, "loss": 0.0425, "step": 3517 }, { "epoch": 0.8004550625711035, "grad_norm": 0.7109505941253476, "learning_rate": 1.172648624100637e-06, "loss": 0.0335, "step": 3518 }, { "epoch": 0.8006825938566553, "grad_norm": 2.140244010424768, "learning_rate": 1.1726055619809245e-06, "loss": 0.0764, "step": 3519 }, { "epoch": 0.800910125142207, "grad_norm": 1.444555156138925, "learning_rate": 1.172562488669157e-06, "loss": 0.0618, "step": 3520 }, { "epoch": 0.8011376564277588, "grad_norm": 1.7685121918701119, "learning_rate": 1.172519404166215e-06, "loss": 0.0857, "step": 3521 }, { "epoch": 0.8013651877133106, "grad_norm": 1.713827324352964, "learning_rate": 1.1724763084729792e-06, "loss": 0.0633, "step": 3522 }, { "epoch": 0.8015927189988623, "grad_norm": 1.1050834097261653, "learning_rate": 1.1724332015903303e-06, "loss": 0.0519, "step": 3523 }, { "epoch": 0.8018202502844141, "grad_norm": 1.6510556663604237, "learning_rate": 1.1723900835191494e-06, "loss": 0.0789, "step": 3524 }, { "epoch": 0.8020477815699659, "grad_norm": 0.9667126871814873, "learning_rate": 1.1723469542603174e-06, "loss": 0.042, "step": 3525 }, { "epoch": 0.8022753128555177, "grad_norm": 0.8657263063272098, "learning_rate": 1.1723038138147165e-06, "loss": 0.0313, "step": 3526 }, { "epoch": 0.8025028441410694, "grad_norm": 1.856452808149009, "learning_rate": 1.1722606621832278e-06, "loss": 0.0896, "step": 3527 }, { "epoch": 0.8027303754266212, "grad_norm": 0.6848148476167641, "learning_rate": 1.1722174993667335e-06, "loss": 0.0183, "step": 3528 }, { "epoch": 0.8029579067121729, "grad_norm": 1.019323438363341, "learning_rate": 1.1721743253661155e-06, "loss": 0.051, "step": 3529 }, { "epoch": 0.8031854379977247, "grad_norm": 0.6693914231036064, "learning_rate": 1.1721311401822566e-06, "loss": 0.0232, "step": 3530 }, { "epoch": 0.8034129692832764, "grad_norm": 0.920308592294961, "learning_rate": 1.1720879438160391e-06, "loss": 0.0305, "step": 3531 }, { "epoch": 0.8036405005688282, "grad_norm": 1.3695873071245632, "learning_rate": 1.1720447362683458e-06, "loss": 0.045, "step": 3532 }, { "epoch": 0.8038680318543799, "grad_norm": 1.7871009286158004, "learning_rate": 1.1720015175400602e-06, "loss": 0.0834, "step": 3533 }, { "epoch": 0.8040955631399317, "grad_norm": 0.6832287827660394, "learning_rate": 1.1719582876320655e-06, "loss": 0.0263, "step": 3534 }, { "epoch": 0.8043230944254836, "grad_norm": 0.7627184412086807, "learning_rate": 1.1719150465452447e-06, "loss": 0.0262, "step": 3535 }, { "epoch": 0.8045506257110353, "grad_norm": 1.716803621327735, "learning_rate": 1.1718717942804822e-06, "loss": 0.0734, "step": 3536 }, { "epoch": 0.8047781569965871, "grad_norm": 2.0486680528761854, "learning_rate": 1.1718285308386618e-06, "loss": 0.1516, "step": 3537 }, { "epoch": 0.8050056882821388, "grad_norm": 1.3104153747023377, "learning_rate": 1.1717852562206678e-06, "loss": 0.0414, "step": 3538 }, { "epoch": 0.8052332195676906, "grad_norm": 1.5524472443736617, "learning_rate": 1.1717419704273844e-06, "loss": 0.0532, "step": 3539 }, { "epoch": 0.8054607508532423, "grad_norm": 1.8181142023265993, "learning_rate": 1.1716986734596963e-06, "loss": 0.074, "step": 3540 }, { "epoch": 0.8056882821387941, "grad_norm": 1.5770991987541603, "learning_rate": 1.1716553653184887e-06, "loss": 0.0572, "step": 3541 }, { "epoch": 0.8059158134243458, "grad_norm": 1.603046856744777, "learning_rate": 1.1716120460046464e-06, "loss": 0.068, "step": 3542 }, { "epoch": 0.8061433447098976, "grad_norm": 0.9490597103722859, "learning_rate": 1.1715687155190553e-06, "loss": 0.044, "step": 3543 }, { "epoch": 0.8063708759954493, "grad_norm": 1.4475288739954406, "learning_rate": 1.1715253738626005e-06, "loss": 0.0859, "step": 3544 }, { "epoch": 0.8065984072810012, "grad_norm": 2.4651858821386132, "learning_rate": 1.171482021036168e-06, "loss": 0.1099, "step": 3545 }, { "epoch": 0.8068259385665529, "grad_norm": 0.6592130419111449, "learning_rate": 1.1714386570406439e-06, "loss": 0.0271, "step": 3546 }, { "epoch": 0.8070534698521047, "grad_norm": 1.7063955755708686, "learning_rate": 1.1713952818769142e-06, "loss": 0.111, "step": 3547 }, { "epoch": 0.8072810011376564, "grad_norm": 2.169506097036596, "learning_rate": 1.1713518955458657e-06, "loss": 0.0733, "step": 3548 }, { "epoch": 0.8075085324232082, "grad_norm": 1.2288435774277289, "learning_rate": 1.1713084980483849e-06, "loss": 0.042, "step": 3549 }, { "epoch": 0.8077360637087599, "grad_norm": 1.577397877205651, "learning_rate": 1.1712650893853591e-06, "loss": 0.0831, "step": 3550 }, { "epoch": 0.8079635949943117, "grad_norm": 1.1381216086876993, "learning_rate": 1.1712216695576753e-06, "loss": 0.0531, "step": 3551 }, { "epoch": 0.8081911262798634, "grad_norm": 1.1404596418271893, "learning_rate": 1.171178238566221e-06, "loss": 0.043, "step": 3552 }, { "epoch": 0.8084186575654152, "grad_norm": 1.317622472587367, "learning_rate": 1.1711347964118837e-06, "loss": 0.0529, "step": 3553 }, { "epoch": 0.8086461888509671, "grad_norm": 1.065643129987413, "learning_rate": 1.1710913430955514e-06, "loss": 0.0375, "step": 3554 }, { "epoch": 0.8088737201365188, "grad_norm": 1.3409199390597606, "learning_rate": 1.171047878618112e-06, "loss": 0.0424, "step": 3555 }, { "epoch": 0.8091012514220706, "grad_norm": 1.795521953652107, "learning_rate": 1.1710044029804542e-06, "loss": 0.0649, "step": 3556 }, { "epoch": 0.8093287827076223, "grad_norm": 1.4702215735242095, "learning_rate": 1.1709609161834663e-06, "loss": 0.0714, "step": 3557 }, { "epoch": 0.8095563139931741, "grad_norm": 1.4542304303142697, "learning_rate": 1.1709174182280371e-06, "loss": 0.074, "step": 3558 }, { "epoch": 0.8097838452787258, "grad_norm": 0.9222037242585802, "learning_rate": 1.1708739091150557e-06, "loss": 0.0479, "step": 3559 }, { "epoch": 0.8100113765642776, "grad_norm": 1.1706470717553774, "learning_rate": 1.1708303888454113e-06, "loss": 0.0499, "step": 3560 }, { "epoch": 0.8102389078498293, "grad_norm": 1.4734537784856048, "learning_rate": 1.1707868574199934e-06, "loss": 0.0684, "step": 3561 }, { "epoch": 0.8104664391353811, "grad_norm": 2.40284003722661, "learning_rate": 1.1707433148396918e-06, "loss": 0.1156, "step": 3562 }, { "epoch": 0.8106939704209328, "grad_norm": 1.283544001327138, "learning_rate": 1.1706997611053963e-06, "loss": 0.041, "step": 3563 }, { "epoch": 0.8109215017064847, "grad_norm": 1.2655368943151744, "learning_rate": 1.1706561962179968e-06, "loss": 0.0517, "step": 3564 }, { "epoch": 0.8111490329920364, "grad_norm": 1.2742964978863571, "learning_rate": 1.1706126201783844e-06, "loss": 0.0601, "step": 3565 }, { "epoch": 0.8113765642775882, "grad_norm": 1.1638624014319168, "learning_rate": 1.170569032987449e-06, "loss": 0.0408, "step": 3566 }, { "epoch": 0.81160409556314, "grad_norm": 1.3521980505141613, "learning_rate": 1.1705254346460818e-06, "loss": 0.0494, "step": 3567 }, { "epoch": 0.8118316268486917, "grad_norm": 1.002566765933997, "learning_rate": 1.170481825155174e-06, "loss": 0.0608, "step": 3568 }, { "epoch": 0.8120591581342435, "grad_norm": 1.0785551144421794, "learning_rate": 1.1704382045156162e-06, "loss": 0.0443, "step": 3569 }, { "epoch": 0.8122866894197952, "grad_norm": 0.9426048538607938, "learning_rate": 1.1703945727283008e-06, "loss": 0.0293, "step": 3570 }, { "epoch": 0.812514220705347, "grad_norm": 0.9121003153189483, "learning_rate": 1.1703509297941193e-06, "loss": 0.0469, "step": 3571 }, { "epoch": 0.8127417519908987, "grad_norm": 1.1024950352965313, "learning_rate": 1.1703072757139633e-06, "loss": 0.0432, "step": 3572 }, { "epoch": 0.8129692832764505, "grad_norm": 1.5884989646335343, "learning_rate": 1.1702636104887253e-06, "loss": 0.0567, "step": 3573 }, { "epoch": 0.8131968145620023, "grad_norm": 1.3193688692811854, "learning_rate": 1.1702199341192977e-06, "loss": 0.0492, "step": 3574 }, { "epoch": 0.8134243458475541, "grad_norm": 1.278989450905917, "learning_rate": 1.1701762466065733e-06, "loss": 0.0673, "step": 3575 }, { "epoch": 0.8136518771331058, "grad_norm": 1.2742872087629682, "learning_rate": 1.1701325479514446e-06, "loss": 0.0515, "step": 3576 }, { "epoch": 0.8138794084186576, "grad_norm": 1.0146683924087292, "learning_rate": 1.1700888381548052e-06, "loss": 0.0313, "step": 3577 }, { "epoch": 0.8141069397042093, "grad_norm": 1.2456789380747535, "learning_rate": 1.1700451172175482e-06, "loss": 0.0795, "step": 3578 }, { "epoch": 0.8143344709897611, "grad_norm": 1.0481095629438555, "learning_rate": 1.1700013851405673e-06, "loss": 0.0388, "step": 3579 }, { "epoch": 0.8145620022753128, "grad_norm": 2.2650366394721293, "learning_rate": 1.169957641924756e-06, "loss": 0.0791, "step": 3580 }, { "epoch": 0.8147895335608646, "grad_norm": 1.0382772002032885, "learning_rate": 1.1699138875710087e-06, "loss": 0.0378, "step": 3581 }, { "epoch": 0.8150170648464163, "grad_norm": 1.1375187926566632, "learning_rate": 1.1698701220802194e-06, "loss": 0.046, "step": 3582 }, { "epoch": 0.8152445961319681, "grad_norm": 1.1605133417672624, "learning_rate": 1.1698263454532827e-06, "loss": 0.0563, "step": 3583 }, { "epoch": 0.81547212741752, "grad_norm": 1.382705077452947, "learning_rate": 1.1697825576910933e-06, "loss": 0.0388, "step": 3584 }, { "epoch": 0.8156996587030717, "grad_norm": 24.85184337955777, "learning_rate": 1.1697387587945461e-06, "loss": 0.4189, "step": 3585 }, { "epoch": 0.8159271899886235, "grad_norm": 1.375750049796865, "learning_rate": 1.1696949487645365e-06, "loss": 0.0746, "step": 3586 }, { "epoch": 0.8161547212741752, "grad_norm": 1.9135630568236137, "learning_rate": 1.1696511276019595e-06, "loss": 0.0963, "step": 3587 }, { "epoch": 0.816382252559727, "grad_norm": 1.4697481500847787, "learning_rate": 1.169607295307711e-06, "loss": 0.0468, "step": 3588 }, { "epoch": 0.8166097838452787, "grad_norm": 1.8024021280856608, "learning_rate": 1.169563451882687e-06, "loss": 0.0734, "step": 3589 }, { "epoch": 0.8168373151308305, "grad_norm": 1.0001763795597935, "learning_rate": 1.1695195973277831e-06, "loss": 0.0377, "step": 3590 }, { "epoch": 0.8170648464163822, "grad_norm": 1.6468051827674142, "learning_rate": 1.169475731643896e-06, "loss": 0.0757, "step": 3591 }, { "epoch": 0.817292377701934, "grad_norm": 1.1181893907591056, "learning_rate": 1.169431854831922e-06, "loss": 0.0423, "step": 3592 }, { "epoch": 0.8175199089874858, "grad_norm": 1.153273064949388, "learning_rate": 1.169387966892758e-06, "loss": 0.0697, "step": 3593 }, { "epoch": 0.8177474402730376, "grad_norm": 1.694259041690198, "learning_rate": 1.169344067827301e-06, "loss": 0.0942, "step": 3594 }, { "epoch": 0.8179749715585893, "grad_norm": 1.1843299797048426, "learning_rate": 1.1693001576364483e-06, "loss": 0.0649, "step": 3595 }, { "epoch": 0.8182025028441411, "grad_norm": 1.5290468150322878, "learning_rate": 1.1692562363210969e-06, "loss": 0.0586, "step": 3596 }, { "epoch": 0.8184300341296928, "grad_norm": 1.020107068762973, "learning_rate": 1.169212303882145e-06, "loss": 0.045, "step": 3597 }, { "epoch": 0.8186575654152446, "grad_norm": 0.8683619857454433, "learning_rate": 1.1691683603204901e-06, "loss": 0.0628, "step": 3598 }, { "epoch": 0.8188850967007963, "grad_norm": 1.2042022871006006, "learning_rate": 1.1691244056370307e-06, "loss": 0.0461, "step": 3599 }, { "epoch": 0.8191126279863481, "grad_norm": 0.9629993703014715, "learning_rate": 1.169080439832665e-06, "loss": 0.0429, "step": 3600 }, { "epoch": 0.8193401592718998, "grad_norm": 1.3152436305816528, "learning_rate": 1.1690364629082915e-06, "loss": 0.0414, "step": 3601 }, { "epoch": 0.8195676905574516, "grad_norm": 1.2407550380695793, "learning_rate": 1.168992474864809e-06, "loss": 0.0426, "step": 3602 }, { "epoch": 0.8197952218430035, "grad_norm": 1.7099317456821084, "learning_rate": 1.1689484757031167e-06, "loss": 0.0661, "step": 3603 }, { "epoch": 0.8200227531285552, "grad_norm": 2.4921680206760173, "learning_rate": 1.1689044654241136e-06, "loss": 0.0975, "step": 3604 }, { "epoch": 0.820250284414107, "grad_norm": 0.9191594230152784, "learning_rate": 1.1688604440286994e-06, "loss": 0.0427, "step": 3605 }, { "epoch": 0.8204778156996587, "grad_norm": 1.5850443718709968, "learning_rate": 1.1688164115177737e-06, "loss": 0.064, "step": 3606 }, { "epoch": 0.8207053469852105, "grad_norm": 1.4447088659158112, "learning_rate": 1.1687723678922367e-06, "loss": 0.0644, "step": 3607 }, { "epoch": 0.8209328782707622, "grad_norm": 1.1876210795397029, "learning_rate": 1.1687283131529882e-06, "loss": 0.05, "step": 3608 }, { "epoch": 0.821160409556314, "grad_norm": 1.1124560475691172, "learning_rate": 1.1686842473009286e-06, "loss": 0.052, "step": 3609 }, { "epoch": 0.8213879408418657, "grad_norm": 1.1061762100158978, "learning_rate": 1.168640170336959e-06, "loss": 0.0386, "step": 3610 }, { "epoch": 0.8216154721274175, "grad_norm": 1.44352318845533, "learning_rate": 1.16859608226198e-06, "loss": 0.063, "step": 3611 }, { "epoch": 0.8218430034129692, "grad_norm": 1.6539718566592532, "learning_rate": 1.1685519830768923e-06, "loss": 0.0874, "step": 3612 }, { "epoch": 0.8220705346985211, "grad_norm": 1.6565633171924414, "learning_rate": 1.1685078727825978e-06, "loss": 0.0589, "step": 3613 }, { "epoch": 0.8222980659840728, "grad_norm": 1.461206103355595, "learning_rate": 1.1684637513799976e-06, "loss": 0.0627, "step": 3614 }, { "epoch": 0.8225255972696246, "grad_norm": 0.9887213175076792, "learning_rate": 1.1684196188699936e-06, "loss": 0.0459, "step": 3615 }, { "epoch": 0.8227531285551763, "grad_norm": 1.3363304037819075, "learning_rate": 1.168375475253488e-06, "loss": 0.0467, "step": 3616 }, { "epoch": 0.8229806598407281, "grad_norm": 1.178055634767022, "learning_rate": 1.168331320531383e-06, "loss": 0.0471, "step": 3617 }, { "epoch": 0.8232081911262799, "grad_norm": 1.586381066611804, "learning_rate": 1.1682871547045805e-06, "loss": 0.0708, "step": 3618 }, { "epoch": 0.8234357224118316, "grad_norm": 1.209859608299182, "learning_rate": 1.1682429777739836e-06, "loss": 0.0513, "step": 3619 }, { "epoch": 0.8236632536973834, "grad_norm": 1.7764116713616094, "learning_rate": 1.168198789740495e-06, "loss": 0.0638, "step": 3620 }, { "epoch": 0.8238907849829351, "grad_norm": 0.7763254752373752, "learning_rate": 1.1681545906050184e-06, "loss": 0.0343, "step": 3621 }, { "epoch": 0.824118316268487, "grad_norm": 1.1001948497692553, "learning_rate": 1.1681103803684564e-06, "loss": 0.056, "step": 3622 }, { "epoch": 0.8243458475540387, "grad_norm": 1.273383386134855, "learning_rate": 1.168066159031713e-06, "loss": 0.0468, "step": 3623 }, { "epoch": 0.8245733788395905, "grad_norm": 1.5388101293640168, "learning_rate": 1.1680219265956918e-06, "loss": 0.0558, "step": 3624 }, { "epoch": 0.8248009101251422, "grad_norm": 1.1316172883077475, "learning_rate": 1.167977683061297e-06, "loss": 0.0434, "step": 3625 }, { "epoch": 0.825028441410694, "grad_norm": 1.3402414111306105, "learning_rate": 1.1679334284294328e-06, "loss": 0.0601, "step": 3626 }, { "epoch": 0.8252559726962457, "grad_norm": 0.8814362527241864, "learning_rate": 1.1678891627010036e-06, "loss": 0.0498, "step": 3627 }, { "epoch": 0.8254835039817975, "grad_norm": 1.1662546055909706, "learning_rate": 1.1678448858769139e-06, "loss": 0.0493, "step": 3628 }, { "epoch": 0.8257110352673492, "grad_norm": 1.8562897841170296, "learning_rate": 1.1678005979580694e-06, "loss": 0.0834, "step": 3629 }, { "epoch": 0.825938566552901, "grad_norm": 0.8447551801193321, "learning_rate": 1.1677562989453745e-06, "loss": 0.0283, "step": 3630 }, { "epoch": 0.8261660978384527, "grad_norm": 1.0072638573362171, "learning_rate": 1.1677119888397348e-06, "loss": 0.049, "step": 3631 }, { "epoch": 0.8263936291240046, "grad_norm": 1.433958970376907, "learning_rate": 1.1676676676420562e-06, "loss": 0.0601, "step": 3632 }, { "epoch": 0.8266211604095564, "grad_norm": 1.123241025021127, "learning_rate": 1.1676233353532442e-06, "loss": 0.046, "step": 3633 }, { "epoch": 0.8268486916951081, "grad_norm": 0.7445525822110918, "learning_rate": 1.167578991974205e-06, "loss": 0.0181, "step": 3634 }, { "epoch": 0.8270762229806599, "grad_norm": 0.7798344340235215, "learning_rate": 1.167534637505845e-06, "loss": 0.0272, "step": 3635 }, { "epoch": 0.8273037542662116, "grad_norm": 1.0329542174985942, "learning_rate": 1.1674902719490704e-06, "loss": 0.0283, "step": 3636 }, { "epoch": 0.8275312855517634, "grad_norm": 1.3105774584535312, "learning_rate": 1.1674458953047883e-06, "loss": 0.0768, "step": 3637 }, { "epoch": 0.8277588168373151, "grad_norm": 1.403061308791836, "learning_rate": 1.1674015075739054e-06, "loss": 0.0524, "step": 3638 }, { "epoch": 0.8279863481228669, "grad_norm": 1.525826516533729, "learning_rate": 1.1673571087573293e-06, "loss": 0.035, "step": 3639 }, { "epoch": 0.8282138794084186, "grad_norm": 1.578985773093579, "learning_rate": 1.167312698855967e-06, "loss": 0.0686, "step": 3640 }, { "epoch": 0.8284414106939704, "grad_norm": 0.8598678068916811, "learning_rate": 1.1672682778707262e-06, "loss": 0.0434, "step": 3641 }, { "epoch": 0.8286689419795222, "grad_norm": 1.1746515581399284, "learning_rate": 1.167223845802515e-06, "loss": 0.0683, "step": 3642 }, { "epoch": 0.828896473265074, "grad_norm": 1.101963672107574, "learning_rate": 1.1671794026522417e-06, "loss": 0.0446, "step": 3643 }, { "epoch": 0.8291240045506257, "grad_norm": 1.29174643335613, "learning_rate": 1.1671349484208142e-06, "loss": 0.0631, "step": 3644 }, { "epoch": 0.8293515358361775, "grad_norm": 14.688190742848866, "learning_rate": 1.1670904831091412e-06, "loss": 0.3068, "step": 3645 }, { "epoch": 0.8295790671217292, "grad_norm": 1.997677369000564, "learning_rate": 1.1670460067181313e-06, "loss": 0.097, "step": 3646 }, { "epoch": 0.829806598407281, "grad_norm": 2.0170155054153844, "learning_rate": 1.167001519248694e-06, "loss": 0.091, "step": 3647 }, { "epoch": 0.8300341296928327, "grad_norm": 1.445412257420854, "learning_rate": 1.1669570207017384e-06, "loss": 0.0451, "step": 3648 }, { "epoch": 0.8302616609783845, "grad_norm": 1.5233401207808903, "learning_rate": 1.1669125110781737e-06, "loss": 0.0606, "step": 3649 }, { "epoch": 0.8304891922639362, "grad_norm": 0.7835188426142936, "learning_rate": 1.1668679903789095e-06, "loss": 0.0332, "step": 3650 }, { "epoch": 0.830716723549488, "grad_norm": 1.4515524685426144, "learning_rate": 1.1668234586048562e-06, "loss": 0.0596, "step": 3651 }, { "epoch": 0.8309442548350399, "grad_norm": 0.9655835502300143, "learning_rate": 1.1667789157569237e-06, "loss": 0.0409, "step": 3652 }, { "epoch": 0.8311717861205916, "grad_norm": 1.393653817047434, "learning_rate": 1.1667343618360224e-06, "loss": 0.0469, "step": 3653 }, { "epoch": 0.8313993174061434, "grad_norm": 1.483284458995605, "learning_rate": 1.166689796843063e-06, "loss": 0.071, "step": 3654 }, { "epoch": 0.8316268486916951, "grad_norm": 2.0171503740210004, "learning_rate": 1.166645220778956e-06, "loss": 0.113, "step": 3655 }, { "epoch": 0.8318543799772469, "grad_norm": 1.1504742515281876, "learning_rate": 1.1666006336446127e-06, "loss": 0.0524, "step": 3656 }, { "epoch": 0.8320819112627986, "grad_norm": 45.01747083144473, "learning_rate": 1.1665560354409444e-06, "loss": 0.4523, "step": 3657 }, { "epoch": 0.8323094425483504, "grad_norm": 1.112433200629069, "learning_rate": 1.1665114261688625e-06, "loss": 0.0668, "step": 3658 }, { "epoch": 0.8325369738339021, "grad_norm": 0.8757946163066259, "learning_rate": 1.1664668058292789e-06, "loss": 0.031, "step": 3659 }, { "epoch": 0.8327645051194539, "grad_norm": 1.4231045164348157, "learning_rate": 1.1664221744231052e-06, "loss": 0.0557, "step": 3660 }, { "epoch": 0.8329920364050057, "grad_norm": 0.817775869454815, "learning_rate": 1.1663775319512539e-06, "loss": 0.033, "step": 3661 }, { "epoch": 0.8332195676905575, "grad_norm": 1.1781931600979176, "learning_rate": 1.1663328784146375e-06, "loss": 0.0642, "step": 3662 }, { "epoch": 0.8334470989761092, "grad_norm": 0.9259664097181735, "learning_rate": 1.1662882138141684e-06, "loss": 0.04, "step": 3663 }, { "epoch": 0.833674630261661, "grad_norm": 0.9223727967515527, "learning_rate": 1.1662435381507595e-06, "loss": 0.0348, "step": 3664 }, { "epoch": 0.8339021615472128, "grad_norm": 2.505531351215165, "learning_rate": 1.1661988514253237e-06, "loss": 0.0541, "step": 3665 }, { "epoch": 0.8341296928327645, "grad_norm": 0.8185437890691741, "learning_rate": 1.1661541536387748e-06, "loss": 0.0304, "step": 3666 }, { "epoch": 0.8343572241183163, "grad_norm": 1.4951738593038986, "learning_rate": 1.166109444792026e-06, "loss": 0.0621, "step": 3667 }, { "epoch": 0.834584755403868, "grad_norm": 1.3025368287248427, "learning_rate": 1.1660647248859913e-06, "loss": 0.0516, "step": 3668 }, { "epoch": 0.8348122866894198, "grad_norm": 1.2218241621957284, "learning_rate": 1.1660199939215845e-06, "loss": 0.0425, "step": 3669 }, { "epoch": 0.8350398179749715, "grad_norm": 1.3776947069628735, "learning_rate": 1.1659752518997197e-06, "loss": 0.0398, "step": 3670 }, { "epoch": 0.8352673492605234, "grad_norm": 1.1397273421298342, "learning_rate": 1.1659304988213115e-06, "loss": 0.065, "step": 3671 }, { "epoch": 0.8354948805460751, "grad_norm": 1.0434688435298627, "learning_rate": 1.1658857346872745e-06, "loss": 0.0566, "step": 3672 }, { "epoch": 0.8357224118316269, "grad_norm": 1.5440568255758365, "learning_rate": 1.165840959498524e-06, "loss": 0.0688, "step": 3673 }, { "epoch": 0.8359499431171786, "grad_norm": 1.115123935337787, "learning_rate": 1.1657961732559745e-06, "loss": 0.032, "step": 3674 }, { "epoch": 0.8361774744027304, "grad_norm": 2.009941649682246, "learning_rate": 1.1657513759605417e-06, "loss": 0.1067, "step": 3675 }, { "epoch": 0.8364050056882821, "grad_norm": 1.0878721650862986, "learning_rate": 1.1657065676131412e-06, "loss": 0.039, "step": 3676 }, { "epoch": 0.8366325369738339, "grad_norm": 1.2823447557648144, "learning_rate": 1.1656617482146886e-06, "loss": 0.0565, "step": 3677 }, { "epoch": 0.8368600682593856, "grad_norm": 0.8640896631578039, "learning_rate": 1.1656169177661e-06, "loss": 0.0286, "step": 3678 }, { "epoch": 0.8370875995449374, "grad_norm": 1.5431241138574887, "learning_rate": 1.165572076268292e-06, "loss": 0.0777, "step": 3679 }, { "epoch": 0.8373151308304891, "grad_norm": 2.182613338071724, "learning_rate": 1.1655272237221804e-06, "loss": 0.0811, "step": 3680 }, { "epoch": 0.837542662116041, "grad_norm": 1.6601496114451646, "learning_rate": 1.1654823601286826e-06, "loss": 0.0705, "step": 3681 }, { "epoch": 0.8377701934015928, "grad_norm": 0.9845739508563496, "learning_rate": 1.165437485488715e-06, "loss": 0.0352, "step": 3682 }, { "epoch": 0.8379977246871445, "grad_norm": 1.7268236108563282, "learning_rate": 1.165392599803195e-06, "loss": 0.0813, "step": 3683 }, { "epoch": 0.8382252559726963, "grad_norm": 1.7016772440974146, "learning_rate": 1.16534770307304e-06, "loss": 0.0686, "step": 3684 }, { "epoch": 0.838452787258248, "grad_norm": 1.1115991957924602, "learning_rate": 1.1653027952991676e-06, "loss": 0.0453, "step": 3685 }, { "epoch": 0.8386803185437998, "grad_norm": 1.1625036802126243, "learning_rate": 1.1652578764824953e-06, "loss": 0.0547, "step": 3686 }, { "epoch": 0.8389078498293515, "grad_norm": 1.2752562383666506, "learning_rate": 1.1652129466239417e-06, "loss": 0.0557, "step": 3687 }, { "epoch": 0.8391353811149033, "grad_norm": 1.1708119141481568, "learning_rate": 1.1651680057244247e-06, "loss": 0.0543, "step": 3688 }, { "epoch": 0.839362912400455, "grad_norm": 1.5686634859706143, "learning_rate": 1.165123053784863e-06, "loss": 0.0631, "step": 3689 }, { "epoch": 0.8395904436860068, "grad_norm": 0.7862204435509706, "learning_rate": 1.1650780908061753e-06, "loss": 0.0268, "step": 3690 }, { "epoch": 0.8398179749715586, "grad_norm": 0.8894683444434565, "learning_rate": 1.1650331167892805e-06, "loss": 0.0447, "step": 3691 }, { "epoch": 0.8400455062571104, "grad_norm": 1.1634157087855133, "learning_rate": 1.1649881317350979e-06, "loss": 0.0563, "step": 3692 }, { "epoch": 0.8402730375426621, "grad_norm": 0.8154942879262764, "learning_rate": 1.1649431356445467e-06, "loss": 0.0286, "step": 3693 }, { "epoch": 0.8405005688282139, "grad_norm": 1.377478587198256, "learning_rate": 1.1648981285185464e-06, "loss": 0.0616, "step": 3694 }, { "epoch": 0.8407281001137656, "grad_norm": 1.4564713307212853, "learning_rate": 1.1648531103580178e-06, "loss": 0.0537, "step": 3695 }, { "epoch": 0.8409556313993174, "grad_norm": 1.1442584647082312, "learning_rate": 1.1648080811638797e-06, "loss": 0.0514, "step": 3696 }, { "epoch": 0.8411831626848691, "grad_norm": 0.6059546954643502, "learning_rate": 1.1647630409370533e-06, "loss": 0.024, "step": 3697 }, { "epoch": 0.8414106939704209, "grad_norm": 0.845020155561466, "learning_rate": 1.1647179896784588e-06, "loss": 0.0297, "step": 3698 }, { "epoch": 0.8416382252559726, "grad_norm": 1.1265316983407672, "learning_rate": 1.1646729273890172e-06, "loss": 0.0385, "step": 3699 }, { "epoch": 0.8418657565415245, "grad_norm": 1.5081431023506024, "learning_rate": 1.1646278540696493e-06, "loss": 0.0791, "step": 3700 }, { "epoch": 0.8420932878270763, "grad_norm": 1.0623449631960677, "learning_rate": 1.1645827697212762e-06, "loss": 0.0366, "step": 3701 }, { "epoch": 0.842320819112628, "grad_norm": 1.0697058302573508, "learning_rate": 1.1645376743448194e-06, "loss": 0.0365, "step": 3702 }, { "epoch": 0.8425483503981798, "grad_norm": 1.6166494157045328, "learning_rate": 1.1644925679412008e-06, "loss": 0.0917, "step": 3703 }, { "epoch": 0.8427758816837315, "grad_norm": 0.8909409395783954, "learning_rate": 1.1644474505113423e-06, "loss": 0.0483, "step": 3704 }, { "epoch": 0.8430034129692833, "grad_norm": 1.2947960727076373, "learning_rate": 1.1644023220561657e-06, "loss": 0.0841, "step": 3705 }, { "epoch": 0.843230944254835, "grad_norm": 2.338681558244469, "learning_rate": 1.1643571825765935e-06, "loss": 0.0884, "step": 3706 }, { "epoch": 0.8434584755403868, "grad_norm": 1.0808444836814672, "learning_rate": 1.1643120320735481e-06, "loss": 0.0468, "step": 3707 }, { "epoch": 0.8436860068259385, "grad_norm": 3.1483674383789007, "learning_rate": 1.164266870547953e-06, "loss": 0.1461, "step": 3708 }, { "epoch": 0.8439135381114903, "grad_norm": 1.1429682418848544, "learning_rate": 1.16422169800073e-06, "loss": 0.0575, "step": 3709 }, { "epoch": 0.8441410693970421, "grad_norm": 1.4092804732703959, "learning_rate": 1.1641765144328035e-06, "loss": 0.0824, "step": 3710 }, { "epoch": 0.8443686006825939, "grad_norm": 0.9907223592123003, "learning_rate": 1.1641313198450966e-06, "loss": 0.0408, "step": 3711 }, { "epoch": 0.8445961319681456, "grad_norm": 1.1953791862365195, "learning_rate": 1.1640861142385326e-06, "loss": 0.0417, "step": 3712 }, { "epoch": 0.8448236632536974, "grad_norm": 1.0300444272783411, "learning_rate": 1.1640408976140358e-06, "loss": 0.0292, "step": 3713 }, { "epoch": 0.8450511945392492, "grad_norm": 0.9396034384804057, "learning_rate": 1.1639956699725303e-06, "loss": 0.0251, "step": 3714 }, { "epoch": 0.8452787258248009, "grad_norm": 1.0450241907142919, "learning_rate": 1.1639504313149403e-06, "loss": 0.0427, "step": 3715 }, { "epoch": 0.8455062571103527, "grad_norm": 1.2352022092638624, "learning_rate": 1.1639051816421906e-06, "loss": 0.0622, "step": 3716 }, { "epoch": 0.8457337883959044, "grad_norm": 1.7465197987251446, "learning_rate": 1.163859920955206e-06, "loss": 0.0646, "step": 3717 }, { "epoch": 0.8459613196814562, "grad_norm": 1.1198862623805916, "learning_rate": 1.1638146492549112e-06, "loss": 0.057, "step": 3718 }, { "epoch": 0.8461888509670079, "grad_norm": 0.9891506478501289, "learning_rate": 1.163769366542232e-06, "loss": 0.043, "step": 3719 }, { "epoch": 0.8464163822525598, "grad_norm": 0.8307264131023004, "learning_rate": 1.1637240728180937e-06, "loss": 0.0374, "step": 3720 }, { "epoch": 0.8466439135381115, "grad_norm": 1.2015516317579826, "learning_rate": 1.163678768083422e-06, "loss": 0.0542, "step": 3721 }, { "epoch": 0.8468714448236633, "grad_norm": 0.8651705828559965, "learning_rate": 1.1636334523391426e-06, "loss": 0.0333, "step": 3722 }, { "epoch": 0.847098976109215, "grad_norm": 1.8721051492604341, "learning_rate": 1.1635881255861821e-06, "loss": 0.1021, "step": 3723 }, { "epoch": 0.8473265073947668, "grad_norm": 1.7947647079315026, "learning_rate": 1.1635427878254667e-06, "loss": 0.0741, "step": 3724 }, { "epoch": 0.8475540386803185, "grad_norm": 2.1673257995211506, "learning_rate": 1.163497439057923e-06, "loss": 0.084, "step": 3725 }, { "epoch": 0.8477815699658703, "grad_norm": 1.1269894105236045, "learning_rate": 1.1634520792844778e-06, "loss": 0.0675, "step": 3726 }, { "epoch": 0.848009101251422, "grad_norm": 1.205510809010935, "learning_rate": 1.1634067085060583e-06, "loss": 0.0495, "step": 3727 }, { "epoch": 0.8482366325369738, "grad_norm": 0.691849376485242, "learning_rate": 1.1633613267235915e-06, "loss": 0.0466, "step": 3728 }, { "epoch": 0.8484641638225257, "grad_norm": 1.7367118780028674, "learning_rate": 1.1633159339380054e-06, "loss": 0.0796, "step": 3729 }, { "epoch": 0.8486916951080774, "grad_norm": 1.0717660762894743, "learning_rate": 1.1632705301502272e-06, "loss": 0.04, "step": 3730 }, { "epoch": 0.8489192263936292, "grad_norm": 1.5860944536440218, "learning_rate": 1.1632251153611855e-06, "loss": 0.0494, "step": 3731 }, { "epoch": 0.8491467576791809, "grad_norm": 1.8503643346395675, "learning_rate": 1.163179689571808e-06, "loss": 0.0747, "step": 3732 }, { "epoch": 0.8493742889647327, "grad_norm": 1.51685414443071, "learning_rate": 1.1631342527830234e-06, "loss": 0.0605, "step": 3733 }, { "epoch": 0.8496018202502844, "grad_norm": 5.054888475581482, "learning_rate": 1.16308880499576e-06, "loss": 0.1453, "step": 3734 }, { "epoch": 0.8498293515358362, "grad_norm": 0.9938442986727953, "learning_rate": 1.163043346210947e-06, "loss": 0.0402, "step": 3735 }, { "epoch": 0.8500568828213879, "grad_norm": 1.5560463292525197, "learning_rate": 1.1629978764295133e-06, "loss": 0.0735, "step": 3736 }, { "epoch": 0.8502844141069397, "grad_norm": 1.6867577558544848, "learning_rate": 1.1629523956523883e-06, "loss": 0.0488, "step": 3737 }, { "epoch": 0.8505119453924914, "grad_norm": 1.0404339887342127, "learning_rate": 1.1629069038805018e-06, "loss": 0.0328, "step": 3738 }, { "epoch": 0.8507394766780433, "grad_norm": 1.2922834659902496, "learning_rate": 1.162861401114783e-06, "loss": 0.0643, "step": 3739 }, { "epoch": 0.850967007963595, "grad_norm": 1.0645197047514645, "learning_rate": 1.1628158873561624e-06, "loss": 0.0527, "step": 3740 }, { "epoch": 0.8511945392491468, "grad_norm": 1.2986459720013372, "learning_rate": 1.16277036260557e-06, "loss": 0.0562, "step": 3741 }, { "epoch": 0.8514220705346985, "grad_norm": 0.9856861581794601, "learning_rate": 1.1627248268639363e-06, "loss": 0.0395, "step": 3742 }, { "epoch": 0.8516496018202503, "grad_norm": 1.0675074473205326, "learning_rate": 1.1626792801321917e-06, "loss": 0.0675, "step": 3743 }, { "epoch": 0.851877133105802, "grad_norm": 2.38754548600943, "learning_rate": 1.1626337224112676e-06, "loss": 0.0971, "step": 3744 }, { "epoch": 0.8521046643913538, "grad_norm": 1.7266469693034243, "learning_rate": 1.1625881537020948e-06, "loss": 0.0671, "step": 3745 }, { "epoch": 0.8523321956769055, "grad_norm": 1.280551624533034, "learning_rate": 1.1625425740056046e-06, "loss": 0.0377, "step": 3746 }, { "epoch": 0.8525597269624573, "grad_norm": 1.237081721406354, "learning_rate": 1.1624969833227287e-06, "loss": 0.059, "step": 3747 }, { "epoch": 0.852787258248009, "grad_norm": 0.9836714855093116, "learning_rate": 1.1624513816543988e-06, "loss": 0.0362, "step": 3748 }, { "epoch": 0.8530147895335609, "grad_norm": 1.9867742968928817, "learning_rate": 1.162405769001547e-06, "loss": 0.0645, "step": 3749 }, { "epoch": 0.8532423208191127, "grad_norm": 0.8182858794812171, "learning_rate": 1.1623601453651053e-06, "loss": 0.0355, "step": 3750 }, { "epoch": 0.8534698521046644, "grad_norm": 1.4917273253143806, "learning_rate": 1.1623145107460065e-06, "loss": 0.0503, "step": 3751 }, { "epoch": 0.8536973833902162, "grad_norm": 0.7287193210043196, "learning_rate": 1.1622688651451833e-06, "loss": 0.0307, "step": 3752 }, { "epoch": 0.8539249146757679, "grad_norm": 1.4100424416120585, "learning_rate": 1.1622232085635683e-06, "loss": 0.0826, "step": 3753 }, { "epoch": 0.8541524459613197, "grad_norm": 1.2920863855722, "learning_rate": 1.162177541002095e-06, "loss": 0.057, "step": 3754 }, { "epoch": 0.8543799772468714, "grad_norm": 0.9760542142070106, "learning_rate": 1.162131862461696e-06, "loss": 0.0424, "step": 3755 }, { "epoch": 0.8546075085324232, "grad_norm": 1.1155922665776505, "learning_rate": 1.1620861729433062e-06, "loss": 0.0548, "step": 3756 }, { "epoch": 0.8548350398179749, "grad_norm": 2.0265637739770646, "learning_rate": 1.1620404724478582e-06, "loss": 0.1044, "step": 3757 }, { "epoch": 0.8550625711035267, "grad_norm": 1.2011014044384718, "learning_rate": 1.1619947609762867e-06, "loss": 0.0564, "step": 3758 }, { "epoch": 0.8552901023890785, "grad_norm": 6.130934132803181, "learning_rate": 1.1619490385295255e-06, "loss": 0.2002, "step": 3759 }, { "epoch": 0.8555176336746303, "grad_norm": 1.113651539392019, "learning_rate": 1.1619033051085094e-06, "loss": 0.0652, "step": 3760 }, { "epoch": 0.855745164960182, "grad_norm": 1.286325265245382, "learning_rate": 1.161857560714173e-06, "loss": 0.0588, "step": 3761 }, { "epoch": 0.8559726962457338, "grad_norm": 1.7600474072446048, "learning_rate": 1.1618118053474514e-06, "loss": 0.0893, "step": 3762 }, { "epoch": 0.8562002275312856, "grad_norm": 1.6589882412706265, "learning_rate": 1.1617660390092794e-06, "loss": 0.0745, "step": 3763 }, { "epoch": 0.8564277588168373, "grad_norm": 1.2790558914988392, "learning_rate": 1.161720261700593e-06, "loss": 0.057, "step": 3764 }, { "epoch": 0.856655290102389, "grad_norm": 1.0102177907448584, "learning_rate": 1.161674473422327e-06, "loss": 0.0413, "step": 3765 }, { "epoch": 0.8568828213879408, "grad_norm": 1.5326235862910704, "learning_rate": 1.1616286741754178e-06, "loss": 0.0605, "step": 3766 }, { "epoch": 0.8571103526734926, "grad_norm": 0.8465015567779428, "learning_rate": 1.1615828639608013e-06, "loss": 0.0312, "step": 3767 }, { "epoch": 0.8573378839590444, "grad_norm": 1.5793350610162067, "learning_rate": 1.1615370427794138e-06, "loss": 0.0945, "step": 3768 }, { "epoch": 0.8575654152445962, "grad_norm": 1.2162812251052018, "learning_rate": 1.1614912106321916e-06, "loss": 0.0752, "step": 3769 }, { "epoch": 0.8577929465301479, "grad_norm": 1.8496686418444572, "learning_rate": 1.1614453675200718e-06, "loss": 0.0722, "step": 3770 }, { "epoch": 0.8580204778156997, "grad_norm": 1.6913533101626717, "learning_rate": 1.1613995134439912e-06, "loss": 0.0725, "step": 3771 }, { "epoch": 0.8582480091012514, "grad_norm": 3.096645898390307, "learning_rate": 1.1613536484048866e-06, "loss": 0.1055, "step": 3772 }, { "epoch": 0.8584755403868032, "grad_norm": 1.6446149120178801, "learning_rate": 1.161307772403696e-06, "loss": 0.0597, "step": 3773 }, { "epoch": 0.8587030716723549, "grad_norm": 1.5393305823965842, "learning_rate": 1.1612618854413566e-06, "loss": 0.0732, "step": 3774 }, { "epoch": 0.8589306029579067, "grad_norm": 1.9421552759578808, "learning_rate": 1.1612159875188065e-06, "loss": 0.0693, "step": 3775 }, { "epoch": 0.8591581342434584, "grad_norm": 1.3707910624756399, "learning_rate": 1.1611700786369835e-06, "loss": 0.0585, "step": 3776 }, { "epoch": 0.8593856655290102, "grad_norm": 1.5332539464674453, "learning_rate": 1.161124158796826e-06, "loss": 0.0679, "step": 3777 }, { "epoch": 0.859613196814562, "grad_norm": 0.8257875510353138, "learning_rate": 1.1610782279992728e-06, "loss": 0.0405, "step": 3778 }, { "epoch": 0.8598407281001138, "grad_norm": 0.8774289661633246, "learning_rate": 1.161032286245262e-06, "loss": 0.0303, "step": 3779 }, { "epoch": 0.8600682593856656, "grad_norm": 2.167218867354055, "learning_rate": 1.1609863335357332e-06, "loss": 0.077, "step": 3780 }, { "epoch": 0.8602957906712173, "grad_norm": 1.8941215544947247, "learning_rate": 1.1609403698716255e-06, "loss": 0.0786, "step": 3781 }, { "epoch": 0.8605233219567691, "grad_norm": 1.1276115513513825, "learning_rate": 1.160894395253878e-06, "loss": 0.0537, "step": 3782 }, { "epoch": 0.8607508532423208, "grad_norm": 1.7706734723923294, "learning_rate": 1.1608484096834306e-06, "loss": 0.0922, "step": 3783 }, { "epoch": 0.8609783845278726, "grad_norm": 1.2558738456157548, "learning_rate": 1.1608024131612231e-06, "loss": 0.0652, "step": 3784 }, { "epoch": 0.8612059158134243, "grad_norm": 0.9032710072829923, "learning_rate": 1.1607564056881953e-06, "loss": 0.0379, "step": 3785 }, { "epoch": 0.8614334470989761, "grad_norm": 1.190052750152249, "learning_rate": 1.160710387265288e-06, "loss": 0.0444, "step": 3786 }, { "epoch": 0.8616609783845278, "grad_norm": 0.9786079939895075, "learning_rate": 1.1606643578934414e-06, "loss": 0.0418, "step": 3787 }, { "epoch": 0.8618885096700797, "grad_norm": 2.273287744213828, "learning_rate": 1.1606183175735963e-06, "loss": 0.0738, "step": 3788 }, { "epoch": 0.8621160409556314, "grad_norm": 1.0946877128994326, "learning_rate": 1.1605722663066938e-06, "loss": 0.0473, "step": 3789 }, { "epoch": 0.8623435722411832, "grad_norm": 1.3819454647043339, "learning_rate": 1.1605262040936752e-06, "loss": 0.0735, "step": 3790 }, { "epoch": 0.8625711035267349, "grad_norm": 1.263046575159459, "learning_rate": 1.1604801309354815e-06, "loss": 0.0595, "step": 3791 }, { "epoch": 0.8627986348122867, "grad_norm": 1.0133652978063783, "learning_rate": 1.1604340468330546e-06, "loss": 0.0307, "step": 3792 }, { "epoch": 0.8630261660978384, "grad_norm": 1.2379613127015137, "learning_rate": 1.1603879517873366e-06, "loss": 0.0517, "step": 3793 }, { "epoch": 0.8632536973833902, "grad_norm": 1.1750697280707245, "learning_rate": 1.160341845799269e-06, "loss": 0.0482, "step": 3794 }, { "epoch": 0.863481228668942, "grad_norm": 1.6791780297526506, "learning_rate": 1.160295728869795e-06, "loss": 0.1042, "step": 3795 }, { "epoch": 0.8637087599544937, "grad_norm": 1.1451895610966802, "learning_rate": 1.1602496009998562e-06, "loss": 0.0483, "step": 3796 }, { "epoch": 0.8639362912400455, "grad_norm": 0.737910160878505, "learning_rate": 1.160203462190396e-06, "loss": 0.0264, "step": 3797 }, { "epoch": 0.8641638225255973, "grad_norm": 1.0917499952318759, "learning_rate": 1.1601573124423573e-06, "loss": 0.0553, "step": 3798 }, { "epoch": 0.8643913538111491, "grad_norm": 1.4396023675409693, "learning_rate": 1.1601111517566831e-06, "loss": 0.0593, "step": 3799 }, { "epoch": 0.8646188850967008, "grad_norm": 1.302598540309529, "learning_rate": 1.1600649801343173e-06, "loss": 0.0761, "step": 3800 }, { "epoch": 0.8648464163822526, "grad_norm": 0.9487038984159104, "learning_rate": 1.1600187975762029e-06, "loss": 0.0533, "step": 3801 }, { "epoch": 0.8650739476678043, "grad_norm": 0.8318957052334727, "learning_rate": 1.159972604083284e-06, "loss": 0.0281, "step": 3802 }, { "epoch": 0.8653014789533561, "grad_norm": 1.2107437712653222, "learning_rate": 1.159926399656505e-06, "loss": 0.0465, "step": 3803 }, { "epoch": 0.8655290102389078, "grad_norm": 1.4023241592897753, "learning_rate": 1.1598801842968103e-06, "loss": 0.0395, "step": 3804 }, { "epoch": 0.8657565415244596, "grad_norm": 1.021600867386226, "learning_rate": 1.1598339580051439e-06, "loss": 0.042, "step": 3805 }, { "epoch": 0.8659840728100113, "grad_norm": 1.0092710955033857, "learning_rate": 1.159787720782451e-06, "loss": 0.0513, "step": 3806 }, { "epoch": 0.8662116040955632, "grad_norm": 1.517102557965297, "learning_rate": 1.1597414726296764e-06, "loss": 0.0823, "step": 3807 }, { "epoch": 0.8664391353811149, "grad_norm": 1.354754585750691, "learning_rate": 1.1596952135477656e-06, "loss": 0.0619, "step": 3808 }, { "epoch": 0.8666666666666667, "grad_norm": 2.107511992431661, "learning_rate": 1.1596489435376638e-06, "loss": 0.1053, "step": 3809 }, { "epoch": 0.8668941979522184, "grad_norm": 1.660224356687729, "learning_rate": 1.1596026626003168e-06, "loss": 0.1028, "step": 3810 }, { "epoch": 0.8671217292377702, "grad_norm": 1.1996277276445466, "learning_rate": 1.1595563707366705e-06, "loss": 0.0802, "step": 3811 }, { "epoch": 0.867349260523322, "grad_norm": 1.3676154758246724, "learning_rate": 1.1595100679476707e-06, "loss": 0.0662, "step": 3812 }, { "epoch": 0.8675767918088737, "grad_norm": 1.2961263261205425, "learning_rate": 1.1594637542342644e-06, "loss": 0.0569, "step": 3813 }, { "epoch": 0.8678043230944255, "grad_norm": 1.0638382967882807, "learning_rate": 1.1594174295973976e-06, "loss": 0.0527, "step": 3814 }, { "epoch": 0.8680318543799772, "grad_norm": 1.2853107355416882, "learning_rate": 1.1593710940380172e-06, "loss": 0.053, "step": 3815 }, { "epoch": 0.868259385665529, "grad_norm": 1.4112153541469374, "learning_rate": 1.1593247475570704e-06, "loss": 0.0652, "step": 3816 }, { "epoch": 0.8684869169510808, "grad_norm": 0.6600043862976881, "learning_rate": 1.1592783901555043e-06, "loss": 0.0235, "step": 3817 }, { "epoch": 0.8687144482366326, "grad_norm": 1.483562739056723, "learning_rate": 1.1592320218342665e-06, "loss": 0.0635, "step": 3818 }, { "epoch": 0.8689419795221843, "grad_norm": 1.0821712914528594, "learning_rate": 1.1591856425943044e-06, "loss": 0.0576, "step": 3819 }, { "epoch": 0.8691695108077361, "grad_norm": 1.2023944040312258, "learning_rate": 1.159139252436566e-06, "loss": 0.0448, "step": 3820 }, { "epoch": 0.8693970420932878, "grad_norm": 1.2261008981178048, "learning_rate": 1.1590928513619997e-06, "loss": 0.079, "step": 3821 }, { "epoch": 0.8696245733788396, "grad_norm": 0.9103768778718383, "learning_rate": 1.1590464393715536e-06, "loss": 0.0319, "step": 3822 }, { "epoch": 0.8698521046643913, "grad_norm": 1.0450339587824766, "learning_rate": 1.1590000164661763e-06, "loss": 0.0632, "step": 3823 }, { "epoch": 0.8700796359499431, "grad_norm": 1.0245424665524807, "learning_rate": 1.1589535826468168e-06, "loss": 0.0615, "step": 3824 }, { "epoch": 0.8703071672354948, "grad_norm": 1.624369945490237, "learning_rate": 1.158907137914424e-06, "loss": 0.0988, "step": 3825 }, { "epoch": 0.8705346985210466, "grad_norm": 1.1284177897194427, "learning_rate": 1.158860682269947e-06, "loss": 0.0488, "step": 3826 }, { "epoch": 0.8707622298065985, "grad_norm": 1.2276202548958175, "learning_rate": 1.1588142157143353e-06, "loss": 0.0608, "step": 3827 }, { "epoch": 0.8709897610921502, "grad_norm": 1.3822371198286005, "learning_rate": 1.1587677382485386e-06, "loss": 0.0468, "step": 3828 }, { "epoch": 0.871217292377702, "grad_norm": 1.106888167025077, "learning_rate": 1.158721249873507e-06, "loss": 0.0636, "step": 3829 }, { "epoch": 0.8714448236632537, "grad_norm": 0.9393892132586369, "learning_rate": 1.1586747505901904e-06, "loss": 0.0416, "step": 3830 }, { "epoch": 0.8716723549488055, "grad_norm": 1.1085193545572236, "learning_rate": 1.1586282403995395e-06, "loss": 0.0541, "step": 3831 }, { "epoch": 0.8718998862343572, "grad_norm": 1.345809504190513, "learning_rate": 1.1585817193025046e-06, "loss": 0.0513, "step": 3832 }, { "epoch": 0.872127417519909, "grad_norm": 0.8494118924798915, "learning_rate": 1.1585351873000365e-06, "loss": 0.03, "step": 3833 }, { "epoch": 0.8723549488054607, "grad_norm": 1.2837019051092946, "learning_rate": 1.1584886443930863e-06, "loss": 0.0529, "step": 3834 }, { "epoch": 0.8725824800910125, "grad_norm": 1.6558407244456526, "learning_rate": 1.1584420905826051e-06, "loss": 0.0833, "step": 3835 }, { "epoch": 0.8728100113765643, "grad_norm": 2.9844675595475887, "learning_rate": 1.1583955258695447e-06, "loss": 0.1321, "step": 3836 }, { "epoch": 0.8730375426621161, "grad_norm": 1.2652070426430007, "learning_rate": 1.1583489502548566e-06, "loss": 0.0795, "step": 3837 }, { "epoch": 0.8732650739476678, "grad_norm": 2.0137801774238344, "learning_rate": 1.1583023637394928e-06, "loss": 0.066, "step": 3838 }, { "epoch": 0.8734926052332196, "grad_norm": 1.2580173572147197, "learning_rate": 1.1582557663244052e-06, "loss": 0.0768, "step": 3839 }, { "epoch": 0.8737201365187713, "grad_norm": 1.300183703835354, "learning_rate": 1.1582091580105464e-06, "loss": 0.0609, "step": 3840 }, { "epoch": 0.8739476678043231, "grad_norm": 1.6278123129636566, "learning_rate": 1.158162538798869e-06, "loss": 0.0459, "step": 3841 }, { "epoch": 0.8741751990898748, "grad_norm": 1.4397538286299336, "learning_rate": 1.158115908690326e-06, "loss": 0.0895, "step": 3842 }, { "epoch": 0.8744027303754266, "grad_norm": 0.6209497855296535, "learning_rate": 1.1580692676858699e-06, "loss": 0.0383, "step": 3843 }, { "epoch": 0.8746302616609783, "grad_norm": 1.7840447917226478, "learning_rate": 1.1580226157864542e-06, "loss": 0.0761, "step": 3844 }, { "epoch": 0.8748577929465301, "grad_norm": 1.3094120438469554, "learning_rate": 1.1579759529930324e-06, "loss": 0.0403, "step": 3845 }, { "epoch": 0.875085324232082, "grad_norm": 2.141642949759066, "learning_rate": 1.1579292793065583e-06, "loss": 0.0849, "step": 3846 }, { "epoch": 0.8753128555176337, "grad_norm": 1.3320582954398776, "learning_rate": 1.157882594727986e-06, "loss": 0.0581, "step": 3847 }, { "epoch": 0.8755403868031855, "grad_norm": 2.7977842973683478, "learning_rate": 1.1578358992582689e-06, "loss": 0.0733, "step": 3848 }, { "epoch": 0.8757679180887372, "grad_norm": 1.273538584951798, "learning_rate": 1.1577891928983622e-06, "loss": 0.075, "step": 3849 }, { "epoch": 0.875995449374289, "grad_norm": 1.189871225174409, "learning_rate": 1.15774247564922e-06, "loss": 0.0533, "step": 3850 }, { "epoch": 0.8762229806598407, "grad_norm": 1.3416154016304795, "learning_rate": 1.1576957475117973e-06, "loss": 0.0687, "step": 3851 }, { "epoch": 0.8764505119453925, "grad_norm": 1.6440039914240385, "learning_rate": 1.1576490084870493e-06, "loss": 0.0573, "step": 3852 }, { "epoch": 0.8766780432309442, "grad_norm": 1.2895981463406705, "learning_rate": 1.1576022585759308e-06, "loss": 0.0626, "step": 3853 }, { "epoch": 0.876905574516496, "grad_norm": 1.0617961429852751, "learning_rate": 1.1575554977793975e-06, "loss": 0.0425, "step": 3854 }, { "epoch": 0.8771331058020477, "grad_norm": 0.7670265616309823, "learning_rate": 1.1575087260984056e-06, "loss": 0.0297, "step": 3855 }, { "epoch": 0.8773606370875996, "grad_norm": 0.9091034119562531, "learning_rate": 1.1574619435339101e-06, "loss": 0.0505, "step": 3856 }, { "epoch": 0.8775881683731513, "grad_norm": 0.721924678198625, "learning_rate": 1.157415150086868e-06, "loss": 0.0334, "step": 3857 }, { "epoch": 0.8778156996587031, "grad_norm": 1.028224217020184, "learning_rate": 1.1573683457582349e-06, "loss": 0.0504, "step": 3858 }, { "epoch": 0.8780432309442548, "grad_norm": 0.9265903973410824, "learning_rate": 1.157321530548968e-06, "loss": 0.0384, "step": 3859 }, { "epoch": 0.8782707622298066, "grad_norm": 1.0369597512628028, "learning_rate": 1.157274704460024e-06, "loss": 0.0421, "step": 3860 }, { "epoch": 0.8784982935153584, "grad_norm": 0.8116397369448615, "learning_rate": 1.1572278674923598e-06, "loss": 0.0382, "step": 3861 }, { "epoch": 0.8787258248009101, "grad_norm": 1.366642693958556, "learning_rate": 1.1571810196469326e-06, "loss": 0.0886, "step": 3862 }, { "epoch": 0.8789533560864619, "grad_norm": 0.9890955350681031, "learning_rate": 1.1571341609247003e-06, "loss": 0.0449, "step": 3863 }, { "epoch": 0.8791808873720136, "grad_norm": 1.4331436845492822, "learning_rate": 1.1570872913266202e-06, "loss": 0.0823, "step": 3864 }, { "epoch": 0.8794084186575654, "grad_norm": 0.9885779670626057, "learning_rate": 1.1570404108536501e-06, "loss": 0.0291, "step": 3865 }, { "epoch": 0.8796359499431172, "grad_norm": 1.0999651301917803, "learning_rate": 1.1569935195067487e-06, "loss": 0.041, "step": 3866 }, { "epoch": 0.879863481228669, "grad_norm": 1.1538168389141554, "learning_rate": 1.1569466172868737e-06, "loss": 0.0308, "step": 3867 }, { "epoch": 0.8800910125142207, "grad_norm": 0.737445292579344, "learning_rate": 1.1568997041949843e-06, "loss": 0.0304, "step": 3868 }, { "epoch": 0.8803185437997725, "grad_norm": 1.0078853967937846, "learning_rate": 1.156852780232039e-06, "loss": 0.0373, "step": 3869 }, { "epoch": 0.8805460750853242, "grad_norm": 1.1880834513401282, "learning_rate": 1.156805845398997e-06, "loss": 0.0483, "step": 3870 }, { "epoch": 0.880773606370876, "grad_norm": 1.0071545839978109, "learning_rate": 1.1567588996968173e-06, "loss": 0.0379, "step": 3871 }, { "epoch": 0.8810011376564277, "grad_norm": 1.0549498497637813, "learning_rate": 1.1567119431264598e-06, "loss": 0.058, "step": 3872 }, { "epoch": 0.8812286689419795, "grad_norm": 1.5102339027747615, "learning_rate": 1.156664975688884e-06, "loss": 0.0607, "step": 3873 }, { "epoch": 0.8814562002275312, "grad_norm": 1.8905446477817547, "learning_rate": 1.1566179973850496e-06, "loss": 0.11, "step": 3874 }, { "epoch": 0.8816837315130831, "grad_norm": 0.6411112129672515, "learning_rate": 1.156571008215917e-06, "loss": 0.0307, "step": 3875 }, { "epoch": 0.8819112627986349, "grad_norm": 0.748342490109174, "learning_rate": 1.1565240081824466e-06, "loss": 0.0284, "step": 3876 }, { "epoch": 0.8821387940841866, "grad_norm": 1.354485160913724, "learning_rate": 1.1564769972855987e-06, "loss": 0.0491, "step": 3877 }, { "epoch": 0.8823663253697384, "grad_norm": 1.0100612900654617, "learning_rate": 1.1564299755263345e-06, "loss": 0.061, "step": 3878 }, { "epoch": 0.8825938566552901, "grad_norm": 1.5275951944341757, "learning_rate": 1.1563829429056148e-06, "loss": 0.0997, "step": 3879 }, { "epoch": 0.8828213879408419, "grad_norm": 1.2004586776908042, "learning_rate": 1.156335899424401e-06, "loss": 0.0407, "step": 3880 }, { "epoch": 0.8830489192263936, "grad_norm": 1.191033638879632, "learning_rate": 1.1562888450836544e-06, "loss": 0.0705, "step": 3881 }, { "epoch": 0.8832764505119454, "grad_norm": 2.5519305254154117, "learning_rate": 1.156241779884337e-06, "loss": 0.0995, "step": 3882 }, { "epoch": 0.8835039817974971, "grad_norm": 1.1448529875713787, "learning_rate": 1.1561947038274104e-06, "loss": 0.0409, "step": 3883 }, { "epoch": 0.8837315130830489, "grad_norm": 1.3287084471615846, "learning_rate": 1.156147616913837e-06, "loss": 0.0658, "step": 3884 }, { "epoch": 0.8839590443686007, "grad_norm": 1.607480285345695, "learning_rate": 1.156100519144579e-06, "loss": 0.065, "step": 3885 }, { "epoch": 0.8841865756541525, "grad_norm": 0.8350311325241287, "learning_rate": 1.156053410520599e-06, "loss": 0.028, "step": 3886 }, { "epoch": 0.8844141069397042, "grad_norm": 1.2493913843113744, "learning_rate": 1.15600629104286e-06, "loss": 0.054, "step": 3887 }, { "epoch": 0.884641638225256, "grad_norm": 0.8917197871451082, "learning_rate": 1.1559591607123248e-06, "loss": 0.0529, "step": 3888 }, { "epoch": 0.8848691695108077, "grad_norm": 0.9543671098376685, "learning_rate": 1.1559120195299566e-06, "loss": 0.0482, "step": 3889 }, { "epoch": 0.8850967007963595, "grad_norm": 2.016256579364082, "learning_rate": 1.1558648674967191e-06, "loss": 0.0989, "step": 3890 }, { "epoch": 0.8853242320819112, "grad_norm": 1.1851720940142052, "learning_rate": 1.1558177046135761e-06, "loss": 0.0478, "step": 3891 }, { "epoch": 0.885551763367463, "grad_norm": 1.4662655950086683, "learning_rate": 1.1557705308814914e-06, "loss": 0.058, "step": 3892 }, { "epoch": 0.8857792946530147, "grad_norm": 0.7173696835484785, "learning_rate": 1.155723346301429e-06, "loss": 0.0316, "step": 3893 }, { "epoch": 0.8860068259385665, "grad_norm": 1.2706849402987643, "learning_rate": 1.1556761508743532e-06, "loss": 0.0822, "step": 3894 }, { "epoch": 0.8862343572241184, "grad_norm": 1.609473277966293, "learning_rate": 1.1556289446012292e-06, "loss": 0.0687, "step": 3895 }, { "epoch": 0.8864618885096701, "grad_norm": 1.388883872823661, "learning_rate": 1.155581727483021e-06, "loss": 0.0583, "step": 3896 }, { "epoch": 0.8866894197952219, "grad_norm": 1.2281956278668817, "learning_rate": 1.1555344995206941e-06, "loss": 0.0627, "step": 3897 }, { "epoch": 0.8869169510807736, "grad_norm": 0.814456870112058, "learning_rate": 1.1554872607152138e-06, "loss": 0.0294, "step": 3898 }, { "epoch": 0.8871444823663254, "grad_norm": 2.0519731112865855, "learning_rate": 1.1554400110675453e-06, "loss": 0.054, "step": 3899 }, { "epoch": 0.8873720136518771, "grad_norm": 0.61176670012776, "learning_rate": 1.1553927505786543e-06, "loss": 0.0195, "step": 3900 }, { "epoch": 0.8875995449374289, "grad_norm": 1.0712439727415621, "learning_rate": 1.1553454792495072e-06, "loss": 0.0397, "step": 3901 }, { "epoch": 0.8878270762229806, "grad_norm": 1.1593590248854935, "learning_rate": 1.1552981970810694e-06, "loss": 0.0551, "step": 3902 }, { "epoch": 0.8880546075085324, "grad_norm": 1.082392405172485, "learning_rate": 1.1552509040743078e-06, "loss": 0.0423, "step": 3903 }, { "epoch": 0.8882821387940842, "grad_norm": 1.2264806717691155, "learning_rate": 1.1552036002301891e-06, "loss": 0.0614, "step": 3904 }, { "epoch": 0.888509670079636, "grad_norm": 1.8769370812382, "learning_rate": 1.1551562855496796e-06, "loss": 0.1051, "step": 3905 }, { "epoch": 0.8887372013651877, "grad_norm": 0.8452780282160288, "learning_rate": 1.1551089600337465e-06, "loss": 0.0456, "step": 3906 }, { "epoch": 0.8889647326507395, "grad_norm": 1.0073082653206435, "learning_rate": 1.1550616236833574e-06, "loss": 0.0527, "step": 3907 }, { "epoch": 0.8891922639362912, "grad_norm": 1.4600634269759383, "learning_rate": 1.155014276499479e-06, "loss": 0.0589, "step": 3908 }, { "epoch": 0.889419795221843, "grad_norm": 0.9260386118996091, "learning_rate": 1.1549669184830796e-06, "loss": 0.0296, "step": 3909 }, { "epoch": 0.8896473265073948, "grad_norm": 1.4635888779797275, "learning_rate": 1.1549195496351271e-06, "loss": 0.092, "step": 3910 }, { "epoch": 0.8898748577929465, "grad_norm": 1.0776139556085673, "learning_rate": 1.1548721699565896e-06, "loss": 0.0517, "step": 3911 }, { "epoch": 0.8901023890784983, "grad_norm": 1.9827438270104123, "learning_rate": 1.1548247794484353e-06, "loss": 0.0953, "step": 3912 }, { "epoch": 0.89032992036405, "grad_norm": 1.0572692038363223, "learning_rate": 1.1547773781116326e-06, "loss": 0.0493, "step": 3913 }, { "epoch": 0.8905574516496019, "grad_norm": 0.9208219945202761, "learning_rate": 1.1547299659471509e-06, "loss": 0.0347, "step": 3914 }, { "epoch": 0.8907849829351536, "grad_norm": 0.7636650309650683, "learning_rate": 1.1546825429559585e-06, "loss": 0.0394, "step": 3915 }, { "epoch": 0.8910125142207054, "grad_norm": 1.8570270747724098, "learning_rate": 1.1546351091390253e-06, "loss": 0.0756, "step": 3916 }, { "epoch": 0.8912400455062571, "grad_norm": 2.2137237056291625, "learning_rate": 1.1545876644973202e-06, "loss": 0.0552, "step": 3917 }, { "epoch": 0.8914675767918089, "grad_norm": 1.314419860717642, "learning_rate": 1.1545402090318133e-06, "loss": 0.0746, "step": 3918 }, { "epoch": 0.8916951080773606, "grad_norm": 1.0660978255955464, "learning_rate": 1.1544927427434743e-06, "loss": 0.0544, "step": 3919 }, { "epoch": 0.8919226393629124, "grad_norm": 1.2951418524657141, "learning_rate": 1.1544452656332733e-06, "loss": 0.0585, "step": 3920 }, { "epoch": 0.8921501706484641, "grad_norm": 1.266569155658392, "learning_rate": 1.1543977777021808e-06, "loss": 0.0593, "step": 3921 }, { "epoch": 0.8923777019340159, "grad_norm": 1.0985430730465215, "learning_rate": 1.1543502789511671e-06, "loss": 0.057, "step": 3922 }, { "epoch": 0.8926052332195676, "grad_norm": 1.0051340610905692, "learning_rate": 1.1543027693812033e-06, "loss": 0.044, "step": 3923 }, { "epoch": 0.8928327645051195, "grad_norm": 0.8979646754734382, "learning_rate": 1.15425524899326e-06, "loss": 0.0381, "step": 3924 }, { "epoch": 0.8930602957906713, "grad_norm": 1.8955096750982607, "learning_rate": 1.154207717788309e-06, "loss": 0.0698, "step": 3925 }, { "epoch": 0.893287827076223, "grad_norm": 0.7837134343494108, "learning_rate": 1.1541601757673216e-06, "loss": 0.021, "step": 3926 }, { "epoch": 0.8935153583617748, "grad_norm": 1.406918667470208, "learning_rate": 1.154112622931269e-06, "loss": 0.0633, "step": 3927 }, { "epoch": 0.8937428896473265, "grad_norm": 1.6626757430888262, "learning_rate": 1.1540650592811233e-06, "loss": 0.0802, "step": 3928 }, { "epoch": 0.8939704209328783, "grad_norm": 2.7246018561325025, "learning_rate": 1.154017484817857e-06, "loss": 0.1196, "step": 3929 }, { "epoch": 0.89419795221843, "grad_norm": 1.3964197751853908, "learning_rate": 1.1539698995424423e-06, "loss": 0.0619, "step": 3930 }, { "epoch": 0.8944254835039818, "grad_norm": 1.2631409164792344, "learning_rate": 1.1539223034558513e-06, "loss": 0.0513, "step": 3931 }, { "epoch": 0.8946530147895335, "grad_norm": 1.0255766384543599, "learning_rate": 1.1538746965590572e-06, "loss": 0.0428, "step": 3932 }, { "epoch": 0.8948805460750853, "grad_norm": 0.4848782114580742, "learning_rate": 1.153827078853033e-06, "loss": 0.0267, "step": 3933 }, { "epoch": 0.8951080773606371, "grad_norm": 1.0020187672794154, "learning_rate": 1.1537794503387516e-06, "loss": 0.0494, "step": 3934 }, { "epoch": 0.8953356086461889, "grad_norm": 1.83637730371515, "learning_rate": 1.1537318110171867e-06, "loss": 0.0759, "step": 3935 }, { "epoch": 0.8955631399317406, "grad_norm": 1.9684228773442944, "learning_rate": 1.153684160889312e-06, "loss": 0.1322, "step": 3936 }, { "epoch": 0.8957906712172924, "grad_norm": 2.0616350638849172, "learning_rate": 1.1536364999561011e-06, "loss": 0.0855, "step": 3937 }, { "epoch": 0.8960182025028441, "grad_norm": 4.2442677487614, "learning_rate": 1.1535888282185283e-06, "loss": 0.1942, "step": 3938 }, { "epoch": 0.8962457337883959, "grad_norm": 1.5621794154117725, "learning_rate": 1.1535411456775682e-06, "loss": 0.0611, "step": 3939 }, { "epoch": 0.8964732650739476, "grad_norm": 1.0048473835575604, "learning_rate": 1.1534934523341952e-06, "loss": 0.0484, "step": 3940 }, { "epoch": 0.8967007963594994, "grad_norm": 0.9747410927457769, "learning_rate": 1.1534457481893834e-06, "loss": 0.0321, "step": 3941 }, { "epoch": 0.8969283276450511, "grad_norm": 0.8739461699351664, "learning_rate": 1.1533980332441085e-06, "loss": 0.0363, "step": 3942 }, { "epoch": 0.897155858930603, "grad_norm": 1.1216851883585208, "learning_rate": 1.1533503074993455e-06, "loss": 0.0423, "step": 3943 }, { "epoch": 0.8973833902161548, "grad_norm": 0.9832239028189882, "learning_rate": 1.15330257095607e-06, "loss": 0.0458, "step": 3944 }, { "epoch": 0.8976109215017065, "grad_norm": 0.8256408828846264, "learning_rate": 1.1532548236152574e-06, "loss": 0.0376, "step": 3945 }, { "epoch": 0.8978384527872583, "grad_norm": 2.409677737424434, "learning_rate": 1.1532070654778838e-06, "loss": 0.1075, "step": 3946 }, { "epoch": 0.89806598407281, "grad_norm": 1.6618831194214136, "learning_rate": 1.1531592965449249e-06, "loss": 0.0724, "step": 3947 }, { "epoch": 0.8982935153583618, "grad_norm": 1.1965129320838301, "learning_rate": 1.1531115168173574e-06, "loss": 0.0563, "step": 3948 }, { "epoch": 0.8985210466439135, "grad_norm": 1.6580223852928602, "learning_rate": 1.1530637262961574e-06, "loss": 0.0901, "step": 3949 }, { "epoch": 0.8987485779294653, "grad_norm": 2.0705952551342124, "learning_rate": 1.1530159249823022e-06, "loss": 0.1004, "step": 3950 }, { "epoch": 0.898976109215017, "grad_norm": 1.0142357819912748, "learning_rate": 1.1529681128767686e-06, "loss": 0.0412, "step": 3951 }, { "epoch": 0.8992036405005688, "grad_norm": 1.1350339928761526, "learning_rate": 1.1529202899805336e-06, "loss": 0.0538, "step": 3952 }, { "epoch": 0.8994311717861206, "grad_norm": 1.5799378264286232, "learning_rate": 1.1528724562945748e-06, "loss": 0.0833, "step": 3953 }, { "epoch": 0.8996587030716724, "grad_norm": 1.1208895769557774, "learning_rate": 1.1528246118198697e-06, "loss": 0.039, "step": 3954 }, { "epoch": 0.8998862343572241, "grad_norm": 1.160212672909766, "learning_rate": 1.1527767565573961e-06, "loss": 0.0271, "step": 3955 }, { "epoch": 0.9001137656427759, "grad_norm": 1.689340759598001, "learning_rate": 1.152728890508132e-06, "loss": 0.085, "step": 3956 }, { "epoch": 0.9003412969283277, "grad_norm": 6.049824984821008, "learning_rate": 1.1526810136730562e-06, "loss": 0.0767, "step": 3957 }, { "epoch": 0.9005688282138794, "grad_norm": 1.5293674377800182, "learning_rate": 1.1526331260531467e-06, "loss": 0.0652, "step": 3958 }, { "epoch": 0.9007963594994312, "grad_norm": 0.9268964770876272, "learning_rate": 1.1525852276493825e-06, "loss": 0.0373, "step": 3959 }, { "epoch": 0.9010238907849829, "grad_norm": 1.8729631573608785, "learning_rate": 1.1525373184627426e-06, "loss": 0.0883, "step": 3960 }, { "epoch": 0.9012514220705347, "grad_norm": 1.3972976693669097, "learning_rate": 1.1524893984942059e-06, "loss": 0.056, "step": 3961 }, { "epoch": 0.9014789533560864, "grad_norm": 1.0645332178479103, "learning_rate": 1.152441467744752e-06, "loss": 0.0482, "step": 3962 }, { "epoch": 0.9017064846416383, "grad_norm": 0.8491387015963779, "learning_rate": 1.1523935262153604e-06, "loss": 0.037, "step": 3963 }, { "epoch": 0.90193401592719, "grad_norm": 1.9173276752149597, "learning_rate": 1.152345573907011e-06, "loss": 0.0756, "step": 3964 }, { "epoch": 0.9021615472127418, "grad_norm": 1.0482343731205466, "learning_rate": 1.1522976108206838e-06, "loss": 0.0426, "step": 3965 }, { "epoch": 0.9023890784982935, "grad_norm": 1.5912063871488282, "learning_rate": 1.1522496369573592e-06, "loss": 0.0635, "step": 3966 }, { "epoch": 0.9026166097838453, "grad_norm": 1.0885136963654516, "learning_rate": 1.1522016523180177e-06, "loss": 0.0467, "step": 3967 }, { "epoch": 0.902844141069397, "grad_norm": 1.0253021604975354, "learning_rate": 1.15215365690364e-06, "loss": 0.0402, "step": 3968 }, { "epoch": 0.9030716723549488, "grad_norm": 1.6954250726492197, "learning_rate": 1.1521056507152068e-06, "loss": 0.0775, "step": 3969 }, { "epoch": 0.9032992036405005, "grad_norm": 2.193731037581251, "learning_rate": 1.1520576337536995e-06, "loss": 0.1161, "step": 3970 }, { "epoch": 0.9035267349260523, "grad_norm": 1.7077975253874977, "learning_rate": 1.1520096060200995e-06, "loss": 0.0947, "step": 3971 }, { "epoch": 0.903754266211604, "grad_norm": 1.5399563597984878, "learning_rate": 1.1519615675153884e-06, "loss": 0.1044, "step": 3972 }, { "epoch": 0.9039817974971559, "grad_norm": 1.7617983578353158, "learning_rate": 1.1519135182405477e-06, "loss": 0.0684, "step": 3973 }, { "epoch": 0.9042093287827077, "grad_norm": 1.3968116012863456, "learning_rate": 1.1518654581965597e-06, "loss": 0.0688, "step": 3974 }, { "epoch": 0.9044368600682594, "grad_norm": 1.1385754513385686, "learning_rate": 1.1518173873844068e-06, "loss": 0.0349, "step": 3975 }, { "epoch": 0.9046643913538112, "grad_norm": 0.8893866615862518, "learning_rate": 1.1517693058050714e-06, "loss": 0.0309, "step": 3976 }, { "epoch": 0.9048919226393629, "grad_norm": 1.539477830489965, "learning_rate": 1.151721213459536e-06, "loss": 0.0689, "step": 3977 }, { "epoch": 0.9051194539249147, "grad_norm": 1.1195140916495427, "learning_rate": 1.1516731103487836e-06, "loss": 0.051, "step": 3978 }, { "epoch": 0.9053469852104664, "grad_norm": 0.7220926512528832, "learning_rate": 1.1516249964737974e-06, "loss": 0.019, "step": 3979 }, { "epoch": 0.9055745164960182, "grad_norm": 1.0047441878180272, "learning_rate": 1.1515768718355607e-06, "loss": 0.0536, "step": 3980 }, { "epoch": 0.9058020477815699, "grad_norm": 1.1154458357876431, "learning_rate": 1.1515287364350573e-06, "loss": 0.0436, "step": 3981 }, { "epoch": 0.9060295790671218, "grad_norm": 1.083449314088406, "learning_rate": 1.1514805902732706e-06, "loss": 0.0403, "step": 3982 }, { "epoch": 0.9062571103526735, "grad_norm": 1.0839313718633037, "learning_rate": 1.151432433351185e-06, "loss": 0.0494, "step": 3983 }, { "epoch": 0.9064846416382253, "grad_norm": 1.6843996604661877, "learning_rate": 1.1513842656697844e-06, "loss": 0.0731, "step": 3984 }, { "epoch": 0.906712172923777, "grad_norm": 0.8893497549287934, "learning_rate": 1.1513360872300535e-06, "loss": 0.024, "step": 3985 }, { "epoch": 0.9069397042093288, "grad_norm": 1.6031300617012416, "learning_rate": 1.1512878980329771e-06, "loss": 0.0705, "step": 3986 }, { "epoch": 0.9071672354948805, "grad_norm": 1.129106422024724, "learning_rate": 1.1512396980795399e-06, "loss": 0.0568, "step": 3987 }, { "epoch": 0.9073947667804323, "grad_norm": 1.228932149349224, "learning_rate": 1.1511914873707269e-06, "loss": 0.0462, "step": 3988 }, { "epoch": 0.907622298065984, "grad_norm": 1.636371433136822, "learning_rate": 1.1511432659075234e-06, "loss": 0.0733, "step": 3989 }, { "epoch": 0.9078498293515358, "grad_norm": 1.1425256617133182, "learning_rate": 1.1510950336909154e-06, "loss": 0.0628, "step": 3990 }, { "epoch": 0.9080773606370875, "grad_norm": 1.01819882213196, "learning_rate": 1.1510467907218883e-06, "loss": 0.0442, "step": 3991 }, { "epoch": 0.9083048919226394, "grad_norm": 1.245888941943274, "learning_rate": 1.1509985370014283e-06, "loss": 0.0551, "step": 3992 }, { "epoch": 0.9085324232081912, "grad_norm": 1.3036968814546137, "learning_rate": 1.1509502725305214e-06, "loss": 0.0531, "step": 3993 }, { "epoch": 0.9087599544937429, "grad_norm": 0.9765098064954344, "learning_rate": 1.1509019973101542e-06, "loss": 0.0415, "step": 3994 }, { "epoch": 0.9089874857792947, "grad_norm": 0.8156281011799231, "learning_rate": 1.1508537113413134e-06, "loss": 0.041, "step": 3995 }, { "epoch": 0.9092150170648464, "grad_norm": 1.3936521974976825, "learning_rate": 1.1508054146249858e-06, "loss": 0.0397, "step": 3996 }, { "epoch": 0.9094425483503982, "grad_norm": 1.0646687699415507, "learning_rate": 1.1507571071621585e-06, "loss": 0.0357, "step": 3997 }, { "epoch": 0.9096700796359499, "grad_norm": 1.1522419098336925, "learning_rate": 1.1507087889538186e-06, "loss": 0.048, "step": 3998 }, { "epoch": 0.9098976109215017, "grad_norm": 1.2076402547928258, "learning_rate": 1.1506604600009542e-06, "loss": 0.0586, "step": 3999 }, { "epoch": 0.9101251422070534, "grad_norm": 1.0133219849273498, "learning_rate": 1.1506121203045524e-06, "loss": 0.0619, "step": 4000 }, { "epoch": 0.9103526734926052, "grad_norm": 0.8291805370083495, "learning_rate": 1.1505637698656014e-06, "loss": 0.0474, "step": 4001 }, { "epoch": 0.910580204778157, "grad_norm": 1.5524783948323588, "learning_rate": 1.1505154086850898e-06, "loss": 0.0696, "step": 4002 }, { "epoch": 0.9108077360637088, "grad_norm": 1.0896886188901207, "learning_rate": 1.1504670367640054e-06, "loss": 0.0349, "step": 4003 }, { "epoch": 0.9110352673492605, "grad_norm": 1.4368712196848394, "learning_rate": 1.1504186541033371e-06, "loss": 0.0972, "step": 4004 }, { "epoch": 0.9112627986348123, "grad_norm": 1.1609052845323367, "learning_rate": 1.150370260704074e-06, "loss": 0.0344, "step": 4005 }, { "epoch": 0.911490329920364, "grad_norm": 0.7261107332342037, "learning_rate": 1.1503218565672047e-06, "loss": 0.0378, "step": 4006 }, { "epoch": 0.9117178612059158, "grad_norm": 1.3224085375415395, "learning_rate": 1.1502734416937188e-06, "loss": 0.0806, "step": 4007 }, { "epoch": 0.9119453924914676, "grad_norm": 1.3655919422766778, "learning_rate": 1.150225016084606e-06, "loss": 0.0822, "step": 4008 }, { "epoch": 0.9121729237770193, "grad_norm": 0.8123755839338497, "learning_rate": 1.1501765797408558e-06, "loss": 0.0337, "step": 4009 }, { "epoch": 0.9124004550625711, "grad_norm": 1.8652137193155605, "learning_rate": 1.1501281326634578e-06, "loss": 0.0612, "step": 4010 }, { "epoch": 0.9126279863481229, "grad_norm": 0.9706539819037835, "learning_rate": 1.1500796748534026e-06, "loss": 0.0559, "step": 4011 }, { "epoch": 0.9128555176336747, "grad_norm": 0.6605470896041641, "learning_rate": 1.1500312063116803e-06, "loss": 0.0277, "step": 4012 }, { "epoch": 0.9130830489192264, "grad_norm": 2.0635463828713445, "learning_rate": 1.149982727039282e-06, "loss": 0.1237, "step": 4013 }, { "epoch": 0.9133105802047782, "grad_norm": 1.2700127801482208, "learning_rate": 1.149934237037198e-06, "loss": 0.0519, "step": 4014 }, { "epoch": 0.9135381114903299, "grad_norm": 1.0543579404973509, "learning_rate": 1.1498857363064198e-06, "loss": 0.0318, "step": 4015 }, { "epoch": 0.9137656427758817, "grad_norm": 1.3887452700800633, "learning_rate": 1.1498372248479383e-06, "loss": 0.0776, "step": 4016 }, { "epoch": 0.9139931740614334, "grad_norm": 1.1548765049955128, "learning_rate": 1.1497887026627451e-06, "loss": 0.0631, "step": 4017 }, { "epoch": 0.9142207053469852, "grad_norm": 1.4155291898606746, "learning_rate": 1.1497401697518318e-06, "loss": 0.0757, "step": 4018 }, { "epoch": 0.9144482366325369, "grad_norm": 1.5411778856885947, "learning_rate": 1.1496916261161908e-06, "loss": 0.0845, "step": 4019 }, { "epoch": 0.9146757679180887, "grad_norm": 0.8243491395066448, "learning_rate": 1.1496430717568136e-06, "loss": 0.0304, "step": 4020 }, { "epoch": 0.9149032992036406, "grad_norm": 1.0014800679630642, "learning_rate": 1.149594506674693e-06, "loss": 0.0443, "step": 4021 }, { "epoch": 0.9151308304891923, "grad_norm": 0.9825886288199939, "learning_rate": 1.1495459308708212e-06, "loss": 0.0285, "step": 4022 }, { "epoch": 0.9153583617747441, "grad_norm": 0.6708245548677098, "learning_rate": 1.1494973443461915e-06, "loss": 0.0258, "step": 4023 }, { "epoch": 0.9155858930602958, "grad_norm": 1.0354775698510426, "learning_rate": 1.1494487471017965e-06, "loss": 0.0516, "step": 4024 }, { "epoch": 0.9158134243458476, "grad_norm": 1.4705781324392022, "learning_rate": 1.1494001391386298e-06, "loss": 0.0588, "step": 4025 }, { "epoch": 0.9160409556313993, "grad_norm": 1.4496056117656928, "learning_rate": 1.1493515204576844e-06, "loss": 0.0453, "step": 4026 }, { "epoch": 0.9162684869169511, "grad_norm": 1.2256180174274225, "learning_rate": 1.1493028910599544e-06, "loss": 0.0474, "step": 4027 }, { "epoch": 0.9164960182025028, "grad_norm": 1.241479459058262, "learning_rate": 1.1492542509464333e-06, "loss": 0.0499, "step": 4028 }, { "epoch": 0.9167235494880546, "grad_norm": 1.0655237448875803, "learning_rate": 1.1492056001181157e-06, "loss": 0.04, "step": 4029 }, { "epoch": 0.9169510807736063, "grad_norm": 0.9311299042715484, "learning_rate": 1.1491569385759953e-06, "loss": 0.0353, "step": 4030 }, { "epoch": 0.9171786120591582, "grad_norm": 1.9476966702422809, "learning_rate": 1.1491082663210675e-06, "loss": 0.0822, "step": 4031 }, { "epoch": 0.9174061433447099, "grad_norm": 1.7080369705282124, "learning_rate": 1.1490595833543263e-06, "loss": 0.0921, "step": 4032 }, { "epoch": 0.9176336746302617, "grad_norm": 0.9700416110415793, "learning_rate": 1.1490108896767672e-06, "loss": 0.0376, "step": 4033 }, { "epoch": 0.9178612059158134, "grad_norm": 1.79218464701242, "learning_rate": 1.1489621852893849e-06, "loss": 0.0793, "step": 4034 }, { "epoch": 0.9180887372013652, "grad_norm": 1.5337857838361597, "learning_rate": 1.1489134701931753e-06, "loss": 0.0734, "step": 4035 }, { "epoch": 0.9183162684869169, "grad_norm": 2.2908857950335393, "learning_rate": 1.1488647443891339e-06, "loss": 0.1363, "step": 4036 }, { "epoch": 0.9185437997724687, "grad_norm": 0.9807386709388579, "learning_rate": 1.1488160078782565e-06, "loss": 0.0553, "step": 4037 }, { "epoch": 0.9187713310580204, "grad_norm": 1.610140836174014, "learning_rate": 1.148767260661539e-06, "loss": 0.0535, "step": 4038 }, { "epoch": 0.9189988623435722, "grad_norm": 1.8010870128097782, "learning_rate": 1.1487185027399783e-06, "loss": 0.0809, "step": 4039 }, { "epoch": 0.919226393629124, "grad_norm": 0.9946177354686938, "learning_rate": 1.1486697341145703e-06, "loss": 0.0308, "step": 4040 }, { "epoch": 0.9194539249146758, "grad_norm": 1.1243952505434611, "learning_rate": 1.148620954786312e-06, "loss": 0.0403, "step": 4041 }, { "epoch": 0.9196814562002276, "grad_norm": 1.1676663366717939, "learning_rate": 1.1485721647562005e-06, "loss": 0.0642, "step": 4042 }, { "epoch": 0.9199089874857793, "grad_norm": 1.2805472497836035, "learning_rate": 1.1485233640252328e-06, "loss": 0.0495, "step": 4043 }, { "epoch": 0.9201365187713311, "grad_norm": 1.176529756201314, "learning_rate": 1.1484745525944063e-06, "loss": 0.0425, "step": 4044 }, { "epoch": 0.9203640500568828, "grad_norm": 1.4292232195143082, "learning_rate": 1.1484257304647187e-06, "loss": 0.0792, "step": 4045 }, { "epoch": 0.9205915813424346, "grad_norm": 1.7932020137107854, "learning_rate": 1.1483768976371677e-06, "loss": 0.0727, "step": 4046 }, { "epoch": 0.9208191126279863, "grad_norm": 1.6644682540806308, "learning_rate": 1.1483280541127513e-06, "loss": 0.062, "step": 4047 }, { "epoch": 0.9210466439135381, "grad_norm": 1.1510951655579869, "learning_rate": 1.1482791998924681e-06, "loss": 0.0434, "step": 4048 }, { "epoch": 0.9212741751990898, "grad_norm": 1.19496202376025, "learning_rate": 1.1482303349773164e-06, "loss": 0.0679, "step": 4049 }, { "epoch": 0.9215017064846417, "grad_norm": 1.154368424237523, "learning_rate": 1.1481814593682946e-06, "loss": 0.0396, "step": 4050 }, { "epoch": 0.9217292377701934, "grad_norm": 1.3809033535643431, "learning_rate": 1.1481325730664023e-06, "loss": 0.0772, "step": 4051 }, { "epoch": 0.9219567690557452, "grad_norm": 0.9785189844102623, "learning_rate": 1.148083676072638e-06, "loss": 0.039, "step": 4052 }, { "epoch": 0.922184300341297, "grad_norm": 1.4126999693949231, "learning_rate": 1.1480347683880016e-06, "loss": 0.0704, "step": 4053 }, { "epoch": 0.9224118316268487, "grad_norm": 0.8860494719620977, "learning_rate": 1.1479858500134924e-06, "loss": 0.0269, "step": 4054 }, { "epoch": 0.9226393629124005, "grad_norm": 1.4070564902702778, "learning_rate": 1.14793692095011e-06, "loss": 0.0775, "step": 4055 }, { "epoch": 0.9228668941979522, "grad_norm": 1.1117624571631268, "learning_rate": 1.147887981198855e-06, "loss": 0.0474, "step": 4056 }, { "epoch": 0.923094425483504, "grad_norm": 1.4621020662224937, "learning_rate": 1.147839030760727e-06, "loss": 0.069, "step": 4057 }, { "epoch": 0.9233219567690557, "grad_norm": 1.4729100301695202, "learning_rate": 1.1477900696367269e-06, "loss": 0.0696, "step": 4058 }, { "epoch": 0.9235494880546075, "grad_norm": 0.9208872591470431, "learning_rate": 1.147741097827855e-06, "loss": 0.0365, "step": 4059 }, { "epoch": 0.9237770193401593, "grad_norm": 0.9240892354847577, "learning_rate": 1.1476921153351126e-06, "loss": 0.0405, "step": 4060 }, { "epoch": 0.9240045506257111, "grad_norm": 1.4959558432174869, "learning_rate": 1.1476431221595005e-06, "loss": 0.0463, "step": 4061 }, { "epoch": 0.9242320819112628, "grad_norm": 1.332519816167777, "learning_rate": 1.1475941183020203e-06, "loss": 0.0658, "step": 4062 }, { "epoch": 0.9244596131968146, "grad_norm": 2.187146405056203, "learning_rate": 1.1475451037636733e-06, "loss": 0.0943, "step": 4063 }, { "epoch": 0.9246871444823663, "grad_norm": 1.3435930034506374, "learning_rate": 1.1474960785454615e-06, "loss": 0.0726, "step": 4064 }, { "epoch": 0.9249146757679181, "grad_norm": 1.3012028567242737, "learning_rate": 1.1474470426483868e-06, "loss": 0.0528, "step": 4065 }, { "epoch": 0.9251422070534698, "grad_norm": 1.0819099650921233, "learning_rate": 1.1473979960734513e-06, "loss": 0.0426, "step": 4066 }, { "epoch": 0.9253697383390216, "grad_norm": 0.781744966406713, "learning_rate": 1.1473489388216574e-06, "loss": 0.0367, "step": 4067 }, { "epoch": 0.9255972696245733, "grad_norm": 1.151095346965489, "learning_rate": 1.1472998708940079e-06, "loss": 0.0619, "step": 4068 }, { "epoch": 0.9258248009101251, "grad_norm": 1.3142684365826751, "learning_rate": 1.1472507922915056e-06, "loss": 0.0446, "step": 4069 }, { "epoch": 0.926052332195677, "grad_norm": 1.3392798642883632, "learning_rate": 1.1472017030151536e-06, "loss": 0.0675, "step": 4070 }, { "epoch": 0.9262798634812287, "grad_norm": 0.7396122034506598, "learning_rate": 1.147152603065955e-06, "loss": 0.0436, "step": 4071 }, { "epoch": 0.9265073947667805, "grad_norm": 1.3292817664212946, "learning_rate": 1.1471034924449133e-06, "loss": 0.0562, "step": 4072 }, { "epoch": 0.9267349260523322, "grad_norm": 1.9912088396901564, "learning_rate": 1.1470543711530328e-06, "loss": 0.0629, "step": 4073 }, { "epoch": 0.926962457337884, "grad_norm": 1.3570840904239112, "learning_rate": 1.147005239191317e-06, "loss": 0.0667, "step": 4074 }, { "epoch": 0.9271899886234357, "grad_norm": 1.2559298173099012, "learning_rate": 1.1469560965607699e-06, "loss": 0.0518, "step": 4075 }, { "epoch": 0.9274175199089875, "grad_norm": 1.0348081180884323, "learning_rate": 1.1469069432623965e-06, "loss": 0.0517, "step": 4076 }, { "epoch": 0.9276450511945392, "grad_norm": 1.0484875570709722, "learning_rate": 1.1468577792972004e-06, "loss": 0.0465, "step": 4077 }, { "epoch": 0.927872582480091, "grad_norm": 1.09203642071682, "learning_rate": 1.1468086046661874e-06, "loss": 0.0548, "step": 4078 }, { "epoch": 0.9281001137656428, "grad_norm": 1.7712907658090584, "learning_rate": 1.146759419370362e-06, "loss": 0.1043, "step": 4079 }, { "epoch": 0.9283276450511946, "grad_norm": 0.8782441484375744, "learning_rate": 1.14671022341073e-06, "loss": 0.0241, "step": 4080 }, { "epoch": 0.9285551763367463, "grad_norm": 1.3964199439198308, "learning_rate": 1.1466610167882963e-06, "loss": 0.0789, "step": 4081 }, { "epoch": 0.9287827076222981, "grad_norm": 1.3164169043622294, "learning_rate": 1.1466117995040666e-06, "loss": 0.0635, "step": 4082 }, { "epoch": 0.9290102389078498, "grad_norm": 1.2381160175958454, "learning_rate": 1.1465625715590473e-06, "loss": 0.0427, "step": 4083 }, { "epoch": 0.9292377701934016, "grad_norm": 1.0698402996548797, "learning_rate": 1.146513332954244e-06, "loss": 0.0626, "step": 4084 }, { "epoch": 0.9294653014789533, "grad_norm": 1.534739951538693, "learning_rate": 1.1464640836906635e-06, "loss": 0.0587, "step": 4085 }, { "epoch": 0.9296928327645051, "grad_norm": 1.0552598639917363, "learning_rate": 1.146414823769312e-06, "loss": 0.046, "step": 4086 }, { "epoch": 0.9299203640500568, "grad_norm": 0.7826783824026263, "learning_rate": 1.1463655531911963e-06, "loss": 0.0308, "step": 4087 }, { "epoch": 0.9301478953356086, "grad_norm": 0.8273022304812389, "learning_rate": 1.1463162719573236e-06, "loss": 0.031, "step": 4088 }, { "epoch": 0.9303754266211605, "grad_norm": 1.3288942117067748, "learning_rate": 1.1462669800687012e-06, "loss": 0.0564, "step": 4089 }, { "epoch": 0.9306029579067122, "grad_norm": 0.8195424504406558, "learning_rate": 1.1462176775263365e-06, "loss": 0.0349, "step": 4090 }, { "epoch": 0.930830489192264, "grad_norm": 1.69823108141483, "learning_rate": 1.1461683643312366e-06, "loss": 0.0698, "step": 4091 }, { "epoch": 0.9310580204778157, "grad_norm": 1.5933893580265006, "learning_rate": 1.1461190404844103e-06, "loss": 0.0748, "step": 4092 }, { "epoch": 0.9312855517633675, "grad_norm": 1.4412482142890468, "learning_rate": 1.1460697059868648e-06, "loss": 0.086, "step": 4093 }, { "epoch": 0.9315130830489192, "grad_norm": 1.1113383236310055, "learning_rate": 1.146020360839609e-06, "loss": 0.0379, "step": 4094 }, { "epoch": 0.931740614334471, "grad_norm": 0.7200940731856813, "learning_rate": 1.1459710050436513e-06, "loss": 0.0356, "step": 4095 }, { "epoch": 0.9319681456200227, "grad_norm": 1.4853749484298682, "learning_rate": 1.1459216385999999e-06, "loss": 0.0559, "step": 4096 }, { "epoch": 0.9321956769055745, "grad_norm": 0.9574272073778142, "learning_rate": 1.1458722615096648e-06, "loss": 0.0276, "step": 4097 }, { "epoch": 0.9324232081911262, "grad_norm": 0.8174617084003479, "learning_rate": 1.1458228737736542e-06, "loss": 0.0327, "step": 4098 }, { "epoch": 0.9326507394766781, "grad_norm": 1.3250207983841498, "learning_rate": 1.145773475392978e-06, "loss": 0.0516, "step": 4099 }, { "epoch": 0.9328782707622298, "grad_norm": 1.22249857755224, "learning_rate": 1.145724066368646e-06, "loss": 0.0637, "step": 4100 }, { "epoch": 0.9331058020477816, "grad_norm": 1.4663762999847116, "learning_rate": 1.1456746467016675e-06, "loss": 0.0567, "step": 4101 }, { "epoch": 0.9333333333333333, "grad_norm": 1.4030554644294082, "learning_rate": 1.1456252163930528e-06, "loss": 0.0584, "step": 4102 }, { "epoch": 0.9335608646188851, "grad_norm": 1.5109418446571456, "learning_rate": 1.1455757754438122e-06, "loss": 0.0497, "step": 4103 }, { "epoch": 0.9337883959044369, "grad_norm": 1.0191531582003377, "learning_rate": 1.1455263238549563e-06, "loss": 0.0562, "step": 4104 }, { "epoch": 0.9340159271899886, "grad_norm": 0.9931115670125917, "learning_rate": 1.1454768616274955e-06, "loss": 0.0494, "step": 4105 }, { "epoch": 0.9342434584755404, "grad_norm": 1.0861122916963688, "learning_rate": 1.1454273887624407e-06, "loss": 0.0548, "step": 4106 }, { "epoch": 0.9344709897610921, "grad_norm": 0.9828952905986, "learning_rate": 1.1453779052608032e-06, "loss": 0.0374, "step": 4107 }, { "epoch": 0.9346985210466439, "grad_norm": 1.5222459429366253, "learning_rate": 1.1453284111235947e-06, "loss": 0.0818, "step": 4108 }, { "epoch": 0.9349260523321957, "grad_norm": 1.1153500509733376, "learning_rate": 1.145278906351826e-06, "loss": 0.0409, "step": 4109 }, { "epoch": 0.9351535836177475, "grad_norm": 1.0587413994265493, "learning_rate": 1.1452293909465095e-06, "loss": 0.0669, "step": 4110 }, { "epoch": 0.9353811149032992, "grad_norm": 0.809823129379949, "learning_rate": 1.145179864908657e-06, "loss": 0.0304, "step": 4111 }, { "epoch": 0.935608646188851, "grad_norm": 1.173627001973459, "learning_rate": 1.1451303282392808e-06, "loss": 0.046, "step": 4112 }, { "epoch": 0.9358361774744027, "grad_norm": 1.301208608992828, "learning_rate": 1.145080780939393e-06, "loss": 0.0457, "step": 4113 }, { "epoch": 0.9360637087599545, "grad_norm": 1.1311235005804838, "learning_rate": 1.145031223010007e-06, "loss": 0.0293, "step": 4114 }, { "epoch": 0.9362912400455062, "grad_norm": 1.754429989040699, "learning_rate": 1.1449816544521347e-06, "loss": 0.0962, "step": 4115 }, { "epoch": 0.936518771331058, "grad_norm": 1.1523409260488855, "learning_rate": 1.1449320752667898e-06, "loss": 0.0622, "step": 4116 }, { "epoch": 0.9367463026166097, "grad_norm": 0.9453806040405214, "learning_rate": 1.1448824854549856e-06, "loss": 0.0303, "step": 4117 }, { "epoch": 0.9369738339021616, "grad_norm": 1.270090887527193, "learning_rate": 1.1448328850177356e-06, "loss": 0.057, "step": 4118 }, { "epoch": 0.9372013651877134, "grad_norm": 1.1742323486339012, "learning_rate": 1.1447832739560533e-06, "loss": 0.0689, "step": 4119 }, { "epoch": 0.9374288964732651, "grad_norm": 1.0965078798779573, "learning_rate": 1.1447336522709528e-06, "loss": 0.0616, "step": 4120 }, { "epoch": 0.9376564277588169, "grad_norm": 1.1403738547142621, "learning_rate": 1.1446840199634483e-06, "loss": 0.0681, "step": 4121 }, { "epoch": 0.9378839590443686, "grad_norm": 1.176539532491646, "learning_rate": 1.1446343770345544e-06, "loss": 0.0369, "step": 4122 }, { "epoch": 0.9381114903299204, "grad_norm": 0.8950308841444714, "learning_rate": 1.1445847234852853e-06, "loss": 0.0508, "step": 4123 }, { "epoch": 0.9383390216154721, "grad_norm": 2.6033063822464553, "learning_rate": 1.1445350593166559e-06, "loss": 0.0865, "step": 4124 }, { "epoch": 0.9385665529010239, "grad_norm": 1.4419905128708395, "learning_rate": 1.1444853845296816e-06, "loss": 0.0922, "step": 4125 }, { "epoch": 0.9387940841865756, "grad_norm": 0.810209427625322, "learning_rate": 1.1444356991253774e-06, "loss": 0.0302, "step": 4126 }, { "epoch": 0.9390216154721274, "grad_norm": 1.4059215847764233, "learning_rate": 1.1443860031047589e-06, "loss": 0.0584, "step": 4127 }, { "epoch": 0.9392491467576792, "grad_norm": 1.3946503335439686, "learning_rate": 1.1443362964688416e-06, "loss": 0.0665, "step": 4128 }, { "epoch": 0.939476678043231, "grad_norm": 2.123996937731276, "learning_rate": 1.1442865792186413e-06, "loss": 0.0869, "step": 4129 }, { "epoch": 0.9397042093287827, "grad_norm": 1.0634024569590812, "learning_rate": 1.1442368513551746e-06, "loss": 0.0418, "step": 4130 }, { "epoch": 0.9399317406143345, "grad_norm": 1.063642439356403, "learning_rate": 1.1441871128794576e-06, "loss": 0.0341, "step": 4131 }, { "epoch": 0.9401592718998862, "grad_norm": 0.9460769468351368, "learning_rate": 1.1441373637925068e-06, "loss": 0.0533, "step": 4132 }, { "epoch": 0.940386803185438, "grad_norm": 1.764351905359598, "learning_rate": 1.1440876040953392e-06, "loss": 0.1242, "step": 4133 }, { "epoch": 0.9406143344709897, "grad_norm": 1.4558757328006684, "learning_rate": 1.1440378337889713e-06, "loss": 0.0763, "step": 4134 }, { "epoch": 0.9408418657565415, "grad_norm": 0.9021601245337508, "learning_rate": 1.143988052874421e-06, "loss": 0.0409, "step": 4135 }, { "epoch": 0.9410693970420932, "grad_norm": 1.7188429838557109, "learning_rate": 1.143938261352705e-06, "loss": 0.069, "step": 4136 }, { "epoch": 0.941296928327645, "grad_norm": 1.2572237568519273, "learning_rate": 1.1438884592248416e-06, "loss": 0.0368, "step": 4137 }, { "epoch": 0.9415244596131969, "grad_norm": 1.2286297911170685, "learning_rate": 1.1438386464918483e-06, "loss": 0.0371, "step": 4138 }, { "epoch": 0.9417519908987486, "grad_norm": 1.664747740752945, "learning_rate": 1.1437888231547434e-06, "loss": 0.0793, "step": 4139 }, { "epoch": 0.9419795221843004, "grad_norm": 0.9027458023794878, "learning_rate": 1.143738989214545e-06, "loss": 0.0359, "step": 4140 }, { "epoch": 0.9422070534698521, "grad_norm": 0.9844227932686808, "learning_rate": 1.1436891446722718e-06, "loss": 0.0398, "step": 4141 }, { "epoch": 0.9424345847554039, "grad_norm": 2.473079458381896, "learning_rate": 1.1436392895289423e-06, "loss": 0.1426, "step": 4142 }, { "epoch": 0.9426621160409556, "grad_norm": 1.6695549407826555, "learning_rate": 1.1435894237855754e-06, "loss": 0.0569, "step": 4143 }, { "epoch": 0.9428896473265074, "grad_norm": 0.9951741584954418, "learning_rate": 1.1435395474431906e-06, "loss": 0.0354, "step": 4144 }, { "epoch": 0.9431171786120591, "grad_norm": 1.3165779040583256, "learning_rate": 1.143489660502807e-06, "loss": 0.062, "step": 4145 }, { "epoch": 0.9433447098976109, "grad_norm": 1.1419121851198617, "learning_rate": 1.1434397629654445e-06, "loss": 0.0628, "step": 4146 }, { "epoch": 0.9435722411831626, "grad_norm": 1.1143992408718018, "learning_rate": 1.1433898548321226e-06, "loss": 0.0461, "step": 4147 }, { "epoch": 0.9437997724687145, "grad_norm": 1.4419058001216918, "learning_rate": 1.1433399361038614e-06, "loss": 0.0622, "step": 4148 }, { "epoch": 0.9440273037542662, "grad_norm": 1.0878632797009244, "learning_rate": 1.1432900067816813e-06, "loss": 0.051, "step": 4149 }, { "epoch": 0.944254835039818, "grad_norm": 0.8678017410178684, "learning_rate": 1.1432400668666028e-06, "loss": 0.036, "step": 4150 }, { "epoch": 0.9444823663253697, "grad_norm": 2.5565159682952325, "learning_rate": 1.1431901163596462e-06, "loss": 0.1481, "step": 4151 }, { "epoch": 0.9447098976109215, "grad_norm": 1.1719746596557403, "learning_rate": 1.1431401552618327e-06, "loss": 0.0654, "step": 4152 }, { "epoch": 0.9449374288964733, "grad_norm": 0.7399300932811994, "learning_rate": 1.1430901835741833e-06, "loss": 0.0426, "step": 4153 }, { "epoch": 0.945164960182025, "grad_norm": 1.5392915168318702, "learning_rate": 1.1430402012977195e-06, "loss": 0.064, "step": 4154 }, { "epoch": 0.9453924914675768, "grad_norm": 0.8642603027613659, "learning_rate": 1.1429902084334627e-06, "loss": 0.0391, "step": 4155 }, { "epoch": 0.9456200227531285, "grad_norm": 1.5792578097649426, "learning_rate": 1.1429402049824348e-06, "loss": 0.0864, "step": 4156 }, { "epoch": 0.9458475540386804, "grad_norm": 1.0656243196471682, "learning_rate": 1.1428901909456575e-06, "loss": 0.0643, "step": 4157 }, { "epoch": 0.9460750853242321, "grad_norm": 0.7834798421355951, "learning_rate": 1.1428401663241533e-06, "loss": 0.0255, "step": 4158 }, { "epoch": 0.9463026166097839, "grad_norm": 0.7290481198035724, "learning_rate": 1.1427901311189444e-06, "loss": 0.0347, "step": 4159 }, { "epoch": 0.9465301478953356, "grad_norm": 1.0906839196972904, "learning_rate": 1.1427400853310536e-06, "loss": 0.0416, "step": 4160 }, { "epoch": 0.9467576791808874, "grad_norm": 0.7797257513708735, "learning_rate": 1.1426900289615034e-06, "loss": 0.0332, "step": 4161 }, { "epoch": 0.9469852104664391, "grad_norm": 0.9828933034646482, "learning_rate": 1.1426399620113174e-06, "loss": 0.0426, "step": 4162 }, { "epoch": 0.9472127417519909, "grad_norm": 1.0815564353692642, "learning_rate": 1.1425898844815183e-06, "loss": 0.03, "step": 4163 }, { "epoch": 0.9474402730375426, "grad_norm": 1.3070693062923497, "learning_rate": 1.1425397963731303e-06, "loss": 0.0761, "step": 4164 }, { "epoch": 0.9476678043230944, "grad_norm": 1.9681632277415542, "learning_rate": 1.1424896976871763e-06, "loss": 0.0751, "step": 4165 }, { "epoch": 0.9478953356086461, "grad_norm": 1.3403750162598331, "learning_rate": 1.1424395884246808e-06, "loss": 0.0645, "step": 4166 }, { "epoch": 0.948122866894198, "grad_norm": 1.4856178047017083, "learning_rate": 1.1423894685866677e-06, "loss": 0.0524, "step": 4167 }, { "epoch": 0.9483503981797498, "grad_norm": 2.390346331051792, "learning_rate": 1.1423393381741614e-06, "loss": 0.1007, "step": 4168 }, { "epoch": 0.9485779294653015, "grad_norm": 1.9335775336716192, "learning_rate": 1.1422891971881867e-06, "loss": 0.0909, "step": 4169 }, { "epoch": 0.9488054607508533, "grad_norm": 1.4846712557080712, "learning_rate": 1.142239045629768e-06, "loss": 0.0706, "step": 4170 }, { "epoch": 0.949032992036405, "grad_norm": 1.2534969921049077, "learning_rate": 1.1421888834999306e-06, "loss": 0.0637, "step": 4171 }, { "epoch": 0.9492605233219568, "grad_norm": 1.5488362147727934, "learning_rate": 1.1421387107996993e-06, "loss": 0.065, "step": 4172 }, { "epoch": 0.9494880546075085, "grad_norm": 1.8544510157017915, "learning_rate": 1.1420885275301001e-06, "loss": 0.0783, "step": 4173 }, { "epoch": 0.9497155858930603, "grad_norm": 1.2775832861301255, "learning_rate": 1.1420383336921583e-06, "loss": 0.0628, "step": 4174 }, { "epoch": 0.949943117178612, "grad_norm": 1.1882749927515044, "learning_rate": 1.1419881292869e-06, "loss": 0.053, "step": 4175 }, { "epoch": 0.9501706484641638, "grad_norm": 1.1112594425917803, "learning_rate": 1.1419379143153511e-06, "loss": 0.0391, "step": 4176 }, { "epoch": 0.9503981797497156, "grad_norm": 1.4092744149498044, "learning_rate": 1.1418876887785379e-06, "loss": 0.0588, "step": 4177 }, { "epoch": 0.9506257110352674, "grad_norm": 1.2859140834039406, "learning_rate": 1.1418374526774872e-06, "loss": 0.0449, "step": 4178 }, { "epoch": 0.9508532423208191, "grad_norm": 1.7602664958939818, "learning_rate": 1.1417872060132251e-06, "loss": 0.066, "step": 4179 }, { "epoch": 0.9510807736063709, "grad_norm": 1.7687663881511184, "learning_rate": 1.1417369487867793e-06, "loss": 0.0737, "step": 4180 }, { "epoch": 0.9513083048919226, "grad_norm": 1.0504917072097133, "learning_rate": 1.1416866809991763e-06, "loss": 0.0571, "step": 4181 }, { "epoch": 0.9515358361774744, "grad_norm": 0.9175209845403075, "learning_rate": 1.1416364026514443e-06, "loss": 0.0473, "step": 4182 }, { "epoch": 0.9517633674630261, "grad_norm": 1.1584022734622255, "learning_rate": 1.1415861137446099e-06, "loss": 0.0615, "step": 4183 }, { "epoch": 0.9519908987485779, "grad_norm": 0.9605566486118688, "learning_rate": 1.1415358142797018e-06, "loss": 0.0468, "step": 4184 }, { "epoch": 0.9522184300341296, "grad_norm": 1.0547428180217657, "learning_rate": 1.1414855042577474e-06, "loss": 0.0578, "step": 4185 }, { "epoch": 0.9524459613196815, "grad_norm": 1.664335132859367, "learning_rate": 1.1414351836797755e-06, "loss": 0.0929, "step": 4186 }, { "epoch": 0.9526734926052333, "grad_norm": 1.326192413657784, "learning_rate": 1.1413848525468139e-06, "loss": 0.0583, "step": 4187 }, { "epoch": 0.952901023890785, "grad_norm": 1.475570664457709, "learning_rate": 1.1413345108598916e-06, "loss": 0.0626, "step": 4188 }, { "epoch": 0.9531285551763368, "grad_norm": 1.7815203562597235, "learning_rate": 1.1412841586200378e-06, "loss": 0.0717, "step": 4189 }, { "epoch": 0.9533560864618885, "grad_norm": 1.7331207453848758, "learning_rate": 1.1412337958282812e-06, "loss": 0.0621, "step": 4190 }, { "epoch": 0.9535836177474403, "grad_norm": 0.5286320379408398, "learning_rate": 1.1411834224856514e-06, "loss": 0.0279, "step": 4191 }, { "epoch": 0.953811149032992, "grad_norm": 0.8095894659074508, "learning_rate": 1.1411330385931776e-06, "loss": 0.0342, "step": 4192 }, { "epoch": 0.9540386803185438, "grad_norm": 1.1720468896203664, "learning_rate": 1.1410826441518898e-06, "loss": 0.0769, "step": 4193 }, { "epoch": 0.9542662116040955, "grad_norm": 1.6215729451396426, "learning_rate": 1.1410322391628179e-06, "loss": 0.064, "step": 4194 }, { "epoch": 0.9544937428896473, "grad_norm": 0.9975722592735233, "learning_rate": 1.140981823626992e-06, "loss": 0.033, "step": 4195 }, { "epoch": 0.9547212741751991, "grad_norm": 0.961294779276341, "learning_rate": 1.1409313975454429e-06, "loss": 0.0556, "step": 4196 }, { "epoch": 0.9549488054607509, "grad_norm": 1.195259518530016, "learning_rate": 1.1408809609192007e-06, "loss": 0.0685, "step": 4197 }, { "epoch": 0.9551763367463026, "grad_norm": 1.2021438816670518, "learning_rate": 1.1408305137492963e-06, "loss": 0.0567, "step": 4198 }, { "epoch": 0.9554038680318544, "grad_norm": 1.0136210761213766, "learning_rate": 1.1407800560367612e-06, "loss": 0.0304, "step": 4199 }, { "epoch": 0.9556313993174061, "grad_norm": 0.8478875067932324, "learning_rate": 1.140729587782626e-06, "loss": 0.0439, "step": 4200 }, { "epoch": 0.9558589306029579, "grad_norm": 1.660804148976066, "learning_rate": 1.1406791089879229e-06, "loss": 0.0731, "step": 4201 }, { "epoch": 0.9560864618885097, "grad_norm": 0.8895932107889849, "learning_rate": 1.1406286196536832e-06, "loss": 0.0363, "step": 4202 }, { "epoch": 0.9563139931740614, "grad_norm": 2.248321391825998, "learning_rate": 1.1405781197809388e-06, "loss": 0.0736, "step": 4203 }, { "epoch": 0.9565415244596132, "grad_norm": 0.5970206546121392, "learning_rate": 1.1405276093707218e-06, "loss": 0.02, "step": 4204 }, { "epoch": 0.9567690557451649, "grad_norm": 1.263024166664947, "learning_rate": 1.1404770884240645e-06, "loss": 0.0523, "step": 4205 }, { "epoch": 0.9569965870307168, "grad_norm": 1.3072039796205928, "learning_rate": 1.1404265569419998e-06, "loss": 0.0606, "step": 4206 }, { "epoch": 0.9572241183162685, "grad_norm": 1.2388621360387058, "learning_rate": 1.14037601492556e-06, "loss": 0.065, "step": 4207 }, { "epoch": 0.9574516496018203, "grad_norm": 0.6867586774624046, "learning_rate": 1.1403254623757785e-06, "loss": 0.0258, "step": 4208 }, { "epoch": 0.957679180887372, "grad_norm": 1.296038153739218, "learning_rate": 1.1402748992936881e-06, "loss": 0.0563, "step": 4209 }, { "epoch": 0.9579067121729238, "grad_norm": 1.3170165094768458, "learning_rate": 1.1402243256803228e-06, "loss": 0.0537, "step": 4210 }, { "epoch": 0.9581342434584755, "grad_norm": 2.115381990971279, "learning_rate": 1.1401737415367157e-06, "loss": 0.1093, "step": 4211 }, { "epoch": 0.9583617747440273, "grad_norm": 1.1322388794198333, "learning_rate": 1.1401231468639008e-06, "loss": 0.0477, "step": 4212 }, { "epoch": 0.958589306029579, "grad_norm": 1.7074659597722202, "learning_rate": 1.140072541662912e-06, "loss": 0.0883, "step": 4213 }, { "epoch": 0.9588168373151308, "grad_norm": 1.6179437373612184, "learning_rate": 1.1400219259347842e-06, "loss": 0.0583, "step": 4214 }, { "epoch": 0.9590443686006825, "grad_norm": 1.0176578115657482, "learning_rate": 1.139971299680551e-06, "loss": 0.0551, "step": 4215 }, { "epoch": 0.9592718998862344, "grad_norm": 1.07590619993706, "learning_rate": 1.1399206629012478e-06, "loss": 0.0367, "step": 4216 }, { "epoch": 0.9594994311717862, "grad_norm": 0.9678873702253064, "learning_rate": 1.1398700155979092e-06, "loss": 0.0394, "step": 4217 }, { "epoch": 0.9597269624573379, "grad_norm": 1.1238559057706377, "learning_rate": 1.1398193577715705e-06, "loss": 0.0623, "step": 4218 }, { "epoch": 0.9599544937428897, "grad_norm": 1.2051383992211526, "learning_rate": 1.1397686894232671e-06, "loss": 0.048, "step": 4219 }, { "epoch": 0.9601820250284414, "grad_norm": 1.5391029437375636, "learning_rate": 1.1397180105540343e-06, "loss": 0.0575, "step": 4220 }, { "epoch": 0.9604095563139932, "grad_norm": 1.620882109175618, "learning_rate": 1.1396673211649078e-06, "loss": 0.0401, "step": 4221 }, { "epoch": 0.9606370875995449, "grad_norm": 1.4900621743746256, "learning_rate": 1.139616621256924e-06, "loss": 0.0488, "step": 4222 }, { "epoch": 0.9608646188850967, "grad_norm": 1.2199319551548797, "learning_rate": 1.1395659108311192e-06, "loss": 0.0568, "step": 4223 }, { "epoch": 0.9610921501706484, "grad_norm": 0.7265505005040674, "learning_rate": 1.1395151898885293e-06, "loss": 0.03, "step": 4224 }, { "epoch": 0.9613196814562003, "grad_norm": 1.1651056301933327, "learning_rate": 1.1394644584301912e-06, "loss": 0.0496, "step": 4225 }, { "epoch": 0.961547212741752, "grad_norm": 1.3085058571423405, "learning_rate": 1.1394137164571418e-06, "loss": 0.0488, "step": 4226 }, { "epoch": 0.9617747440273038, "grad_norm": 1.3459918801148398, "learning_rate": 1.1393629639704182e-06, "loss": 0.0525, "step": 4227 }, { "epoch": 0.9620022753128555, "grad_norm": 1.8692658121925165, "learning_rate": 1.1393122009710575e-06, "loss": 0.0859, "step": 4228 }, { "epoch": 0.9622298065984073, "grad_norm": 1.1703507912941524, "learning_rate": 1.1392614274600975e-06, "loss": 0.048, "step": 4229 }, { "epoch": 0.962457337883959, "grad_norm": 1.0308219099107716, "learning_rate": 1.1392106434385754e-06, "loss": 0.0502, "step": 4230 }, { "epoch": 0.9626848691695108, "grad_norm": 1.67972447648695, "learning_rate": 1.1391598489075298e-06, "loss": 0.0898, "step": 4231 }, { "epoch": 0.9629124004550625, "grad_norm": 0.9225444046119701, "learning_rate": 1.1391090438679986e-06, "loss": 0.0589, "step": 4232 }, { "epoch": 0.9631399317406143, "grad_norm": 0.8217198859736241, "learning_rate": 1.1390582283210199e-06, "loss": 0.0515, "step": 4233 }, { "epoch": 0.963367463026166, "grad_norm": 1.4166504015117989, "learning_rate": 1.1390074022676325e-06, "loss": 0.0434, "step": 4234 }, { "epoch": 0.9635949943117179, "grad_norm": 1.8246388670882077, "learning_rate": 1.1389565657088752e-06, "loss": 0.0696, "step": 4235 }, { "epoch": 0.9638225255972697, "grad_norm": 1.984425508646575, "learning_rate": 1.1389057186457868e-06, "loss": 0.139, "step": 4236 }, { "epoch": 0.9640500568828214, "grad_norm": 2.480312140442047, "learning_rate": 1.1388548610794069e-06, "loss": 0.1062, "step": 4237 }, { "epoch": 0.9642775881683732, "grad_norm": 1.0531496575917552, "learning_rate": 1.1388039930107747e-06, "loss": 0.0416, "step": 4238 }, { "epoch": 0.9645051194539249, "grad_norm": 1.4873297621523849, "learning_rate": 1.1387531144409297e-06, "loss": 0.0418, "step": 4239 }, { "epoch": 0.9647326507394767, "grad_norm": 0.9204083159334068, "learning_rate": 1.138702225370912e-06, "loss": 0.0365, "step": 4240 }, { "epoch": 0.9649601820250284, "grad_norm": 1.193250713664879, "learning_rate": 1.1386513258017617e-06, "loss": 0.057, "step": 4241 }, { "epoch": 0.9651877133105802, "grad_norm": 1.1349796974488853, "learning_rate": 1.138600415734519e-06, "loss": 0.0321, "step": 4242 }, { "epoch": 0.9654152445961319, "grad_norm": 1.0770052024267605, "learning_rate": 1.1385494951702245e-06, "loss": 0.0683, "step": 4243 }, { "epoch": 0.9656427758816837, "grad_norm": 1.4502511589216216, "learning_rate": 1.1384985641099187e-06, "loss": 0.0729, "step": 4244 }, { "epoch": 0.9658703071672355, "grad_norm": 1.1356940815876257, "learning_rate": 1.1384476225546426e-06, "loss": 0.0366, "step": 4245 }, { "epoch": 0.9660978384527873, "grad_norm": 0.9898762934279589, "learning_rate": 1.1383966705054377e-06, "loss": 0.0408, "step": 4246 }, { "epoch": 0.966325369738339, "grad_norm": 1.7320554731048066, "learning_rate": 1.1383457079633448e-06, "loss": 0.0668, "step": 4247 }, { "epoch": 0.9665529010238908, "grad_norm": 1.1627771449836428, "learning_rate": 1.138294734929406e-06, "loss": 0.0565, "step": 4248 }, { "epoch": 0.9667804323094426, "grad_norm": 1.4667218418425674, "learning_rate": 1.1382437514046627e-06, "loss": 0.0572, "step": 4249 }, { "epoch": 0.9670079635949943, "grad_norm": 1.3102476422588094, "learning_rate": 1.1381927573901572e-06, "loss": 0.045, "step": 4250 }, { "epoch": 0.967235494880546, "grad_norm": 0.9659550904946845, "learning_rate": 1.1381417528869316e-06, "loss": 0.0562, "step": 4251 }, { "epoch": 0.9674630261660978, "grad_norm": 1.208698720888652, "learning_rate": 1.1380907378960282e-06, "loss": 0.0601, "step": 4252 }, { "epoch": 0.9676905574516496, "grad_norm": 1.3703900042435244, "learning_rate": 1.13803971241849e-06, "loss": 0.0498, "step": 4253 }, { "epoch": 0.9679180887372013, "grad_norm": 1.3965879380443744, "learning_rate": 1.1379886764553596e-06, "loss": 0.0457, "step": 4254 }, { "epoch": 0.9681456200227532, "grad_norm": 1.4360003219255868, "learning_rate": 1.1379376300076803e-06, "loss": 0.0689, "step": 4255 }, { "epoch": 0.9683731513083049, "grad_norm": 1.1977161122633277, "learning_rate": 1.137886573076495e-06, "loss": 0.0455, "step": 4256 }, { "epoch": 0.9686006825938567, "grad_norm": 1.5336831401293278, "learning_rate": 1.1378355056628474e-06, "loss": 0.0749, "step": 4257 }, { "epoch": 0.9688282138794084, "grad_norm": 1.842256713795995, "learning_rate": 1.1377844277677815e-06, "loss": 0.0702, "step": 4258 }, { "epoch": 0.9690557451649602, "grad_norm": 1.3257914742909727, "learning_rate": 1.1377333393923408e-06, "loss": 0.0427, "step": 4259 }, { "epoch": 0.9692832764505119, "grad_norm": 0.9651098248872604, "learning_rate": 1.1376822405375698e-06, "loss": 0.0309, "step": 4260 }, { "epoch": 0.9695108077360637, "grad_norm": 1.406730207103498, "learning_rate": 1.1376311312045128e-06, "loss": 0.0467, "step": 4261 }, { "epoch": 0.9697383390216154, "grad_norm": 1.1340714191157526, "learning_rate": 1.1375800113942144e-06, "loss": 0.0414, "step": 4262 }, { "epoch": 0.9699658703071672, "grad_norm": 1.0587038466584116, "learning_rate": 1.137528881107719e-06, "loss": 0.0411, "step": 4263 }, { "epoch": 0.970193401592719, "grad_norm": 1.375758096041912, "learning_rate": 1.137477740346072e-06, "loss": 0.0532, "step": 4264 }, { "epoch": 0.9704209328782708, "grad_norm": 1.3742948141863867, "learning_rate": 1.1374265891103187e-06, "loss": 0.0581, "step": 4265 }, { "epoch": 0.9706484641638226, "grad_norm": 1.8817386199561381, "learning_rate": 1.1373754274015044e-06, "loss": 0.0567, "step": 4266 }, { "epoch": 0.9708759954493743, "grad_norm": 1.8821493407596268, "learning_rate": 1.1373242552206744e-06, "loss": 0.0881, "step": 4267 }, { "epoch": 0.9711035267349261, "grad_norm": 1.2074529205184104, "learning_rate": 1.1372730725688754e-06, "loss": 0.0634, "step": 4268 }, { "epoch": 0.9713310580204778, "grad_norm": 1.193289676669864, "learning_rate": 1.1372218794471527e-06, "loss": 0.0435, "step": 4269 }, { "epoch": 0.9715585893060296, "grad_norm": 1.119752010069939, "learning_rate": 1.1371706758565529e-06, "loss": 0.0391, "step": 4270 }, { "epoch": 0.9717861205915813, "grad_norm": 1.4265721705104009, "learning_rate": 1.1371194617981224e-06, "loss": 0.0483, "step": 4271 }, { "epoch": 0.9720136518771331, "grad_norm": 2.0794425956744775, "learning_rate": 1.137068237272908e-06, "loss": 0.0806, "step": 4272 }, { "epoch": 0.9722411831626848, "grad_norm": 0.8292125442794063, "learning_rate": 1.1370170022819569e-06, "loss": 0.0266, "step": 4273 }, { "epoch": 0.9724687144482367, "grad_norm": 1.5988253258989058, "learning_rate": 1.1369657568263157e-06, "loss": 0.064, "step": 4274 }, { "epoch": 0.9726962457337884, "grad_norm": 1.4884666998450276, "learning_rate": 1.1369145009070323e-06, "loss": 0.0595, "step": 4275 }, { "epoch": 0.9729237770193402, "grad_norm": 1.0787898499294646, "learning_rate": 1.1368632345251538e-06, "loss": 0.0449, "step": 4276 }, { "epoch": 0.9731513083048919, "grad_norm": 1.2761275024398588, "learning_rate": 1.1368119576817283e-06, "loss": 0.0473, "step": 4277 }, { "epoch": 0.9733788395904437, "grad_norm": 0.9499141582616558, "learning_rate": 1.1367606703778037e-06, "loss": 0.0376, "step": 4278 }, { "epoch": 0.9736063708759954, "grad_norm": 1.0046565168504633, "learning_rate": 1.1367093726144283e-06, "loss": 0.0432, "step": 4279 }, { "epoch": 0.9738339021615472, "grad_norm": 1.0659038125870088, "learning_rate": 1.1366580643926506e-06, "loss": 0.0404, "step": 4280 }, { "epoch": 0.974061433447099, "grad_norm": 1.595011229834194, "learning_rate": 1.1366067457135188e-06, "loss": 0.0794, "step": 4281 }, { "epoch": 0.9742889647326507, "grad_norm": 1.968290906084048, "learning_rate": 1.1365554165780823e-06, "loss": 0.0673, "step": 4282 }, { "epoch": 0.9745164960182024, "grad_norm": 1.511125938151934, "learning_rate": 1.13650407698739e-06, "loss": 0.0347, "step": 4283 }, { "epoch": 0.9747440273037543, "grad_norm": 1.6319247438179008, "learning_rate": 1.136452726942491e-06, "loss": 0.0582, "step": 4284 }, { "epoch": 0.9749715585893061, "grad_norm": 1.3918200009388155, "learning_rate": 1.1364013664444351e-06, "loss": 0.053, "step": 4285 }, { "epoch": 0.9751990898748578, "grad_norm": 1.5863047987952235, "learning_rate": 1.1363499954942717e-06, "loss": 0.0704, "step": 4286 }, { "epoch": 0.9754266211604096, "grad_norm": 0.9811314326155632, "learning_rate": 1.1362986140930509e-06, "loss": 0.0341, "step": 4287 }, { "epoch": 0.9756541524459613, "grad_norm": 1.7299011978403993, "learning_rate": 1.1362472222418228e-06, "loss": 0.0831, "step": 4288 }, { "epoch": 0.9758816837315131, "grad_norm": 1.4033619817926257, "learning_rate": 1.1361958199416378e-06, "loss": 0.0559, "step": 4289 }, { "epoch": 0.9761092150170648, "grad_norm": 1.6430727260435547, "learning_rate": 1.1361444071935467e-06, "loss": 0.0666, "step": 4290 }, { "epoch": 0.9763367463026166, "grad_norm": 1.5397750371074748, "learning_rate": 1.1360929839985998e-06, "loss": 0.0954, "step": 4291 }, { "epoch": 0.9765642775881683, "grad_norm": 1.1112858103130956, "learning_rate": 1.1360415503578485e-06, "loss": 0.0524, "step": 4292 }, { "epoch": 0.9767918088737202, "grad_norm": 2.049966956578, "learning_rate": 1.1359901062723437e-06, "loss": 0.1009, "step": 4293 }, { "epoch": 0.9770193401592719, "grad_norm": 2.123284701337966, "learning_rate": 1.1359386517431366e-06, "loss": 0.0741, "step": 4294 }, { "epoch": 0.9772468714448237, "grad_norm": 1.8057163155909235, "learning_rate": 1.1358871867712797e-06, "loss": 0.1112, "step": 4295 }, { "epoch": 0.9774744027303754, "grad_norm": 0.6815528807119272, "learning_rate": 1.1358357113578242e-06, "loss": 0.0279, "step": 4296 }, { "epoch": 0.9777019340159272, "grad_norm": 1.9569360573215808, "learning_rate": 1.1357842255038222e-06, "loss": 0.0712, "step": 4297 }, { "epoch": 0.977929465301479, "grad_norm": 1.2238824938808284, "learning_rate": 1.1357327292103266e-06, "loss": 0.068, "step": 4298 }, { "epoch": 0.9781569965870307, "grad_norm": 1.3361935047633542, "learning_rate": 1.135681222478389e-06, "loss": 0.0593, "step": 4299 }, { "epoch": 0.9783845278725825, "grad_norm": 0.9204735606624523, "learning_rate": 1.1356297053090623e-06, "loss": 0.0433, "step": 4300 }, { "epoch": 0.9786120591581342, "grad_norm": 1.6545159240714982, "learning_rate": 1.1355781777033998e-06, "loss": 0.0716, "step": 4301 }, { "epoch": 0.978839590443686, "grad_norm": 1.3389750327964336, "learning_rate": 1.1355266396624545e-06, "loss": 0.0788, "step": 4302 }, { "epoch": 0.9790671217292378, "grad_norm": 1.2850002640158098, "learning_rate": 1.1354750911872795e-06, "loss": 0.0563, "step": 4303 }, { "epoch": 0.9792946530147896, "grad_norm": 2.1844096287203585, "learning_rate": 1.1354235322789286e-06, "loss": 0.1006, "step": 4304 }, { "epoch": 0.9795221843003413, "grad_norm": 1.2563539759355846, "learning_rate": 1.1353719629384554e-06, "loss": 0.0732, "step": 4305 }, { "epoch": 0.9797497155858931, "grad_norm": 1.4434315214333129, "learning_rate": 1.135320383166914e-06, "loss": 0.0704, "step": 4306 }, { "epoch": 0.9799772468714448, "grad_norm": 1.5960905043054407, "learning_rate": 1.1352687929653586e-06, "loss": 0.0837, "step": 4307 }, { "epoch": 0.9802047781569966, "grad_norm": 1.4152051878506084, "learning_rate": 1.1352171923348438e-06, "loss": 0.06, "step": 4308 }, { "epoch": 0.9804323094425483, "grad_norm": 1.3288142940732306, "learning_rate": 1.1351655812764236e-06, "loss": 0.0604, "step": 4309 }, { "epoch": 0.9806598407281001, "grad_norm": 0.9762848070268094, "learning_rate": 1.1351139597911536e-06, "loss": 0.0392, "step": 4310 }, { "epoch": 0.9808873720136518, "grad_norm": 2.015722359424784, "learning_rate": 1.1350623278800884e-06, "loss": 0.0572, "step": 4311 }, { "epoch": 0.9811149032992036, "grad_norm": 0.7081344517803854, "learning_rate": 1.1350106855442833e-06, "loss": 0.0276, "step": 4312 }, { "epoch": 0.9813424345847555, "grad_norm": 0.846185742491215, "learning_rate": 1.134959032784794e-06, "loss": 0.026, "step": 4313 }, { "epoch": 0.9815699658703072, "grad_norm": 1.3352329035862636, "learning_rate": 1.1349073696026759e-06, "loss": 0.057, "step": 4314 }, { "epoch": 0.981797497155859, "grad_norm": 0.8695753542918584, "learning_rate": 1.1348556959989848e-06, "loss": 0.0229, "step": 4315 }, { "epoch": 0.9820250284414107, "grad_norm": 1.3890644514305241, "learning_rate": 1.1348040119747771e-06, "loss": 0.052, "step": 4316 }, { "epoch": 0.9822525597269625, "grad_norm": 1.870756610162731, "learning_rate": 1.1347523175311092e-06, "loss": 0.0653, "step": 4317 }, { "epoch": 0.9824800910125142, "grad_norm": 1.4997312982720814, "learning_rate": 1.1347006126690377e-06, "loss": 0.1015, "step": 4318 }, { "epoch": 0.982707622298066, "grad_norm": 1.0165185093361495, "learning_rate": 1.1346488973896188e-06, "loss": 0.0291, "step": 4319 }, { "epoch": 0.9829351535836177, "grad_norm": 1.380478111887299, "learning_rate": 1.13459717169391e-06, "loss": 0.0632, "step": 4320 }, { "epoch": 0.9831626848691695, "grad_norm": 1.641625597159548, "learning_rate": 1.1345454355829682e-06, "loss": 0.0921, "step": 4321 }, { "epoch": 0.9833902161547212, "grad_norm": 1.0317618560602355, "learning_rate": 1.1344936890578508e-06, "loss": 0.0407, "step": 4322 }, { "epoch": 0.9836177474402731, "grad_norm": 1.310711884960193, "learning_rate": 1.1344419321196156e-06, "loss": 0.0685, "step": 4323 }, { "epoch": 0.9838452787258248, "grad_norm": 1.1047706035095923, "learning_rate": 1.1343901647693204e-06, "loss": 0.0565, "step": 4324 }, { "epoch": 0.9840728100113766, "grad_norm": 0.7843402054936461, "learning_rate": 1.134338387008023e-06, "loss": 0.0304, "step": 4325 }, { "epoch": 0.9843003412969283, "grad_norm": 1.1819853377536353, "learning_rate": 1.134286598836782e-06, "loss": 0.0661, "step": 4326 }, { "epoch": 0.9845278725824801, "grad_norm": 1.0132047480735697, "learning_rate": 1.1342348002566553e-06, "loss": 0.0327, "step": 4327 }, { "epoch": 0.9847554038680318, "grad_norm": 0.8226016309372456, "learning_rate": 1.1341829912687023e-06, "loss": 0.0437, "step": 4328 }, { "epoch": 0.9849829351535836, "grad_norm": 1.254492363059247, "learning_rate": 1.134131171873981e-06, "loss": 0.048, "step": 4329 }, { "epoch": 0.9852104664391353, "grad_norm": 1.9410455746016961, "learning_rate": 1.1340793420735514e-06, "loss": 0.0688, "step": 4330 }, { "epoch": 0.9854379977246871, "grad_norm": 1.2149172788419882, "learning_rate": 1.1340275018684722e-06, "loss": 0.055, "step": 4331 }, { "epoch": 0.985665529010239, "grad_norm": 1.135352471755647, "learning_rate": 1.133975651259803e-06, "loss": 0.0367, "step": 4332 }, { "epoch": 0.9858930602957907, "grad_norm": 1.3243570338612756, "learning_rate": 1.1339237902486037e-06, "loss": 0.0546, "step": 4333 }, { "epoch": 0.9861205915813425, "grad_norm": 1.3460820001864684, "learning_rate": 1.1338719188359343e-06, "loss": 0.0588, "step": 4334 }, { "epoch": 0.9863481228668942, "grad_norm": 0.9640088217580686, "learning_rate": 1.1338200370228546e-06, "loss": 0.0266, "step": 4335 }, { "epoch": 0.986575654152446, "grad_norm": 1.0087770932683686, "learning_rate": 1.1337681448104254e-06, "loss": 0.0549, "step": 4336 }, { "epoch": 0.9868031854379977, "grad_norm": 1.0628308289235069, "learning_rate": 1.1337162421997072e-06, "loss": 0.06, "step": 4337 }, { "epoch": 0.9870307167235495, "grad_norm": 0.9968869621613908, "learning_rate": 1.1336643291917604e-06, "loss": 0.0406, "step": 4338 }, { "epoch": 0.9872582480091012, "grad_norm": 1.3414457698222066, "learning_rate": 1.1336124057876464e-06, "loss": 0.059, "step": 4339 }, { "epoch": 0.987485779294653, "grad_norm": 1.1669110242397096, "learning_rate": 1.1335604719884264e-06, "loss": 0.0534, "step": 4340 }, { "epoch": 0.9877133105802047, "grad_norm": 2.0594889831279968, "learning_rate": 1.1335085277951616e-06, "loss": 0.108, "step": 4341 }, { "epoch": 0.9879408418657566, "grad_norm": 1.4063745875081306, "learning_rate": 1.1334565732089138e-06, "loss": 0.085, "step": 4342 }, { "epoch": 0.9881683731513083, "grad_norm": 1.501936982721738, "learning_rate": 1.133404608230745e-06, "loss": 0.0857, "step": 4343 }, { "epoch": 0.9883959044368601, "grad_norm": 1.2396246613907491, "learning_rate": 1.1333526328617168e-06, "loss": 0.0783, "step": 4344 }, { "epoch": 0.9886234357224118, "grad_norm": 1.4534107446508395, "learning_rate": 1.133300647102892e-06, "loss": 0.0359, "step": 4345 }, { "epoch": 0.9888509670079636, "grad_norm": 1.5794373443169556, "learning_rate": 1.1332486509553328e-06, "loss": 0.0433, "step": 4346 }, { "epoch": 0.9890784982935154, "grad_norm": 1.2904275476428004, "learning_rate": 1.133196644420102e-06, "loss": 0.0554, "step": 4347 }, { "epoch": 0.9893060295790671, "grad_norm": 1.1570569489010505, "learning_rate": 1.1331446274982625e-06, "loss": 0.0472, "step": 4348 }, { "epoch": 0.9895335608646189, "grad_norm": 1.145673861183969, "learning_rate": 1.1330926001908777e-06, "loss": 0.0458, "step": 4349 }, { "epoch": 0.9897610921501706, "grad_norm": 0.896667580653844, "learning_rate": 1.1330405624990104e-06, "loss": 0.0234, "step": 4350 }, { "epoch": 0.9899886234357224, "grad_norm": 0.9213529689842777, "learning_rate": 1.1329885144237243e-06, "loss": 0.0381, "step": 4351 }, { "epoch": 0.9902161547212742, "grad_norm": 1.0487384771504538, "learning_rate": 1.1329364559660836e-06, "loss": 0.0364, "step": 4352 }, { "epoch": 0.990443686006826, "grad_norm": 1.4233320390454558, "learning_rate": 1.132884387127152e-06, "loss": 0.0527, "step": 4353 }, { "epoch": 0.9906712172923777, "grad_norm": 1.4228059163710602, "learning_rate": 1.1328323079079934e-06, "loss": 0.0554, "step": 4354 }, { "epoch": 0.9908987485779295, "grad_norm": 1.1059331642608046, "learning_rate": 1.1327802183096725e-06, "loss": 0.0341, "step": 4355 }, { "epoch": 0.9911262798634812, "grad_norm": 0.9827406192573771, "learning_rate": 1.1327281183332542e-06, "loss": 0.0367, "step": 4356 }, { "epoch": 0.991353811149033, "grad_norm": 1.1884213703636224, "learning_rate": 1.1326760079798027e-06, "loss": 0.0491, "step": 4357 }, { "epoch": 0.9915813424345847, "grad_norm": 1.656723752079525, "learning_rate": 1.1326238872503837e-06, "loss": 0.045, "step": 4358 }, { "epoch": 0.9918088737201365, "grad_norm": 1.0844028312793037, "learning_rate": 1.1325717561460617e-06, "loss": 0.0339, "step": 4359 }, { "epoch": 0.9920364050056882, "grad_norm": 1.0891260266689944, "learning_rate": 1.132519614667903e-06, "loss": 0.0466, "step": 4360 }, { "epoch": 0.9922639362912401, "grad_norm": 1.7100161526383912, "learning_rate": 1.1324674628169725e-06, "loss": 0.0889, "step": 4361 }, { "epoch": 0.9924914675767919, "grad_norm": 1.0880829131016376, "learning_rate": 1.1324153005943367e-06, "loss": 0.0431, "step": 4362 }, { "epoch": 0.9927189988623436, "grad_norm": 0.8884823165051863, "learning_rate": 1.1323631280010611e-06, "loss": 0.0447, "step": 4363 }, { "epoch": 0.9929465301478954, "grad_norm": 1.8533973089871183, "learning_rate": 1.1323109450382128e-06, "loss": 0.082, "step": 4364 }, { "epoch": 0.9931740614334471, "grad_norm": 0.9974325526425775, "learning_rate": 1.1322587517068576e-06, "loss": 0.0319, "step": 4365 }, { "epoch": 0.9934015927189989, "grad_norm": 0.9460101960780305, "learning_rate": 1.1322065480080625e-06, "loss": 0.0433, "step": 4366 }, { "epoch": 0.9936291240045506, "grad_norm": 2.537972684614883, "learning_rate": 1.1321543339428946e-06, "loss": 0.1302, "step": 4367 }, { "epoch": 0.9938566552901024, "grad_norm": 0.9361607430120513, "learning_rate": 1.132102109512421e-06, "loss": 0.0414, "step": 4368 }, { "epoch": 0.9940841865756541, "grad_norm": 1.3835872703988714, "learning_rate": 1.1320498747177088e-06, "loss": 0.0539, "step": 4369 }, { "epoch": 0.9943117178612059, "grad_norm": 1.4613596711487815, "learning_rate": 1.1319976295598258e-06, "loss": 0.0695, "step": 4370 }, { "epoch": 0.9945392491467577, "grad_norm": 1.3564131183677717, "learning_rate": 1.1319453740398397e-06, "loss": 0.0438, "step": 4371 }, { "epoch": 0.9947667804323095, "grad_norm": 1.312368604701528, "learning_rate": 1.1318931081588188e-06, "loss": 0.0539, "step": 4372 }, { "epoch": 0.9949943117178612, "grad_norm": 2.852729748963175, "learning_rate": 1.1318408319178308e-06, "loss": 0.1051, "step": 4373 }, { "epoch": 0.995221843003413, "grad_norm": 1.174771913378081, "learning_rate": 1.1317885453179448e-06, "loss": 0.0345, "step": 4374 }, { "epoch": 0.9954493742889647, "grad_norm": 1.156608166877398, "learning_rate": 1.131736248360229e-06, "loss": 0.0507, "step": 4375 }, { "epoch": 0.9956769055745165, "grad_norm": 1.7246435858174831, "learning_rate": 1.1316839410457523e-06, "loss": 0.0753, "step": 4376 }, { "epoch": 0.9959044368600682, "grad_norm": 1.190785748773095, "learning_rate": 1.1316316233755837e-06, "loss": 0.048, "step": 4377 }, { "epoch": 0.99613196814562, "grad_norm": 1.3884121672018674, "learning_rate": 1.1315792953507924e-06, "loss": 0.054, "step": 4378 }, { "epoch": 0.9963594994311717, "grad_norm": 1.1099500717065285, "learning_rate": 1.1315269569724483e-06, "loss": 0.0578, "step": 4379 }, { "epoch": 0.9965870307167235, "grad_norm": 1.1511233150222433, "learning_rate": 1.131474608241621e-06, "loss": 0.0709, "step": 4380 }, { "epoch": 0.9968145620022754, "grad_norm": 0.9955642314650807, "learning_rate": 1.1314222491593798e-06, "loss": 0.0358, "step": 4381 }, { "epoch": 0.9970420932878271, "grad_norm": 1.434440905613479, "learning_rate": 1.1313698797267958e-06, "loss": 0.0486, "step": 4382 }, { "epoch": 0.9972696245733789, "grad_norm": 2.8294165726638822, "learning_rate": 1.1313174999449384e-06, "loss": 0.0813, "step": 4383 }, { "epoch": 0.9974971558589306, "grad_norm": 1.2319487216248912, "learning_rate": 1.1312651098148788e-06, "loss": 0.0432, "step": 4384 }, { "epoch": 0.9977246871444824, "grad_norm": 1.2286306009661918, "learning_rate": 1.1312127093376876e-06, "loss": 0.0498, "step": 4385 }, { "epoch": 0.9979522184300341, "grad_norm": 1.4020060740030407, "learning_rate": 1.1311602985144358e-06, "loss": 0.0618, "step": 4386 }, { "epoch": 0.9981797497155859, "grad_norm": 1.2218921395546485, "learning_rate": 1.1311078773461942e-06, "loss": 0.0704, "step": 4387 }, { "epoch": 0.9984072810011376, "grad_norm": 1.2701236465901293, "learning_rate": 1.1310554458340345e-06, "loss": 0.0494, "step": 4388 }, { "epoch": 0.9986348122866894, "grad_norm": 1.2231395933687852, "learning_rate": 1.1310030039790285e-06, "loss": 0.0548, "step": 4389 }, { "epoch": 0.9988623435722411, "grad_norm": 1.228744395379064, "learning_rate": 1.1309505517822476e-06, "loss": 0.0456, "step": 4390 }, { "epoch": 0.999089874857793, "grad_norm": 1.0175121426182296, "learning_rate": 1.1308980892447641e-06, "loss": 0.049, "step": 4391 }, { "epoch": 0.9993174061433447, "grad_norm": 1.144658935981794, "learning_rate": 1.1308456163676501e-06, "loss": 0.0487, "step": 4392 }, { "epoch": 0.9995449374288965, "grad_norm": 1.1385156907253697, "learning_rate": 1.1307931331519783e-06, "loss": 0.051, "step": 4393 }, { "epoch": 0.9997724687144482, "grad_norm": 1.2941484330489015, "learning_rate": 1.1307406395988211e-06, "loss": 0.0587, "step": 4394 }, { "epoch": 1.0, "grad_norm": 0.7645843200332254, "learning_rate": 1.1306881357092513e-06, "loss": 0.0273, "step": 4395 }, { "epoch": 1.0002275312855518, "grad_norm": 0.5440562375961928, "learning_rate": 1.1306356214843423e-06, "loss": 0.0133, "step": 4396 }, { "epoch": 1.0004550625711035, "grad_norm": 1.048557888593317, "learning_rate": 1.1305830969251672e-06, "loss": 0.0234, "step": 4397 }, { "epoch": 1.0006825938566553, "grad_norm": 0.7566376417333784, "learning_rate": 1.1305305620327994e-06, "loss": 0.0208, "step": 4398 }, { "epoch": 1.000910125142207, "grad_norm": 0.6722933508685324, "learning_rate": 1.1304780168083128e-06, "loss": 0.0252, "step": 4399 }, { "epoch": 1.0011376564277588, "grad_norm": 0.9157174579220916, "learning_rate": 1.1304254612527815e-06, "loss": 0.0292, "step": 4400 }, { "epoch": 1.0013651877133105, "grad_norm": 0.7682748184714413, "learning_rate": 1.130372895367279e-06, "loss": 0.0305, "step": 4401 }, { "epoch": 1.0015927189988623, "grad_norm": 0.4692149259132462, "learning_rate": 1.1303203191528803e-06, "loss": 0.011, "step": 4402 }, { "epoch": 1.001820250284414, "grad_norm": 0.6034838800549486, "learning_rate": 1.1302677326106598e-06, "loss": 0.0158, "step": 4403 }, { "epoch": 1.0020477815699658, "grad_norm": 0.5139829099740163, "learning_rate": 1.130215135741692e-06, "loss": 0.0097, "step": 4404 }, { "epoch": 1.0022753128555177, "grad_norm": 0.6233625169929209, "learning_rate": 1.1301625285470522e-06, "loss": 0.0209, "step": 4405 }, { "epoch": 1.0025028441410695, "grad_norm": 0.8715203529693301, "learning_rate": 1.1301099110278156e-06, "loss": 0.0349, "step": 4406 }, { "epoch": 1.0027303754266212, "grad_norm": 0.5809712252863496, "learning_rate": 1.1300572831850574e-06, "loss": 0.0173, "step": 4407 }, { "epoch": 1.002957906712173, "grad_norm": 0.869776635220801, "learning_rate": 1.1300046450198532e-06, "loss": 0.0349, "step": 4408 }, { "epoch": 1.0031854379977247, "grad_norm": 0.6891595017115912, "learning_rate": 1.1299519965332791e-06, "loss": 0.0218, "step": 4409 }, { "epoch": 1.0034129692832765, "grad_norm": 2.771272397471203, "learning_rate": 1.1298993377264108e-06, "loss": 0.1173, "step": 4410 }, { "epoch": 1.0036405005688283, "grad_norm": 0.6496153724080771, "learning_rate": 1.129846668600325e-06, "loss": 0.0165, "step": 4411 }, { "epoch": 1.00386803185438, "grad_norm": 0.9105625202645007, "learning_rate": 1.1297939891560975e-06, "loss": 0.0356, "step": 4412 }, { "epoch": 1.0040955631399318, "grad_norm": 0.954002149635865, "learning_rate": 1.1297412993948054e-06, "loss": 0.0419, "step": 4413 }, { "epoch": 1.0043230944254835, "grad_norm": 0.8420422197036498, "learning_rate": 1.1296885993175255e-06, "loss": 0.0249, "step": 4414 }, { "epoch": 1.0045506257110353, "grad_norm": 1.1194729404929513, "learning_rate": 1.1296358889253351e-06, "loss": 0.0282, "step": 4415 }, { "epoch": 1.004778156996587, "grad_norm": 0.664863679519903, "learning_rate": 1.1295831682193115e-06, "loss": 0.0232, "step": 4416 }, { "epoch": 1.0050056882821388, "grad_norm": 0.7352288120040293, "learning_rate": 1.1295304372005316e-06, "loss": 0.0308, "step": 4417 }, { "epoch": 1.0052332195676905, "grad_norm": 0.9912305346960562, "learning_rate": 1.129477695870074e-06, "loss": 0.0454, "step": 4418 }, { "epoch": 1.0054607508532423, "grad_norm": 0.5967781019650417, "learning_rate": 1.129424944229016e-06, "loss": 0.0145, "step": 4419 }, { "epoch": 1.005688282138794, "grad_norm": 0.8293958411943283, "learning_rate": 1.1293721822784359e-06, "loss": 0.0189, "step": 4420 }, { "epoch": 1.0059158134243458, "grad_norm": 0.9305770629796571, "learning_rate": 1.1293194100194121e-06, "loss": 0.0258, "step": 4421 }, { "epoch": 1.0061433447098975, "grad_norm": 0.7187164975510966, "learning_rate": 1.1292666274530232e-06, "loss": 0.0165, "step": 4422 }, { "epoch": 1.0063708759954493, "grad_norm": 0.6821469033121376, "learning_rate": 1.129213834580348e-06, "loss": 0.0147, "step": 4423 }, { "epoch": 1.006598407281001, "grad_norm": 0.5603983030125896, "learning_rate": 1.1291610314024653e-06, "loss": 0.0135, "step": 4424 }, { "epoch": 1.006825938566553, "grad_norm": 0.7667715549060277, "learning_rate": 1.1291082179204548e-06, "loss": 0.0202, "step": 4425 }, { "epoch": 1.0070534698521048, "grad_norm": 0.5411620244180794, "learning_rate": 1.1290553941353954e-06, "loss": 0.0103, "step": 4426 }, { "epoch": 1.0072810011376565, "grad_norm": 0.6055971311258457, "learning_rate": 1.1290025600483667e-06, "loss": 0.0228, "step": 4427 }, { "epoch": 1.0075085324232083, "grad_norm": 1.2109757139019808, "learning_rate": 1.1289497156604487e-06, "loss": 0.0437, "step": 4428 }, { "epoch": 1.00773606370876, "grad_norm": 0.5370646714370468, "learning_rate": 1.1288968609727216e-06, "loss": 0.0085, "step": 4429 }, { "epoch": 1.0079635949943118, "grad_norm": 0.9137582688267473, "learning_rate": 1.1288439959862654e-06, "loss": 0.0349, "step": 4430 }, { "epoch": 1.0081911262798635, "grad_norm": 0.6202104523174599, "learning_rate": 1.128791120702161e-06, "loss": 0.0244, "step": 4431 }, { "epoch": 1.0084186575654153, "grad_norm": 1.264397001473755, "learning_rate": 1.1287382351214884e-06, "loss": 0.0385, "step": 4432 }, { "epoch": 1.008646188850967, "grad_norm": 0.4733349727968622, "learning_rate": 1.128685339245329e-06, "loss": 0.0157, "step": 4433 }, { "epoch": 1.0088737201365188, "grad_norm": 0.7488096549319501, "learning_rate": 1.1286324330747637e-06, "loss": 0.0237, "step": 4434 }, { "epoch": 1.0091012514220705, "grad_norm": 0.6215090350542927, "learning_rate": 1.1285795166108735e-06, "loss": 0.0165, "step": 4435 }, { "epoch": 1.0093287827076223, "grad_norm": 0.9977188909479956, "learning_rate": 1.1285265898547406e-06, "loss": 0.0269, "step": 4436 }, { "epoch": 1.009556313993174, "grad_norm": 1.3828220847485446, "learning_rate": 1.1284736528074464e-06, "loss": 0.0327, "step": 4437 }, { "epoch": 1.0097838452787258, "grad_norm": 0.8745376030427436, "learning_rate": 1.1284207054700727e-06, "loss": 0.0223, "step": 4438 }, { "epoch": 1.0100113765642775, "grad_norm": 0.710421250041984, "learning_rate": 1.1283677478437016e-06, "loss": 0.0206, "step": 4439 }, { "epoch": 1.0102389078498293, "grad_norm": 0.9294364308935749, "learning_rate": 1.1283147799294158e-06, "loss": 0.023, "step": 4440 }, { "epoch": 1.010466439135381, "grad_norm": 1.1336723842425673, "learning_rate": 1.1282618017282977e-06, "loss": 0.0543, "step": 4441 }, { "epoch": 1.0106939704209328, "grad_norm": 0.6236066585507253, "learning_rate": 1.1282088132414297e-06, "loss": 0.0177, "step": 4442 }, { "epoch": 1.0109215017064845, "grad_norm": 0.4538882994044952, "learning_rate": 1.1281558144698956e-06, "loss": 0.005, "step": 4443 }, { "epoch": 1.0111490329920365, "grad_norm": 0.9922582222331733, "learning_rate": 1.128102805414778e-06, "loss": 0.024, "step": 4444 }, { "epoch": 1.0113765642775883, "grad_norm": 0.939282752346539, "learning_rate": 1.1280497860771603e-06, "loss": 0.0373, "step": 4445 }, { "epoch": 1.01160409556314, "grad_norm": 0.6323505737761085, "learning_rate": 1.1279967564581264e-06, "loss": 0.0121, "step": 4446 }, { "epoch": 1.0118316268486918, "grad_norm": 1.0956673474878567, "learning_rate": 1.12794371655876e-06, "loss": 0.02, "step": 4447 }, { "epoch": 1.0120591581342435, "grad_norm": 0.9129627353008005, "learning_rate": 1.127890666380145e-06, "loss": 0.025, "step": 4448 }, { "epoch": 1.0122866894197953, "grad_norm": 0.46435154090423725, "learning_rate": 1.1278376059233658e-06, "loss": 0.0083, "step": 4449 }, { "epoch": 1.012514220705347, "grad_norm": 0.823583660478901, "learning_rate": 1.127784535189507e-06, "loss": 0.0242, "step": 4450 }, { "epoch": 1.0127417519908988, "grad_norm": 1.244611532738964, "learning_rate": 1.127731454179653e-06, "loss": 0.0477, "step": 4451 }, { "epoch": 1.0129692832764505, "grad_norm": 0.6536691853613382, "learning_rate": 1.1276783628948887e-06, "loss": 0.0203, "step": 4452 }, { "epoch": 1.0131968145620023, "grad_norm": 0.7200287149249455, "learning_rate": 1.1276252613362995e-06, "loss": 0.0238, "step": 4453 }, { "epoch": 1.013424345847554, "grad_norm": 0.8521168883159115, "learning_rate": 1.1275721495049702e-06, "loss": 0.0308, "step": 4454 }, { "epoch": 1.0136518771331058, "grad_norm": 0.6812814467705595, "learning_rate": 1.1275190274019867e-06, "loss": 0.0223, "step": 4455 }, { "epoch": 1.0138794084186575, "grad_norm": 0.8073573367282825, "learning_rate": 1.1274658950284347e-06, "loss": 0.0159, "step": 4456 }, { "epoch": 1.0141069397042093, "grad_norm": 0.9929398468232915, "learning_rate": 1.1274127523854e-06, "loss": 0.0174, "step": 4457 }, { "epoch": 1.014334470989761, "grad_norm": 0.8110259409039485, "learning_rate": 1.1273595994739688e-06, "loss": 0.0231, "step": 4458 }, { "epoch": 1.0145620022753128, "grad_norm": 0.8503939357738989, "learning_rate": 1.1273064362952272e-06, "loss": 0.0181, "step": 4459 }, { "epoch": 1.0147895335608645, "grad_norm": 0.952005853323446, "learning_rate": 1.1272532628502621e-06, "loss": 0.011, "step": 4460 }, { "epoch": 1.0150170648464163, "grad_norm": 0.979632492475044, "learning_rate": 1.1272000791401602e-06, "loss": 0.0289, "step": 4461 }, { "epoch": 1.015244596131968, "grad_norm": 0.9141108737856498, "learning_rate": 1.1271468851660084e-06, "loss": 0.0325, "step": 4462 }, { "epoch": 1.01547212741752, "grad_norm": 0.7032331801339051, "learning_rate": 1.127093680928894e-06, "loss": 0.0188, "step": 4463 }, { "epoch": 1.0156996587030718, "grad_norm": 0.9755139681497196, "learning_rate": 1.1270404664299042e-06, "loss": 0.0212, "step": 4464 }, { "epoch": 1.0159271899886235, "grad_norm": 0.623696536501648, "learning_rate": 1.1269872416701267e-06, "loss": 0.0143, "step": 4465 }, { "epoch": 1.0161547212741753, "grad_norm": 0.5413765005494656, "learning_rate": 1.1269340066506493e-06, "loss": 0.0101, "step": 4466 }, { "epoch": 1.016382252559727, "grad_norm": 0.6689689779774286, "learning_rate": 1.12688076137256e-06, "loss": 0.0114, "step": 4467 }, { "epoch": 1.0166097838452788, "grad_norm": 0.6453292072094783, "learning_rate": 1.1268275058369472e-06, "loss": 0.0107, "step": 4468 }, { "epoch": 1.0168373151308305, "grad_norm": 1.1332110733517493, "learning_rate": 1.1267742400448992e-06, "loss": 0.0217, "step": 4469 }, { "epoch": 1.0170648464163823, "grad_norm": 0.630093494299339, "learning_rate": 1.1267209639975046e-06, "loss": 0.0089, "step": 4470 }, { "epoch": 1.017292377701934, "grad_norm": 1.0118536500106963, "learning_rate": 1.1266676776958523e-06, "loss": 0.0208, "step": 4471 }, { "epoch": 1.0175199089874858, "grad_norm": 1.0518953123729393, "learning_rate": 1.1266143811410317e-06, "loss": 0.0201, "step": 4472 }, { "epoch": 1.0177474402730375, "grad_norm": 0.757424135828587, "learning_rate": 1.1265610743341316e-06, "loss": 0.0217, "step": 4473 }, { "epoch": 1.0179749715585893, "grad_norm": 0.77124606602971, "learning_rate": 1.1265077572762418e-06, "loss": 0.0262, "step": 4474 }, { "epoch": 1.018202502844141, "grad_norm": 1.7312334858326726, "learning_rate": 1.1264544299684518e-06, "loss": 0.0511, "step": 4475 }, { "epoch": 1.0184300341296928, "grad_norm": 0.8077840361458416, "learning_rate": 1.1264010924118518e-06, "loss": 0.0156, "step": 4476 }, { "epoch": 1.0186575654152445, "grad_norm": 0.9380910246867491, "learning_rate": 1.1263477446075315e-06, "loss": 0.0387, "step": 4477 }, { "epoch": 1.0188850967007963, "grad_norm": 0.7652818963146196, "learning_rate": 1.1262943865565818e-06, "loss": 0.0173, "step": 4478 }, { "epoch": 1.019112627986348, "grad_norm": 1.3337882989968501, "learning_rate": 1.1262410182600927e-06, "loss": 0.0396, "step": 4479 }, { "epoch": 1.0193401592718998, "grad_norm": 0.45812987319685416, "learning_rate": 1.1261876397191554e-06, "loss": 0.015, "step": 4480 }, { "epoch": 1.0195676905574516, "grad_norm": 0.7602276965168047, "learning_rate": 1.1261342509348604e-06, "loss": 0.0202, "step": 4481 }, { "epoch": 1.0197952218430033, "grad_norm": 0.9601272476880518, "learning_rate": 1.126080851908299e-06, "loss": 0.0245, "step": 4482 }, { "epoch": 1.0200227531285553, "grad_norm": 0.9302450496938718, "learning_rate": 1.1260274426405629e-06, "loss": 0.0251, "step": 4483 }, { "epoch": 1.020250284414107, "grad_norm": 1.064523523991496, "learning_rate": 1.1259740231327434e-06, "loss": 0.0295, "step": 4484 }, { "epoch": 1.0204778156996588, "grad_norm": 1.4809063729774259, "learning_rate": 1.1259205933859325e-06, "loss": 0.0268, "step": 4485 }, { "epoch": 1.0207053469852105, "grad_norm": 1.2332212440546455, "learning_rate": 1.1258671534012216e-06, "loss": 0.0432, "step": 4486 }, { "epoch": 1.0209328782707623, "grad_norm": 0.7799860888625535, "learning_rate": 1.1258137031797037e-06, "loss": 0.0124, "step": 4487 }, { "epoch": 1.021160409556314, "grad_norm": 0.759267946121651, "learning_rate": 1.125760242722471e-06, "loss": 0.0091, "step": 4488 }, { "epoch": 1.0213879408418658, "grad_norm": 0.891169288145688, "learning_rate": 1.1257067720306159e-06, "loss": 0.0286, "step": 4489 }, { "epoch": 1.0216154721274175, "grad_norm": 1.0432400031887874, "learning_rate": 1.1256532911052313e-06, "loss": 0.0334, "step": 4490 }, { "epoch": 1.0218430034129693, "grad_norm": 1.0537890506909147, "learning_rate": 1.1255997999474105e-06, "loss": 0.02, "step": 4491 }, { "epoch": 1.022070534698521, "grad_norm": 0.7817598770936856, "learning_rate": 1.1255462985582465e-06, "loss": 0.017, "step": 4492 }, { "epoch": 1.0222980659840728, "grad_norm": 0.9383161200612665, "learning_rate": 1.125492786938833e-06, "loss": 0.011, "step": 4493 }, { "epoch": 1.0225255972696246, "grad_norm": 1.7998282210253833, "learning_rate": 1.1254392650902633e-06, "loss": 0.0469, "step": 4494 }, { "epoch": 1.0227531285551763, "grad_norm": 0.9796354836057241, "learning_rate": 1.1253857330136316e-06, "loss": 0.0252, "step": 4495 }, { "epoch": 1.022980659840728, "grad_norm": 0.8615898866534331, "learning_rate": 1.125332190710032e-06, "loss": 0.0103, "step": 4496 }, { "epoch": 1.0232081911262798, "grad_norm": 1.066758301808706, "learning_rate": 1.125278638180559e-06, "loss": 0.0176, "step": 4497 }, { "epoch": 1.0234357224118316, "grad_norm": 0.7537124723757486, "learning_rate": 1.1252250754263064e-06, "loss": 0.0243, "step": 4498 }, { "epoch": 1.0236632536973833, "grad_norm": 0.7310817973699005, "learning_rate": 1.1251715024483695e-06, "loss": 0.0213, "step": 4499 }, { "epoch": 1.023890784982935, "grad_norm": 0.8578238852047406, "learning_rate": 1.125117919247843e-06, "loss": 0.0319, "step": 4500 }, { "epoch": 1.0241183162684868, "grad_norm": 0.9913368014981602, "learning_rate": 1.1250643258258225e-06, "loss": 0.0208, "step": 4501 }, { "epoch": 1.0243458475540388, "grad_norm": 1.2683092562362872, "learning_rate": 1.1250107221834027e-06, "loss": 0.0504, "step": 4502 }, { "epoch": 1.0245733788395905, "grad_norm": 0.7751769469902451, "learning_rate": 1.12495710832168e-06, "loss": 0.0278, "step": 4503 }, { "epoch": 1.0248009101251423, "grad_norm": 0.7428863858706001, "learning_rate": 1.1249034842417489e-06, "loss": 0.0141, "step": 4504 }, { "epoch": 1.025028441410694, "grad_norm": 0.7095783022904831, "learning_rate": 1.1248498499447065e-06, "loss": 0.019, "step": 4505 }, { "epoch": 1.0252559726962458, "grad_norm": 1.0630325639796825, "learning_rate": 1.1247962054316485e-06, "loss": 0.0439, "step": 4506 }, { "epoch": 1.0254835039817976, "grad_norm": 0.8040239643121114, "learning_rate": 1.1247425507036715e-06, "loss": 0.0189, "step": 4507 }, { "epoch": 1.0257110352673493, "grad_norm": 1.8105078367140066, "learning_rate": 1.1246888857618719e-06, "loss": 0.0633, "step": 4508 }, { "epoch": 1.025938566552901, "grad_norm": 1.3028583651888686, "learning_rate": 1.1246352106073466e-06, "loss": 0.0233, "step": 4509 }, { "epoch": 1.0261660978384528, "grad_norm": 1.3773748170809028, "learning_rate": 1.1245815252411928e-06, "loss": 0.0563, "step": 4510 }, { "epoch": 1.0263936291240046, "grad_norm": 1.362685911234451, "learning_rate": 1.1245278296645073e-06, "loss": 0.0257, "step": 4511 }, { "epoch": 1.0266211604095563, "grad_norm": 0.7561719123407112, "learning_rate": 1.124474123878388e-06, "loss": 0.0137, "step": 4512 }, { "epoch": 1.026848691695108, "grad_norm": 0.6802103464690515, "learning_rate": 1.1244204078839325e-06, "loss": 0.0148, "step": 4513 }, { "epoch": 1.0270762229806598, "grad_norm": 1.023770837391378, "learning_rate": 1.1243666816822382e-06, "loss": 0.0383, "step": 4514 }, { "epoch": 1.0273037542662116, "grad_norm": 1.105198417368013, "learning_rate": 1.1243129452744036e-06, "loss": 0.0169, "step": 4515 }, { "epoch": 1.0275312855517633, "grad_norm": 0.8424066208751216, "learning_rate": 1.1242591986615268e-06, "loss": 0.0308, "step": 4516 }, { "epoch": 1.027758816837315, "grad_norm": 0.666774886288571, "learning_rate": 1.1242054418447063e-06, "loss": 0.0093, "step": 4517 }, { "epoch": 1.0279863481228668, "grad_norm": 0.6569322003852172, "learning_rate": 1.1241516748250408e-06, "loss": 0.0118, "step": 4518 }, { "epoch": 1.0282138794084186, "grad_norm": 1.2076404281398103, "learning_rate": 1.1240978976036294e-06, "loss": 0.0489, "step": 4519 }, { "epoch": 1.0284414106939703, "grad_norm": 1.1551438155853788, "learning_rate": 1.124044110181571e-06, "loss": 0.0311, "step": 4520 }, { "epoch": 1.028668941979522, "grad_norm": 1.389595859599812, "learning_rate": 1.1239903125599648e-06, "loss": 0.0491, "step": 4521 }, { "epoch": 1.028896473265074, "grad_norm": 1.0333918284834915, "learning_rate": 1.1239365047399106e-06, "loss": 0.0404, "step": 4522 }, { "epoch": 1.0291240045506258, "grad_norm": 0.8599318194273396, "learning_rate": 1.1238826867225077e-06, "loss": 0.0197, "step": 4523 }, { "epoch": 1.0293515358361776, "grad_norm": 0.8304017967466861, "learning_rate": 1.1238288585088567e-06, "loss": 0.0171, "step": 4524 }, { "epoch": 1.0295790671217293, "grad_norm": 1.136424973741861, "learning_rate": 1.1237750201000574e-06, "loss": 0.0316, "step": 4525 }, { "epoch": 1.029806598407281, "grad_norm": 1.4555472105148153, "learning_rate": 1.1237211714972098e-06, "loss": 0.062, "step": 4526 }, { "epoch": 1.0300341296928328, "grad_norm": 1.3336128205554842, "learning_rate": 1.1236673127014152e-06, "loss": 0.0243, "step": 4527 }, { "epoch": 1.0302616609783846, "grad_norm": 0.9479976161551643, "learning_rate": 1.1236134437137738e-06, "loss": 0.022, "step": 4528 }, { "epoch": 1.0304891922639363, "grad_norm": 0.851610494023467, "learning_rate": 1.1235595645353869e-06, "loss": 0.0304, "step": 4529 }, { "epoch": 1.030716723549488, "grad_norm": 0.8155261530618487, "learning_rate": 1.1235056751673554e-06, "loss": 0.017, "step": 4530 }, { "epoch": 1.0309442548350398, "grad_norm": 1.1243416422484538, "learning_rate": 1.123451775610781e-06, "loss": 0.0248, "step": 4531 }, { "epoch": 1.0311717861205916, "grad_norm": 0.6551716981893974, "learning_rate": 1.1233978658667651e-06, "loss": 0.0251, "step": 4532 }, { "epoch": 1.0313993174061433, "grad_norm": 0.9437616699526395, "learning_rate": 1.1233439459364097e-06, "loss": 0.0228, "step": 4533 }, { "epoch": 1.031626848691695, "grad_norm": 1.245047538582983, "learning_rate": 1.1232900158208166e-06, "loss": 0.0148, "step": 4534 }, { "epoch": 1.0318543799772468, "grad_norm": 0.9658390993690719, "learning_rate": 1.1232360755210883e-06, "loss": 0.0166, "step": 4535 }, { "epoch": 1.0320819112627986, "grad_norm": 1.1047045638558348, "learning_rate": 1.123182125038327e-06, "loss": 0.0316, "step": 4536 }, { "epoch": 1.0323094425483503, "grad_norm": 1.3208237077699243, "learning_rate": 1.1231281643736353e-06, "loss": 0.0303, "step": 4537 }, { "epoch": 1.032536973833902, "grad_norm": 0.7809775193316096, "learning_rate": 1.1230741935281163e-06, "loss": 0.0196, "step": 4538 }, { "epoch": 1.0327645051194538, "grad_norm": 0.991878604252392, "learning_rate": 1.123020212502873e-06, "loss": 0.02, "step": 4539 }, { "epoch": 1.0329920364050056, "grad_norm": 1.3869865701838429, "learning_rate": 1.1229662212990088e-06, "loss": 0.0612, "step": 4540 }, { "epoch": 1.0332195676905576, "grad_norm": 0.619858198046472, "learning_rate": 1.1229122199176268e-06, "loss": 0.0072, "step": 4541 }, { "epoch": 1.0334470989761093, "grad_norm": 0.7040478929448408, "learning_rate": 1.1228582083598311e-06, "loss": 0.0257, "step": 4542 }, { "epoch": 1.033674630261661, "grad_norm": 1.182074276350949, "learning_rate": 1.122804186626725e-06, "loss": 0.0206, "step": 4543 }, { "epoch": 1.0339021615472128, "grad_norm": 0.42845477644473695, "learning_rate": 1.1227501547194133e-06, "loss": 0.0111, "step": 4544 }, { "epoch": 1.0341296928327646, "grad_norm": 1.5120577926077867, "learning_rate": 1.1226961126390001e-06, "loss": 0.0402, "step": 4545 }, { "epoch": 1.0343572241183163, "grad_norm": 1.0258672333609278, "learning_rate": 1.1226420603865898e-06, "loss": 0.0266, "step": 4546 }, { "epoch": 1.034584755403868, "grad_norm": 0.6935436273674827, "learning_rate": 1.122587997963287e-06, "loss": 0.021, "step": 4547 }, { "epoch": 1.0348122866894198, "grad_norm": 2.2382185926621383, "learning_rate": 1.122533925370197e-06, "loss": 0.0311, "step": 4548 }, { "epoch": 1.0350398179749716, "grad_norm": 0.8597189295835992, "learning_rate": 1.1224798426084246e-06, "loss": 0.0251, "step": 4549 }, { "epoch": 1.0352673492605233, "grad_norm": 1.2187642427431231, "learning_rate": 1.1224257496790756e-06, "loss": 0.0307, "step": 4550 }, { "epoch": 1.035494880546075, "grad_norm": 0.8235532448084206, "learning_rate": 1.122371646583255e-06, "loss": 0.016, "step": 4551 }, { "epoch": 1.0357224118316268, "grad_norm": 1.1637372096776504, "learning_rate": 1.1223175333220688e-06, "loss": 0.0249, "step": 4552 }, { "epoch": 1.0359499431171786, "grad_norm": 1.2013323108837044, "learning_rate": 1.122263409896623e-06, "loss": 0.0211, "step": 4553 }, { "epoch": 1.0361774744027303, "grad_norm": 1.5241558913138389, "learning_rate": 1.1222092763080242e-06, "loss": 0.0614, "step": 4554 }, { "epoch": 1.036405005688282, "grad_norm": 0.9609458503216893, "learning_rate": 1.1221551325573779e-06, "loss": 0.0345, "step": 4555 }, { "epoch": 1.0366325369738338, "grad_norm": 1.067326425845944, "learning_rate": 1.1221009786457914e-06, "loss": 0.0251, "step": 4556 }, { "epoch": 1.0368600682593856, "grad_norm": 0.6952960225267327, "learning_rate": 1.1220468145743713e-06, "loss": 0.0158, "step": 4557 }, { "epoch": 1.0370875995449373, "grad_norm": 1.5415742411379196, "learning_rate": 1.1219926403442247e-06, "loss": 0.0398, "step": 4558 }, { "epoch": 1.037315130830489, "grad_norm": 1.0675139745280051, "learning_rate": 1.1219384559564587e-06, "loss": 0.0277, "step": 4559 }, { "epoch": 1.0375426621160408, "grad_norm": 0.7777707864923777, "learning_rate": 1.1218842614121806e-06, "loss": 0.0247, "step": 4560 }, { "epoch": 1.0377701934015928, "grad_norm": 1.6672178552337855, "learning_rate": 1.1218300567124983e-06, "loss": 0.0342, "step": 4561 }, { "epoch": 1.0379977246871446, "grad_norm": 1.0152141504441292, "learning_rate": 1.1217758418585195e-06, "loss": 0.0426, "step": 4562 }, { "epoch": 1.0382252559726963, "grad_norm": 1.0296746623158612, "learning_rate": 1.1217216168513522e-06, "loss": 0.0225, "step": 4563 }, { "epoch": 1.038452787258248, "grad_norm": 1.970275220352162, "learning_rate": 1.1216673816921048e-06, "loss": 0.0876, "step": 4564 }, { "epoch": 1.0386803185437998, "grad_norm": 1.1493211366430227, "learning_rate": 1.1216131363818859e-06, "loss": 0.0191, "step": 4565 }, { "epoch": 1.0389078498293516, "grad_norm": 0.9951641567260908, "learning_rate": 1.1215588809218038e-06, "loss": 0.013, "step": 4566 }, { "epoch": 1.0391353811149033, "grad_norm": 0.8580754292791186, "learning_rate": 1.1215046153129678e-06, "loss": 0.0183, "step": 4567 }, { "epoch": 1.039362912400455, "grad_norm": 1.4552256935681152, "learning_rate": 1.1214503395564866e-06, "loss": 0.0245, "step": 4568 }, { "epoch": 1.0395904436860068, "grad_norm": 0.6094461237074116, "learning_rate": 1.1213960536534698e-06, "loss": 0.0088, "step": 4569 }, { "epoch": 1.0398179749715586, "grad_norm": 1.6872520162524116, "learning_rate": 1.1213417576050267e-06, "loss": 0.0465, "step": 4570 }, { "epoch": 1.0400455062571103, "grad_norm": 0.8147199481078186, "learning_rate": 1.1212874514122669e-06, "loss": 0.0188, "step": 4571 }, { "epoch": 1.040273037542662, "grad_norm": 1.6710841073159661, "learning_rate": 1.1212331350763007e-06, "loss": 0.0183, "step": 4572 }, { "epoch": 1.0405005688282138, "grad_norm": 0.7521582291067465, "learning_rate": 1.1211788085982381e-06, "loss": 0.0141, "step": 4573 }, { "epoch": 1.0407281001137656, "grad_norm": 1.3851917572634034, "learning_rate": 1.1211244719791892e-06, "loss": 0.0306, "step": 4574 }, { "epoch": 1.0409556313993173, "grad_norm": 0.5793785303293703, "learning_rate": 1.1210701252202647e-06, "loss": 0.0106, "step": 4575 }, { "epoch": 1.041183162684869, "grad_norm": 1.0136644099615377, "learning_rate": 1.1210157683225753e-06, "loss": 0.0165, "step": 4576 }, { "epoch": 1.0414106939704209, "grad_norm": 0.45591615471037517, "learning_rate": 1.1209614012872323e-06, "loss": 0.0084, "step": 4577 }, { "epoch": 1.0416382252559726, "grad_norm": 1.3441880570081561, "learning_rate": 1.1209070241153462e-06, "loss": 0.031, "step": 4578 }, { "epoch": 1.0418657565415244, "grad_norm": 1.1464751988060768, "learning_rate": 1.1208526368080288e-06, "loss": 0.0332, "step": 4579 }, { "epoch": 1.0420932878270763, "grad_norm": 0.7884324251000338, "learning_rate": 1.120798239366392e-06, "loss": 0.0168, "step": 4580 }, { "epoch": 1.042320819112628, "grad_norm": 2.046969798862839, "learning_rate": 1.1207438317915468e-06, "loss": 0.0229, "step": 4581 }, { "epoch": 1.0425483503981798, "grad_norm": 0.6622639696340208, "learning_rate": 1.1206894140846055e-06, "loss": 0.0087, "step": 4582 }, { "epoch": 1.0427758816837316, "grad_norm": 0.7403190930069853, "learning_rate": 1.1206349862466807e-06, "loss": 0.015, "step": 4583 }, { "epoch": 1.0430034129692833, "grad_norm": 2.3626414035054175, "learning_rate": 1.1205805482788846e-06, "loss": 0.0417, "step": 4584 }, { "epoch": 1.043230944254835, "grad_norm": 2.471483368904896, "learning_rate": 1.1205261001823293e-06, "loss": 0.044, "step": 4585 }, { "epoch": 1.0434584755403868, "grad_norm": 2.416437888927223, "learning_rate": 1.1204716419581281e-06, "loss": 0.0098, "step": 4586 }, { "epoch": 1.0436860068259386, "grad_norm": 1.467076361857702, "learning_rate": 1.1204171736073942e-06, "loss": 0.0134, "step": 4587 }, { "epoch": 1.0439135381114903, "grad_norm": 1.1268621078307177, "learning_rate": 1.1203626951312405e-06, "loss": 0.0349, "step": 4588 }, { "epoch": 1.044141069397042, "grad_norm": 0.7695063155900477, "learning_rate": 1.1203082065307805e-06, "loss": 0.012, "step": 4589 }, { "epoch": 1.0443686006825939, "grad_norm": 1.446038392903303, "learning_rate": 1.1202537078071277e-06, "loss": 0.0547, "step": 4590 }, { "epoch": 1.0445961319681456, "grad_norm": 0.8969845928910839, "learning_rate": 1.1201991989613963e-06, "loss": 0.0111, "step": 4591 }, { "epoch": 1.0448236632536974, "grad_norm": 0.8115056392147382, "learning_rate": 1.1201446799947003e-06, "loss": 0.0092, "step": 4592 }, { "epoch": 1.045051194539249, "grad_norm": 1.2473917829189507, "learning_rate": 1.1200901509081537e-06, "loss": 0.0137, "step": 4593 }, { "epoch": 1.0452787258248009, "grad_norm": 1.0896608769495246, "learning_rate": 1.120035611702871e-06, "loss": 0.0209, "step": 4594 }, { "epoch": 1.0455062571103526, "grad_norm": 0.7977339624737255, "learning_rate": 1.1199810623799673e-06, "loss": 0.0176, "step": 4595 }, { "epoch": 1.0457337883959044, "grad_norm": 1.1634422015699433, "learning_rate": 1.119926502940557e-06, "loss": 0.0188, "step": 4596 }, { "epoch": 1.0459613196814561, "grad_norm": 1.092858194939433, "learning_rate": 1.1198719333857555e-06, "loss": 0.0312, "step": 4597 }, { "epoch": 1.0461888509670079, "grad_norm": 0.836130183985909, "learning_rate": 1.119817353716678e-06, "loss": 0.012, "step": 4598 }, { "epoch": 1.0464163822525596, "grad_norm": 0.767678738883129, "learning_rate": 1.11976276393444e-06, "loss": 0.0114, "step": 4599 }, { "epoch": 1.0466439135381116, "grad_norm": 1.1278576378072667, "learning_rate": 1.1197081640401572e-06, "loss": 0.0245, "step": 4600 }, { "epoch": 1.0468714448236633, "grad_norm": 1.3358445644446304, "learning_rate": 1.1196535540349453e-06, "loss": 0.0414, "step": 4601 }, { "epoch": 1.047098976109215, "grad_norm": 2.748783161026122, "learning_rate": 1.119598933919921e-06, "loss": 0.0374, "step": 4602 }, { "epoch": 1.0473265073947668, "grad_norm": 0.7346665523199191, "learning_rate": 1.1195443036962002e-06, "loss": 0.0137, "step": 4603 }, { "epoch": 1.0475540386803186, "grad_norm": 0.7926661565300546, "learning_rate": 1.1194896633648996e-06, "loss": 0.0234, "step": 4604 }, { "epoch": 1.0477815699658704, "grad_norm": 1.2266234047001268, "learning_rate": 1.1194350129271358e-06, "loss": 0.0218, "step": 4605 }, { "epoch": 1.048009101251422, "grad_norm": 1.1781905579948284, "learning_rate": 1.119380352384026e-06, "loss": 0.0298, "step": 4606 }, { "epoch": 1.0482366325369739, "grad_norm": 0.8103390645469172, "learning_rate": 1.1193256817366871e-06, "loss": 0.014, "step": 4607 }, { "epoch": 1.0484641638225256, "grad_norm": 1.2189569921919905, "learning_rate": 1.1192710009862365e-06, "loss": 0.0252, "step": 4608 }, { "epoch": 1.0486916951080774, "grad_norm": 0.6581776967473145, "learning_rate": 1.1192163101337921e-06, "loss": 0.0129, "step": 4609 }, { "epoch": 1.0489192263936291, "grad_norm": 0.7853755589384823, "learning_rate": 1.1191616091804712e-06, "loss": 0.0346, "step": 4610 }, { "epoch": 1.0491467576791809, "grad_norm": 0.48263244857420745, "learning_rate": 1.1191068981273919e-06, "loss": 0.0113, "step": 4611 }, { "epoch": 1.0493742889647326, "grad_norm": 0.9545125321950874, "learning_rate": 1.1190521769756729e-06, "loss": 0.0254, "step": 4612 }, { "epoch": 1.0496018202502844, "grad_norm": 1.247134305539268, "learning_rate": 1.118997445726432e-06, "loss": 0.0413, "step": 4613 }, { "epoch": 1.0498293515358361, "grad_norm": 1.1502298928735868, "learning_rate": 1.118942704380788e-06, "loss": 0.0137, "step": 4614 }, { "epoch": 1.0500568828213879, "grad_norm": 1.6594682241469216, "learning_rate": 1.11888795293986e-06, "loss": 0.0326, "step": 4615 }, { "epoch": 1.0502844141069396, "grad_norm": 0.7549275144222661, "learning_rate": 1.1188331914047666e-06, "loss": 0.0138, "step": 4616 }, { "epoch": 1.0505119453924914, "grad_norm": 0.8321159003301328, "learning_rate": 1.1187784197766269e-06, "loss": 0.0184, "step": 4617 }, { "epoch": 1.0507394766780431, "grad_norm": 1.209949785555879, "learning_rate": 1.1187236380565608e-06, "loss": 0.0251, "step": 4618 }, { "epoch": 1.050967007963595, "grad_norm": 0.7955918295120653, "learning_rate": 1.1186688462456879e-06, "loss": 0.0135, "step": 4619 }, { "epoch": 1.0511945392491469, "grad_norm": 1.1615893854076145, "learning_rate": 1.118614044345128e-06, "loss": 0.0282, "step": 4620 }, { "epoch": 1.0514220705346986, "grad_norm": 1.273989641669821, "learning_rate": 1.1185592323560006e-06, "loss": 0.038, "step": 4621 }, { "epoch": 1.0516496018202504, "grad_norm": 1.0165071618076773, "learning_rate": 1.1185044102794267e-06, "loss": 0.0328, "step": 4622 }, { "epoch": 1.051877133105802, "grad_norm": 1.1328617777044239, "learning_rate": 1.1184495781165263e-06, "loss": 0.0274, "step": 4623 }, { "epoch": 1.0521046643913539, "grad_norm": 1.1825756024120404, "learning_rate": 1.1183947358684203e-06, "loss": 0.0276, "step": 4624 }, { "epoch": 1.0523321956769056, "grad_norm": 0.517945392477519, "learning_rate": 1.1183398835362298e-06, "loss": 0.0174, "step": 4625 }, { "epoch": 1.0525597269624574, "grad_norm": 0.5843868560216139, "learning_rate": 1.1182850211210752e-06, "loss": 0.0161, "step": 4626 }, { "epoch": 1.0527872582480091, "grad_norm": 1.2595121663614455, "learning_rate": 1.1182301486240782e-06, "loss": 0.0359, "step": 4627 }, { "epoch": 1.0530147895335609, "grad_norm": 1.0715185413956108, "learning_rate": 1.1181752660463604e-06, "loss": 0.0214, "step": 4628 }, { "epoch": 1.0532423208191126, "grad_norm": 0.7795449522978944, "learning_rate": 1.1181203733890433e-06, "loss": 0.0177, "step": 4629 }, { "epoch": 1.0534698521046644, "grad_norm": 0.9800972489749913, "learning_rate": 1.118065470653249e-06, "loss": 0.0347, "step": 4630 }, { "epoch": 1.0536973833902161, "grad_norm": 1.0970470234975969, "learning_rate": 1.1180105578400993e-06, "loss": 0.0433, "step": 4631 }, { "epoch": 1.0539249146757679, "grad_norm": 1.0800910071281984, "learning_rate": 1.117955634950717e-06, "loss": 0.0227, "step": 4632 }, { "epoch": 1.0541524459613196, "grad_norm": 1.3530664066926406, "learning_rate": 1.117900701986224e-06, "loss": 0.0343, "step": 4633 }, { "epoch": 1.0543799772468714, "grad_norm": 1.4133875078213476, "learning_rate": 1.1178457589477434e-06, "loss": 0.027, "step": 4634 }, { "epoch": 1.0546075085324231, "grad_norm": 0.8440744418991889, "learning_rate": 1.1177908058363984e-06, "loss": 0.0098, "step": 4635 }, { "epoch": 1.0548350398179749, "grad_norm": 1.092273870951362, "learning_rate": 1.1177358426533115e-06, "loss": 0.02, "step": 4636 }, { "epoch": 1.0550625711035266, "grad_norm": 0.9187729071178374, "learning_rate": 1.1176808693996067e-06, "loss": 0.0183, "step": 4637 }, { "epoch": 1.0552901023890784, "grad_norm": 1.1090047987424088, "learning_rate": 1.117625886076407e-06, "loss": 0.0193, "step": 4638 }, { "epoch": 1.0555176336746304, "grad_norm": 1.0008875695337944, "learning_rate": 1.1175708926848363e-06, "loss": 0.0189, "step": 4639 }, { "epoch": 1.0557451649601821, "grad_norm": 2.724829300569884, "learning_rate": 1.1175158892260187e-06, "loss": 0.0362, "step": 4640 }, { "epoch": 1.0559726962457339, "grad_norm": 1.0478197516778056, "learning_rate": 1.1174608757010785e-06, "loss": 0.0152, "step": 4641 }, { "epoch": 1.0562002275312856, "grad_norm": 1.590272661414187, "learning_rate": 1.1174058521111398e-06, "loss": 0.0436, "step": 4642 }, { "epoch": 1.0564277588168374, "grad_norm": 0.8472430737385992, "learning_rate": 1.1173508184573273e-06, "loss": 0.0138, "step": 4643 }, { "epoch": 1.0566552901023891, "grad_norm": 0.8371060858404447, "learning_rate": 1.1172957747407657e-06, "loss": 0.0157, "step": 4644 }, { "epoch": 1.0568828213879409, "grad_norm": 0.9592085999503343, "learning_rate": 1.11724072096258e-06, "loss": 0.0323, "step": 4645 }, { "epoch": 1.0571103526734926, "grad_norm": 1.0524749179147639, "learning_rate": 1.1171856571238958e-06, "loss": 0.0296, "step": 4646 }, { "epoch": 1.0573378839590444, "grad_norm": 0.8834933966797389, "learning_rate": 1.1171305832258378e-06, "loss": 0.0218, "step": 4647 }, { "epoch": 1.0575654152445961, "grad_norm": 0.7743496062659715, "learning_rate": 1.117075499269532e-06, "loss": 0.0183, "step": 4648 }, { "epoch": 1.0577929465301479, "grad_norm": 1.0385592346274115, "learning_rate": 1.1170204052561045e-06, "loss": 0.0205, "step": 4649 }, { "epoch": 1.0580204778156996, "grad_norm": 0.905517805098956, "learning_rate": 1.1169653011866806e-06, "loss": 0.0274, "step": 4650 }, { "epoch": 1.0582480091012514, "grad_norm": 0.7908819622644908, "learning_rate": 1.1169101870623872e-06, "loss": 0.0137, "step": 4651 }, { "epoch": 1.0584755403868031, "grad_norm": 1.061810097297376, "learning_rate": 1.1168550628843506e-06, "loss": 0.0206, "step": 4652 }, { "epoch": 1.058703071672355, "grad_norm": 1.6341823289625474, "learning_rate": 1.116799928653697e-06, "loss": 0.0258, "step": 4653 }, { "epoch": 1.0589306029579066, "grad_norm": 1.4294137058728704, "learning_rate": 1.1167447843715536e-06, "loss": 0.0415, "step": 4654 }, { "epoch": 1.0591581342434584, "grad_norm": 1.9352681748212788, "learning_rate": 1.1166896300390475e-06, "loss": 0.0398, "step": 4655 }, { "epoch": 1.0593856655290101, "grad_norm": 0.623531295073113, "learning_rate": 1.1166344656573058e-06, "loss": 0.0096, "step": 4656 }, { "epoch": 1.059613196814562, "grad_norm": 0.7942878422407874, "learning_rate": 1.116579291227456e-06, "loss": 0.0246, "step": 4657 }, { "epoch": 1.0598407281001139, "grad_norm": 1.2220975624229167, "learning_rate": 1.1165241067506258e-06, "loss": 0.0361, "step": 4658 }, { "epoch": 1.0600682593856656, "grad_norm": 1.1286970072887417, "learning_rate": 1.116468912227943e-06, "loss": 0.0263, "step": 4659 }, { "epoch": 1.0602957906712174, "grad_norm": 1.030142424103505, "learning_rate": 1.1164137076605359e-06, "loss": 0.0179, "step": 4660 }, { "epoch": 1.0605233219567691, "grad_norm": 1.6790275883871597, "learning_rate": 1.1163584930495323e-06, "loss": 0.0422, "step": 4661 }, { "epoch": 1.0607508532423209, "grad_norm": 1.3814052631606326, "learning_rate": 1.1163032683960612e-06, "loss": 0.0209, "step": 4662 }, { "epoch": 1.0609783845278726, "grad_norm": 1.225549316988825, "learning_rate": 1.116248033701251e-06, "loss": 0.0271, "step": 4663 }, { "epoch": 1.0612059158134244, "grad_norm": 0.7598712432018329, "learning_rate": 1.1161927889662307e-06, "loss": 0.009, "step": 4664 }, { "epoch": 1.0614334470989761, "grad_norm": 0.940084790434426, "learning_rate": 1.1161375341921293e-06, "loss": 0.0248, "step": 4665 }, { "epoch": 1.0616609783845279, "grad_norm": 0.5873146342311836, "learning_rate": 1.1160822693800761e-06, "loss": 0.0078, "step": 4666 }, { "epoch": 1.0618885096700796, "grad_norm": 1.4244470289207833, "learning_rate": 1.116026994531201e-06, "loss": 0.0248, "step": 4667 }, { "epoch": 1.0621160409556314, "grad_norm": 1.0520117462705862, "learning_rate": 1.1159717096466332e-06, "loss": 0.0302, "step": 4668 }, { "epoch": 1.0623435722411831, "grad_norm": 1.062118932659138, "learning_rate": 1.1159164147275026e-06, "loss": 0.022, "step": 4669 }, { "epoch": 1.062571103526735, "grad_norm": 1.1122338472373463, "learning_rate": 1.11586110977494e-06, "loss": 0.0164, "step": 4670 }, { "epoch": 1.0627986348122866, "grad_norm": 0.8000529898204216, "learning_rate": 1.1158057947900749e-06, "loss": 0.0217, "step": 4671 }, { "epoch": 1.0630261660978384, "grad_norm": 0.6219194713042089, "learning_rate": 1.1157504697740384e-06, "loss": 0.0138, "step": 4672 }, { "epoch": 1.0632536973833902, "grad_norm": 1.3392201881028478, "learning_rate": 1.1156951347279612e-06, "loss": 0.028, "step": 4673 }, { "epoch": 1.063481228668942, "grad_norm": 1.7333136164138454, "learning_rate": 1.1156397896529739e-06, "loss": 0.05, "step": 4674 }, { "epoch": 1.0637087599544937, "grad_norm": 1.5262332959624183, "learning_rate": 1.1155844345502079e-06, "loss": 0.0312, "step": 4675 }, { "epoch": 1.0639362912400454, "grad_norm": 0.9932229349703479, "learning_rate": 1.1155290694207946e-06, "loss": 0.0226, "step": 4676 }, { "epoch": 1.0641638225255972, "grad_norm": 1.2628573637308051, "learning_rate": 1.1154736942658655e-06, "loss": 0.0214, "step": 4677 }, { "epoch": 1.0643913538111491, "grad_norm": 1.4330647372008305, "learning_rate": 1.1154183090865523e-06, "loss": 0.0256, "step": 4678 }, { "epoch": 1.0646188850967009, "grad_norm": 1.042200851154514, "learning_rate": 1.1153629138839869e-06, "loss": 0.0293, "step": 4679 }, { "epoch": 1.0648464163822526, "grad_norm": 0.9462778481987262, "learning_rate": 1.115307508659302e-06, "loss": 0.0245, "step": 4680 }, { "epoch": 1.0650739476678044, "grad_norm": 1.6564513386390822, "learning_rate": 1.115252093413629e-06, "loss": 0.0496, "step": 4681 }, { "epoch": 1.0653014789533561, "grad_norm": 1.0868647502168836, "learning_rate": 1.1151966681481013e-06, "loss": 0.0113, "step": 4682 }, { "epoch": 1.065529010238908, "grad_norm": 1.115171642211064, "learning_rate": 1.1151412328638516e-06, "loss": 0.0224, "step": 4683 }, { "epoch": 1.0657565415244596, "grad_norm": 0.5398893154965461, "learning_rate": 1.1150857875620129e-06, "loss": 0.0139, "step": 4684 }, { "epoch": 1.0659840728100114, "grad_norm": 1.0871499987134439, "learning_rate": 1.1150303322437179e-06, "loss": 0.0134, "step": 4685 }, { "epoch": 1.0662116040955631, "grad_norm": 1.1070037508570774, "learning_rate": 1.1149748669101005e-06, "loss": 0.0221, "step": 4686 }, { "epoch": 1.066439135381115, "grad_norm": 0.9425411653474299, "learning_rate": 1.1149193915622942e-06, "loss": 0.0398, "step": 4687 }, { "epoch": 1.0666666666666667, "grad_norm": 1.1772890828333267, "learning_rate": 1.1148639062014325e-06, "loss": 0.0211, "step": 4688 }, { "epoch": 1.0668941979522184, "grad_norm": 1.2557238706849523, "learning_rate": 1.11480841082865e-06, "loss": 0.0301, "step": 4689 }, { "epoch": 1.0671217292377702, "grad_norm": 0.7994780640965474, "learning_rate": 1.1147529054450805e-06, "loss": 0.0201, "step": 4690 }, { "epoch": 1.067349260523322, "grad_norm": 1.1399734758992979, "learning_rate": 1.1146973900518587e-06, "loss": 0.0293, "step": 4691 }, { "epoch": 1.0675767918088737, "grad_norm": 1.3046065395944408, "learning_rate": 1.1146418646501189e-06, "loss": 0.0286, "step": 4692 }, { "epoch": 1.0678043230944254, "grad_norm": 0.8748609055579856, "learning_rate": 1.114586329240996e-06, "loss": 0.0211, "step": 4693 }, { "epoch": 1.0680318543799772, "grad_norm": 0.8739676230672317, "learning_rate": 1.1145307838256255e-06, "loss": 0.011, "step": 4694 }, { "epoch": 1.068259385665529, "grad_norm": 1.249091385352741, "learning_rate": 1.1144752284051422e-06, "loss": 0.0301, "step": 4695 }, { "epoch": 1.068486916951081, "grad_norm": 1.562897636481294, "learning_rate": 1.1144196629806817e-06, "loss": 0.0501, "step": 4696 }, { "epoch": 1.0687144482366326, "grad_norm": 0.9143311380884951, "learning_rate": 1.1143640875533795e-06, "loss": 0.0123, "step": 4697 }, { "epoch": 1.0689419795221844, "grad_norm": 1.6200825529791434, "learning_rate": 1.1143085021243717e-06, "loss": 0.0225, "step": 4698 }, { "epoch": 1.0691695108077361, "grad_norm": 1.177872950810007, "learning_rate": 1.1142529066947941e-06, "loss": 0.0346, "step": 4699 }, { "epoch": 1.069397042093288, "grad_norm": 1.108013166594474, "learning_rate": 1.1141973012657834e-06, "loss": 0.0267, "step": 4700 }, { "epoch": 1.0696245733788396, "grad_norm": 1.0112425933651041, "learning_rate": 1.1141416858384753e-06, "loss": 0.0311, "step": 4701 }, { "epoch": 1.0698521046643914, "grad_norm": 1.095144486124691, "learning_rate": 1.1140860604140076e-06, "loss": 0.0286, "step": 4702 }, { "epoch": 1.0700796359499432, "grad_norm": 0.806336687142019, "learning_rate": 1.114030424993516e-06, "loss": 0.0142, "step": 4703 }, { "epoch": 1.070307167235495, "grad_norm": 0.7834952051331864, "learning_rate": 1.1139747795781382e-06, "loss": 0.0101, "step": 4704 }, { "epoch": 1.0705346985210467, "grad_norm": 0.7472560755343798, "learning_rate": 1.1139191241690116e-06, "loss": 0.0139, "step": 4705 }, { "epoch": 1.0707622298065984, "grad_norm": 0.5618125746941464, "learning_rate": 1.1138634587672734e-06, "loss": 0.0075, "step": 4706 }, { "epoch": 1.0709897610921502, "grad_norm": 0.7202976835755431, "learning_rate": 1.1138077833740616e-06, "loss": 0.0207, "step": 4707 }, { "epoch": 1.071217292377702, "grad_norm": 0.7307887063589182, "learning_rate": 1.1137520979905138e-06, "loss": 0.0223, "step": 4708 }, { "epoch": 1.0714448236632537, "grad_norm": 1.1867538184942938, "learning_rate": 1.1136964026177683e-06, "loss": 0.0288, "step": 4709 }, { "epoch": 1.0716723549488054, "grad_norm": 1.2840239837956307, "learning_rate": 1.113640697256963e-06, "loss": 0.0184, "step": 4710 }, { "epoch": 1.0718998862343572, "grad_norm": 1.1345754398737768, "learning_rate": 1.113584981909237e-06, "loss": 0.0238, "step": 4711 }, { "epoch": 1.072127417519909, "grad_norm": 0.9685288187674116, "learning_rate": 1.1135292565757288e-06, "loss": 0.0227, "step": 4712 }, { "epoch": 1.0723549488054607, "grad_norm": 0.7081273094248537, "learning_rate": 1.1134735212575772e-06, "loss": 0.0102, "step": 4713 }, { "epoch": 1.0725824800910124, "grad_norm": 0.9192931039025396, "learning_rate": 1.1134177759559216e-06, "loss": 0.0244, "step": 4714 }, { "epoch": 1.0728100113765642, "grad_norm": 0.9284668402003036, "learning_rate": 1.1133620206719011e-06, "loss": 0.0243, "step": 4715 }, { "epoch": 1.073037542662116, "grad_norm": 0.9115758932784177, "learning_rate": 1.1133062554066551e-06, "loss": 0.0205, "step": 4716 }, { "epoch": 1.073265073947668, "grad_norm": 0.8734771500689966, "learning_rate": 1.1132504801613237e-06, "loss": 0.0213, "step": 4717 }, { "epoch": 1.0734926052332197, "grad_norm": 1.126626337283734, "learning_rate": 1.1131946949370467e-06, "loss": 0.0327, "step": 4718 }, { "epoch": 1.0737201365187714, "grad_norm": 0.65003673022336, "learning_rate": 1.113138899734964e-06, "loss": 0.0162, "step": 4719 }, { "epoch": 1.0739476678043232, "grad_norm": 1.6524784863925304, "learning_rate": 1.1130830945562165e-06, "loss": 0.046, "step": 4720 }, { "epoch": 1.074175199089875, "grad_norm": 0.9704754115890377, "learning_rate": 1.1130272794019442e-06, "loss": 0.0165, "step": 4721 }, { "epoch": 1.0744027303754267, "grad_norm": 0.841189725514391, "learning_rate": 1.1129714542732882e-06, "loss": 0.0209, "step": 4722 }, { "epoch": 1.0746302616609784, "grad_norm": 0.6874778292132357, "learning_rate": 1.1129156191713893e-06, "loss": 0.0121, "step": 4723 }, { "epoch": 1.0748577929465302, "grad_norm": 0.9632218181611477, "learning_rate": 1.1128597740973886e-06, "loss": 0.0388, "step": 4724 }, { "epoch": 1.075085324232082, "grad_norm": 0.7703489214733116, "learning_rate": 1.1128039190524278e-06, "loss": 0.0059, "step": 4725 }, { "epoch": 1.0753128555176337, "grad_norm": 1.9572066317541847, "learning_rate": 1.112748054037648e-06, "loss": 0.0262, "step": 4726 }, { "epoch": 1.0755403868031854, "grad_norm": 1.0824319203641397, "learning_rate": 1.1126921790541915e-06, "loss": 0.0149, "step": 4727 }, { "epoch": 1.0757679180887372, "grad_norm": 0.670440042251469, "learning_rate": 1.1126362941032e-06, "loss": 0.0063, "step": 4728 }, { "epoch": 1.075995449374289, "grad_norm": 0.7800421077003349, "learning_rate": 1.1125803991858156e-06, "loss": 0.0277, "step": 4729 }, { "epoch": 1.0762229806598407, "grad_norm": 1.106352016812619, "learning_rate": 1.1125244943031809e-06, "loss": 0.0123, "step": 4730 }, { "epoch": 1.0764505119453924, "grad_norm": 1.1161991172358912, "learning_rate": 1.1124685794564383e-06, "loss": 0.026, "step": 4731 }, { "epoch": 1.0766780432309442, "grad_norm": 1.7881963402264416, "learning_rate": 1.1124126546467307e-06, "loss": 0.0385, "step": 4732 }, { "epoch": 1.076905574516496, "grad_norm": 1.2868577345652183, "learning_rate": 1.1123567198752012e-06, "loss": 0.0273, "step": 4733 }, { "epoch": 1.0771331058020477, "grad_norm": 2.0351098504278085, "learning_rate": 1.1123007751429928e-06, "loss": 0.0568, "step": 4734 }, { "epoch": 1.0773606370875997, "grad_norm": 0.9002868646294088, "learning_rate": 1.1122448204512493e-06, "loss": 0.0253, "step": 4735 }, { "epoch": 1.0775881683731514, "grad_norm": 0.9063013425685674, "learning_rate": 1.1121888558011136e-06, "loss": 0.0117, "step": 4736 }, { "epoch": 1.0778156996587032, "grad_norm": 0.5859089097561659, "learning_rate": 1.11213288119373e-06, "loss": 0.0074, "step": 4737 }, { "epoch": 1.078043230944255, "grad_norm": 1.3479752927519844, "learning_rate": 1.1120768966302424e-06, "loss": 0.0249, "step": 4738 }, { "epoch": 1.0782707622298067, "grad_norm": 0.7352263154143484, "learning_rate": 1.1120209021117953e-06, "loss": 0.0173, "step": 4739 }, { "epoch": 1.0784982935153584, "grad_norm": 0.7374112860235839, "learning_rate": 1.111964897639533e-06, "loss": 0.0174, "step": 4740 }, { "epoch": 1.0787258248009102, "grad_norm": 1.287591128879902, "learning_rate": 1.1119088832145999e-06, "loss": 0.0326, "step": 4741 }, { "epoch": 1.078953356086462, "grad_norm": 1.0787165555211229, "learning_rate": 1.1118528588381408e-06, "loss": 0.0192, "step": 4742 }, { "epoch": 1.0791808873720137, "grad_norm": 1.0815099906932875, "learning_rate": 1.111796824511301e-06, "loss": 0.0216, "step": 4743 }, { "epoch": 1.0794084186575654, "grad_norm": 0.8664538220627443, "learning_rate": 1.1117407802352257e-06, "loss": 0.0127, "step": 4744 }, { "epoch": 1.0796359499431172, "grad_norm": 1.33972724465675, "learning_rate": 1.11168472601106e-06, "loss": 0.0335, "step": 4745 }, { "epoch": 1.079863481228669, "grad_norm": 1.4666132789919955, "learning_rate": 1.1116286618399502e-06, "loss": 0.0457, "step": 4746 }, { "epoch": 1.0800910125142207, "grad_norm": 0.6790281378613789, "learning_rate": 1.1115725877230416e-06, "loss": 0.0102, "step": 4747 }, { "epoch": 1.0803185437997724, "grad_norm": 0.9620611501361773, "learning_rate": 1.1115165036614803e-06, "loss": 0.0261, "step": 4748 }, { "epoch": 1.0805460750853242, "grad_norm": 1.1237272287467874, "learning_rate": 1.1114604096564128e-06, "loss": 0.0316, "step": 4749 }, { "epoch": 1.080773606370876, "grad_norm": 1.0088779610949172, "learning_rate": 1.1114043057089855e-06, "loss": 0.0225, "step": 4750 }, { "epoch": 1.0810011376564277, "grad_norm": 1.497609697625705, "learning_rate": 1.1113481918203447e-06, "loss": 0.0442, "step": 4751 }, { "epoch": 1.0812286689419794, "grad_norm": 0.8914252374944144, "learning_rate": 1.111292067991638e-06, "loss": 0.0165, "step": 4752 }, { "epoch": 1.0814562002275312, "grad_norm": 1.276532043770101, "learning_rate": 1.1112359342240118e-06, "loss": 0.0387, "step": 4753 }, { "epoch": 1.081683731513083, "grad_norm": 0.9077763034087386, "learning_rate": 1.1111797905186137e-06, "loss": 0.0141, "step": 4754 }, { "epoch": 1.0819112627986347, "grad_norm": 1.6466072809663417, "learning_rate": 1.111123636876591e-06, "loss": 0.0349, "step": 4755 }, { "epoch": 1.0821387940841867, "grad_norm": 0.9430919880459814, "learning_rate": 1.1110674732990915e-06, "loss": 0.016, "step": 4756 }, { "epoch": 1.0823663253697384, "grad_norm": 1.6404185778699842, "learning_rate": 1.1110112997872627e-06, "loss": 0.0309, "step": 4757 }, { "epoch": 1.0825938566552902, "grad_norm": 0.9479110834953683, "learning_rate": 1.1109551163422535e-06, "loss": 0.0189, "step": 4758 }, { "epoch": 1.082821387940842, "grad_norm": 0.9870118719493604, "learning_rate": 1.1108989229652115e-06, "loss": 0.0221, "step": 4759 }, { "epoch": 1.0830489192263937, "grad_norm": 1.7588111531610175, "learning_rate": 1.1108427196572854e-06, "loss": 0.0327, "step": 4760 }, { "epoch": 1.0832764505119454, "grad_norm": 0.9021346736655393, "learning_rate": 1.110786506419624e-06, "loss": 0.036, "step": 4761 }, { "epoch": 1.0835039817974972, "grad_norm": 0.8266880922651803, "learning_rate": 1.110730283253376e-06, "loss": 0.0279, "step": 4762 }, { "epoch": 1.083731513083049, "grad_norm": 0.8011724413374426, "learning_rate": 1.1106740501596904e-06, "loss": 0.0195, "step": 4763 }, { "epoch": 1.0839590443686007, "grad_norm": 0.9073688021992705, "learning_rate": 1.110617807139717e-06, "loss": 0.023, "step": 4764 }, { "epoch": 1.0841865756541524, "grad_norm": 1.022169451105952, "learning_rate": 1.1105615541946049e-06, "loss": 0.0209, "step": 4765 }, { "epoch": 1.0844141069397042, "grad_norm": 0.43405837742396564, "learning_rate": 1.1105052913255038e-06, "loss": 0.0044, "step": 4766 }, { "epoch": 1.084641638225256, "grad_norm": 1.383667776402452, "learning_rate": 1.1104490185335638e-06, "loss": 0.0287, "step": 4767 }, { "epoch": 1.0848691695108077, "grad_norm": 0.9054919782471123, "learning_rate": 1.1103927358199349e-06, "loss": 0.0191, "step": 4768 }, { "epoch": 1.0850967007963594, "grad_norm": 1.2358115777885788, "learning_rate": 1.1103364431857672e-06, "loss": 0.0311, "step": 4769 }, { "epoch": 1.0853242320819112, "grad_norm": 0.7048379529354732, "learning_rate": 1.1102801406322118e-06, "loss": 0.0104, "step": 4770 }, { "epoch": 1.085551763367463, "grad_norm": 1.1457330667513694, "learning_rate": 1.110223828160419e-06, "loss": 0.0212, "step": 4771 }, { "epoch": 1.0857792946530147, "grad_norm": 0.7058751162839924, "learning_rate": 1.1101675057715396e-06, "loss": 0.0092, "step": 4772 }, { "epoch": 1.0860068259385665, "grad_norm": 1.1868874800053821, "learning_rate": 1.110111173466725e-06, "loss": 0.0416, "step": 4773 }, { "epoch": 1.0862343572241184, "grad_norm": 1.1919649645556245, "learning_rate": 1.1100548312471266e-06, "loss": 0.0287, "step": 4774 }, { "epoch": 1.0864618885096702, "grad_norm": 1.0824756647963032, "learning_rate": 1.1099984791138957e-06, "loss": 0.0152, "step": 4775 }, { "epoch": 1.086689419795222, "grad_norm": 1.4691023279738935, "learning_rate": 1.109942117068184e-06, "loss": 0.0201, "step": 4776 }, { "epoch": 1.0869169510807737, "grad_norm": 0.8472040472970495, "learning_rate": 1.1098857451111437e-06, "loss": 0.0142, "step": 4777 }, { "epoch": 1.0871444823663254, "grad_norm": 0.8480336544082094, "learning_rate": 1.1098293632439267e-06, "loss": 0.009, "step": 4778 }, { "epoch": 1.0873720136518772, "grad_norm": 1.1145190027001157, "learning_rate": 1.1097729714676855e-06, "loss": 0.0143, "step": 4779 }, { "epoch": 1.087599544937429, "grad_norm": 1.04224113160413, "learning_rate": 1.1097165697835726e-06, "loss": 0.024, "step": 4780 }, { "epoch": 1.0878270762229807, "grad_norm": 1.0620838647799002, "learning_rate": 1.1096601581927407e-06, "loss": 0.0178, "step": 4781 }, { "epoch": 1.0880546075085324, "grad_norm": 4.011373250629747, "learning_rate": 1.109603736696343e-06, "loss": 0.1504, "step": 4782 }, { "epoch": 1.0882821387940842, "grad_norm": 1.5174512485770184, "learning_rate": 1.1095473052955322e-06, "loss": 0.0405, "step": 4783 }, { "epoch": 1.088509670079636, "grad_norm": 0.884113509373173, "learning_rate": 1.1094908639914617e-06, "loss": 0.0224, "step": 4784 }, { "epoch": 1.0887372013651877, "grad_norm": 1.3619635032370507, "learning_rate": 1.1094344127852855e-06, "loss": 0.037, "step": 4785 }, { "epoch": 1.0889647326507395, "grad_norm": 1.9047416105199788, "learning_rate": 1.1093779516781571e-06, "loss": 0.0361, "step": 4786 }, { "epoch": 1.0891922639362912, "grad_norm": 1.1728707520462738, "learning_rate": 1.1093214806712305e-06, "loss": 0.0389, "step": 4787 }, { "epoch": 1.089419795221843, "grad_norm": 1.3254160466529121, "learning_rate": 1.1092649997656597e-06, "loss": 0.0232, "step": 4788 }, { "epoch": 1.0896473265073947, "grad_norm": 1.0725037617108157, "learning_rate": 1.1092085089625992e-06, "loss": 0.0164, "step": 4789 }, { "epoch": 1.0898748577929465, "grad_norm": 0.9758736007788956, "learning_rate": 1.1091520082632037e-06, "loss": 0.0218, "step": 4790 }, { "epoch": 1.0901023890784982, "grad_norm": 0.8477305521502946, "learning_rate": 1.1090954976686277e-06, "loss": 0.0245, "step": 4791 }, { "epoch": 1.09032992036405, "grad_norm": 0.9962558192273867, "learning_rate": 1.1090389771800264e-06, "loss": 0.0287, "step": 4792 }, { "epoch": 1.0905574516496017, "grad_norm": 1.2473669308596411, "learning_rate": 1.1089824467985549e-06, "loss": 0.0229, "step": 4793 }, { "epoch": 1.0907849829351535, "grad_norm": 2.510749402051243, "learning_rate": 1.1089259065253684e-06, "loss": 0.0259, "step": 4794 }, { "epoch": 1.0910125142207054, "grad_norm": 1.149992705234752, "learning_rate": 1.1088693563616226e-06, "loss": 0.0247, "step": 4795 }, { "epoch": 1.0912400455062572, "grad_norm": 0.5940460865164179, "learning_rate": 1.1088127963084736e-06, "loss": 0.0171, "step": 4796 }, { "epoch": 1.091467576791809, "grad_norm": 0.9164786766368149, "learning_rate": 1.108756226367077e-06, "loss": 0.0317, "step": 4797 }, { "epoch": 1.0916951080773607, "grad_norm": 1.1244091740172437, "learning_rate": 1.108699646538589e-06, "loss": 0.0255, "step": 4798 }, { "epoch": 1.0919226393629125, "grad_norm": 1.437903004135215, "learning_rate": 1.108643056824166e-06, "loss": 0.0464, "step": 4799 }, { "epoch": 1.0921501706484642, "grad_norm": 0.9254577109068766, "learning_rate": 1.108586457224965e-06, "loss": 0.0194, "step": 4800 }, { "epoch": 1.092377701934016, "grad_norm": 1.1931575439362083, "learning_rate": 1.1085298477421421e-06, "loss": 0.015, "step": 4801 }, { "epoch": 1.0926052332195677, "grad_norm": 0.6083574023703965, "learning_rate": 1.1084732283768548e-06, "loss": 0.0057, "step": 4802 }, { "epoch": 1.0928327645051195, "grad_norm": 0.8597843041320873, "learning_rate": 1.1084165991302601e-06, "loss": 0.0117, "step": 4803 }, { "epoch": 1.0930602957906712, "grad_norm": 1.4390855321709966, "learning_rate": 1.1083599600035155e-06, "loss": 0.0712, "step": 4804 }, { "epoch": 1.093287827076223, "grad_norm": 0.9189557088411734, "learning_rate": 1.1083033109977787e-06, "loss": 0.0248, "step": 4805 }, { "epoch": 1.0935153583617747, "grad_norm": 0.7847663302953185, "learning_rate": 1.1082466521142072e-06, "loss": 0.0138, "step": 4806 }, { "epoch": 1.0937428896473265, "grad_norm": 1.0427794259157794, "learning_rate": 1.1081899833539592e-06, "loss": 0.022, "step": 4807 }, { "epoch": 1.0939704209328782, "grad_norm": 0.7963208995055476, "learning_rate": 1.1081333047181928e-06, "loss": 0.0148, "step": 4808 }, { "epoch": 1.09419795221843, "grad_norm": 1.2861710540407572, "learning_rate": 1.1080766162080664e-06, "loss": 0.044, "step": 4809 }, { "epoch": 1.0944254835039817, "grad_norm": 1.0606108464391806, "learning_rate": 1.1080199178247388e-06, "loss": 0.0227, "step": 4810 }, { "epoch": 1.0946530147895335, "grad_norm": 1.0178256141989168, "learning_rate": 1.1079632095693688e-06, "loss": 0.0217, "step": 4811 }, { "epoch": 1.0948805460750852, "grad_norm": 0.3733490330289167, "learning_rate": 1.107906491443115e-06, "loss": 0.0041, "step": 4812 }, { "epoch": 1.0951080773606372, "grad_norm": 0.970660447630168, "learning_rate": 1.1078497634471373e-06, "loss": 0.0191, "step": 4813 }, { "epoch": 1.095335608646189, "grad_norm": 0.9571317568580685, "learning_rate": 1.1077930255825944e-06, "loss": 0.0146, "step": 4814 }, { "epoch": 1.0955631399317407, "grad_norm": 1.4227698048199817, "learning_rate": 1.1077362778506464e-06, "loss": 0.0259, "step": 4815 }, { "epoch": 1.0957906712172925, "grad_norm": 0.7937432954740787, "learning_rate": 1.107679520252453e-06, "loss": 0.0215, "step": 4816 }, { "epoch": 1.0960182025028442, "grad_norm": 1.2433291885534135, "learning_rate": 1.107622752789174e-06, "loss": 0.0271, "step": 4817 }, { "epoch": 1.096245733788396, "grad_norm": 0.5775769553380794, "learning_rate": 1.10756597546197e-06, "loss": 0.0067, "step": 4818 }, { "epoch": 1.0964732650739477, "grad_norm": 0.43052018257915775, "learning_rate": 1.1075091882720012e-06, "loss": 0.0047, "step": 4819 }, { "epoch": 1.0967007963594995, "grad_norm": 0.6965681550864847, "learning_rate": 1.1074523912204282e-06, "loss": 0.0069, "step": 4820 }, { "epoch": 1.0969283276450512, "grad_norm": 1.950972202735377, "learning_rate": 1.107395584308412e-06, "loss": 0.0384, "step": 4821 }, { "epoch": 1.097155858930603, "grad_norm": 1.2755184486485458, "learning_rate": 1.1073387675371134e-06, "loss": 0.021, "step": 4822 }, { "epoch": 1.0973833902161547, "grad_norm": 0.9195102681671121, "learning_rate": 1.1072819409076937e-06, "loss": 0.0233, "step": 4823 }, { "epoch": 1.0976109215017065, "grad_norm": 1.4467096094976202, "learning_rate": 1.1072251044213146e-06, "loss": 0.063, "step": 4824 }, { "epoch": 1.0978384527872582, "grad_norm": 0.9276103854462384, "learning_rate": 1.1071682580791375e-06, "loss": 0.0186, "step": 4825 }, { "epoch": 1.09806598407281, "grad_norm": 0.6788301923178457, "learning_rate": 1.107111401882324e-06, "loss": 0.0178, "step": 4826 }, { "epoch": 1.0982935153583617, "grad_norm": 0.8922254579510575, "learning_rate": 1.1070545358320367e-06, "loss": 0.0205, "step": 4827 }, { "epoch": 1.0985210466439135, "grad_norm": 1.5932312378336655, "learning_rate": 1.1069976599294374e-06, "loss": 0.0296, "step": 4828 }, { "epoch": 1.0987485779294652, "grad_norm": 1.3474193896215136, "learning_rate": 1.1069407741756884e-06, "loss": 0.0108, "step": 4829 }, { "epoch": 1.098976109215017, "grad_norm": 1.286192731643991, "learning_rate": 1.106883878571953e-06, "loss": 0.0306, "step": 4830 }, { "epoch": 1.0992036405005687, "grad_norm": 1.7859019215489673, "learning_rate": 1.1068269731193936e-06, "loss": 0.0467, "step": 4831 }, { "epoch": 1.0994311717861205, "grad_norm": 1.0673033994327072, "learning_rate": 1.106770057819173e-06, "loss": 0.011, "step": 4832 }, { "epoch": 1.0996587030716722, "grad_norm": 1.1919269451119046, "learning_rate": 1.1067131326724551e-06, "loss": 0.0275, "step": 4833 }, { "epoch": 1.0998862343572242, "grad_norm": 1.0610839759729118, "learning_rate": 1.106656197680403e-06, "loss": 0.0175, "step": 4834 }, { "epoch": 1.100113765642776, "grad_norm": 0.9398630402901391, "learning_rate": 1.10659925284418e-06, "loss": 0.028, "step": 4835 }, { "epoch": 1.1003412969283277, "grad_norm": 1.0905561404271749, "learning_rate": 1.1065422981649506e-06, "loss": 0.0408, "step": 4836 }, { "epoch": 1.1005688282138795, "grad_norm": 1.1760138038912833, "learning_rate": 1.1064853336438782e-06, "loss": 0.0491, "step": 4837 }, { "epoch": 1.1007963594994312, "grad_norm": 0.8244510688827178, "learning_rate": 1.1064283592821276e-06, "loss": 0.0197, "step": 4838 }, { "epoch": 1.101023890784983, "grad_norm": 0.747177088030229, "learning_rate": 1.106371375080863e-06, "loss": 0.017, "step": 4839 }, { "epoch": 1.1012514220705347, "grad_norm": 1.1336438441896446, "learning_rate": 1.106314381041249e-06, "loss": 0.0253, "step": 4840 }, { "epoch": 1.1014789533560865, "grad_norm": 0.5114058401483557, "learning_rate": 1.1062573771644506e-06, "loss": 0.004, "step": 4841 }, { "epoch": 1.1017064846416382, "grad_norm": 0.5746769321004888, "learning_rate": 1.106200363451633e-06, "loss": 0.0094, "step": 4842 }, { "epoch": 1.10193401592719, "grad_norm": 0.7680543679306212, "learning_rate": 1.1061433399039608e-06, "loss": 0.029, "step": 4843 }, { "epoch": 1.1021615472127417, "grad_norm": 0.7493587776816238, "learning_rate": 1.1060863065226002e-06, "loss": 0.0091, "step": 4844 }, { "epoch": 1.1023890784982935, "grad_norm": 1.3476928607388319, "learning_rate": 1.1060292633087167e-06, "loss": 0.0436, "step": 4845 }, { "epoch": 1.1026166097838452, "grad_norm": 1.087918078217182, "learning_rate": 1.1059722102634756e-06, "loss": 0.0201, "step": 4846 }, { "epoch": 1.102844141069397, "grad_norm": 1.0929486099530539, "learning_rate": 1.1059151473880439e-06, "loss": 0.0328, "step": 4847 }, { "epoch": 1.1030716723549487, "grad_norm": 1.3189660985869427, "learning_rate": 1.105858074683587e-06, "loss": 0.0142, "step": 4848 }, { "epoch": 1.1032992036405005, "grad_norm": 1.052151916181327, "learning_rate": 1.1058009921512717e-06, "loss": 0.0195, "step": 4849 }, { "epoch": 1.1035267349260522, "grad_norm": 1.0009913456254373, "learning_rate": 1.1057438997922648e-06, "loss": 0.022, "step": 4850 }, { "epoch": 1.103754266211604, "grad_norm": 0.9615349865705245, "learning_rate": 1.105686797607733e-06, "loss": 0.0111, "step": 4851 }, { "epoch": 1.103981797497156, "grad_norm": 1.3130785181889084, "learning_rate": 1.1056296855988432e-06, "loss": 0.0235, "step": 4852 }, { "epoch": 1.1042093287827077, "grad_norm": 0.8672033139838674, "learning_rate": 1.105572563766763e-06, "loss": 0.0243, "step": 4853 }, { "epoch": 1.1044368600682595, "grad_norm": 1.0127688429987634, "learning_rate": 1.1055154321126597e-06, "loss": 0.0153, "step": 4854 }, { "epoch": 1.1046643913538112, "grad_norm": 1.2643260607838762, "learning_rate": 1.105458290637701e-06, "loss": 0.04, "step": 4855 }, { "epoch": 1.104891922639363, "grad_norm": 0.6193207760495852, "learning_rate": 1.105401139343055e-06, "loss": 0.0095, "step": 4856 }, { "epoch": 1.1051194539249147, "grad_norm": 0.9678581892125716, "learning_rate": 1.105343978229889e-06, "loss": 0.0137, "step": 4857 }, { "epoch": 1.1053469852104665, "grad_norm": 1.2738862816924261, "learning_rate": 1.1052868072993723e-06, "loss": 0.0324, "step": 4858 }, { "epoch": 1.1055745164960182, "grad_norm": 2.111284557158375, "learning_rate": 1.1052296265526726e-06, "loss": 0.0436, "step": 4859 }, { "epoch": 1.10580204778157, "grad_norm": 1.7291659207116172, "learning_rate": 1.105172435990959e-06, "loss": 0.0443, "step": 4860 }, { "epoch": 1.1060295790671217, "grad_norm": 1.4583333294669347, "learning_rate": 1.1051152356154e-06, "loss": 0.0316, "step": 4861 }, { "epoch": 1.1062571103526735, "grad_norm": 0.9218920816273283, "learning_rate": 1.105058025427165e-06, "loss": 0.017, "step": 4862 }, { "epoch": 1.1064846416382252, "grad_norm": 1.2813414812852448, "learning_rate": 1.105000805427423e-06, "loss": 0.0262, "step": 4863 }, { "epoch": 1.106712172923777, "grad_norm": 1.6993734853125684, "learning_rate": 1.1049435756173439e-06, "loss": 0.0219, "step": 4864 }, { "epoch": 1.1069397042093287, "grad_norm": 0.8219715997662334, "learning_rate": 1.104886335998097e-06, "loss": 0.0166, "step": 4865 }, { "epoch": 1.1071672354948805, "grad_norm": 1.4898463042222714, "learning_rate": 1.104829086570852e-06, "loss": 0.0244, "step": 4866 }, { "epoch": 1.1073947667804322, "grad_norm": 0.8450834919239356, "learning_rate": 1.1047718273367794e-06, "loss": 0.0281, "step": 4867 }, { "epoch": 1.107622298065984, "grad_norm": 0.6448103593188148, "learning_rate": 1.1047145582970494e-06, "loss": 0.0086, "step": 4868 }, { "epoch": 1.1078498293515358, "grad_norm": 1.1979257342162584, "learning_rate": 1.1046572794528324e-06, "loss": 0.0295, "step": 4869 }, { "epoch": 1.1080773606370875, "grad_norm": 0.704626187571558, "learning_rate": 1.104599990805299e-06, "loss": 0.0141, "step": 4870 }, { "epoch": 1.1083048919226393, "grad_norm": 1.2920126084899004, "learning_rate": 1.1045426923556198e-06, "loss": 0.0226, "step": 4871 }, { "epoch": 1.108532423208191, "grad_norm": 0.9357951370023873, "learning_rate": 1.1044853841049668e-06, "loss": 0.0146, "step": 4872 }, { "epoch": 1.108759954493743, "grad_norm": 0.8290368347528208, "learning_rate": 1.1044280660545103e-06, "loss": 0.0098, "step": 4873 }, { "epoch": 1.1089874857792947, "grad_norm": 1.067183885716191, "learning_rate": 1.1043707382054223e-06, "loss": 0.0206, "step": 4874 }, { "epoch": 1.1092150170648465, "grad_norm": 0.9196445315136, "learning_rate": 1.1043134005588743e-06, "loss": 0.0196, "step": 4875 }, { "epoch": 1.1094425483503982, "grad_norm": 1.5830957213473278, "learning_rate": 1.1042560531160381e-06, "loss": 0.0426, "step": 4876 }, { "epoch": 1.10967007963595, "grad_norm": 1.0495445338165712, "learning_rate": 1.104198695878086e-06, "loss": 0.0223, "step": 4877 }, { "epoch": 1.1098976109215017, "grad_norm": 0.5938411642446392, "learning_rate": 1.1041413288461903e-06, "loss": 0.0118, "step": 4878 }, { "epoch": 1.1101251422070535, "grad_norm": 1.286777509620217, "learning_rate": 1.1040839520215233e-06, "loss": 0.0333, "step": 4879 }, { "epoch": 1.1103526734926052, "grad_norm": 0.9877268530404503, "learning_rate": 1.1040265654052575e-06, "loss": 0.0223, "step": 4880 }, { "epoch": 1.110580204778157, "grad_norm": 0.6647674925503221, "learning_rate": 1.103969168998566e-06, "loss": 0.0332, "step": 4881 }, { "epoch": 1.1108077360637088, "grad_norm": 1.2643667840897899, "learning_rate": 1.1039117628026222e-06, "loss": 0.0458, "step": 4882 }, { "epoch": 1.1110352673492605, "grad_norm": 0.9162239690822223, "learning_rate": 1.1038543468185988e-06, "loss": 0.0169, "step": 4883 }, { "epoch": 1.1112627986348123, "grad_norm": 0.8043833710259668, "learning_rate": 1.1037969210476696e-06, "loss": 0.013, "step": 4884 }, { "epoch": 1.111490329920364, "grad_norm": 0.9360911016853952, "learning_rate": 1.1037394854910082e-06, "loss": 0.0299, "step": 4885 }, { "epoch": 1.1117178612059158, "grad_norm": 0.8478138590871992, "learning_rate": 1.1036820401497884e-06, "loss": 0.0104, "step": 4886 }, { "epoch": 1.1119453924914675, "grad_norm": 0.7885738829466076, "learning_rate": 1.1036245850251844e-06, "loss": 0.0119, "step": 4887 }, { "epoch": 1.1121729237770193, "grad_norm": 0.711711942639679, "learning_rate": 1.1035671201183706e-06, "loss": 0.0167, "step": 4888 }, { "epoch": 1.112400455062571, "grad_norm": 0.6664367950082724, "learning_rate": 1.1035096454305213e-06, "loss": 0.0146, "step": 4889 }, { "epoch": 1.1126279863481228, "grad_norm": 1.5180095449935846, "learning_rate": 1.103452160962811e-06, "loss": 0.0206, "step": 4890 }, { "epoch": 1.1128555176336747, "grad_norm": 0.9314557397364934, "learning_rate": 1.103394666716415e-06, "loss": 0.0399, "step": 4891 }, { "epoch": 1.1130830489192265, "grad_norm": 0.7240617420566345, "learning_rate": 1.1033371626925079e-06, "loss": 0.0104, "step": 4892 }, { "epoch": 1.1133105802047782, "grad_norm": 1.000892668316659, "learning_rate": 1.1032796488922653e-06, "loss": 0.0186, "step": 4893 }, { "epoch": 1.11353811149033, "grad_norm": 1.0694112940902762, "learning_rate": 1.103222125316863e-06, "loss": 0.0151, "step": 4894 }, { "epoch": 1.1137656427758817, "grad_norm": 0.9676066050033223, "learning_rate": 1.1031645919674758e-06, "loss": 0.0139, "step": 4895 }, { "epoch": 1.1139931740614335, "grad_norm": 1.0428454696146314, "learning_rate": 1.10310704884528e-06, "loss": 0.0306, "step": 4896 }, { "epoch": 1.1142207053469853, "grad_norm": 1.2326650915996091, "learning_rate": 1.1030494959514521e-06, "loss": 0.0296, "step": 4897 }, { "epoch": 1.114448236632537, "grad_norm": 1.23258544073363, "learning_rate": 1.1029919332871678e-06, "loss": 0.0365, "step": 4898 }, { "epoch": 1.1146757679180888, "grad_norm": 0.9035657907747793, "learning_rate": 1.102934360853604e-06, "loss": 0.0128, "step": 4899 }, { "epoch": 1.1149032992036405, "grad_norm": 1.1444479344404774, "learning_rate": 1.1028767786519368e-06, "loss": 0.0141, "step": 4900 }, { "epoch": 1.1151308304891923, "grad_norm": 1.2429676441421305, "learning_rate": 1.1028191866833438e-06, "loss": 0.0285, "step": 4901 }, { "epoch": 1.115358361774744, "grad_norm": 0.9590079950756828, "learning_rate": 1.1027615849490014e-06, "loss": 0.0112, "step": 4902 }, { "epoch": 1.1155858930602958, "grad_norm": 1.572926999023847, "learning_rate": 1.1027039734500872e-06, "loss": 0.0369, "step": 4903 }, { "epoch": 1.1158134243458475, "grad_norm": 1.1352227453212334, "learning_rate": 1.102646352187779e-06, "loss": 0.0359, "step": 4904 }, { "epoch": 1.1160409556313993, "grad_norm": 1.1716125851334267, "learning_rate": 1.1025887211632538e-06, "loss": 0.02, "step": 4905 }, { "epoch": 1.116268486916951, "grad_norm": 1.1832800935639822, "learning_rate": 1.1025310803776898e-06, "loss": 0.0415, "step": 4906 }, { "epoch": 1.1164960182025028, "grad_norm": 1.1136610636887923, "learning_rate": 1.1024734298322655e-06, "loss": 0.0184, "step": 4907 }, { "epoch": 1.1167235494880545, "grad_norm": 1.1211312468158372, "learning_rate": 1.1024157695281582e-06, "loss": 0.029, "step": 4908 }, { "epoch": 1.1169510807736063, "grad_norm": 0.9972384903553551, "learning_rate": 1.1023580994665472e-06, "loss": 0.0201, "step": 4909 }, { "epoch": 1.117178612059158, "grad_norm": 1.7707402126553615, "learning_rate": 1.1023004196486108e-06, "loss": 0.043, "step": 4910 }, { "epoch": 1.11740614334471, "grad_norm": 1.134530398414991, "learning_rate": 1.102242730075528e-06, "loss": 0.0225, "step": 4911 }, { "epoch": 1.1176336746302618, "grad_norm": 1.627488244438378, "learning_rate": 1.1021850307484776e-06, "loss": 0.02, "step": 4912 }, { "epoch": 1.1178612059158135, "grad_norm": 1.1040646049948346, "learning_rate": 1.1021273216686397e-06, "loss": 0.0173, "step": 4913 }, { "epoch": 1.1180887372013653, "grad_norm": 1.5838051197154315, "learning_rate": 1.1020696028371926e-06, "loss": 0.024, "step": 4914 }, { "epoch": 1.118316268486917, "grad_norm": 0.8992624282859318, "learning_rate": 1.1020118742553166e-06, "loss": 0.0226, "step": 4915 }, { "epoch": 1.1185437997724688, "grad_norm": 1.3853418643898612, "learning_rate": 1.1019541359241917e-06, "loss": 0.0277, "step": 4916 }, { "epoch": 1.1187713310580205, "grad_norm": 1.0102231219601017, "learning_rate": 1.1018963878449976e-06, "loss": 0.013, "step": 4917 }, { "epoch": 1.1189988623435723, "grad_norm": 1.1790884765143759, "learning_rate": 1.1018386300189148e-06, "loss": 0.0176, "step": 4918 }, { "epoch": 1.119226393629124, "grad_norm": 0.7433554447296712, "learning_rate": 1.1017808624471237e-06, "loss": 0.0196, "step": 4919 }, { "epoch": 1.1194539249146758, "grad_norm": 0.9334325084247519, "learning_rate": 1.101723085130805e-06, "loss": 0.0241, "step": 4920 }, { "epoch": 1.1196814562002275, "grad_norm": 0.7643706032076052, "learning_rate": 1.1016652980711392e-06, "loss": 0.0138, "step": 4921 }, { "epoch": 1.1199089874857793, "grad_norm": 0.7474429513186569, "learning_rate": 1.1016075012693082e-06, "loss": 0.0266, "step": 4922 }, { "epoch": 1.120136518771331, "grad_norm": 1.1711233382681683, "learning_rate": 1.1015496947264923e-06, "loss": 0.0324, "step": 4923 }, { "epoch": 1.1203640500568828, "grad_norm": 2.534130515487931, "learning_rate": 1.1014918784438736e-06, "loss": 0.0624, "step": 4924 }, { "epoch": 1.1205915813424345, "grad_norm": 1.4712142976504, "learning_rate": 1.1014340524226337e-06, "loss": 0.0425, "step": 4925 }, { "epoch": 1.1208191126279863, "grad_norm": 0.8897734161115947, "learning_rate": 1.101376216663954e-06, "loss": 0.0146, "step": 4926 }, { "epoch": 1.121046643913538, "grad_norm": 1.6323492107077655, "learning_rate": 1.101318371169017e-06, "loss": 0.0446, "step": 4927 }, { "epoch": 1.1212741751990898, "grad_norm": 0.9646427394649012, "learning_rate": 1.1012605159390048e-06, "loss": 0.0312, "step": 4928 }, { "epoch": 1.1215017064846415, "grad_norm": 0.7920927385811001, "learning_rate": 1.1012026509751e-06, "loss": 0.0194, "step": 4929 }, { "epoch": 1.1217292377701935, "grad_norm": 1.1574113409003755, "learning_rate": 1.1011447762784849e-06, "loss": 0.0237, "step": 4930 }, { "epoch": 1.1219567690557453, "grad_norm": 1.2517329969278503, "learning_rate": 1.1010868918503429e-06, "loss": 0.0207, "step": 4931 }, { "epoch": 1.122184300341297, "grad_norm": 0.8071040660470671, "learning_rate": 1.1010289976918565e-06, "loss": 0.0207, "step": 4932 }, { "epoch": 1.1224118316268488, "grad_norm": 0.9373790639087559, "learning_rate": 1.1009710938042093e-06, "loss": 0.0153, "step": 4933 }, { "epoch": 1.1226393629124005, "grad_norm": 1.2887447933260439, "learning_rate": 1.1009131801885848e-06, "loss": 0.0239, "step": 4934 }, { "epoch": 1.1228668941979523, "grad_norm": 1.40441656204075, "learning_rate": 1.100855256846166e-06, "loss": 0.0343, "step": 4935 }, { "epoch": 1.123094425483504, "grad_norm": 0.689175568925837, "learning_rate": 1.1007973237781377e-06, "loss": 0.0148, "step": 4936 }, { "epoch": 1.1233219567690558, "grad_norm": 1.1787781392551855, "learning_rate": 1.1007393809856834e-06, "loss": 0.0286, "step": 4937 }, { "epoch": 1.1235494880546075, "grad_norm": 1.0271974875036538, "learning_rate": 1.1006814284699873e-06, "loss": 0.0373, "step": 4938 }, { "epoch": 1.1237770193401593, "grad_norm": 0.8297711019866251, "learning_rate": 1.100623466232234e-06, "loss": 0.0261, "step": 4939 }, { "epoch": 1.124004550625711, "grad_norm": 1.0858267777295185, "learning_rate": 1.1005654942736082e-06, "loss": 0.0261, "step": 4940 }, { "epoch": 1.1242320819112628, "grad_norm": 0.6790255085030746, "learning_rate": 1.1005075125952946e-06, "loss": 0.0156, "step": 4941 }, { "epoch": 1.1244596131968145, "grad_norm": 1.0737443834894482, "learning_rate": 1.1004495211984783e-06, "loss": 0.0342, "step": 4942 }, { "epoch": 1.1246871444823663, "grad_norm": 1.1759005758316916, "learning_rate": 1.1003915200843446e-06, "loss": 0.0296, "step": 4943 }, { "epoch": 1.124914675767918, "grad_norm": 1.4502960478357807, "learning_rate": 1.1003335092540787e-06, "loss": 0.0338, "step": 4944 }, { "epoch": 1.1251422070534698, "grad_norm": 1.0232031902807623, "learning_rate": 1.1002754887088665e-06, "loss": 0.0275, "step": 4945 }, { "epoch": 1.1253697383390215, "grad_norm": 1.1276836760259192, "learning_rate": 1.1002174584498938e-06, "loss": 0.018, "step": 4946 }, { "epoch": 1.1255972696245733, "grad_norm": 1.121912066969083, "learning_rate": 1.1001594184783464e-06, "loss": 0.0224, "step": 4947 }, { "epoch": 1.125824800910125, "grad_norm": 1.1026078266089805, "learning_rate": 1.1001013687954109e-06, "loss": 0.0265, "step": 4948 }, { "epoch": 1.1260523321956768, "grad_norm": 0.6996142941788891, "learning_rate": 1.1000433094022735e-06, "loss": 0.0204, "step": 4949 }, { "epoch": 1.1262798634812285, "grad_norm": 1.022975678784142, "learning_rate": 1.0999852403001208e-06, "loss": 0.0189, "step": 4950 }, { "epoch": 1.1265073947667805, "grad_norm": 1.2687033993187262, "learning_rate": 1.0999271614901396e-06, "loss": 0.0141, "step": 4951 }, { "epoch": 1.1267349260523323, "grad_norm": 1.799956791253596, "learning_rate": 1.099869072973517e-06, "loss": 0.0146, "step": 4952 }, { "epoch": 1.126962457337884, "grad_norm": 1.5492585986994671, "learning_rate": 1.0998109747514404e-06, "loss": 0.0447, "step": 4953 }, { "epoch": 1.1271899886234358, "grad_norm": 0.7598908512501374, "learning_rate": 1.099752866825097e-06, "loss": 0.0166, "step": 4954 }, { "epoch": 1.1274175199089875, "grad_norm": 0.6362839552889372, "learning_rate": 1.0996947491956745e-06, "loss": 0.0113, "step": 4955 }, { "epoch": 1.1276450511945393, "grad_norm": 1.0336058491900288, "learning_rate": 1.0996366218643607e-06, "loss": 0.0156, "step": 4956 }, { "epoch": 1.127872582480091, "grad_norm": 0.7457309100980003, "learning_rate": 1.0995784848323434e-06, "loss": 0.0151, "step": 4957 }, { "epoch": 1.1281001137656428, "grad_norm": 1.649804543332714, "learning_rate": 1.0995203381008112e-06, "loss": 0.0347, "step": 4958 }, { "epoch": 1.1283276450511945, "grad_norm": 1.2533333259409183, "learning_rate": 1.099462181670952e-06, "loss": 0.0288, "step": 4959 }, { "epoch": 1.1285551763367463, "grad_norm": 1.1428064969183531, "learning_rate": 1.0994040155439553e-06, "loss": 0.0263, "step": 4960 }, { "epoch": 1.128782707622298, "grad_norm": 0.7012991923657113, "learning_rate": 1.0993458397210092e-06, "loss": 0.0108, "step": 4961 }, { "epoch": 1.1290102389078498, "grad_norm": 1.319347853084701, "learning_rate": 1.0992876542033026e-06, "loss": 0.0232, "step": 4962 }, { "epoch": 1.1292377701934015, "grad_norm": 0.8983600851477538, "learning_rate": 1.0992294589920252e-06, "loss": 0.015, "step": 4963 }, { "epoch": 1.1294653014789533, "grad_norm": 0.6582052508958808, "learning_rate": 1.099171254088366e-06, "loss": 0.0129, "step": 4964 }, { "epoch": 1.129692832764505, "grad_norm": 2.0776942729185306, "learning_rate": 1.0991130394935148e-06, "loss": 0.0128, "step": 4965 }, { "epoch": 1.1299203640500568, "grad_norm": 1.3980552891281348, "learning_rate": 1.0990548152086616e-06, "loss": 0.0158, "step": 4966 }, { "epoch": 1.1301478953356086, "grad_norm": 1.399217668963092, "learning_rate": 1.098996581234996e-06, "loss": 0.0214, "step": 4967 }, { "epoch": 1.1303754266211605, "grad_norm": 1.0723453956148017, "learning_rate": 1.0989383375737081e-06, "loss": 0.0124, "step": 4968 }, { "epoch": 1.1306029579067123, "grad_norm": 1.1545644366726804, "learning_rate": 1.098880084225989e-06, "loss": 0.0412, "step": 4969 }, { "epoch": 1.130830489192264, "grad_norm": 0.942557146311076, "learning_rate": 1.0988218211930285e-06, "loss": 0.0201, "step": 4970 }, { "epoch": 1.1310580204778158, "grad_norm": 1.0043398319888264, "learning_rate": 1.0987635484760178e-06, "loss": 0.0198, "step": 4971 }, { "epoch": 1.1312855517633675, "grad_norm": 1.5698009312279446, "learning_rate": 1.098705266076148e-06, "loss": 0.0258, "step": 4972 }, { "epoch": 1.1315130830489193, "grad_norm": 1.4423264895380257, "learning_rate": 1.0986469739946102e-06, "loss": 0.0325, "step": 4973 }, { "epoch": 1.131740614334471, "grad_norm": 1.2136974558872335, "learning_rate": 1.0985886722325954e-06, "loss": 0.0217, "step": 4974 }, { "epoch": 1.1319681456200228, "grad_norm": 1.0091129092645157, "learning_rate": 1.0985303607912956e-06, "loss": 0.0134, "step": 4975 }, { "epoch": 1.1321956769055745, "grad_norm": 0.9544088452985304, "learning_rate": 1.0984720396719024e-06, "loss": 0.0243, "step": 4976 }, { "epoch": 1.1324232081911263, "grad_norm": 1.03400426394427, "learning_rate": 1.098413708875608e-06, "loss": 0.0165, "step": 4977 }, { "epoch": 1.132650739476678, "grad_norm": 0.9584699961425377, "learning_rate": 1.098355368403604e-06, "loss": 0.0159, "step": 4978 }, { "epoch": 1.1328782707622298, "grad_norm": 1.1633397537719241, "learning_rate": 1.0982970182570837e-06, "loss": 0.027, "step": 4979 }, { "epoch": 1.1331058020477816, "grad_norm": 0.6150701132014431, "learning_rate": 1.098238658437239e-06, "loss": 0.0134, "step": 4980 }, { "epoch": 1.1333333333333333, "grad_norm": 0.9628332044868487, "learning_rate": 1.0981802889452627e-06, "loss": 0.0244, "step": 4981 }, { "epoch": 1.133560864618885, "grad_norm": 0.8522049310117307, "learning_rate": 1.0981219097823479e-06, "loss": 0.0106, "step": 4982 }, { "epoch": 1.1337883959044368, "grad_norm": 0.9473163604101656, "learning_rate": 1.0980635209496878e-06, "loss": 0.024, "step": 4983 }, { "epoch": 1.1340159271899886, "grad_norm": 0.8002876838010067, "learning_rate": 1.0980051224484756e-06, "loss": 0.0083, "step": 4984 }, { "epoch": 1.1342434584755403, "grad_norm": 1.275340108026526, "learning_rate": 1.0979467142799052e-06, "loss": 0.0425, "step": 4985 }, { "epoch": 1.134470989761092, "grad_norm": 1.1242771585253066, "learning_rate": 1.0978882964451698e-06, "loss": 0.0235, "step": 4986 }, { "epoch": 1.1346985210466438, "grad_norm": 0.8316998784778787, "learning_rate": 1.097829868945464e-06, "loss": 0.0138, "step": 4987 }, { "epoch": 1.1349260523321956, "grad_norm": 1.2826664766461733, "learning_rate": 1.0977714317819812e-06, "loss": 0.0148, "step": 4988 }, { "epoch": 1.1351535836177473, "grad_norm": 1.7211760416844804, "learning_rate": 1.0977129849559165e-06, "loss": 0.0336, "step": 4989 }, { "epoch": 1.1353811149032993, "grad_norm": 0.8143435710252414, "learning_rate": 1.0976545284684642e-06, "loss": 0.0066, "step": 4990 }, { "epoch": 1.135608646188851, "grad_norm": 1.0342548252924428, "learning_rate": 1.0975960623208188e-06, "loss": 0.0162, "step": 4991 }, { "epoch": 1.1358361774744028, "grad_norm": 1.7590039011675342, "learning_rate": 1.0975375865141753e-06, "loss": 0.0362, "step": 4992 }, { "epoch": 1.1360637087599545, "grad_norm": 1.6822978128433002, "learning_rate": 1.097479101049729e-06, "loss": 0.0185, "step": 4993 }, { "epoch": 1.1362912400455063, "grad_norm": 1.2344613037091736, "learning_rate": 1.0974206059286752e-06, "loss": 0.0374, "step": 4994 }, { "epoch": 1.136518771331058, "grad_norm": 0.5280046364865614, "learning_rate": 1.0973621011522096e-06, "loss": 0.0084, "step": 4995 }, { "epoch": 1.1367463026166098, "grad_norm": 1.9687955658616516, "learning_rate": 1.0973035867215276e-06, "loss": 0.0336, "step": 4996 }, { "epoch": 1.1369738339021616, "grad_norm": 1.022624248487459, "learning_rate": 1.0972450626378254e-06, "loss": 0.0225, "step": 4997 }, { "epoch": 1.1372013651877133, "grad_norm": 0.48108692573634865, "learning_rate": 1.0971865289022988e-06, "loss": 0.008, "step": 4998 }, { "epoch": 1.137428896473265, "grad_norm": 0.9799413189160909, "learning_rate": 1.0971279855161442e-06, "loss": 0.0394, "step": 4999 }, { "epoch": 1.1376564277588168, "grad_norm": 1.466751462689922, "learning_rate": 1.0970694324805586e-06, "loss": 0.0167, "step": 5000 }, { "epoch": 1.1378839590443686, "grad_norm": 1.5693112940714422, "learning_rate": 1.0970108697967382e-06, "loss": 0.0325, "step": 5001 }, { "epoch": 1.1381114903299203, "grad_norm": 0.9255284204543452, "learning_rate": 1.09695229746588e-06, "loss": 0.0362, "step": 5002 }, { "epoch": 1.138339021615472, "grad_norm": 1.7087407221932875, "learning_rate": 1.0968937154891812e-06, "loss": 0.052, "step": 5003 }, { "epoch": 1.1385665529010238, "grad_norm": 2.428840915012362, "learning_rate": 1.096835123867839e-06, "loss": 0.0366, "step": 5004 }, { "epoch": 1.1387940841865756, "grad_norm": 0.6595273191874941, "learning_rate": 1.0967765226030512e-06, "loss": 0.0215, "step": 5005 }, { "epoch": 1.1390216154721273, "grad_norm": 1.2073236201929451, "learning_rate": 1.0967179116960153e-06, "loss": 0.0431, "step": 5006 }, { "epoch": 1.1392491467576793, "grad_norm": 1.517298131220002, "learning_rate": 1.096659291147929e-06, "loss": 0.0493, "step": 5007 }, { "epoch": 1.139476678043231, "grad_norm": 0.8459583474402816, "learning_rate": 1.0966006609599908e-06, "loss": 0.0161, "step": 5008 }, { "epoch": 1.1397042093287828, "grad_norm": 0.9402719162645292, "learning_rate": 1.0965420211333984e-06, "loss": 0.0199, "step": 5009 }, { "epoch": 1.1399317406143346, "grad_norm": 0.8824336019611823, "learning_rate": 1.0964833716693512e-06, "loss": 0.0236, "step": 5010 }, { "epoch": 1.1401592718998863, "grad_norm": 0.9061765730464424, "learning_rate": 1.096424712569047e-06, "loss": 0.0144, "step": 5011 }, { "epoch": 1.140386803185438, "grad_norm": 1.1463154922222984, "learning_rate": 1.0963660438336851e-06, "loss": 0.0405, "step": 5012 }, { "epoch": 1.1406143344709898, "grad_norm": 0.6924366133128478, "learning_rate": 1.0963073654644645e-06, "loss": 0.0139, "step": 5013 }, { "epoch": 1.1408418657565416, "grad_norm": 0.9228757161234781, "learning_rate": 1.0962486774625847e-06, "loss": 0.0227, "step": 5014 }, { "epoch": 1.1410693970420933, "grad_norm": 0.9665478631542946, "learning_rate": 1.096189979829245e-06, "loss": 0.0273, "step": 5015 }, { "epoch": 1.141296928327645, "grad_norm": 0.9370551975836519, "learning_rate": 1.096131272565645e-06, "loss": 0.0201, "step": 5016 }, { "epoch": 1.1415244596131968, "grad_norm": 1.0359990365775988, "learning_rate": 1.0960725556729845e-06, "loss": 0.0188, "step": 5017 }, { "epoch": 1.1417519908987486, "grad_norm": 0.6081258142385076, "learning_rate": 1.0960138291524637e-06, "loss": 0.0123, "step": 5018 }, { "epoch": 1.1419795221843003, "grad_norm": 0.8151694196582907, "learning_rate": 1.095955093005283e-06, "loss": 0.0178, "step": 5019 }, { "epoch": 1.142207053469852, "grad_norm": 0.972811628969212, "learning_rate": 1.0958963472326426e-06, "loss": 0.0114, "step": 5020 }, { "epoch": 1.1424345847554038, "grad_norm": 0.7206858563005965, "learning_rate": 1.0958375918357433e-06, "loss": 0.011, "step": 5021 }, { "epoch": 1.1426621160409556, "grad_norm": 0.8388894087062686, "learning_rate": 1.095778826815786e-06, "loss": 0.0214, "step": 5022 }, { "epoch": 1.1428896473265073, "grad_norm": 1.4288807810887136, "learning_rate": 1.0957200521739715e-06, "loss": 0.0315, "step": 5023 }, { "epoch": 1.143117178612059, "grad_norm": 0.9634756984934681, "learning_rate": 1.0956612679115012e-06, "loss": 0.0338, "step": 5024 }, { "epoch": 1.1433447098976108, "grad_norm": 1.2246259764288705, "learning_rate": 1.0956024740295767e-06, "loss": 0.0339, "step": 5025 }, { "epoch": 1.1435722411831626, "grad_norm": 0.9727990066152786, "learning_rate": 1.0955436705293996e-06, "loss": 0.0186, "step": 5026 }, { "epoch": 1.1437997724687143, "grad_norm": 1.4271212460831064, "learning_rate": 1.0954848574121715e-06, "loss": 0.0407, "step": 5027 }, { "epoch": 1.144027303754266, "grad_norm": 1.1642512101338942, "learning_rate": 1.0954260346790944e-06, "loss": 0.0305, "step": 5028 }, { "epoch": 1.144254835039818, "grad_norm": 3.382254274133764, "learning_rate": 1.0953672023313709e-06, "loss": 0.0462, "step": 5029 }, { "epoch": 1.1444823663253698, "grad_norm": 1.3043659225114033, "learning_rate": 1.0953083603702031e-06, "loss": 0.0269, "step": 5030 }, { "epoch": 1.1447098976109216, "grad_norm": 0.8941373741234482, "learning_rate": 1.0952495087967939e-06, "loss": 0.01, "step": 5031 }, { "epoch": 1.1449374288964733, "grad_norm": 1.0345429364832142, "learning_rate": 1.095190647612346e-06, "loss": 0.0313, "step": 5032 }, { "epoch": 1.145164960182025, "grad_norm": 0.9469945046545952, "learning_rate": 1.0951317768180623e-06, "loss": 0.0185, "step": 5033 }, { "epoch": 1.1453924914675768, "grad_norm": 1.5417022710277841, "learning_rate": 1.0950728964151457e-06, "loss": 0.0477, "step": 5034 }, { "epoch": 1.1456200227531286, "grad_norm": 1.2090565974916883, "learning_rate": 1.0950140064048005e-06, "loss": 0.0304, "step": 5035 }, { "epoch": 1.1458475540386803, "grad_norm": 1.6222929147315774, "learning_rate": 1.0949551067882297e-06, "loss": 0.0449, "step": 5036 }, { "epoch": 1.146075085324232, "grad_norm": 0.8458047187793855, "learning_rate": 1.094896197566637e-06, "loss": 0.0177, "step": 5037 }, { "epoch": 1.1463026166097838, "grad_norm": 0.8642635965418713, "learning_rate": 1.0948372787412267e-06, "loss": 0.019, "step": 5038 }, { "epoch": 1.1465301478953356, "grad_norm": 1.6962052412561806, "learning_rate": 1.094778350313203e-06, "loss": 0.0388, "step": 5039 }, { "epoch": 1.1467576791808873, "grad_norm": 1.2082274113676141, "learning_rate": 1.09471941228377e-06, "loss": 0.0443, "step": 5040 }, { "epoch": 1.146985210466439, "grad_norm": 1.551649129101969, "learning_rate": 1.0946604646541327e-06, "loss": 0.0458, "step": 5041 }, { "epoch": 1.1472127417519908, "grad_norm": 0.6322582451330618, "learning_rate": 1.0946015074254957e-06, "loss": 0.0063, "step": 5042 }, { "epoch": 1.1474402730375426, "grad_norm": 1.2390695869307191, "learning_rate": 1.0945425405990636e-06, "loss": 0.0438, "step": 5043 }, { "epoch": 1.1476678043230943, "grad_norm": 1.171113561669916, "learning_rate": 1.094483564176042e-06, "loss": 0.0304, "step": 5044 }, { "epoch": 1.147895335608646, "grad_norm": 1.111705225220342, "learning_rate": 1.0944245781576363e-06, "loss": 0.0199, "step": 5045 }, { "epoch": 1.148122866894198, "grad_norm": 0.7788906673524727, "learning_rate": 1.0943655825450517e-06, "loss": 0.017, "step": 5046 }, { "epoch": 1.1483503981797498, "grad_norm": 1.37808766490078, "learning_rate": 1.0943065773394943e-06, "loss": 0.027, "step": 5047 }, { "epoch": 1.1485779294653016, "grad_norm": 1.0016121975434131, "learning_rate": 1.0942475625421701e-06, "loss": 0.0217, "step": 5048 }, { "epoch": 1.1488054607508533, "grad_norm": 1.5673100085633027, "learning_rate": 1.094188538154285e-06, "loss": 0.039, "step": 5049 }, { "epoch": 1.149032992036405, "grad_norm": 1.1060010064378178, "learning_rate": 1.0941295041770453e-06, "loss": 0.0158, "step": 5050 }, { "epoch": 1.1492605233219568, "grad_norm": 1.1961653202374867, "learning_rate": 1.0940704606116578e-06, "loss": 0.0197, "step": 5051 }, { "epoch": 1.1494880546075086, "grad_norm": 0.9641941727834898, "learning_rate": 1.0940114074593292e-06, "loss": 0.0141, "step": 5052 }, { "epoch": 1.1497155858930603, "grad_norm": 0.6461548194155666, "learning_rate": 1.0939523447212665e-06, "loss": 0.0074, "step": 5053 }, { "epoch": 1.149943117178612, "grad_norm": 1.125395408330258, "learning_rate": 1.0938932723986766e-06, "loss": 0.0316, "step": 5054 }, { "epoch": 1.1501706484641638, "grad_norm": 0.6999270740525377, "learning_rate": 1.0938341904927669e-06, "loss": 0.0115, "step": 5055 }, { "epoch": 1.1503981797497156, "grad_norm": 0.824514611548941, "learning_rate": 1.093775099004745e-06, "loss": 0.0146, "step": 5056 }, { "epoch": 1.1506257110352673, "grad_norm": 1.2958988293911842, "learning_rate": 1.0937159979358186e-06, "loss": 0.0285, "step": 5057 }, { "epoch": 1.150853242320819, "grad_norm": 0.5661057032643344, "learning_rate": 1.0936568872871958e-06, "loss": 0.0085, "step": 5058 }, { "epoch": 1.1510807736063708, "grad_norm": 0.8911202539112528, "learning_rate": 1.0935977670600843e-06, "loss": 0.0249, "step": 5059 }, { "epoch": 1.1513083048919226, "grad_norm": 0.6119358774484015, "learning_rate": 1.0935386372556928e-06, "loss": 0.0081, "step": 5060 }, { "epoch": 1.1515358361774743, "grad_norm": 1.1732745638400495, "learning_rate": 1.0934794978752295e-06, "loss": 0.0304, "step": 5061 }, { "epoch": 1.151763367463026, "grad_norm": 1.4610976896610823, "learning_rate": 1.0934203489199033e-06, "loss": 0.026, "step": 5062 }, { "epoch": 1.1519908987485779, "grad_norm": 0.8751176409958491, "learning_rate": 1.093361190390923e-06, "loss": 0.0129, "step": 5063 }, { "epoch": 1.1522184300341296, "grad_norm": 0.9095379255951723, "learning_rate": 1.0933020222894978e-06, "loss": 0.0189, "step": 5064 }, { "epoch": 1.1524459613196814, "grad_norm": 0.886509655802644, "learning_rate": 1.0932428446168369e-06, "loss": 0.0094, "step": 5065 }, { "epoch": 1.152673492605233, "grad_norm": 1.0165308626390206, "learning_rate": 1.0931836573741498e-06, "loss": 0.0194, "step": 5066 }, { "epoch": 1.1529010238907849, "grad_norm": 2.2862767542657307, "learning_rate": 1.093124460562646e-06, "loss": 0.03, "step": 5067 }, { "epoch": 1.1531285551763368, "grad_norm": 3.0841316980515, "learning_rate": 1.0930652541835357e-06, "loss": 0.1031, "step": 5068 }, { "epoch": 1.1533560864618886, "grad_norm": 2.057276950609037, "learning_rate": 1.093006038238029e-06, "loss": 0.0498, "step": 5069 }, { "epoch": 1.1535836177474403, "grad_norm": 0.8535534714067085, "learning_rate": 1.0929468127273357e-06, "loss": 0.0262, "step": 5070 }, { "epoch": 1.153811149032992, "grad_norm": 0.7487264216032792, "learning_rate": 1.0928875776526667e-06, "loss": 0.0187, "step": 5071 }, { "epoch": 1.1540386803185438, "grad_norm": 1.011442354116883, "learning_rate": 1.0928283330152325e-06, "loss": 0.0237, "step": 5072 }, { "epoch": 1.1542662116040956, "grad_norm": 1.2385558290850083, "learning_rate": 1.092769078816244e-06, "loss": 0.0576, "step": 5073 }, { "epoch": 1.1544937428896473, "grad_norm": 1.2917165551359526, "learning_rate": 1.092709815056912e-06, "loss": 0.0211, "step": 5074 }, { "epoch": 1.154721274175199, "grad_norm": 1.0590341552017861, "learning_rate": 1.0926505417384482e-06, "loss": 0.0303, "step": 5075 }, { "epoch": 1.1549488054607508, "grad_norm": 0.9189460734693743, "learning_rate": 1.0925912588620637e-06, "loss": 0.0147, "step": 5076 }, { "epoch": 1.1551763367463026, "grad_norm": 0.998558466002832, "learning_rate": 1.0925319664289703e-06, "loss": 0.0219, "step": 5077 }, { "epoch": 1.1554038680318544, "grad_norm": 0.7945299298414331, "learning_rate": 1.0924726644403797e-06, "loss": 0.0176, "step": 5078 }, { "epoch": 1.155631399317406, "grad_norm": 1.3566259925414048, "learning_rate": 1.0924133528975039e-06, "loss": 0.0295, "step": 5079 }, { "epoch": 1.1558589306029579, "grad_norm": 0.8074114789352509, "learning_rate": 1.0923540318015552e-06, "loss": 0.022, "step": 5080 }, { "epoch": 1.1560864618885096, "grad_norm": 1.3411051591390433, "learning_rate": 1.092294701153746e-06, "loss": 0.029, "step": 5081 }, { "epoch": 1.1563139931740614, "grad_norm": 1.423080211058415, "learning_rate": 1.092235360955289e-06, "loss": 0.0503, "step": 5082 }, { "epoch": 1.1565415244596131, "grad_norm": 0.8213852817117168, "learning_rate": 1.092176011207397e-06, "loss": 0.021, "step": 5083 }, { "epoch": 1.1567690557451649, "grad_norm": 1.2933428126076256, "learning_rate": 1.0921166519112828e-06, "loss": 0.0442, "step": 5084 }, { "epoch": 1.1569965870307168, "grad_norm": 0.5857327857396418, "learning_rate": 1.0920572830681597e-06, "loss": 0.0107, "step": 5085 }, { "epoch": 1.1572241183162686, "grad_norm": 1.0152274364913478, "learning_rate": 1.0919979046792411e-06, "loss": 0.021, "step": 5086 }, { "epoch": 1.1574516496018203, "grad_norm": 1.6197058508435338, "learning_rate": 1.0919385167457408e-06, "loss": 0.0317, "step": 5087 }, { "epoch": 1.157679180887372, "grad_norm": 0.8321181279990257, "learning_rate": 1.0918791192688722e-06, "loss": 0.0083, "step": 5088 }, { "epoch": 1.1579067121729238, "grad_norm": 1.1078885245620338, "learning_rate": 1.0918197122498495e-06, "loss": 0.0417, "step": 5089 }, { "epoch": 1.1581342434584756, "grad_norm": 0.8666450018016355, "learning_rate": 1.0917602956898867e-06, "loss": 0.0235, "step": 5090 }, { "epoch": 1.1583617747440274, "grad_norm": 0.9389403634525508, "learning_rate": 1.0917008695901985e-06, "loss": 0.0161, "step": 5091 }, { "epoch": 1.158589306029579, "grad_norm": 1.6343340981795507, "learning_rate": 1.091641433951999e-06, "loss": 0.0589, "step": 5092 }, { "epoch": 1.1588168373151309, "grad_norm": 1.0221675248406095, "learning_rate": 1.0915819887765034e-06, "loss": 0.0209, "step": 5093 }, { "epoch": 1.1590443686006826, "grad_norm": 1.2783954239375135, "learning_rate": 1.0915225340649264e-06, "loss": 0.0314, "step": 5094 }, { "epoch": 1.1592718998862344, "grad_norm": 1.0876136046711122, "learning_rate": 1.091463069818483e-06, "loss": 0.0183, "step": 5095 }, { "epoch": 1.159499431171786, "grad_norm": 0.92935249083099, "learning_rate": 1.091403596038389e-06, "loss": 0.0284, "step": 5096 }, { "epoch": 1.1597269624573379, "grad_norm": 1.532892418760994, "learning_rate": 1.0913441127258596e-06, "loss": 0.0229, "step": 5097 }, { "epoch": 1.1599544937428896, "grad_norm": 1.0034643002001107, "learning_rate": 1.0912846198821105e-06, "loss": 0.0176, "step": 5098 }, { "epoch": 1.1601820250284414, "grad_norm": 1.1190616568728453, "learning_rate": 1.091225117508358e-06, "loss": 0.0201, "step": 5099 }, { "epoch": 1.1604095563139931, "grad_norm": 0.7869200780957125, "learning_rate": 1.0911656056058175e-06, "loss": 0.0085, "step": 5100 }, { "epoch": 1.1606370875995449, "grad_norm": 0.7075180696463311, "learning_rate": 1.0911060841757063e-06, "loss": 0.0143, "step": 5101 }, { "epoch": 1.1608646188850966, "grad_norm": 0.6587063658622069, "learning_rate": 1.09104655321924e-06, "loss": 0.0174, "step": 5102 }, { "epoch": 1.1610921501706484, "grad_norm": 1.2851325080822802, "learning_rate": 1.0909870127376358e-06, "loss": 0.0347, "step": 5103 }, { "epoch": 1.1613196814562001, "grad_norm": 1.4645102454670038, "learning_rate": 1.0909274627321106e-06, "loss": 0.0412, "step": 5104 }, { "epoch": 1.1615472127417519, "grad_norm": 1.983767386413822, "learning_rate": 1.090867903203881e-06, "loss": 0.0224, "step": 5105 }, { "epoch": 1.1617747440273036, "grad_norm": 1.2382132521135707, "learning_rate": 1.090808334154165e-06, "loss": 0.0172, "step": 5106 }, { "epoch": 1.1620022753128556, "grad_norm": 0.8691675756314506, "learning_rate": 1.0907487555841797e-06, "loss": 0.0158, "step": 5107 }, { "epoch": 1.1622298065984074, "grad_norm": 2.1283896982776853, "learning_rate": 1.0906891674951426e-06, "loss": 0.0345, "step": 5108 }, { "epoch": 1.162457337883959, "grad_norm": 1.6543688130648284, "learning_rate": 1.090629569888272e-06, "loss": 0.0491, "step": 5109 }, { "epoch": 1.1626848691695109, "grad_norm": 0.6293933164268644, "learning_rate": 1.0905699627647857e-06, "loss": 0.0094, "step": 5110 }, { "epoch": 1.1629124004550626, "grad_norm": 1.6953242717121608, "learning_rate": 1.090510346125902e-06, "loss": 0.0238, "step": 5111 }, { "epoch": 1.1631399317406144, "grad_norm": 1.159765223167702, "learning_rate": 1.0904507199728392e-06, "loss": 0.0406, "step": 5112 }, { "epoch": 1.1633674630261661, "grad_norm": 1.3015729382676753, "learning_rate": 1.0903910843068163e-06, "loss": 0.0318, "step": 5113 }, { "epoch": 1.1635949943117179, "grad_norm": 0.8775703929826588, "learning_rate": 1.090331439129052e-06, "loss": 0.0087, "step": 5114 }, { "epoch": 1.1638225255972696, "grad_norm": 2.0908466705122963, "learning_rate": 1.0902717844407651e-06, "loss": 0.0187, "step": 5115 }, { "epoch": 1.1640500568828214, "grad_norm": 0.8211627405295623, "learning_rate": 1.0902121202431754e-06, "loss": 0.0214, "step": 5116 }, { "epoch": 1.1642775881683731, "grad_norm": 1.0607426301957161, "learning_rate": 1.0901524465375015e-06, "loss": 0.0305, "step": 5117 }, { "epoch": 1.1645051194539249, "grad_norm": 1.0553962833230413, "learning_rate": 1.0900927633249638e-06, "loss": 0.0217, "step": 5118 }, { "epoch": 1.1647326507394766, "grad_norm": 1.0244204786724485, "learning_rate": 1.0900330706067818e-06, "loss": 0.0264, "step": 5119 }, { "epoch": 1.1649601820250284, "grad_norm": 0.9707054088367129, "learning_rate": 1.0899733683841753e-06, "loss": 0.0162, "step": 5120 }, { "epoch": 1.1651877133105801, "grad_norm": 1.1311434603614883, "learning_rate": 1.0899136566583647e-06, "loss": 0.028, "step": 5121 }, { "epoch": 1.1654152445961319, "grad_norm": 0.6912191457949579, "learning_rate": 1.0898539354305706e-06, "loss": 0.0099, "step": 5122 }, { "epoch": 1.1656427758816836, "grad_norm": 0.7902212704972374, "learning_rate": 1.0897942047020131e-06, "loss": 0.0128, "step": 5123 }, { "epoch": 1.1658703071672356, "grad_norm": 0.5967448578657041, "learning_rate": 1.0897344644739139e-06, "loss": 0.0081, "step": 5124 }, { "epoch": 1.1660978384527874, "grad_norm": 0.804672182956438, "learning_rate": 1.089674714747493e-06, "loss": 0.0121, "step": 5125 }, { "epoch": 1.1663253697383391, "grad_norm": 1.289356948925363, "learning_rate": 1.0896149555239717e-06, "loss": 0.0322, "step": 5126 }, { "epoch": 1.1665529010238909, "grad_norm": 1.8000751184542518, "learning_rate": 1.0895551868045718e-06, "loss": 0.055, "step": 5127 }, { "epoch": 1.1667804323094426, "grad_norm": 0.7403604907569553, "learning_rate": 1.0894954085905147e-06, "loss": 0.0213, "step": 5128 }, { "epoch": 1.1670079635949944, "grad_norm": 0.7673485205599404, "learning_rate": 1.0894356208830223e-06, "loss": 0.0146, "step": 5129 }, { "epoch": 1.1672354948805461, "grad_norm": 0.8057347260016878, "learning_rate": 1.089375823683316e-06, "loss": 0.0114, "step": 5130 }, { "epoch": 1.1674630261660979, "grad_norm": 1.031201971745674, "learning_rate": 1.0893160169926186e-06, "loss": 0.0224, "step": 5131 }, { "epoch": 1.1676905574516496, "grad_norm": 1.1471763380869269, "learning_rate": 1.0892562008121522e-06, "loss": 0.0199, "step": 5132 }, { "epoch": 1.1679180887372014, "grad_norm": 1.1194487112088174, "learning_rate": 1.0891963751431392e-06, "loss": 0.0202, "step": 5133 }, { "epoch": 1.1681456200227531, "grad_norm": 0.9431094841741555, "learning_rate": 1.0891365399868022e-06, "loss": 0.0242, "step": 5134 }, { "epoch": 1.1683731513083049, "grad_norm": 0.8152241328521926, "learning_rate": 1.0890766953443646e-06, "loss": 0.0135, "step": 5135 }, { "epoch": 1.1686006825938566, "grad_norm": 1.111189670935272, "learning_rate": 1.0890168412170493e-06, "loss": 0.0313, "step": 5136 }, { "epoch": 1.1688282138794084, "grad_norm": 1.0125526520638173, "learning_rate": 1.0889569776060796e-06, "loss": 0.0219, "step": 5137 }, { "epoch": 1.1690557451649601, "grad_norm": 1.1702719514469648, "learning_rate": 1.088897104512679e-06, "loss": 0.0349, "step": 5138 }, { "epoch": 1.1692832764505119, "grad_norm": 0.8014688872464257, "learning_rate": 1.0888372219380709e-06, "loss": 0.0134, "step": 5139 }, { "epoch": 1.1695108077360636, "grad_norm": 1.2120196426423329, "learning_rate": 1.0887773298834798e-06, "loss": 0.0218, "step": 5140 }, { "epoch": 1.1697383390216154, "grad_norm": 0.5093518570185542, "learning_rate": 1.0887174283501293e-06, "loss": 0.0069, "step": 5141 }, { "epoch": 1.1699658703071671, "grad_norm": 1.0586184070871325, "learning_rate": 1.0886575173392435e-06, "loss": 0.0212, "step": 5142 }, { "epoch": 1.170193401592719, "grad_norm": 0.9121062929597813, "learning_rate": 1.0885975968520476e-06, "loss": 0.01, "step": 5143 }, { "epoch": 1.1704209328782706, "grad_norm": 1.1192151515949293, "learning_rate": 1.0885376668897656e-06, "loss": 0.0297, "step": 5144 }, { "epoch": 1.1706484641638226, "grad_norm": 1.1083415447915677, "learning_rate": 1.0884777274536228e-06, "loss": 0.0288, "step": 5145 }, { "epoch": 1.1708759954493744, "grad_norm": 1.1157775165463915, "learning_rate": 1.0884177785448441e-06, "loss": 0.0179, "step": 5146 }, { "epoch": 1.1711035267349261, "grad_norm": 0.6942923206091653, "learning_rate": 1.0883578201646546e-06, "loss": 0.0186, "step": 5147 }, { "epoch": 1.1713310580204779, "grad_norm": 1.0660205113669436, "learning_rate": 1.08829785231428e-06, "loss": 0.0154, "step": 5148 }, { "epoch": 1.1715585893060296, "grad_norm": 1.2226468081458537, "learning_rate": 1.0882378749949456e-06, "loss": 0.027, "step": 5149 }, { "epoch": 1.1717861205915814, "grad_norm": 0.6736525742954345, "learning_rate": 1.0881778882078774e-06, "loss": 0.0104, "step": 5150 }, { "epoch": 1.1720136518771331, "grad_norm": 1.0175690285786059, "learning_rate": 1.0881178919543016e-06, "loss": 0.0209, "step": 5151 }, { "epoch": 1.1722411831626849, "grad_norm": 1.0274399291271918, "learning_rate": 1.0880578862354444e-06, "loss": 0.0169, "step": 5152 }, { "epoch": 1.1724687144482366, "grad_norm": 1.174579618024413, "learning_rate": 1.087997871052532e-06, "loss": 0.023, "step": 5153 }, { "epoch": 1.1726962457337884, "grad_norm": 1.0784512457128186, "learning_rate": 1.0879378464067906e-06, "loss": 0.0505, "step": 5154 }, { "epoch": 1.1729237770193401, "grad_norm": 1.2684504792555313, "learning_rate": 1.0878778122994477e-06, "loss": 0.0199, "step": 5155 }, { "epoch": 1.173151308304892, "grad_norm": 1.451789483548959, "learning_rate": 1.0878177687317302e-06, "loss": 0.0433, "step": 5156 }, { "epoch": 1.1733788395904436, "grad_norm": 0.9973415681599368, "learning_rate": 1.0877577157048648e-06, "loss": 0.0205, "step": 5157 }, { "epoch": 1.1736063708759954, "grad_norm": 2.814855357951941, "learning_rate": 1.0876976532200797e-06, "loss": 0.0309, "step": 5158 }, { "epoch": 1.1738339021615471, "grad_norm": 0.8916291744362826, "learning_rate": 1.0876375812786017e-06, "loss": 0.0207, "step": 5159 }, { "epoch": 1.174061433447099, "grad_norm": 0.7454883697223361, "learning_rate": 1.0875774998816586e-06, "loss": 0.0181, "step": 5160 }, { "epoch": 1.1742889647326507, "grad_norm": 1.5976521141289088, "learning_rate": 1.087517409030479e-06, "loss": 0.0352, "step": 5161 }, { "epoch": 1.1745164960182026, "grad_norm": 0.9023925994855438, "learning_rate": 1.0874573087262902e-06, "loss": 0.0189, "step": 5162 }, { "epoch": 1.1747440273037544, "grad_norm": 1.021608393538654, "learning_rate": 1.087397198970321e-06, "loss": 0.0248, "step": 5163 }, { "epoch": 1.1749715585893061, "grad_norm": 1.5179996755409662, "learning_rate": 1.0873370797638002e-06, "loss": 0.0346, "step": 5164 }, { "epoch": 1.1751990898748579, "grad_norm": 1.388608360949784, "learning_rate": 1.0872769511079561e-06, "loss": 0.0371, "step": 5165 }, { "epoch": 1.1754266211604096, "grad_norm": 0.9944177229048421, "learning_rate": 1.0872168130040175e-06, "loss": 0.0255, "step": 5166 }, { "epoch": 1.1756541524459614, "grad_norm": 1.460644807474802, "learning_rate": 1.087156665453214e-06, "loss": 0.0267, "step": 5167 }, { "epoch": 1.1758816837315131, "grad_norm": 1.201538811332985, "learning_rate": 1.0870965084567748e-06, "loss": 0.0322, "step": 5168 }, { "epoch": 1.176109215017065, "grad_norm": 0.9737685044159669, "learning_rate": 1.087036342015929e-06, "loss": 0.0238, "step": 5169 }, { "epoch": 1.1763367463026166, "grad_norm": 1.1123560277419813, "learning_rate": 1.0869761661319064e-06, "loss": 0.0323, "step": 5170 }, { "epoch": 1.1765642775881684, "grad_norm": 1.0917701556058614, "learning_rate": 1.0869159808059373e-06, "loss": 0.0239, "step": 5171 }, { "epoch": 1.1767918088737201, "grad_norm": 0.9254801632517604, "learning_rate": 1.0868557860392516e-06, "loss": 0.0225, "step": 5172 }, { "epoch": 1.177019340159272, "grad_norm": 1.1647756504926867, "learning_rate": 1.0867955818330792e-06, "loss": 0.0202, "step": 5173 }, { "epoch": 1.1772468714448237, "grad_norm": 0.9089361069774722, "learning_rate": 1.086735368188651e-06, "loss": 0.0167, "step": 5174 }, { "epoch": 1.1774744027303754, "grad_norm": 1.5111973761987758, "learning_rate": 1.0866751451071974e-06, "loss": 0.0295, "step": 5175 }, { "epoch": 1.1777019340159272, "grad_norm": 0.6596228779983846, "learning_rate": 1.0866149125899495e-06, "loss": 0.019, "step": 5176 }, { "epoch": 1.177929465301479, "grad_norm": 1.6565664804110074, "learning_rate": 1.086554670638138e-06, "loss": 0.031, "step": 5177 }, { "epoch": 1.1781569965870307, "grad_norm": 0.7431382614926502, "learning_rate": 1.0864944192529946e-06, "loss": 0.0125, "step": 5178 }, { "epoch": 1.1783845278725824, "grad_norm": 1.2886691602870353, "learning_rate": 1.08643415843575e-06, "loss": 0.0132, "step": 5179 }, { "epoch": 1.1786120591581342, "grad_norm": 1.2945110536777915, "learning_rate": 1.0863738881876369e-06, "loss": 0.0453, "step": 5180 }, { "epoch": 1.178839590443686, "grad_norm": 1.6944088405196147, "learning_rate": 1.0863136085098862e-06, "loss": 0.028, "step": 5181 }, { "epoch": 1.1790671217292377, "grad_norm": 0.9678714964974754, "learning_rate": 1.08625331940373e-06, "loss": 0.0186, "step": 5182 }, { "epoch": 1.1792946530147894, "grad_norm": 0.8164594441461164, "learning_rate": 1.086193020870401e-06, "loss": 0.0197, "step": 5183 }, { "epoch": 1.1795221843003414, "grad_norm": 0.847422876421066, "learning_rate": 1.0861327129111313e-06, "loss": 0.014, "step": 5184 }, { "epoch": 1.1797497155858931, "grad_norm": 0.8408136915565151, "learning_rate": 1.0860723955271533e-06, "loss": 0.0141, "step": 5185 }, { "epoch": 1.179977246871445, "grad_norm": 1.4185159384093218, "learning_rate": 1.0860120687196998e-06, "loss": 0.0174, "step": 5186 }, { "epoch": 1.1802047781569966, "grad_norm": 1.3653351030007033, "learning_rate": 1.0859517324900042e-06, "loss": 0.0318, "step": 5187 }, { "epoch": 1.1804323094425484, "grad_norm": 0.9616143252233853, "learning_rate": 1.0858913868392993e-06, "loss": 0.0324, "step": 5188 }, { "epoch": 1.1806598407281002, "grad_norm": 0.8826104173449131, "learning_rate": 1.0858310317688184e-06, "loss": 0.0194, "step": 5189 }, { "epoch": 1.180887372013652, "grad_norm": 0.573005849494334, "learning_rate": 1.0857706672797954e-06, "loss": 0.007, "step": 5190 }, { "epoch": 1.1811149032992037, "grad_norm": 0.792841731677897, "learning_rate": 1.0857102933734636e-06, "loss": 0.0228, "step": 5191 }, { "epoch": 1.1813424345847554, "grad_norm": 1.1198194956030572, "learning_rate": 1.0856499100510575e-06, "loss": 0.0204, "step": 5192 }, { "epoch": 1.1815699658703072, "grad_norm": 1.4060671628835655, "learning_rate": 1.0855895173138107e-06, "loss": 0.0335, "step": 5193 }, { "epoch": 1.181797497155859, "grad_norm": 1.0332252844504048, "learning_rate": 1.0855291151629576e-06, "loss": 0.0248, "step": 5194 }, { "epoch": 1.1820250284414107, "grad_norm": 1.3939283564653595, "learning_rate": 1.0854687035997329e-06, "loss": 0.0503, "step": 5195 }, { "epoch": 1.1822525597269624, "grad_norm": 1.389414773323681, "learning_rate": 1.0854082826253712e-06, "loss": 0.0179, "step": 5196 }, { "epoch": 1.1824800910125142, "grad_norm": 1.3843557347025581, "learning_rate": 1.0853478522411072e-06, "loss": 0.0235, "step": 5197 }, { "epoch": 1.182707622298066, "grad_norm": 3.1968855275158368, "learning_rate": 1.0852874124481764e-06, "loss": 0.0411, "step": 5198 }, { "epoch": 1.1829351535836177, "grad_norm": 1.4480239507963797, "learning_rate": 1.0852269632478138e-06, "loss": 0.0363, "step": 5199 }, { "epoch": 1.1831626848691694, "grad_norm": 1.9126854541087823, "learning_rate": 1.085166504641255e-06, "loss": 0.0181, "step": 5200 }, { "epoch": 1.1833902161547214, "grad_norm": 1.1213805046183494, "learning_rate": 1.0851060366297356e-06, "loss": 0.0353, "step": 5201 }, { "epoch": 1.1836177474402731, "grad_norm": 2.0688040255631126, "learning_rate": 1.0850455592144916e-06, "loss": 0.0451, "step": 5202 }, { "epoch": 1.183845278725825, "grad_norm": 1.7510399141137207, "learning_rate": 1.0849850723967585e-06, "loss": 0.0109, "step": 5203 }, { "epoch": 1.1840728100113767, "grad_norm": 6.664020652459381, "learning_rate": 1.0849245761777733e-06, "loss": 0.0381, "step": 5204 }, { "epoch": 1.1843003412969284, "grad_norm": 2.621359696329597, "learning_rate": 1.0848640705587718e-06, "loss": 0.0216, "step": 5205 }, { "epoch": 1.1845278725824802, "grad_norm": 1.8030419214813977, "learning_rate": 1.0848035555409911e-06, "loss": 0.0303, "step": 5206 }, { "epoch": 1.184755403868032, "grad_norm": 1.186220362313326, "learning_rate": 1.0847430311256676e-06, "loss": 0.0197, "step": 5207 }, { "epoch": 1.1849829351535837, "grad_norm": 1.5048336995036211, "learning_rate": 1.0846824973140388e-06, "loss": 0.0185, "step": 5208 }, { "epoch": 1.1852104664391354, "grad_norm": 1.8320739469192913, "learning_rate": 1.0846219541073417e-06, "loss": 0.0217, "step": 5209 }, { "epoch": 1.1854379977246872, "grad_norm": 1.6975307868867775, "learning_rate": 1.084561401506813e-06, "loss": 0.0491, "step": 5210 }, { "epoch": 1.185665529010239, "grad_norm": 2.1632818389721153, "learning_rate": 1.0845008395136915e-06, "loss": 0.057, "step": 5211 }, { "epoch": 1.1858930602957907, "grad_norm": 1.271479589500172, "learning_rate": 1.0844402681292144e-06, "loss": 0.0369, "step": 5212 }, { "epoch": 1.1861205915813424, "grad_norm": 1.0411295086764045, "learning_rate": 1.084379687354619e-06, "loss": 0.0097, "step": 5213 }, { "epoch": 1.1863481228668942, "grad_norm": 2.6559141117447225, "learning_rate": 1.0843190971911447e-06, "loss": 0.0228, "step": 5214 }, { "epoch": 1.186575654152446, "grad_norm": 1.5202925528678082, "learning_rate": 1.0842584976400292e-06, "loss": 0.0338, "step": 5215 }, { "epoch": 1.1868031854379977, "grad_norm": 1.1093380023536548, "learning_rate": 1.084197888702511e-06, "loss": 0.0168, "step": 5216 }, { "epoch": 1.1870307167235494, "grad_norm": 1.5307787654530016, "learning_rate": 1.0841372703798289e-06, "loss": 0.0278, "step": 5217 }, { "epoch": 1.1872582480091012, "grad_norm": 1.3322857454820025, "learning_rate": 1.0840766426732219e-06, "loss": 0.0347, "step": 5218 }, { "epoch": 1.187485779294653, "grad_norm": 0.8608991873903781, "learning_rate": 1.0840160055839291e-06, "loss": 0.0238, "step": 5219 }, { "epoch": 1.1877133105802047, "grad_norm": 1.119129138655594, "learning_rate": 1.08395535911319e-06, "loss": 0.0131, "step": 5220 }, { "epoch": 1.1879408418657564, "grad_norm": 0.9520127980932701, "learning_rate": 1.0838947032622436e-06, "loss": 0.0187, "step": 5221 }, { "epoch": 1.1881683731513082, "grad_norm": 1.0013994149687084, "learning_rate": 1.0838340380323297e-06, "loss": 0.0217, "step": 5222 }, { "epoch": 1.1883959044368602, "grad_norm": 2.3034636462642166, "learning_rate": 1.083773363424689e-06, "loss": 0.0253, "step": 5223 }, { "epoch": 1.188623435722412, "grad_norm": 0.9604142125712335, "learning_rate": 1.0837126794405603e-06, "loss": 0.0304, "step": 5224 }, { "epoch": 1.1888509670079637, "grad_norm": 0.9680611296826206, "learning_rate": 1.083651986081185e-06, "loss": 0.0233, "step": 5225 }, { "epoch": 1.1890784982935154, "grad_norm": 1.1564232624147677, "learning_rate": 1.0835912833478029e-06, "loss": 0.0514, "step": 5226 }, { "epoch": 1.1893060295790672, "grad_norm": 1.0708217476839645, "learning_rate": 1.0835305712416546e-06, "loss": 0.0244, "step": 5227 }, { "epoch": 1.189533560864619, "grad_norm": 1.1356456632073215, "learning_rate": 1.0834698497639817e-06, "loss": 0.0453, "step": 5228 }, { "epoch": 1.1897610921501707, "grad_norm": 0.9643109850754426, "learning_rate": 1.0834091189160243e-06, "loss": 0.0148, "step": 5229 }, { "epoch": 1.1899886234357224, "grad_norm": 1.4647639680120725, "learning_rate": 1.0833483786990243e-06, "loss": 0.0398, "step": 5230 }, { "epoch": 1.1902161547212742, "grad_norm": 0.6967268185621851, "learning_rate": 1.0832876291142228e-06, "loss": 0.0153, "step": 5231 }, { "epoch": 1.190443686006826, "grad_norm": 1.5226277581599168, "learning_rate": 1.0832268701628613e-06, "loss": 0.0387, "step": 5232 }, { "epoch": 1.1906712172923777, "grad_norm": 1.5725234513516795, "learning_rate": 1.083166101846182e-06, "loss": 0.059, "step": 5233 }, { "epoch": 1.1908987485779294, "grad_norm": 1.0092630282850967, "learning_rate": 1.0831053241654265e-06, "loss": 0.019, "step": 5234 }, { "epoch": 1.1911262798634812, "grad_norm": 1.1102622210079196, "learning_rate": 1.0830445371218374e-06, "loss": 0.0234, "step": 5235 }, { "epoch": 1.191353811149033, "grad_norm": 1.2854110941773407, "learning_rate": 1.0829837407166565e-06, "loss": 0.0243, "step": 5236 }, { "epoch": 1.1915813424345847, "grad_norm": 0.38016640183110134, "learning_rate": 1.082922934951127e-06, "loss": 0.0134, "step": 5237 }, { "epoch": 1.1918088737201364, "grad_norm": 1.168241176946775, "learning_rate": 1.082862119826491e-06, "loss": 0.031, "step": 5238 }, { "epoch": 1.1920364050056882, "grad_norm": 1.1960841025032027, "learning_rate": 1.0828012953439921e-06, "loss": 0.052, "step": 5239 }, { "epoch": 1.1922639362912402, "grad_norm": 0.7158528800285552, "learning_rate": 1.082740461504873e-06, "loss": 0.011, "step": 5240 }, { "epoch": 1.192491467576792, "grad_norm": 1.1123900753579041, "learning_rate": 1.0826796183103774e-06, "loss": 0.0442, "step": 5241 }, { "epoch": 1.1927189988623437, "grad_norm": 1.3167322420939016, "learning_rate": 1.0826187657617484e-06, "loss": 0.0351, "step": 5242 }, { "epoch": 1.1929465301478954, "grad_norm": 1.8256977932069058, "learning_rate": 1.0825579038602298e-06, "loss": 0.0228, "step": 5243 }, { "epoch": 1.1931740614334472, "grad_norm": 0.8153271667842001, "learning_rate": 1.082497032607066e-06, "loss": 0.0218, "step": 5244 }, { "epoch": 1.193401592718999, "grad_norm": 1.13052228865416, "learning_rate": 1.0824361520035004e-06, "loss": 0.0269, "step": 5245 }, { "epoch": 1.1936291240045507, "grad_norm": 0.8769614800574006, "learning_rate": 1.0823752620507778e-06, "loss": 0.0196, "step": 5246 }, { "epoch": 1.1938566552901024, "grad_norm": 2.442105507702053, "learning_rate": 1.0823143627501423e-06, "loss": 0.0223, "step": 5247 }, { "epoch": 1.1940841865756542, "grad_norm": 0.9690120767439246, "learning_rate": 1.082253454102839e-06, "loss": 0.0293, "step": 5248 }, { "epoch": 1.194311717861206, "grad_norm": 0.9656302532397552, "learning_rate": 1.0821925361101124e-06, "loss": 0.0194, "step": 5249 }, { "epoch": 1.1945392491467577, "grad_norm": 1.2220527685129399, "learning_rate": 1.0821316087732075e-06, "loss": 0.0361, "step": 5250 }, { "epoch": 1.1947667804323094, "grad_norm": 1.1131169147903066, "learning_rate": 1.0820706720933698e-06, "loss": 0.0282, "step": 5251 }, { "epoch": 1.1949943117178612, "grad_norm": 1.1331333681292557, "learning_rate": 1.0820097260718448e-06, "loss": 0.0117, "step": 5252 }, { "epoch": 1.195221843003413, "grad_norm": 1.126019194986974, "learning_rate": 1.081948770709878e-06, "loss": 0.0142, "step": 5253 }, { "epoch": 1.1954493742889647, "grad_norm": 1.9300596010130329, "learning_rate": 1.0818878060087151e-06, "loss": 0.0305, "step": 5254 }, { "epoch": 1.1956769055745164, "grad_norm": 3.8484100820913096, "learning_rate": 1.081826831969602e-06, "loss": 0.0175, "step": 5255 }, { "epoch": 1.1959044368600682, "grad_norm": 2.6177072747321493, "learning_rate": 1.0817658485937855e-06, "loss": 0.0356, "step": 5256 }, { "epoch": 1.19613196814562, "grad_norm": 1.8647005779081272, "learning_rate": 1.0817048558825114e-06, "loss": 0.0416, "step": 5257 }, { "epoch": 1.1963594994311717, "grad_norm": 1.8201241368258048, "learning_rate": 1.0816438538370262e-06, "loss": 0.03, "step": 5258 }, { "epoch": 1.1965870307167235, "grad_norm": 1.309838446791344, "learning_rate": 1.0815828424585772e-06, "loss": 0.0221, "step": 5259 }, { "epoch": 1.1968145620022752, "grad_norm": 2.2126363422898736, "learning_rate": 1.081521821748411e-06, "loss": 0.01, "step": 5260 }, { "epoch": 1.197042093287827, "grad_norm": 1.5225649498436484, "learning_rate": 1.081460791707775e-06, "loss": 0.0106, "step": 5261 }, { "epoch": 1.197269624573379, "grad_norm": 2.926124794782369, "learning_rate": 1.0813997523379163e-06, "loss": 0.0545, "step": 5262 }, { "epoch": 1.1974971558589307, "grad_norm": 3.121781275137615, "learning_rate": 1.0813387036400825e-06, "loss": 0.039, "step": 5263 }, { "epoch": 1.1977246871444824, "grad_norm": 2.991132032943111, "learning_rate": 1.081277645615521e-06, "loss": 0.0217, "step": 5264 }, { "epoch": 1.1979522184300342, "grad_norm": 0.7510540265715727, "learning_rate": 1.0812165782654806e-06, "loss": 0.0113, "step": 5265 }, { "epoch": 1.198179749715586, "grad_norm": 0.9442454590864651, "learning_rate": 1.0811555015912086e-06, "loss": 0.0128, "step": 5266 }, { "epoch": 1.1984072810011377, "grad_norm": 1.0369867334237874, "learning_rate": 1.0810944155939536e-06, "loss": 0.0289, "step": 5267 }, { "epoch": 1.1986348122866894, "grad_norm": 1.2893237225891867, "learning_rate": 1.081033320274964e-06, "loss": 0.0225, "step": 5268 }, { "epoch": 1.1988623435722412, "grad_norm": 1.4348798625399937, "learning_rate": 1.0809722156354884e-06, "loss": 0.0196, "step": 5269 }, { "epoch": 1.199089874857793, "grad_norm": 0.7783433954066992, "learning_rate": 1.0809111016767762e-06, "loss": 0.0132, "step": 5270 }, { "epoch": 1.1993174061433447, "grad_norm": 2.29125273839501, "learning_rate": 1.0808499784000756e-06, "loss": 0.0155, "step": 5271 }, { "epoch": 1.1995449374288965, "grad_norm": 1.8766245713656982, "learning_rate": 1.0807888458066364e-06, "loss": 0.0405, "step": 5272 }, { "epoch": 1.1997724687144482, "grad_norm": 2.0497369482648997, "learning_rate": 1.0807277038977083e-06, "loss": 0.0483, "step": 5273 }, { "epoch": 1.2, "grad_norm": 1.3799009247077467, "learning_rate": 1.0806665526745403e-06, "loss": 0.024, "step": 5274 }, { "epoch": 1.2002275312855517, "grad_norm": 0.9219092351160844, "learning_rate": 1.0806053921383823e-06, "loss": 0.0206, "step": 5275 }, { "epoch": 1.2004550625711035, "grad_norm": 0.7062087076877579, "learning_rate": 1.0805442222904846e-06, "loss": 0.0197, "step": 5276 }, { "epoch": 1.2006825938566552, "grad_norm": 1.1815501920868743, "learning_rate": 1.0804830431320972e-06, "loss": 0.0311, "step": 5277 }, { "epoch": 1.200910125142207, "grad_norm": 1.4194267958205675, "learning_rate": 1.0804218546644708e-06, "loss": 0.0269, "step": 5278 }, { "epoch": 1.201137656427759, "grad_norm": 1.5374413742413802, "learning_rate": 1.0803606568888557e-06, "loss": 0.0371, "step": 5279 }, { "epoch": 1.2013651877133107, "grad_norm": 1.4162269983170825, "learning_rate": 1.0802994498065027e-06, "loss": 0.0433, "step": 5280 }, { "epoch": 1.2015927189988624, "grad_norm": 1.3982466009914463, "learning_rate": 1.0802382334186627e-06, "loss": 0.02, "step": 5281 }, { "epoch": 1.2018202502844142, "grad_norm": 1.0353374922029346, "learning_rate": 1.080177007726587e-06, "loss": 0.0314, "step": 5282 }, { "epoch": 1.202047781569966, "grad_norm": 1.4144577926145716, "learning_rate": 1.080115772731527e-06, "loss": 0.0237, "step": 5283 }, { "epoch": 1.2022753128555177, "grad_norm": 1.270246359088111, "learning_rate": 1.0800545284347338e-06, "loss": 0.0278, "step": 5284 }, { "epoch": 1.2025028441410694, "grad_norm": 1.7811867988957832, "learning_rate": 1.0799932748374598e-06, "loss": 0.0351, "step": 5285 }, { "epoch": 1.2027303754266212, "grad_norm": 0.811331257305328, "learning_rate": 1.0799320119409562e-06, "loss": 0.0154, "step": 5286 }, { "epoch": 1.202957906712173, "grad_norm": 1.326967549928065, "learning_rate": 1.0798707397464756e-06, "loss": 0.0251, "step": 5287 }, { "epoch": 1.2031854379977247, "grad_norm": 1.2668064811271582, "learning_rate": 1.0798094582552703e-06, "loss": 0.0214, "step": 5288 }, { "epoch": 1.2034129692832765, "grad_norm": 1.7316590095255162, "learning_rate": 1.0797481674685925e-06, "loss": 0.0595, "step": 5289 }, { "epoch": 1.2036405005688282, "grad_norm": 0.8626668121425645, "learning_rate": 1.0796868673876947e-06, "loss": 0.0138, "step": 5290 }, { "epoch": 1.20386803185438, "grad_norm": 1.688909270364021, "learning_rate": 1.0796255580138303e-06, "loss": 0.0376, "step": 5291 }, { "epoch": 1.2040955631399317, "grad_norm": 1.3043440282984107, "learning_rate": 1.0795642393482523e-06, "loss": 0.0511, "step": 5292 }, { "epoch": 1.2043230944254835, "grad_norm": 1.2730397806370202, "learning_rate": 1.0795029113922136e-06, "loss": 0.0232, "step": 5293 }, { "epoch": 1.2045506257110352, "grad_norm": 0.5816881949668542, "learning_rate": 1.0794415741469677e-06, "loss": 0.0086, "step": 5294 }, { "epoch": 1.204778156996587, "grad_norm": 1.0835756813832158, "learning_rate": 1.0793802276137683e-06, "loss": 0.0204, "step": 5295 }, { "epoch": 1.2050056882821387, "grad_norm": 1.8276533196679292, "learning_rate": 1.0793188717938693e-06, "loss": 0.0304, "step": 5296 }, { "epoch": 1.2052332195676905, "grad_norm": 2.167032474707277, "learning_rate": 1.0792575066885245e-06, "loss": 0.0331, "step": 5297 }, { "epoch": 1.2054607508532422, "grad_norm": 0.7493889811458411, "learning_rate": 1.0791961322989882e-06, "loss": 0.0147, "step": 5298 }, { "epoch": 1.205688282138794, "grad_norm": 1.329356172948289, "learning_rate": 1.0791347486265147e-06, "loss": 0.0353, "step": 5299 }, { "epoch": 1.2059158134243457, "grad_norm": 0.8797921570409813, "learning_rate": 1.0790733556723589e-06, "loss": 0.0242, "step": 5300 }, { "epoch": 1.2061433447098977, "grad_norm": 0.8299000303047913, "learning_rate": 1.079011953437775e-06, "loss": 0.0131, "step": 5301 }, { "epoch": 1.2063708759954495, "grad_norm": 0.5695113688949743, "learning_rate": 1.0789505419240185e-06, "loss": 0.0091, "step": 5302 }, { "epoch": 1.2065984072810012, "grad_norm": 1.007416588849143, "learning_rate": 1.0788891211323442e-06, "loss": 0.0211, "step": 5303 }, { "epoch": 1.206825938566553, "grad_norm": 1.0207685534429214, "learning_rate": 1.0788276910640074e-06, "loss": 0.0167, "step": 5304 }, { "epoch": 1.2070534698521047, "grad_norm": 0.9531066995071166, "learning_rate": 1.0787662517202641e-06, "loss": 0.0264, "step": 5305 }, { "epoch": 1.2072810011376565, "grad_norm": 0.8593609085890438, "learning_rate": 1.0787048031023693e-06, "loss": 0.0192, "step": 5306 }, { "epoch": 1.2075085324232082, "grad_norm": 0.7570873924698162, "learning_rate": 1.0786433452115794e-06, "loss": 0.0107, "step": 5307 }, { "epoch": 1.20773606370876, "grad_norm": 0.6192632652453488, "learning_rate": 1.0785818780491502e-06, "loss": 0.01, "step": 5308 }, { "epoch": 1.2079635949943117, "grad_norm": 1.1762597971964037, "learning_rate": 1.0785204016163384e-06, "loss": 0.017, "step": 5309 }, { "epoch": 1.2081911262798635, "grad_norm": 0.8264299611729337, "learning_rate": 1.0784589159143999e-06, "loss": 0.0137, "step": 5310 }, { "epoch": 1.2084186575654152, "grad_norm": 0.5022797081392271, "learning_rate": 1.0783974209445915e-06, "loss": 0.0069, "step": 5311 }, { "epoch": 1.208646188850967, "grad_norm": 0.6561114668173221, "learning_rate": 1.0783359167081705e-06, "loss": 0.0178, "step": 5312 }, { "epoch": 1.2088737201365187, "grad_norm": 1.860198594863872, "learning_rate": 1.0782744032063935e-06, "loss": 0.0395, "step": 5313 }, { "epoch": 1.2091012514220705, "grad_norm": 1.2557407515136827, "learning_rate": 1.078212880440518e-06, "loss": 0.0198, "step": 5314 }, { "epoch": 1.2093287827076222, "grad_norm": 0.9774237973356178, "learning_rate": 1.0781513484118008e-06, "loss": 0.033, "step": 5315 }, { "epoch": 1.209556313993174, "grad_norm": 1.1404558362365051, "learning_rate": 1.0780898071215004e-06, "loss": 0.0153, "step": 5316 }, { "epoch": 1.2097838452787257, "grad_norm": 1.2711927135952792, "learning_rate": 1.0780282565708738e-06, "loss": 0.0452, "step": 5317 }, { "epoch": 1.2100113765642777, "grad_norm": 1.572981203889357, "learning_rate": 1.0779666967611796e-06, "loss": 0.0366, "step": 5318 }, { "epoch": 1.2102389078498295, "grad_norm": 1.0432053268711592, "learning_rate": 1.0779051276936755e-06, "loss": 0.0193, "step": 5319 }, { "epoch": 1.2104664391353812, "grad_norm": 1.108281271730141, "learning_rate": 1.0778435493696202e-06, "loss": 0.0514, "step": 5320 }, { "epoch": 1.210693970420933, "grad_norm": 1.0307341066968732, "learning_rate": 1.0777819617902718e-06, "loss": 0.0232, "step": 5321 }, { "epoch": 1.2109215017064847, "grad_norm": 1.1269375808316393, "learning_rate": 1.0777203649568896e-06, "loss": 0.0111, "step": 5322 }, { "epoch": 1.2111490329920365, "grad_norm": 0.8681106412081948, "learning_rate": 1.077658758870732e-06, "loss": 0.0134, "step": 5323 }, { "epoch": 1.2113765642775882, "grad_norm": 1.6310011066128904, "learning_rate": 1.0775971435330588e-06, "loss": 0.0443, "step": 5324 }, { "epoch": 1.21160409556314, "grad_norm": 1.9800926111893868, "learning_rate": 1.0775355189451286e-06, "loss": 0.0491, "step": 5325 }, { "epoch": 1.2118316268486917, "grad_norm": 1.0735676656079027, "learning_rate": 1.0774738851082011e-06, "loss": 0.0137, "step": 5326 }, { "epoch": 1.2120591581342435, "grad_norm": 1.4112445459881595, "learning_rate": 1.0774122420235363e-06, "loss": 0.0267, "step": 5327 }, { "epoch": 1.2122866894197952, "grad_norm": 1.6264263710804632, "learning_rate": 1.0773505896923936e-06, "loss": 0.0601, "step": 5328 }, { "epoch": 1.212514220705347, "grad_norm": 1.4435922348969275, "learning_rate": 1.0772889281160335e-06, "loss": 0.0487, "step": 5329 }, { "epoch": 1.2127417519908987, "grad_norm": 1.6845252736050549, "learning_rate": 1.0772272572957158e-06, "loss": 0.0384, "step": 5330 }, { "epoch": 1.2129692832764505, "grad_norm": 0.620039095627728, "learning_rate": 1.0771655772327013e-06, "loss": 0.0087, "step": 5331 }, { "epoch": 1.2131968145620022, "grad_norm": 1.0155486269794136, "learning_rate": 1.0771038879282505e-06, "loss": 0.0316, "step": 5332 }, { "epoch": 1.213424345847554, "grad_norm": 1.0720369582435914, "learning_rate": 1.0770421893836243e-06, "loss": 0.0297, "step": 5333 }, { "epoch": 1.2136518771331057, "grad_norm": 1.0641000248930415, "learning_rate": 1.0769804816000835e-06, "loss": 0.0228, "step": 5334 }, { "epoch": 1.2138794084186575, "grad_norm": 1.3185310795364764, "learning_rate": 1.0769187645788895e-06, "loss": 0.0325, "step": 5335 }, { "epoch": 1.2141069397042092, "grad_norm": 0.8019919288175986, "learning_rate": 1.0768570383213035e-06, "loss": 0.0108, "step": 5336 }, { "epoch": 1.214334470989761, "grad_norm": 1.0171180200964656, "learning_rate": 1.0767953028285872e-06, "loss": 0.0162, "step": 5337 }, { "epoch": 1.2145620022753127, "grad_norm": 0.6898326225080136, "learning_rate": 1.0767335581020024e-06, "loss": 0.0105, "step": 5338 }, { "epoch": 1.2147895335608645, "grad_norm": 0.5713039580718985, "learning_rate": 1.076671804142811e-06, "loss": 0.0077, "step": 5339 }, { "epoch": 1.2150170648464165, "grad_norm": 1.2459220754175229, "learning_rate": 1.076610040952275e-06, "loss": 0.0332, "step": 5340 }, { "epoch": 1.2152445961319682, "grad_norm": 1.1187776697921044, "learning_rate": 1.076548268531657e-06, "loss": 0.0191, "step": 5341 }, { "epoch": 1.21547212741752, "grad_norm": 2.0317760107761704, "learning_rate": 1.0764864868822194e-06, "loss": 0.0516, "step": 5342 }, { "epoch": 1.2156996587030717, "grad_norm": 1.9233690667339316, "learning_rate": 1.0764246960052247e-06, "loss": 0.0134, "step": 5343 }, { "epoch": 1.2159271899886235, "grad_norm": 1.6064839097939867, "learning_rate": 1.0763628959019359e-06, "loss": 0.0209, "step": 5344 }, { "epoch": 1.2161547212741752, "grad_norm": 1.2879343782626302, "learning_rate": 1.076301086573616e-06, "loss": 0.016, "step": 5345 }, { "epoch": 1.216382252559727, "grad_norm": 1.1521103656454588, "learning_rate": 1.076239268021529e-06, "loss": 0.0294, "step": 5346 }, { "epoch": 1.2166097838452787, "grad_norm": 0.7645894367138184, "learning_rate": 1.0761774402469375e-06, "loss": 0.0146, "step": 5347 }, { "epoch": 1.2168373151308305, "grad_norm": 1.0900116364078585, "learning_rate": 1.0761156032511052e-06, "loss": 0.0148, "step": 5348 }, { "epoch": 1.2170648464163822, "grad_norm": 1.0426468341641788, "learning_rate": 1.0760537570352963e-06, "loss": 0.0205, "step": 5349 }, { "epoch": 1.217292377701934, "grad_norm": 0.968560865785141, "learning_rate": 1.0759919016007747e-06, "loss": 0.028, "step": 5350 }, { "epoch": 1.2175199089874857, "grad_norm": 1.3908329783187319, "learning_rate": 1.0759300369488046e-06, "loss": 0.0311, "step": 5351 }, { "epoch": 1.2177474402730375, "grad_norm": 0.9336674383856225, "learning_rate": 1.0758681630806502e-06, "loss": 0.0126, "step": 5352 }, { "epoch": 1.2179749715585892, "grad_norm": 0.9822161525178283, "learning_rate": 1.0758062799975765e-06, "loss": 0.0328, "step": 5353 }, { "epoch": 1.218202502844141, "grad_norm": 1.56822129701628, "learning_rate": 1.075744387700848e-06, "loss": 0.0543, "step": 5354 }, { "epoch": 1.2184300341296928, "grad_norm": 0.8508512043785544, "learning_rate": 1.0756824861917294e-06, "loss": 0.0186, "step": 5355 }, { "epoch": 1.2186575654152445, "grad_norm": 1.6471064921915508, "learning_rate": 1.0756205754714866e-06, "loss": 0.0419, "step": 5356 }, { "epoch": 1.2188850967007965, "grad_norm": 0.9710323188808565, "learning_rate": 1.0755586555413845e-06, "loss": 0.0165, "step": 5357 }, { "epoch": 1.2191126279863482, "grad_norm": 1.1740838398504168, "learning_rate": 1.0754967264026882e-06, "loss": 0.0171, "step": 5358 }, { "epoch": 1.2193401592719, "grad_norm": 0.821398345986984, "learning_rate": 1.0754347880566643e-06, "loss": 0.0144, "step": 5359 }, { "epoch": 1.2195676905574517, "grad_norm": 0.737155613985261, "learning_rate": 1.075372840504578e-06, "loss": 0.0215, "step": 5360 }, { "epoch": 1.2197952218430035, "grad_norm": 1.2248403884170755, "learning_rate": 1.0753108837476958e-06, "loss": 0.023, "step": 5361 }, { "epoch": 1.2200227531285552, "grad_norm": 1.5213992441300637, "learning_rate": 1.0752489177872839e-06, "loss": 0.0211, "step": 5362 }, { "epoch": 1.220250284414107, "grad_norm": 0.9276211966706847, "learning_rate": 1.0751869426246086e-06, "loss": 0.0248, "step": 5363 }, { "epoch": 1.2204778156996587, "grad_norm": 0.8339386449394393, "learning_rate": 1.0751249582609368e-06, "loss": 0.0121, "step": 5364 }, { "epoch": 1.2207053469852105, "grad_norm": 0.814490781430545, "learning_rate": 1.075062964697535e-06, "loss": 0.0169, "step": 5365 }, { "epoch": 1.2209328782707622, "grad_norm": 1.368386097389221, "learning_rate": 1.0750009619356706e-06, "loss": 0.0406, "step": 5366 }, { "epoch": 1.221160409556314, "grad_norm": 1.0996742222363038, "learning_rate": 1.0749389499766106e-06, "loss": 0.0259, "step": 5367 }, { "epoch": 1.2213879408418657, "grad_norm": 1.468334437179015, "learning_rate": 1.0748769288216226e-06, "loss": 0.0272, "step": 5368 }, { "epoch": 1.2216154721274175, "grad_norm": 1.3908865445482075, "learning_rate": 1.074814898471974e-06, "loss": 0.0461, "step": 5369 }, { "epoch": 1.2218430034129693, "grad_norm": 1.3737903263713396, "learning_rate": 1.0747528589289327e-06, "loss": 0.0109, "step": 5370 }, { "epoch": 1.222070534698521, "grad_norm": 0.9334616229837778, "learning_rate": 1.0746908101937666e-06, "loss": 0.0225, "step": 5371 }, { "epoch": 1.2222980659840728, "grad_norm": 3.4821828449919656, "learning_rate": 1.0746287522677439e-06, "loss": 0.1442, "step": 5372 }, { "epoch": 1.2225255972696245, "grad_norm": 1.1612571872192434, "learning_rate": 1.074566685152133e-06, "loss": 0.024, "step": 5373 }, { "epoch": 1.2227531285551763, "grad_norm": 0.6082887587191537, "learning_rate": 1.0745046088482025e-06, "loss": 0.0091, "step": 5374 }, { "epoch": 1.222980659840728, "grad_norm": 0.8947279579511637, "learning_rate": 1.074442523357221e-06, "loss": 0.018, "step": 5375 }, { "epoch": 1.2232081911262798, "grad_norm": 0.8578464895598875, "learning_rate": 1.0743804286804573e-06, "loss": 0.0083, "step": 5376 }, { "epoch": 1.2234357224118315, "grad_norm": 0.9832599316516126, "learning_rate": 1.0743183248191806e-06, "loss": 0.0112, "step": 5377 }, { "epoch": 1.2236632536973833, "grad_norm": 0.9942384197604868, "learning_rate": 1.0742562117746604e-06, "loss": 0.0266, "step": 5378 }, { "epoch": 1.2238907849829352, "grad_norm": 0.907525459976151, "learning_rate": 1.074194089548166e-06, "loss": 0.0182, "step": 5379 }, { "epoch": 1.224118316268487, "grad_norm": 0.6044100806377944, "learning_rate": 1.0741319581409667e-06, "loss": 0.0132, "step": 5380 }, { "epoch": 1.2243458475540387, "grad_norm": 0.7851876560740085, "learning_rate": 1.0740698175543332e-06, "loss": 0.008, "step": 5381 }, { "epoch": 1.2245733788395905, "grad_norm": 1.1184528122514146, "learning_rate": 1.0740076677895348e-06, "loss": 0.0295, "step": 5382 }, { "epoch": 1.2248009101251423, "grad_norm": 1.0110121442428808, "learning_rate": 1.0739455088478422e-06, "loss": 0.0246, "step": 5383 }, { "epoch": 1.225028441410694, "grad_norm": 1.3430003631618936, "learning_rate": 1.0738833407305254e-06, "loss": 0.0311, "step": 5384 }, { "epoch": 1.2252559726962458, "grad_norm": 0.714109028797083, "learning_rate": 1.0738211634388554e-06, "loss": 0.0137, "step": 5385 }, { "epoch": 1.2254835039817975, "grad_norm": 1.7977112803389215, "learning_rate": 1.0737589769741025e-06, "loss": 0.0311, "step": 5386 }, { "epoch": 1.2257110352673493, "grad_norm": 1.595010039888779, "learning_rate": 1.0736967813375382e-06, "loss": 0.0264, "step": 5387 }, { "epoch": 1.225938566552901, "grad_norm": 0.9599396412918755, "learning_rate": 1.0736345765304335e-06, "loss": 0.0183, "step": 5388 }, { "epoch": 1.2261660978384528, "grad_norm": 1.4002183993615684, "learning_rate": 1.0735723625540596e-06, "loss": 0.0584, "step": 5389 }, { "epoch": 1.2263936291240045, "grad_norm": 0.5289624966838866, "learning_rate": 1.073510139409688e-06, "loss": 0.0101, "step": 5390 }, { "epoch": 1.2266211604095563, "grad_norm": 1.057508406840753, "learning_rate": 1.0734479070985908e-06, "loss": 0.0142, "step": 5391 }, { "epoch": 1.226848691695108, "grad_norm": 0.7921993020441022, "learning_rate": 1.0733856656220396e-06, "loss": 0.0111, "step": 5392 }, { "epoch": 1.2270762229806598, "grad_norm": 0.7881195760466057, "learning_rate": 1.0733234149813065e-06, "loss": 0.0251, "step": 5393 }, { "epoch": 1.2273037542662115, "grad_norm": 0.7174133775125424, "learning_rate": 1.0732611551776639e-06, "loss": 0.0123, "step": 5394 }, { "epoch": 1.2275312855517633, "grad_norm": 1.2047358857096602, "learning_rate": 1.0731988862123841e-06, "loss": 0.012, "step": 5395 }, { "epoch": 1.2277588168373152, "grad_norm": 0.8067610993565308, "learning_rate": 1.07313660808674e-06, "loss": 0.0093, "step": 5396 }, { "epoch": 1.227986348122867, "grad_norm": 0.9414182769464757, "learning_rate": 1.0730743208020044e-06, "loss": 0.0138, "step": 5397 }, { "epoch": 1.2282138794084188, "grad_norm": 1.0848537565553817, "learning_rate": 1.0730120243594504e-06, "loss": 0.0235, "step": 5398 }, { "epoch": 1.2284414106939705, "grad_norm": 1.0189138352280012, "learning_rate": 1.0729497187603508e-06, "loss": 0.0264, "step": 5399 }, { "epoch": 1.2286689419795223, "grad_norm": 1.0924034497075088, "learning_rate": 1.0728874040059798e-06, "loss": 0.0115, "step": 5400 }, { "epoch": 1.228896473265074, "grad_norm": 0.6433664647544001, "learning_rate": 1.07282508009761e-06, "loss": 0.01, "step": 5401 }, { "epoch": 1.2291240045506258, "grad_norm": 0.7843372574416333, "learning_rate": 1.072762747036516e-06, "loss": 0.0215, "step": 5402 }, { "epoch": 1.2293515358361775, "grad_norm": 1.0890903190836254, "learning_rate": 1.0727004048239715e-06, "loss": 0.0272, "step": 5403 }, { "epoch": 1.2295790671217293, "grad_norm": 0.8996000171907133, "learning_rate": 1.0726380534612507e-06, "loss": 0.0171, "step": 5404 }, { "epoch": 1.229806598407281, "grad_norm": 0.8250699810825667, "learning_rate": 1.0725756929496277e-06, "loss": 0.007, "step": 5405 }, { "epoch": 1.2300341296928328, "grad_norm": 1.0264353557572934, "learning_rate": 1.0725133232903773e-06, "loss": 0.0345, "step": 5406 }, { "epoch": 1.2302616609783845, "grad_norm": 1.137616173296977, "learning_rate": 1.0724509444847741e-06, "loss": 0.036, "step": 5407 }, { "epoch": 1.2304891922639363, "grad_norm": 1.3664447826228183, "learning_rate": 1.0723885565340933e-06, "loss": 0.0285, "step": 5408 }, { "epoch": 1.230716723549488, "grad_norm": 1.1602041449374105, "learning_rate": 1.0723261594396095e-06, "loss": 0.0332, "step": 5409 }, { "epoch": 1.2309442548350398, "grad_norm": 1.090010681276093, "learning_rate": 1.0722637532025984e-06, "loss": 0.0218, "step": 5410 }, { "epoch": 1.2311717861205915, "grad_norm": 1.4413107732179307, "learning_rate": 1.0722013378243354e-06, "loss": 0.0321, "step": 5411 }, { "epoch": 1.2313993174061433, "grad_norm": 1.8979249234317928, "learning_rate": 1.0721389133060958e-06, "loss": 0.0266, "step": 5412 }, { "epoch": 1.231626848691695, "grad_norm": 1.7124518471345664, "learning_rate": 1.0720764796491559e-06, "loss": 0.0281, "step": 5413 }, { "epoch": 1.2318543799772468, "grad_norm": 1.466076682039334, "learning_rate": 1.0720140368547915e-06, "loss": 0.0445, "step": 5414 }, { "epoch": 1.2320819112627985, "grad_norm": 1.307580416194323, "learning_rate": 1.0719515849242787e-06, "loss": 0.0233, "step": 5415 }, { "epoch": 1.2323094425483503, "grad_norm": 0.8318083081384708, "learning_rate": 1.0718891238588943e-06, "loss": 0.022, "step": 5416 }, { "epoch": 1.232536973833902, "grad_norm": 1.3996049895553466, "learning_rate": 1.0718266536599145e-06, "loss": 0.046, "step": 5417 }, { "epoch": 1.232764505119454, "grad_norm": 1.4470303584385065, "learning_rate": 1.0717641743286163e-06, "loss": 0.0315, "step": 5418 }, { "epoch": 1.2329920364050058, "grad_norm": 1.251825573353438, "learning_rate": 1.0717016858662766e-06, "loss": 0.0187, "step": 5419 }, { "epoch": 1.2332195676905575, "grad_norm": 1.2025353214638472, "learning_rate": 1.0716391882741722e-06, "loss": 0.0512, "step": 5420 }, { "epoch": 1.2334470989761093, "grad_norm": 1.1125647041962026, "learning_rate": 1.0715766815535812e-06, "loss": 0.0191, "step": 5421 }, { "epoch": 1.233674630261661, "grad_norm": 1.0140086609338004, "learning_rate": 1.0715141657057805e-06, "loss": 0.0286, "step": 5422 }, { "epoch": 1.2339021615472128, "grad_norm": 1.6413125694512114, "learning_rate": 1.0714516407320482e-06, "loss": 0.0277, "step": 5423 }, { "epoch": 1.2341296928327645, "grad_norm": 1.113912252435204, "learning_rate": 1.0713891066336619e-06, "loss": 0.0459, "step": 5424 }, { "epoch": 1.2343572241183163, "grad_norm": 1.4211151899069139, "learning_rate": 1.0713265634118998e-06, "loss": 0.03, "step": 5425 }, { "epoch": 1.234584755403868, "grad_norm": 0.5439788203180792, "learning_rate": 1.0712640110680398e-06, "loss": 0.0051, "step": 5426 }, { "epoch": 1.2348122866894198, "grad_norm": 1.1066572512164194, "learning_rate": 1.071201449603361e-06, "loss": 0.031, "step": 5427 }, { "epoch": 1.2350398179749715, "grad_norm": 1.0734688953953, "learning_rate": 1.0711388790191418e-06, "loss": 0.0159, "step": 5428 }, { "epoch": 1.2352673492605233, "grad_norm": 0.9674963008176792, "learning_rate": 1.071076299316661e-06, "loss": 0.0292, "step": 5429 }, { "epoch": 1.235494880546075, "grad_norm": 0.8648315211359087, "learning_rate": 1.0710137104971973e-06, "loss": 0.015, "step": 5430 }, { "epoch": 1.2357224118316268, "grad_norm": 1.0177696413333797, "learning_rate": 1.0709511125620306e-06, "loss": 0.0187, "step": 5431 }, { "epoch": 1.2359499431171785, "grad_norm": 1.080479875370983, "learning_rate": 1.0708885055124396e-06, "loss": 0.0202, "step": 5432 }, { "epoch": 1.2361774744027303, "grad_norm": 1.2213741714117063, "learning_rate": 1.0708258893497043e-06, "loss": 0.0347, "step": 5433 }, { "epoch": 1.236405005688282, "grad_norm": 0.6102850699242401, "learning_rate": 1.0707632640751042e-06, "loss": 0.011, "step": 5434 }, { "epoch": 1.236632536973834, "grad_norm": 0.7162928033199788, "learning_rate": 1.0707006296899194e-06, "loss": 0.0104, "step": 5435 }, { "epoch": 1.2368600682593858, "grad_norm": 1.6643057799440983, "learning_rate": 1.0706379861954299e-06, "loss": 0.0474, "step": 5436 }, { "epoch": 1.2370875995449375, "grad_norm": 1.0636970768259353, "learning_rate": 1.0705753335929162e-06, "loss": 0.0441, "step": 5437 }, { "epoch": 1.2373151308304893, "grad_norm": 0.5899981023336623, "learning_rate": 1.070512671883659e-06, "loss": 0.0081, "step": 5438 }, { "epoch": 1.237542662116041, "grad_norm": 1.1358807681666672, "learning_rate": 1.0704500010689383e-06, "loss": 0.0231, "step": 5439 }, { "epoch": 1.2377701934015928, "grad_norm": 1.483924786030886, "learning_rate": 1.0703873211500356e-06, "loss": 0.0434, "step": 5440 }, { "epoch": 1.2379977246871445, "grad_norm": 0.5564175099477741, "learning_rate": 1.0703246321282316e-06, "loss": 0.0111, "step": 5441 }, { "epoch": 1.2382252559726963, "grad_norm": 0.6919558166199928, "learning_rate": 1.0702619340048077e-06, "loss": 0.0107, "step": 5442 }, { "epoch": 1.238452787258248, "grad_norm": 0.916701734430617, "learning_rate": 1.0701992267810454e-06, "loss": 0.0095, "step": 5443 }, { "epoch": 1.2386803185437998, "grad_norm": 0.6806190525241188, "learning_rate": 1.0701365104582262e-06, "loss": 0.0166, "step": 5444 }, { "epoch": 1.2389078498293515, "grad_norm": 0.9812678457663916, "learning_rate": 1.070073785037632e-06, "loss": 0.0179, "step": 5445 }, { "epoch": 1.2391353811149033, "grad_norm": 1.2184326624890787, "learning_rate": 1.0700110505205447e-06, "loss": 0.0285, "step": 5446 }, { "epoch": 1.239362912400455, "grad_norm": 0.9122394182634046, "learning_rate": 1.0699483069082468e-06, "loss": 0.0206, "step": 5447 }, { "epoch": 1.2395904436860068, "grad_norm": 1.776304008174226, "learning_rate": 1.0698855542020201e-06, "loss": 0.0406, "step": 5448 }, { "epoch": 1.2398179749715585, "grad_norm": 2.144224863401225, "learning_rate": 1.0698227924031474e-06, "loss": 0.0269, "step": 5449 }, { "epoch": 1.2400455062571103, "grad_norm": 0.8197781138940033, "learning_rate": 1.0697600215129113e-06, "loss": 0.0132, "step": 5450 }, { "epoch": 1.240273037542662, "grad_norm": 1.112356089568793, "learning_rate": 1.0696972415325954e-06, "loss": 0.0188, "step": 5451 }, { "epoch": 1.2405005688282138, "grad_norm": 1.5519612957572413, "learning_rate": 1.069634452463482e-06, "loss": 0.0649, "step": 5452 }, { "epoch": 1.2407281001137656, "grad_norm": 1.7090640826753272, "learning_rate": 1.0695716543068548e-06, "loss": 0.021, "step": 5453 }, { "epoch": 1.2409556313993173, "grad_norm": 0.9894882585365135, "learning_rate": 1.0695088470639973e-06, "loss": 0.031, "step": 5454 }, { "epoch": 1.241183162684869, "grad_norm": 1.2111142205989058, "learning_rate": 1.069446030736193e-06, "loss": 0.0328, "step": 5455 }, { "epoch": 1.2414106939704208, "grad_norm": 0.8409351330327227, "learning_rate": 1.0693832053247256e-06, "loss": 0.0147, "step": 5456 }, { "epoch": 1.2416382252559728, "grad_norm": 1.2297382097518612, "learning_rate": 1.0693203708308792e-06, "loss": 0.018, "step": 5457 }, { "epoch": 1.2418657565415245, "grad_norm": 0.7856520845586189, "learning_rate": 1.0692575272559385e-06, "loss": 0.0132, "step": 5458 }, { "epoch": 1.2420932878270763, "grad_norm": 2.9495226963416092, "learning_rate": 1.0691946746011874e-06, "loss": 0.0753, "step": 5459 }, { "epoch": 1.242320819112628, "grad_norm": 1.1311124384842985, "learning_rate": 1.0691318128679107e-06, "loss": 0.0282, "step": 5460 }, { "epoch": 1.2425483503981798, "grad_norm": 1.2641663291780139, "learning_rate": 1.0690689420573933e-06, "loss": 0.0359, "step": 5461 }, { "epoch": 1.2427758816837315, "grad_norm": 1.5841963404042003, "learning_rate": 1.0690060621709198e-06, "loss": 0.0334, "step": 5462 }, { "epoch": 1.2430034129692833, "grad_norm": 0.7353352974419007, "learning_rate": 1.0689431732097754e-06, "loss": 0.0272, "step": 5463 }, { "epoch": 1.243230944254835, "grad_norm": 1.0500642122401156, "learning_rate": 1.0688802751752458e-06, "loss": 0.0275, "step": 5464 }, { "epoch": 1.2434584755403868, "grad_norm": 0.7175451483028665, "learning_rate": 1.0688173680686164e-06, "loss": 0.0114, "step": 5465 }, { "epoch": 1.2436860068259386, "grad_norm": 0.5867289910981424, "learning_rate": 1.0687544518911726e-06, "loss": 0.0111, "step": 5466 }, { "epoch": 1.2439135381114903, "grad_norm": 0.6712196944383615, "learning_rate": 1.0686915266442005e-06, "loss": 0.0162, "step": 5467 }, { "epoch": 1.244141069397042, "grad_norm": 1.2463644416881146, "learning_rate": 1.0686285923289863e-06, "loss": 0.0442, "step": 5468 }, { "epoch": 1.2443686006825938, "grad_norm": 0.8847672705631197, "learning_rate": 1.0685656489468161e-06, "loss": 0.0137, "step": 5469 }, { "epoch": 1.2445961319681456, "grad_norm": 0.8604702504051126, "learning_rate": 1.0685026964989764e-06, "loss": 0.0168, "step": 5470 }, { "epoch": 1.2448236632536973, "grad_norm": 1.418689898486085, "learning_rate": 1.0684397349867537e-06, "loss": 0.0292, "step": 5471 }, { "epoch": 1.245051194539249, "grad_norm": 1.846189023146859, "learning_rate": 1.068376764411435e-06, "loss": 0.0448, "step": 5472 }, { "epoch": 1.2452787258248008, "grad_norm": 1.252366571384877, "learning_rate": 1.0683137847743076e-06, "loss": 0.0301, "step": 5473 }, { "epoch": 1.2455062571103528, "grad_norm": 1.2815205403700491, "learning_rate": 1.0682507960766578e-06, "loss": 0.0317, "step": 5474 }, { "epoch": 1.2457337883959045, "grad_norm": 1.5712499828336253, "learning_rate": 1.0681877983197738e-06, "loss": 0.0511, "step": 5475 }, { "epoch": 1.2459613196814563, "grad_norm": 1.3120502665157674, "learning_rate": 1.0681247915049428e-06, "loss": 0.0239, "step": 5476 }, { "epoch": 1.246188850967008, "grad_norm": 1.2372734878283551, "learning_rate": 1.0680617756334527e-06, "loss": 0.025, "step": 5477 }, { "epoch": 1.2464163822525598, "grad_norm": 1.1291827306006323, "learning_rate": 1.0679987507065912e-06, "loss": 0.0155, "step": 5478 }, { "epoch": 1.2466439135381115, "grad_norm": 1.1166618335200997, "learning_rate": 1.0679357167256465e-06, "loss": 0.015, "step": 5479 }, { "epoch": 1.2468714448236633, "grad_norm": 0.9525747638788947, "learning_rate": 1.067872673691907e-06, "loss": 0.0379, "step": 5480 }, { "epoch": 1.247098976109215, "grad_norm": 0.9015026474953439, "learning_rate": 1.0678096216066611e-06, "loss": 0.0171, "step": 5481 }, { "epoch": 1.2473265073947668, "grad_norm": 0.7251387961734896, "learning_rate": 1.0677465604711975e-06, "loss": 0.0183, "step": 5482 }, { "epoch": 1.2475540386803186, "grad_norm": 1.1041778984913362, "learning_rate": 1.0676834902868051e-06, "loss": 0.0449, "step": 5483 }, { "epoch": 1.2477815699658703, "grad_norm": 1.5486690407695638, "learning_rate": 1.067620411054773e-06, "loss": 0.0559, "step": 5484 }, { "epoch": 1.248009101251422, "grad_norm": 1.0936433230281484, "learning_rate": 1.0675573227763903e-06, "loss": 0.041, "step": 5485 }, { "epoch": 1.2482366325369738, "grad_norm": 0.7723592633898496, "learning_rate": 1.0674942254529463e-06, "loss": 0.0196, "step": 5486 }, { "epoch": 1.2484641638225256, "grad_norm": 1.3114855555869747, "learning_rate": 1.0674311190857308e-06, "loss": 0.0231, "step": 5487 }, { "epoch": 1.2486916951080773, "grad_norm": 1.781546684980227, "learning_rate": 1.0673680036760333e-06, "loss": 0.0336, "step": 5488 }, { "epoch": 1.248919226393629, "grad_norm": 1.4863914973995318, "learning_rate": 1.0673048792251443e-06, "loss": 0.0284, "step": 5489 }, { "epoch": 1.2491467576791808, "grad_norm": 0.7255040029884252, "learning_rate": 1.0672417457343535e-06, "loss": 0.0119, "step": 5490 }, { "epoch": 1.2493742889647326, "grad_norm": 1.1732515422757923, "learning_rate": 1.0671786032049512e-06, "loss": 0.0247, "step": 5491 }, { "epoch": 1.2496018202502843, "grad_norm": 0.8454940508394108, "learning_rate": 1.0671154516382283e-06, "loss": 0.0126, "step": 5492 }, { "epoch": 1.249829351535836, "grad_norm": 1.7717426170006738, "learning_rate": 1.067052291035475e-06, "loss": 0.0288, "step": 5493 }, { "epoch": 1.2500568828213878, "grad_norm": 0.9733708679345942, "learning_rate": 1.0669891213979826e-06, "loss": 0.0424, "step": 5494 }, { "epoch": 1.2502844141069396, "grad_norm": 0.8099462019509774, "learning_rate": 1.066925942727042e-06, "loss": 0.0135, "step": 5495 }, { "epoch": 1.2505119453924913, "grad_norm": 1.0927120951454266, "learning_rate": 1.0668627550239444e-06, "loss": 0.02, "step": 5496 }, { "epoch": 1.2507394766780433, "grad_norm": 0.9823491330955677, "learning_rate": 1.0667995582899815e-06, "loss": 0.0117, "step": 5497 }, { "epoch": 1.250967007963595, "grad_norm": 1.3444249454765562, "learning_rate": 1.0667363525264446e-06, "loss": 0.0272, "step": 5498 }, { "epoch": 1.2511945392491468, "grad_norm": 1.5794185821366846, "learning_rate": 1.0666731377346257e-06, "loss": 0.0429, "step": 5499 }, { "epoch": 1.2514220705346986, "grad_norm": 0.827330081267108, "learning_rate": 1.0666099139158168e-06, "loss": 0.0133, "step": 5500 }, { "epoch": 1.2516496018202503, "grad_norm": 1.5562286915608332, "learning_rate": 1.06654668107131e-06, "loss": 0.037, "step": 5501 }, { "epoch": 1.251877133105802, "grad_norm": 1.8342841703579758, "learning_rate": 1.0664834392023975e-06, "loss": 0.0369, "step": 5502 }, { "epoch": 1.2521046643913538, "grad_norm": 1.9754022342502022, "learning_rate": 1.0664201883103722e-06, "loss": 0.0576, "step": 5503 }, { "epoch": 1.2523321956769056, "grad_norm": 1.704003080347443, "learning_rate": 1.066356928396527e-06, "loss": 0.041, "step": 5504 }, { "epoch": 1.2525597269624573, "grad_norm": 0.8152446360977106, "learning_rate": 1.066293659462154e-06, "loss": 0.0092, "step": 5505 }, { "epoch": 1.252787258248009, "grad_norm": 1.187665186303167, "learning_rate": 1.066230381508547e-06, "loss": 0.03, "step": 5506 }, { "epoch": 1.2530147895335608, "grad_norm": 1.515571401940605, "learning_rate": 1.0661670945369991e-06, "loss": 0.0658, "step": 5507 }, { "epoch": 1.2532423208191126, "grad_norm": 0.7829970004154015, "learning_rate": 1.0661037985488037e-06, "loss": 0.0221, "step": 5508 }, { "epoch": 1.2534698521046643, "grad_norm": 0.9344601178136276, "learning_rate": 1.0660404935452545e-06, "loss": 0.0176, "step": 5509 }, { "epoch": 1.253697383390216, "grad_norm": 1.1741631037543796, "learning_rate": 1.0659771795276451e-06, "loss": 0.0383, "step": 5510 }, { "epoch": 1.253924914675768, "grad_norm": 0.6952087144812199, "learning_rate": 1.06591385649727e-06, "loss": 0.0166, "step": 5511 }, { "epoch": 1.2541524459613198, "grad_norm": 1.148195580887031, "learning_rate": 1.0658505244554233e-06, "loss": 0.0336, "step": 5512 }, { "epoch": 1.2543799772468716, "grad_norm": 0.9686864698151374, "learning_rate": 1.0657871834033992e-06, "loss": 0.017, "step": 5513 }, { "epoch": 1.2546075085324233, "grad_norm": 0.6541574267823076, "learning_rate": 1.0657238333424922e-06, "loss": 0.0149, "step": 5514 }, { "epoch": 1.254835039817975, "grad_norm": 0.682956550720555, "learning_rate": 1.0656604742739974e-06, "loss": 0.0106, "step": 5515 }, { "epoch": 1.2550625711035268, "grad_norm": 1.6100348965042812, "learning_rate": 1.0655971061992093e-06, "loss": 0.0445, "step": 5516 }, { "epoch": 1.2552901023890786, "grad_norm": 1.1979735909844647, "learning_rate": 1.0655337291194235e-06, "loss": 0.0369, "step": 5517 }, { "epoch": 1.2555176336746303, "grad_norm": 0.7815830769361026, "learning_rate": 1.0654703430359348e-06, "loss": 0.0231, "step": 5518 }, { "epoch": 1.255745164960182, "grad_norm": 0.6929286637595476, "learning_rate": 1.065406947950039e-06, "loss": 0.0106, "step": 5519 }, { "epoch": 1.2559726962457338, "grad_norm": 1.1236746085503946, "learning_rate": 1.065343543863032e-06, "loss": 0.0344, "step": 5520 }, { "epoch": 1.2562002275312856, "grad_norm": 0.570375538462011, "learning_rate": 1.0652801307762093e-06, "loss": 0.0154, "step": 5521 }, { "epoch": 1.2564277588168373, "grad_norm": 1.182265579469843, "learning_rate": 1.065216708690867e-06, "loss": 0.0251, "step": 5522 }, { "epoch": 1.256655290102389, "grad_norm": 1.0748877350333434, "learning_rate": 1.0651532776083014e-06, "loss": 0.0252, "step": 5523 }, { "epoch": 1.2568828213879408, "grad_norm": 0.6801381732217795, "learning_rate": 1.0650898375298088e-06, "loss": 0.0175, "step": 5524 }, { "epoch": 1.2571103526734926, "grad_norm": 0.8236115723758932, "learning_rate": 1.0650263884566863e-06, "loss": 0.0122, "step": 5525 }, { "epoch": 1.2573378839590443, "grad_norm": 1.2072865539422744, "learning_rate": 1.06496293039023e-06, "loss": 0.0307, "step": 5526 }, { "epoch": 1.257565415244596, "grad_norm": 0.8638078889384445, "learning_rate": 1.0648994633317373e-06, "loss": 0.0338, "step": 5527 }, { "epoch": 1.2577929465301478, "grad_norm": 1.3903600138449066, "learning_rate": 1.064835987282505e-06, "loss": 0.0427, "step": 5528 }, { "epoch": 1.2580204778156996, "grad_norm": 1.2546291636301692, "learning_rate": 1.0647725022438307e-06, "loss": 0.0159, "step": 5529 }, { "epoch": 1.2582480091012513, "grad_norm": 0.756663737422429, "learning_rate": 1.0647090082170118e-06, "loss": 0.013, "step": 5530 }, { "epoch": 1.258475540386803, "grad_norm": 0.8566876022289612, "learning_rate": 1.0646455052033463e-06, "loss": 0.0201, "step": 5531 }, { "epoch": 1.2587030716723548, "grad_norm": 1.1638253388866646, "learning_rate": 1.0645819932041317e-06, "loss": 0.0166, "step": 5532 }, { "epoch": 1.2589306029579066, "grad_norm": 1.1215067967587442, "learning_rate": 1.064518472220666e-06, "loss": 0.0138, "step": 5533 }, { "epoch": 1.2591581342434583, "grad_norm": 1.2121221098665633, "learning_rate": 1.064454942254248e-06, "loss": 0.0214, "step": 5534 }, { "epoch": 1.25938566552901, "grad_norm": 0.7454316468560616, "learning_rate": 1.0643914033061757e-06, "loss": 0.0129, "step": 5535 }, { "epoch": 1.259613196814562, "grad_norm": 1.7146758293381863, "learning_rate": 1.0643278553777477e-06, "loss": 0.0243, "step": 5536 }, { "epoch": 1.2598407281001138, "grad_norm": 32.10042100778171, "learning_rate": 1.0642642984702632e-06, "loss": 0.4248, "step": 5537 }, { "epoch": 1.2600682593856656, "grad_norm": 1.0484198163247316, "learning_rate": 1.0642007325850207e-06, "loss": 0.0287, "step": 5538 }, { "epoch": 1.2602957906712173, "grad_norm": 2.3948890958568487, "learning_rate": 1.0641371577233197e-06, "loss": 0.0669, "step": 5539 }, { "epoch": 1.260523321956769, "grad_norm": 0.9801146567578284, "learning_rate": 1.0640735738864594e-06, "loss": 0.0162, "step": 5540 }, { "epoch": 1.2607508532423208, "grad_norm": 0.9922579714313251, "learning_rate": 1.0640099810757394e-06, "loss": 0.0181, "step": 5541 }, { "epoch": 1.2609783845278726, "grad_norm": 0.8815528286158882, "learning_rate": 1.0639463792924592e-06, "loss": 0.0217, "step": 5542 }, { "epoch": 1.2612059158134243, "grad_norm": 0.8238976020134005, "learning_rate": 1.0638827685379191e-06, "loss": 0.0145, "step": 5543 }, { "epoch": 1.261433447098976, "grad_norm": 1.1437101469135258, "learning_rate": 1.063819148813419e-06, "loss": 0.0248, "step": 5544 }, { "epoch": 1.2616609783845278, "grad_norm": 2.5729371322737475, "learning_rate": 1.063755520120259e-06, "loss": 0.0719, "step": 5545 }, { "epoch": 1.2618885096700796, "grad_norm": 0.742178098402897, "learning_rate": 1.0636918824597397e-06, "loss": 0.011, "step": 5546 }, { "epoch": 1.2621160409556313, "grad_norm": 1.2993072879022185, "learning_rate": 1.063628235833162e-06, "loss": 0.0127, "step": 5547 }, { "epoch": 1.262343572241183, "grad_norm": 1.050947969366834, "learning_rate": 1.0635645802418263e-06, "loss": 0.0203, "step": 5548 }, { "epoch": 1.2625711035267349, "grad_norm": 1.0789000058560003, "learning_rate": 1.0635009156870338e-06, "loss": 0.0292, "step": 5549 }, { "epoch": 1.2627986348122868, "grad_norm": 0.7771290887161867, "learning_rate": 1.0634372421700858e-06, "loss": 0.0081, "step": 5550 }, { "epoch": 1.2630261660978386, "grad_norm": 1.1448879442832878, "learning_rate": 1.0633735596922834e-06, "loss": 0.0215, "step": 5551 }, { "epoch": 1.2632536973833903, "grad_norm": 0.5229559835200444, "learning_rate": 1.0633098682549282e-06, "loss": 0.0104, "step": 5552 }, { "epoch": 1.263481228668942, "grad_norm": 0.9799373836364904, "learning_rate": 1.063246167859322e-06, "loss": 0.0282, "step": 5553 }, { "epoch": 1.2637087599544938, "grad_norm": 1.5057299070563284, "learning_rate": 1.0631824585067668e-06, "loss": 0.0223, "step": 5554 }, { "epoch": 1.2639362912400456, "grad_norm": 0.8319291390882159, "learning_rate": 1.0631187401985647e-06, "loss": 0.028, "step": 5555 }, { "epoch": 1.2641638225255973, "grad_norm": 1.0598953809242282, "learning_rate": 1.0630550129360179e-06, "loss": 0.0215, "step": 5556 }, { "epoch": 1.264391353811149, "grad_norm": 0.9828596775853349, "learning_rate": 1.062991276720429e-06, "loss": 0.0207, "step": 5557 }, { "epoch": 1.2646188850967008, "grad_norm": 0.7615549939570654, "learning_rate": 1.0629275315531005e-06, "loss": 0.0103, "step": 5558 }, { "epoch": 1.2648464163822526, "grad_norm": 1.4943347768705173, "learning_rate": 1.0628637774353351e-06, "loss": 0.0393, "step": 5559 }, { "epoch": 1.2650739476678043, "grad_norm": 0.8670381526948084, "learning_rate": 1.062800014368436e-06, "loss": 0.0151, "step": 5560 }, { "epoch": 1.265301478953356, "grad_norm": 0.9076348935605973, "learning_rate": 1.0627362423537065e-06, "loss": 0.023, "step": 5561 }, { "epoch": 1.2655290102389078, "grad_norm": 2.452173267393995, "learning_rate": 1.06267246139245e-06, "loss": 0.0468, "step": 5562 }, { "epoch": 1.2657565415244596, "grad_norm": 2.285019099369935, "learning_rate": 1.06260867148597e-06, "loss": 0.0157, "step": 5563 }, { "epoch": 1.2659840728100114, "grad_norm": 1.5281975862148434, "learning_rate": 1.06254487263557e-06, "loss": 0.032, "step": 5564 }, { "epoch": 1.266211604095563, "grad_norm": 1.3368814016585056, "learning_rate": 1.062481064842554e-06, "loss": 0.0187, "step": 5565 }, { "epoch": 1.2664391353811149, "grad_norm": 1.4457536111569913, "learning_rate": 1.0624172481082265e-06, "loss": 0.0314, "step": 5566 }, { "epoch": 1.2666666666666666, "grad_norm": 0.8433976153186795, "learning_rate": 1.0623534224338916e-06, "loss": 0.0221, "step": 5567 }, { "epoch": 1.2668941979522184, "grad_norm": 0.7106818851766153, "learning_rate": 1.0622895878208535e-06, "loss": 0.0105, "step": 5568 }, { "epoch": 1.2671217292377701, "grad_norm": 1.7306335155218464, "learning_rate": 1.0622257442704174e-06, "loss": 0.0545, "step": 5569 }, { "epoch": 1.2673492605233219, "grad_norm": 1.2056459642628792, "learning_rate": 1.0621618917838874e-06, "loss": 0.0328, "step": 5570 }, { "epoch": 1.2675767918088736, "grad_norm": 1.0152701077745823, "learning_rate": 1.062098030362569e-06, "loss": 0.0206, "step": 5571 }, { "epoch": 1.2678043230944254, "grad_norm": 1.3569352741344352, "learning_rate": 1.0620341600077675e-06, "loss": 0.0202, "step": 5572 }, { "epoch": 1.2680318543799771, "grad_norm": 1.2714138176681336, "learning_rate": 1.0619702807207881e-06, "loss": 0.0223, "step": 5573 }, { "epoch": 1.268259385665529, "grad_norm": 1.1902953540344818, "learning_rate": 1.0619063925029367e-06, "loss": 0.0184, "step": 5574 }, { "epoch": 1.2684869169510808, "grad_norm": 0.8099951263697308, "learning_rate": 1.0618424953555186e-06, "loss": 0.0262, "step": 5575 }, { "epoch": 1.2687144482366326, "grad_norm": 0.7670050924473597, "learning_rate": 1.0617785892798399e-06, "loss": 0.0179, "step": 5576 }, { "epoch": 1.2689419795221843, "grad_norm": 1.2166230545046535, "learning_rate": 1.061714674277207e-06, "loss": 0.0272, "step": 5577 }, { "epoch": 1.269169510807736, "grad_norm": 1.4328861976608087, "learning_rate": 1.0616507503489255e-06, "loss": 0.0381, "step": 5578 }, { "epoch": 1.2693970420932879, "grad_norm": 1.346759860874677, "learning_rate": 1.0615868174963025e-06, "loss": 0.0239, "step": 5579 }, { "epoch": 1.2696245733788396, "grad_norm": 1.0046272104433696, "learning_rate": 1.0615228757206448e-06, "loss": 0.0192, "step": 5580 }, { "epoch": 1.2698521046643914, "grad_norm": 1.3021385069582194, "learning_rate": 1.0614589250232588e-06, "loss": 0.036, "step": 5581 }, { "epoch": 1.270079635949943, "grad_norm": 1.032731442532869, "learning_rate": 1.0613949654054518e-06, "loss": 0.0166, "step": 5582 }, { "epoch": 1.2703071672354949, "grad_norm": 1.2618108062315003, "learning_rate": 1.0613309968685308e-06, "loss": 0.0239, "step": 5583 }, { "epoch": 1.2705346985210466, "grad_norm": 0.6758570490387608, "learning_rate": 1.0612670194138033e-06, "loss": 0.0098, "step": 5584 }, { "epoch": 1.2707622298065984, "grad_norm": 1.114733408671628, "learning_rate": 1.061203033042577e-06, "loss": 0.0234, "step": 5585 }, { "epoch": 1.2709897610921501, "grad_norm": 1.2312044813034186, "learning_rate": 1.0611390377561596e-06, "loss": 0.0259, "step": 5586 }, { "epoch": 1.2712172923777019, "grad_norm": 0.9957995441313681, "learning_rate": 1.0610750335558589e-06, "loss": 0.0211, "step": 5587 }, { "epoch": 1.2714448236632536, "grad_norm": 0.8395539672302367, "learning_rate": 1.0610110204429832e-06, "loss": 0.0297, "step": 5588 }, { "epoch": 1.2716723549488056, "grad_norm": 1.3385709006255209, "learning_rate": 1.060946998418841e-06, "loss": 0.0416, "step": 5589 }, { "epoch": 1.2718998862343573, "grad_norm": 0.7572806427176023, "learning_rate": 1.06088296748474e-06, "loss": 0.0125, "step": 5590 }, { "epoch": 1.272127417519909, "grad_norm": 0.8812091759388678, "learning_rate": 1.0608189276419898e-06, "loss": 0.0181, "step": 5591 }, { "epoch": 1.2723549488054609, "grad_norm": 1.0677368834859697, "learning_rate": 1.0607548788918989e-06, "loss": 0.0203, "step": 5592 }, { "epoch": 1.2725824800910126, "grad_norm": 1.2209088913629613, "learning_rate": 1.0606908212357764e-06, "loss": 0.0225, "step": 5593 }, { "epoch": 1.2728100113765644, "grad_norm": 0.7838470533440405, "learning_rate": 1.0606267546749312e-06, "loss": 0.0148, "step": 5594 }, { "epoch": 1.273037542662116, "grad_norm": 0.7993976759347979, "learning_rate": 1.0605626792106729e-06, "loss": 0.0127, "step": 5595 }, { "epoch": 1.2732650739476679, "grad_norm": 1.0529954663721204, "learning_rate": 1.0604985948443115e-06, "loss": 0.0214, "step": 5596 }, { "epoch": 1.2734926052332196, "grad_norm": 1.3398092166838957, "learning_rate": 1.0604345015771561e-06, "loss": 0.0268, "step": 5597 }, { "epoch": 1.2737201365187714, "grad_norm": 1.0634782276914523, "learning_rate": 1.060370399410517e-06, "loss": 0.0231, "step": 5598 }, { "epoch": 1.2739476678043231, "grad_norm": 0.989167972365425, "learning_rate": 1.0603062883457044e-06, "loss": 0.0224, "step": 5599 }, { "epoch": 1.2741751990898749, "grad_norm": 0.9634331289782628, "learning_rate": 1.0602421683840283e-06, "loss": 0.0132, "step": 5600 }, { "epoch": 1.2744027303754266, "grad_norm": 9.022054267189066, "learning_rate": 1.0601780395267997e-06, "loss": 0.2103, "step": 5601 }, { "epoch": 1.2746302616609784, "grad_norm": 1.1619135610982998, "learning_rate": 1.0601139017753286e-06, "loss": 0.029, "step": 5602 }, { "epoch": 1.2748577929465301, "grad_norm": 1.4434474276395861, "learning_rate": 1.0600497551309263e-06, "loss": 0.033, "step": 5603 }, { "epoch": 1.2750853242320819, "grad_norm": 1.1497890055189814, "learning_rate": 1.0599855995949038e-06, "loss": 0.0391, "step": 5604 }, { "epoch": 1.2753128555176336, "grad_norm": 1.6021652391632355, "learning_rate": 1.0599214351685724e-06, "loss": 0.0319, "step": 5605 }, { "epoch": 1.2755403868031854, "grad_norm": 1.1732970121103377, "learning_rate": 1.0598572618532433e-06, "loss": 0.0181, "step": 5606 }, { "epoch": 1.2757679180887371, "grad_norm": 1.3340500937166788, "learning_rate": 1.0597930796502282e-06, "loss": 0.0256, "step": 5607 }, { "epoch": 1.2759954493742889, "grad_norm": 1.2579949420508676, "learning_rate": 1.059728888560839e-06, "loss": 0.0319, "step": 5608 }, { "epoch": 1.2762229806598406, "grad_norm": 1.0747003018755463, "learning_rate": 1.059664688586387e-06, "loss": 0.0215, "step": 5609 }, { "epoch": 1.2764505119453924, "grad_norm": 1.2386685548422143, "learning_rate": 1.0596004797281853e-06, "loss": 0.0392, "step": 5610 }, { "epoch": 1.2766780432309441, "grad_norm": 1.5312108773003696, "learning_rate": 1.0595362619875455e-06, "loss": 0.0187, "step": 5611 }, { "epoch": 1.276905574516496, "grad_norm": 1.027132364578428, "learning_rate": 1.0594720353657802e-06, "loss": 0.0254, "step": 5612 }, { "epoch": 1.2771331058020479, "grad_norm": 1.942710900166514, "learning_rate": 1.0594077998642025e-06, "loss": 0.0309, "step": 5613 }, { "epoch": 1.2773606370875996, "grad_norm": 2.5894507431035714, "learning_rate": 1.0593435554841247e-06, "loss": 0.0629, "step": 5614 }, { "epoch": 1.2775881683731514, "grad_norm": 0.8039189924018694, "learning_rate": 1.0592793022268603e-06, "loss": 0.0142, "step": 5615 }, { "epoch": 1.2778156996587031, "grad_norm": 1.109060979102289, "learning_rate": 1.0592150400937222e-06, "loss": 0.0216, "step": 5616 }, { "epoch": 1.2780432309442549, "grad_norm": 1.909595005287003, "learning_rate": 1.059150769086024e-06, "loss": 0.0948, "step": 5617 }, { "epoch": 1.2782707622298066, "grad_norm": 1.7646662291191562, "learning_rate": 1.059086489205079e-06, "loss": 0.0768, "step": 5618 }, { "epoch": 1.2784982935153584, "grad_norm": 0.6261291448525245, "learning_rate": 1.0590222004522012e-06, "loss": 0.0147, "step": 5619 }, { "epoch": 1.2787258248009101, "grad_norm": 1.257814887901369, "learning_rate": 1.0589579028287045e-06, "loss": 0.0327, "step": 5620 }, { "epoch": 1.2789533560864619, "grad_norm": 1.118224185059013, "learning_rate": 1.0588935963359032e-06, "loss": 0.0334, "step": 5621 }, { "epoch": 1.2791808873720136, "grad_norm": 1.524708364397306, "learning_rate": 1.0588292809751112e-06, "loss": 0.0404, "step": 5622 }, { "epoch": 1.2794084186575654, "grad_norm": 0.8685923465972103, "learning_rate": 1.0587649567476434e-06, "loss": 0.0194, "step": 5623 }, { "epoch": 1.2796359499431171, "grad_norm": 0.9025577846970224, "learning_rate": 1.0587006236548142e-06, "loss": 0.0159, "step": 5624 }, { "epoch": 1.2798634812286689, "grad_norm": 0.9060583601498521, "learning_rate": 1.0586362816979383e-06, "loss": 0.0174, "step": 5625 }, { "epoch": 1.2800910125142206, "grad_norm": 0.6398548695145103, "learning_rate": 1.0585719308783316e-06, "loss": 0.0116, "step": 5626 }, { "epoch": 1.2803185437997724, "grad_norm": 0.9028145222778184, "learning_rate": 1.058507571197308e-06, "loss": 0.0066, "step": 5627 }, { "epoch": 1.2805460750853244, "grad_norm": 0.7127546236721658, "learning_rate": 1.058443202656184e-06, "loss": 0.0112, "step": 5628 }, { "epoch": 1.2807736063708761, "grad_norm": 1.1810732350923896, "learning_rate": 1.0583788252562745e-06, "loss": 0.0388, "step": 5629 }, { "epoch": 1.2810011376564279, "grad_norm": 1.3548807980738902, "learning_rate": 1.058314438998896e-06, "loss": 0.068, "step": 5630 }, { "epoch": 1.2812286689419796, "grad_norm": 0.8405440766301588, "learning_rate": 1.0582500438853631e-06, "loss": 0.0217, "step": 5631 }, { "epoch": 1.2814562002275314, "grad_norm": 1.1902880934548667, "learning_rate": 1.0581856399169934e-06, "loss": 0.0264, "step": 5632 }, { "epoch": 1.2816837315130831, "grad_norm": 0.9685340307904585, "learning_rate": 1.0581212270951024e-06, "loss": 0.0135, "step": 5633 }, { "epoch": 1.2819112627986349, "grad_norm": 0.7297977851768284, "learning_rate": 1.058056805421007e-06, "loss": 0.0209, "step": 5634 }, { "epoch": 1.2821387940841866, "grad_norm": 1.0753232968315138, "learning_rate": 1.057992374896023e-06, "loss": 0.0237, "step": 5635 }, { "epoch": 1.2823663253697384, "grad_norm": 1.4454879483916625, "learning_rate": 1.0579279355214683e-06, "loss": 0.026, "step": 5636 }, { "epoch": 1.2825938566552901, "grad_norm": 1.260224633378464, "learning_rate": 1.0578634872986592e-06, "loss": 0.0184, "step": 5637 }, { "epoch": 1.2828213879408419, "grad_norm": 1.9252940374710767, "learning_rate": 1.0577990302289136e-06, "loss": 0.0293, "step": 5638 }, { "epoch": 1.2830489192263936, "grad_norm": 1.4343768366824854, "learning_rate": 1.0577345643135482e-06, "loss": 0.0408, "step": 5639 }, { "epoch": 1.2832764505119454, "grad_norm": 0.8768828863677058, "learning_rate": 1.0576700895538809e-06, "loss": 0.0213, "step": 5640 }, { "epoch": 1.2835039817974971, "grad_norm": 0.8309215455146005, "learning_rate": 1.0576056059512292e-06, "loss": 0.0205, "step": 5641 }, { "epoch": 1.283731513083049, "grad_norm": 0.95278530564801, "learning_rate": 1.057541113506911e-06, "loss": 0.0229, "step": 5642 }, { "epoch": 1.2839590443686006, "grad_norm": 2.512244215865245, "learning_rate": 1.057476612222245e-06, "loss": 0.0373, "step": 5643 }, { "epoch": 1.2841865756541524, "grad_norm": 1.2294843329469738, "learning_rate": 1.057412102098549e-06, "loss": 0.0384, "step": 5644 }, { "epoch": 1.2844141069397041, "grad_norm": 2.1898724117931843, "learning_rate": 1.0573475831371416e-06, "loss": 0.0251, "step": 5645 }, { "epoch": 1.284641638225256, "grad_norm": 1.5651747311949955, "learning_rate": 1.0572830553393412e-06, "loss": 0.0298, "step": 5646 }, { "epoch": 1.2848691695108077, "grad_norm": 1.0523784768363587, "learning_rate": 1.057218518706467e-06, "loss": 0.0201, "step": 5647 }, { "epoch": 1.2850967007963594, "grad_norm": 1.8703075631421116, "learning_rate": 1.0571539732398378e-06, "loss": 0.0195, "step": 5648 }, { "epoch": 1.2853242320819112, "grad_norm": 2.013096542848063, "learning_rate": 1.057089418940773e-06, "loss": 0.0299, "step": 5649 }, { "epoch": 1.285551763367463, "grad_norm": 1.3017641104414657, "learning_rate": 1.0570248558105915e-06, "loss": 0.0278, "step": 5650 }, { "epoch": 1.2857792946530147, "grad_norm": 0.688625948886133, "learning_rate": 1.0569602838506136e-06, "loss": 0.0088, "step": 5651 }, { "epoch": 1.2860068259385666, "grad_norm": 1.0503675437019973, "learning_rate": 1.0568957030621582e-06, "loss": 0.0135, "step": 5652 }, { "epoch": 1.2862343572241184, "grad_norm": 1.1035321913801368, "learning_rate": 1.0568311134465457e-06, "loss": 0.0322, "step": 5653 }, { "epoch": 1.2864618885096701, "grad_norm": 0.8543649037890663, "learning_rate": 1.056766515005096e-06, "loss": 0.0124, "step": 5654 }, { "epoch": 1.286689419795222, "grad_norm": 1.1122886475152938, "learning_rate": 1.0567019077391296e-06, "loss": 0.0235, "step": 5655 }, { "epoch": 1.2869169510807736, "grad_norm": 1.1079256097467238, "learning_rate": 1.056637291649967e-06, "loss": 0.0191, "step": 5656 }, { "epoch": 1.2871444823663254, "grad_norm": 2.5082757586881863, "learning_rate": 1.0565726667389284e-06, "loss": 0.0147, "step": 5657 }, { "epoch": 1.2873720136518771, "grad_norm": 0.6994602690994648, "learning_rate": 1.056508033007335e-06, "loss": 0.0146, "step": 5658 }, { "epoch": 1.287599544937429, "grad_norm": 1.156297278545635, "learning_rate": 1.0564433904565078e-06, "loss": 0.0206, "step": 5659 }, { "epoch": 1.2878270762229806, "grad_norm": 1.0348771757586008, "learning_rate": 1.0563787390877677e-06, "loss": 0.0304, "step": 5660 }, { "epoch": 1.2880546075085324, "grad_norm": 1.6172923570680333, "learning_rate": 1.0563140789024363e-06, "loss": 0.0393, "step": 5661 }, { "epoch": 1.2882821387940842, "grad_norm": 0.7384196511271945, "learning_rate": 1.0562494099018346e-06, "loss": 0.0238, "step": 5662 }, { "epoch": 1.288509670079636, "grad_norm": 0.8883280220843028, "learning_rate": 1.0561847320872853e-06, "loss": 0.0184, "step": 5663 }, { "epoch": 1.2887372013651877, "grad_norm": 1.7381905580936334, "learning_rate": 1.0561200454601097e-06, "loss": 0.0362, "step": 5664 }, { "epoch": 1.2889647326507394, "grad_norm": 1.187480383757449, "learning_rate": 1.0560553500216298e-06, "loss": 0.0352, "step": 5665 }, { "epoch": 1.2891922639362912, "grad_norm": 1.2836678409259843, "learning_rate": 1.0559906457731678e-06, "loss": 0.0234, "step": 5666 }, { "epoch": 1.2894197952218431, "grad_norm": 0.9869735102302846, "learning_rate": 1.0559259327160465e-06, "loss": 0.0176, "step": 5667 }, { "epoch": 1.2896473265073949, "grad_norm": 0.5781054590113305, "learning_rate": 1.0558612108515883e-06, "loss": 0.0103, "step": 5668 }, { "epoch": 1.2898748577929466, "grad_norm": 0.7247426227910199, "learning_rate": 1.0557964801811162e-06, "loss": 0.0148, "step": 5669 }, { "epoch": 1.2901023890784984, "grad_norm": 1.5311127998553455, "learning_rate": 1.0557317407059529e-06, "loss": 0.009, "step": 5670 }, { "epoch": 1.2903299203640501, "grad_norm": 1.0196457464528732, "learning_rate": 1.0556669924274217e-06, "loss": 0.0154, "step": 5671 }, { "epoch": 1.290557451649602, "grad_norm": 1.3612360034540798, "learning_rate": 1.0556022353468459e-06, "loss": 0.0158, "step": 5672 }, { "epoch": 1.2907849829351536, "grad_norm": 1.0790279937441036, "learning_rate": 1.055537469465549e-06, "loss": 0.0166, "step": 5673 }, { "epoch": 1.2910125142207054, "grad_norm": 1.1840170080924126, "learning_rate": 1.0554726947848545e-06, "loss": 0.0198, "step": 5674 }, { "epoch": 1.2912400455062572, "grad_norm": 0.9790455049633395, "learning_rate": 1.0554079113060869e-06, "loss": 0.014, "step": 5675 }, { "epoch": 1.291467576791809, "grad_norm": 0.9375698560069781, "learning_rate": 1.0553431190305695e-06, "loss": 0.0193, "step": 5676 }, { "epoch": 1.2916951080773607, "grad_norm": 0.9460244225991181, "learning_rate": 1.055278317959627e-06, "loss": 0.022, "step": 5677 }, { "epoch": 1.2919226393629124, "grad_norm": 37.51370385688921, "learning_rate": 1.0552135080945839e-06, "loss": 0.0196, "step": 5678 }, { "epoch": 1.2921501706484642, "grad_norm": 0.7870557071298617, "learning_rate": 1.0551486894367643e-06, "loss": 0.0292, "step": 5679 }, { "epoch": 1.292377701934016, "grad_norm": 1.005395594404911, "learning_rate": 1.0550838619874933e-06, "loss": 0.0186, "step": 5680 }, { "epoch": 1.2926052332195677, "grad_norm": 1.3329240605685746, "learning_rate": 1.055019025748096e-06, "loss": 0.0303, "step": 5681 }, { "epoch": 1.2928327645051194, "grad_norm": 0.8592212919369031, "learning_rate": 1.0549541807198974e-06, "loss": 0.0169, "step": 5682 }, { "epoch": 1.2930602957906712, "grad_norm": 1.0894816397080542, "learning_rate": 1.0548893269042226e-06, "loss": 0.0147, "step": 5683 }, { "epoch": 1.293287827076223, "grad_norm": 1.2756290606689868, "learning_rate": 1.0548244643023972e-06, "loss": 0.0376, "step": 5684 }, { "epoch": 1.2935153583617747, "grad_norm": 0.7933354902103573, "learning_rate": 1.054759592915747e-06, "loss": 0.0204, "step": 5685 }, { "epoch": 1.2937428896473264, "grad_norm": 0.9717459762266182, "learning_rate": 1.054694712745598e-06, "loss": 0.0217, "step": 5686 }, { "epoch": 1.2939704209328782, "grad_norm": 1.4817148974153636, "learning_rate": 1.0546298237932757e-06, "loss": 0.0395, "step": 5687 }, { "epoch": 1.29419795221843, "grad_norm": 0.7570513970170758, "learning_rate": 1.0545649260601068e-06, "loss": 0.0177, "step": 5688 }, { "epoch": 1.2944254835039817, "grad_norm": 0.8213620969513302, "learning_rate": 1.0545000195474175e-06, "loss": 0.0123, "step": 5689 }, { "epoch": 1.2946530147895334, "grad_norm": 2.570113325707381, "learning_rate": 1.0544351042565344e-06, "loss": 0.1389, "step": 5690 }, { "epoch": 1.2948805460750854, "grad_norm": 0.94694153472363, "learning_rate": 1.0543701801887842e-06, "loss": 0.0129, "step": 5691 }, { "epoch": 1.2951080773606372, "grad_norm": 1.1658997120682517, "learning_rate": 1.054305247345494e-06, "loss": 0.0247, "step": 5692 }, { "epoch": 1.295335608646189, "grad_norm": 0.6388376726663733, "learning_rate": 1.0542403057279907e-06, "loss": 0.0079, "step": 5693 }, { "epoch": 1.2955631399317407, "grad_norm": 1.5533598170833833, "learning_rate": 1.0541753553376016e-06, "loss": 0.0552, "step": 5694 }, { "epoch": 1.2957906712172924, "grad_norm": 1.3900915621379943, "learning_rate": 1.0541103961756543e-06, "loss": 0.0276, "step": 5695 }, { "epoch": 1.2960182025028442, "grad_norm": 1.3170728163032424, "learning_rate": 1.0540454282434765e-06, "loss": 0.0292, "step": 5696 }, { "epoch": 1.296245733788396, "grad_norm": 0.9274424986111619, "learning_rate": 1.0539804515423955e-06, "loss": 0.0248, "step": 5697 }, { "epoch": 1.2964732650739477, "grad_norm": 1.002255995914125, "learning_rate": 1.0539154660737401e-06, "loss": 0.0283, "step": 5698 }, { "epoch": 1.2967007963594994, "grad_norm": 1.0473043573590481, "learning_rate": 1.053850471838838e-06, "loss": 0.014, "step": 5699 }, { "epoch": 1.2969283276450512, "grad_norm": 0.9807190528337413, "learning_rate": 1.0537854688390175e-06, "loss": 0.0247, "step": 5700 }, { "epoch": 1.297155858930603, "grad_norm": 0.6854049395195209, "learning_rate": 1.0537204570756076e-06, "loss": 0.0153, "step": 5701 }, { "epoch": 1.2973833902161547, "grad_norm": 1.0640768424401603, "learning_rate": 1.0536554365499367e-06, "loss": 0.0268, "step": 5702 }, { "epoch": 1.2976109215017064, "grad_norm": 1.4228991728545564, "learning_rate": 1.0535904072633334e-06, "loss": 0.0211, "step": 5703 }, { "epoch": 1.2978384527872582, "grad_norm": 1.0465889798908117, "learning_rate": 1.0535253692171273e-06, "loss": 0.0208, "step": 5704 }, { "epoch": 1.29806598407281, "grad_norm": 1.1180867272354698, "learning_rate": 1.0534603224126474e-06, "loss": 0.0426, "step": 5705 }, { "epoch": 1.298293515358362, "grad_norm": 1.505044776171652, "learning_rate": 1.0533952668512231e-06, "loss": 0.04, "step": 5706 }, { "epoch": 1.2985210466439137, "grad_norm": 0.6143108397733672, "learning_rate": 1.0533302025341843e-06, "loss": 0.0121, "step": 5707 }, { "epoch": 1.2987485779294654, "grad_norm": 1.127765980451607, "learning_rate": 1.0532651294628607e-06, "loss": 0.0316, "step": 5708 }, { "epoch": 1.2989761092150172, "grad_norm": 1.1519987874665927, "learning_rate": 1.053200047638582e-06, "loss": 0.0306, "step": 5709 }, { "epoch": 1.299203640500569, "grad_norm": 0.9637531006099895, "learning_rate": 1.0531349570626787e-06, "loss": 0.0209, "step": 5710 }, { "epoch": 1.2994311717861207, "grad_norm": 0.653830233371083, "learning_rate": 1.0530698577364807e-06, "loss": 0.011, "step": 5711 }, { "epoch": 1.2996587030716724, "grad_norm": 1.6778292566216655, "learning_rate": 1.053004749661319e-06, "loss": 0.0327, "step": 5712 }, { "epoch": 1.2998862343572242, "grad_norm": 0.8508161175719957, "learning_rate": 1.0529396328385238e-06, "loss": 0.0298, "step": 5713 }, { "epoch": 1.300113765642776, "grad_norm": 1.9250422937435294, "learning_rate": 1.0528745072694266e-06, "loss": 0.0616, "step": 5714 }, { "epoch": 1.3003412969283277, "grad_norm": 1.8275774370143938, "learning_rate": 1.052809372955358e-06, "loss": 0.0453, "step": 5715 }, { "epoch": 1.3005688282138794, "grad_norm": 0.7928558918855286, "learning_rate": 1.0527442298976492e-06, "loss": 0.0112, "step": 5716 }, { "epoch": 1.3007963594994312, "grad_norm": 1.6775736020855938, "learning_rate": 1.0526790780976318e-06, "loss": 0.0386, "step": 5717 }, { "epoch": 1.301023890784983, "grad_norm": 0.7524597201626481, "learning_rate": 1.052613917556637e-06, "loss": 0.0126, "step": 5718 }, { "epoch": 1.3012514220705347, "grad_norm": 1.7153199401873807, "learning_rate": 1.0525487482759975e-06, "loss": 0.0136, "step": 5719 }, { "epoch": 1.3014789533560864, "grad_norm": 1.0942059838392535, "learning_rate": 1.052483570257044e-06, "loss": 0.0365, "step": 5720 }, { "epoch": 1.3017064846416382, "grad_norm": 1.0739474507348372, "learning_rate": 1.0524183835011095e-06, "loss": 0.0147, "step": 5721 }, { "epoch": 1.30193401592719, "grad_norm": 1.1602347838561342, "learning_rate": 1.052353188009526e-06, "loss": 0.0438, "step": 5722 }, { "epoch": 1.3021615472127417, "grad_norm": 0.774345620272605, "learning_rate": 1.052287983783626e-06, "loss": 0.0129, "step": 5723 }, { "epoch": 1.3023890784982934, "grad_norm": 1.551981652235957, "learning_rate": 1.052222770824742e-06, "loss": 0.034, "step": 5724 }, { "epoch": 1.3026166097838452, "grad_norm": 1.073745426616395, "learning_rate": 1.0521575491342074e-06, "loss": 0.0151, "step": 5725 }, { "epoch": 1.302844141069397, "grad_norm": 1.2659073131244354, "learning_rate": 1.0520923187133544e-06, "loss": 0.0273, "step": 5726 }, { "epoch": 1.3030716723549487, "grad_norm": 1.559253636601821, "learning_rate": 1.0520270795635167e-06, "loss": 0.0433, "step": 5727 }, { "epoch": 1.3032992036405004, "grad_norm": 1.246986769425595, "learning_rate": 1.0519618316860274e-06, "loss": 0.0251, "step": 5728 }, { "epoch": 1.3035267349260522, "grad_norm": 1.013075136339921, "learning_rate": 1.0518965750822204e-06, "loss": 0.0248, "step": 5729 }, { "epoch": 1.3037542662116042, "grad_norm": 0.776622690041485, "learning_rate": 1.0518313097534292e-06, "loss": 0.0197, "step": 5730 }, { "epoch": 1.303981797497156, "grad_norm": 1.8936915924731421, "learning_rate": 1.0517660357009877e-06, "loss": 0.0323, "step": 5731 }, { "epoch": 1.3042093287827077, "grad_norm": 0.9731351344937886, "learning_rate": 1.0517007529262301e-06, "loss": 0.0201, "step": 5732 }, { "epoch": 1.3044368600682594, "grad_norm": 1.4132176366415885, "learning_rate": 1.0516354614304905e-06, "loss": 0.0448, "step": 5733 }, { "epoch": 1.3046643913538112, "grad_norm": 1.2312639755556094, "learning_rate": 1.0515701612151035e-06, "loss": 0.0363, "step": 5734 }, { "epoch": 1.304891922639363, "grad_norm": 0.9680235672394335, "learning_rate": 1.0515048522814034e-06, "loss": 0.0244, "step": 5735 }, { "epoch": 1.3051194539249147, "grad_norm": 0.8942191857528213, "learning_rate": 1.0514395346307254e-06, "loss": 0.0241, "step": 5736 }, { "epoch": 1.3053469852104664, "grad_norm": 1.4700138740863975, "learning_rate": 1.0513742082644043e-06, "loss": 0.0346, "step": 5737 }, { "epoch": 1.3055745164960182, "grad_norm": 1.1078337385555872, "learning_rate": 1.0513088731837753e-06, "loss": 0.0305, "step": 5738 }, { "epoch": 1.30580204778157, "grad_norm": 1.2748420988398181, "learning_rate": 1.0512435293901737e-06, "loss": 0.0285, "step": 5739 }, { "epoch": 1.3060295790671217, "grad_norm": 1.2790988355743922, "learning_rate": 1.051178176884935e-06, "loss": 0.0492, "step": 5740 }, { "epoch": 1.3062571103526734, "grad_norm": 1.1980282713090238, "learning_rate": 1.0511128156693947e-06, "loss": 0.0216, "step": 5741 }, { "epoch": 1.3064846416382252, "grad_norm": 0.8208046025134933, "learning_rate": 1.0510474457448888e-06, "loss": 0.0178, "step": 5742 }, { "epoch": 1.306712172923777, "grad_norm": 1.8069229595272986, "learning_rate": 1.0509820671127535e-06, "loss": 0.0199, "step": 5743 }, { "epoch": 1.3069397042093287, "grad_norm": 1.6865296714089357, "learning_rate": 1.050916679774325e-06, "loss": 0.0202, "step": 5744 }, { "epoch": 1.3071672354948807, "grad_norm": 1.0275125368796856, "learning_rate": 1.0508512837309394e-06, "loss": 0.0154, "step": 5745 }, { "epoch": 1.3073947667804324, "grad_norm": 0.6133934114428197, "learning_rate": 1.0507858789839336e-06, "loss": 0.0107, "step": 5746 }, { "epoch": 1.3076222980659842, "grad_norm": 1.080087958680723, "learning_rate": 1.0507204655346442e-06, "loss": 0.0314, "step": 5747 }, { "epoch": 1.307849829351536, "grad_norm": 1.3749276553621517, "learning_rate": 1.050655043384408e-06, "loss": 0.0318, "step": 5748 }, { "epoch": 1.3080773606370877, "grad_norm": 1.2280486437236124, "learning_rate": 1.0505896125345624e-06, "loss": 0.0199, "step": 5749 }, { "epoch": 1.3083048919226394, "grad_norm": 1.465755912941539, "learning_rate": 1.0505241729864446e-06, "loss": 0.0517, "step": 5750 }, { "epoch": 1.3085324232081912, "grad_norm": 0.839754511989302, "learning_rate": 1.050458724741392e-06, "loss": 0.0138, "step": 5751 }, { "epoch": 1.308759954493743, "grad_norm": 1.3071289617497805, "learning_rate": 1.0503932678007423e-06, "loss": 0.0268, "step": 5752 }, { "epoch": 1.3089874857792947, "grad_norm": 1.5584387532798485, "learning_rate": 1.0503278021658331e-06, "loss": 0.0279, "step": 5753 }, { "epoch": 1.3092150170648464, "grad_norm": 1.3103126621131902, "learning_rate": 1.0502623278380025e-06, "loss": 0.0229, "step": 5754 }, { "epoch": 1.3094425483503982, "grad_norm": 1.112498635945538, "learning_rate": 1.050196844818589e-06, "loss": 0.0234, "step": 5755 }, { "epoch": 1.30967007963595, "grad_norm": 0.9046518747362639, "learning_rate": 1.0501313531089306e-06, "loss": 0.0099, "step": 5756 }, { "epoch": 1.3098976109215017, "grad_norm": 0.7326140936427108, "learning_rate": 1.050065852710366e-06, "loss": 0.0117, "step": 5757 }, { "epoch": 1.3101251422070535, "grad_norm": 1.1004333953253604, "learning_rate": 1.0500003436242338e-06, "loss": 0.0323, "step": 5758 }, { "epoch": 1.3103526734926052, "grad_norm": 1.4635856283030002, "learning_rate": 1.049934825851873e-06, "loss": 0.0357, "step": 5759 }, { "epoch": 1.310580204778157, "grad_norm": 1.448988151398696, "learning_rate": 1.0498692993946225e-06, "loss": 0.0298, "step": 5760 }, { "epoch": 1.3108077360637087, "grad_norm": 1.0352196860191685, "learning_rate": 1.0498037642538215e-06, "loss": 0.0146, "step": 5761 }, { "epoch": 1.3110352673492605, "grad_norm": 1.2609870327772956, "learning_rate": 1.0497382204308099e-06, "loss": 0.0207, "step": 5762 }, { "epoch": 1.3112627986348122, "grad_norm": 1.2395935801958684, "learning_rate": 1.0496726679269265e-06, "loss": 0.042, "step": 5763 }, { "epoch": 1.311490329920364, "grad_norm": 1.0739173139510374, "learning_rate": 1.0496071067435118e-06, "loss": 0.0394, "step": 5764 }, { "epoch": 1.3117178612059157, "grad_norm": 2.604100179137865, "learning_rate": 1.0495415368819057e-06, "loss": 0.1258, "step": 5765 }, { "epoch": 1.3119453924914675, "grad_norm": 1.393030141056355, "learning_rate": 1.0494759583434478e-06, "loss": 0.0233, "step": 5766 }, { "epoch": 1.3121729237770192, "grad_norm": 1.0166107127362798, "learning_rate": 1.0494103711294786e-06, "loss": 0.0172, "step": 5767 }, { "epoch": 1.312400455062571, "grad_norm": 1.4792549020627224, "learning_rate": 1.049344775241339e-06, "loss": 0.0415, "step": 5768 }, { "epoch": 1.312627986348123, "grad_norm": 1.743954914946822, "learning_rate": 1.049279170680369e-06, "loss": 0.0559, "step": 5769 }, { "epoch": 1.3128555176336747, "grad_norm": 1.8129298367616118, "learning_rate": 1.0492135574479097e-06, "loss": 0.0416, "step": 5770 }, { "epoch": 1.3130830489192264, "grad_norm": 1.253009038271569, "learning_rate": 1.0491479355453026e-06, "loss": 0.0265, "step": 5771 }, { "epoch": 1.3133105802047782, "grad_norm": 0.9975790029687652, "learning_rate": 1.0490823049738884e-06, "loss": 0.0205, "step": 5772 }, { "epoch": 1.31353811149033, "grad_norm": 0.698949514657323, "learning_rate": 1.0490166657350084e-06, "loss": 0.0211, "step": 5773 }, { "epoch": 1.3137656427758817, "grad_norm": 1.2257870676619147, "learning_rate": 1.0489510178300043e-06, "loss": 0.0315, "step": 5774 }, { "epoch": 1.3139931740614335, "grad_norm": 1.0706430296176082, "learning_rate": 1.0488853612602178e-06, "loss": 0.0165, "step": 5775 }, { "epoch": 1.3142207053469852, "grad_norm": 1.1156453691438755, "learning_rate": 1.0488196960269912e-06, "loss": 0.0282, "step": 5776 }, { "epoch": 1.314448236632537, "grad_norm": 1.2538263082028707, "learning_rate": 1.0487540221316659e-06, "loss": 0.0265, "step": 5777 }, { "epoch": 1.3146757679180887, "grad_norm": 1.0773176692864428, "learning_rate": 1.0486883395755845e-06, "loss": 0.0395, "step": 5778 }, { "epoch": 1.3149032992036405, "grad_norm": 1.5883851817408943, "learning_rate": 1.0486226483600894e-06, "loss": 0.0766, "step": 5779 }, { "epoch": 1.3151308304891922, "grad_norm": 1.9633238080883928, "learning_rate": 1.0485569484865231e-06, "loss": 0.0495, "step": 5780 }, { "epoch": 1.315358361774744, "grad_norm": 0.9192459981660208, "learning_rate": 1.0484912399562285e-06, "loss": 0.0152, "step": 5781 }, { "epoch": 1.3155858930602957, "grad_norm": 1.0996145724289152, "learning_rate": 1.0484255227705487e-06, "loss": 0.0197, "step": 5782 }, { "epoch": 1.3158134243458475, "grad_norm": 0.9854990061171917, "learning_rate": 1.0483597969308266e-06, "loss": 0.0138, "step": 5783 }, { "epoch": 1.3160409556313994, "grad_norm": 1.8517240238943, "learning_rate": 1.0482940624384054e-06, "loss": 0.0352, "step": 5784 }, { "epoch": 1.3162684869169512, "grad_norm": 1.225130304804711, "learning_rate": 1.048228319294629e-06, "loss": 0.0431, "step": 5785 }, { "epoch": 1.316496018202503, "grad_norm": 1.0183330800820867, "learning_rate": 1.0481625675008409e-06, "loss": 0.0286, "step": 5786 }, { "epoch": 1.3167235494880547, "grad_norm": 0.9507764029567124, "learning_rate": 1.048096807058385e-06, "loss": 0.0185, "step": 5787 }, { "epoch": 1.3169510807736065, "grad_norm": 0.8930023522137188, "learning_rate": 1.0480310379686048e-06, "loss": 0.0155, "step": 5788 }, { "epoch": 1.3171786120591582, "grad_norm": 1.2745535497453038, "learning_rate": 1.0479652602328453e-06, "loss": 0.0189, "step": 5789 }, { "epoch": 1.31740614334471, "grad_norm": 0.9488946384445988, "learning_rate": 1.0478994738524504e-06, "loss": 0.0233, "step": 5790 }, { "epoch": 1.3176336746302617, "grad_norm": 1.2465521747877613, "learning_rate": 1.047833678828765e-06, "loss": 0.0246, "step": 5791 }, { "epoch": 1.3178612059158135, "grad_norm": 1.0606853469341604, "learning_rate": 1.0477678751631332e-06, "loss": 0.0249, "step": 5792 }, { "epoch": 1.3180887372013652, "grad_norm": 0.7931700678264818, "learning_rate": 1.0477020628569005e-06, "loss": 0.0155, "step": 5793 }, { "epoch": 1.318316268486917, "grad_norm": 1.1055038224148857, "learning_rate": 1.0476362419114117e-06, "loss": 0.0554, "step": 5794 }, { "epoch": 1.3185437997724687, "grad_norm": 1.14334987480646, "learning_rate": 1.047570412328012e-06, "loss": 0.0233, "step": 5795 }, { "epoch": 1.3187713310580205, "grad_norm": 1.0946218965974854, "learning_rate": 1.0475045741080473e-06, "loss": 0.0493, "step": 5796 }, { "epoch": 1.3189988623435722, "grad_norm": 0.5654984905711095, "learning_rate": 1.0474387272528627e-06, "loss": 0.0139, "step": 5797 }, { "epoch": 1.319226393629124, "grad_norm": 1.1814496866598772, "learning_rate": 1.047372871763804e-06, "loss": 0.0267, "step": 5798 }, { "epoch": 1.3194539249146757, "grad_norm": 0.7187378148607076, "learning_rate": 1.0473070076422176e-06, "loss": 0.0177, "step": 5799 }, { "epoch": 1.3196814562002275, "grad_norm": 1.1593289596572192, "learning_rate": 1.0472411348894492e-06, "loss": 0.0283, "step": 5800 }, { "epoch": 1.3199089874857792, "grad_norm": 1.0028279682721826, "learning_rate": 1.0471752535068455e-06, "loss": 0.0195, "step": 5801 }, { "epoch": 1.320136518771331, "grad_norm": 0.8421345274979064, "learning_rate": 1.0471093634957528e-06, "loss": 0.0161, "step": 5802 }, { "epoch": 1.3203640500568827, "grad_norm": 1.42589130831637, "learning_rate": 1.0470434648575175e-06, "loss": 0.0239, "step": 5803 }, { "epoch": 1.3205915813424345, "grad_norm": 1.0307165726126566, "learning_rate": 1.046977557593487e-06, "loss": 0.0296, "step": 5804 }, { "epoch": 1.3208191126279862, "grad_norm": 1.434881555216667, "learning_rate": 1.0469116417050078e-06, "loss": 0.0287, "step": 5805 }, { "epoch": 1.321046643913538, "grad_norm": 1.3073002962401254, "learning_rate": 1.0468457171934276e-06, "loss": 0.0385, "step": 5806 }, { "epoch": 1.3212741751990897, "grad_norm": 1.359726380040625, "learning_rate": 1.0467797840600934e-06, "loss": 0.0558, "step": 5807 }, { "epoch": 1.3215017064846417, "grad_norm": 1.1817435732611496, "learning_rate": 1.0467138423063529e-06, "loss": 0.0308, "step": 5808 }, { "epoch": 1.3217292377701935, "grad_norm": 1.1727355771812176, "learning_rate": 1.0466478919335538e-06, "loss": 0.0126, "step": 5809 }, { "epoch": 1.3219567690557452, "grad_norm": 0.9282827475194554, "learning_rate": 1.0465819329430439e-06, "loss": 0.043, "step": 5810 }, { "epoch": 1.322184300341297, "grad_norm": 1.0289989606262906, "learning_rate": 1.0465159653361716e-06, "loss": 0.0323, "step": 5811 }, { "epoch": 1.3224118316268487, "grad_norm": 0.8011760159128629, "learning_rate": 1.0464499891142847e-06, "loss": 0.019, "step": 5812 }, { "epoch": 1.3226393629124005, "grad_norm": 0.926556955845452, "learning_rate": 1.046384004278732e-06, "loss": 0.0167, "step": 5813 }, { "epoch": 1.3228668941979522, "grad_norm": 1.0337574749197451, "learning_rate": 1.0463180108308615e-06, "loss": 0.0149, "step": 5814 }, { "epoch": 1.323094425483504, "grad_norm": 0.7687290002927235, "learning_rate": 1.0462520087720231e-06, "loss": 0.0195, "step": 5815 }, { "epoch": 1.3233219567690557, "grad_norm": 0.793358906518525, "learning_rate": 1.0461859981035649e-06, "loss": 0.0234, "step": 5816 }, { "epoch": 1.3235494880546075, "grad_norm": 1.29769101133269, "learning_rate": 1.0461199788268364e-06, "loss": 0.0315, "step": 5817 }, { "epoch": 1.3237770193401592, "grad_norm": 1.3102647580631441, "learning_rate": 1.0460539509431865e-06, "loss": 0.0243, "step": 5818 }, { "epoch": 1.324004550625711, "grad_norm": 1.072047311381633, "learning_rate": 1.045987914453965e-06, "loss": 0.0273, "step": 5819 }, { "epoch": 1.3242320819112627, "grad_norm": 0.6326140291009906, "learning_rate": 1.0459218693605216e-06, "loss": 0.0135, "step": 5820 }, { "epoch": 1.3244596131968145, "grad_norm": 0.7804292447629503, "learning_rate": 1.0458558156642063e-06, "loss": 0.0192, "step": 5821 }, { "epoch": 1.3246871444823665, "grad_norm": 1.2322347552231876, "learning_rate": 1.0457897533663686e-06, "loss": 0.029, "step": 5822 }, { "epoch": 1.3249146757679182, "grad_norm": 1.0345091680985516, "learning_rate": 1.0457236824683592e-06, "loss": 0.0297, "step": 5823 }, { "epoch": 1.32514220705347, "grad_norm": 1.2808097927518385, "learning_rate": 1.045657602971528e-06, "loss": 0.0247, "step": 5824 }, { "epoch": 1.3253697383390217, "grad_norm": 0.7749625600644442, "learning_rate": 1.0455915148772262e-06, "loss": 0.0182, "step": 5825 }, { "epoch": 1.3255972696245735, "grad_norm": 0.8652793933619614, "learning_rate": 1.0455254181868037e-06, "loss": 0.0151, "step": 5826 }, { "epoch": 1.3258248009101252, "grad_norm": 1.237413731510675, "learning_rate": 1.0454593129016121e-06, "loss": 0.0172, "step": 5827 }, { "epoch": 1.326052332195677, "grad_norm": 1.00775831243054, "learning_rate": 1.045393199023002e-06, "loss": 0.0355, "step": 5828 }, { "epoch": 1.3262798634812287, "grad_norm": 0.8408054705160839, "learning_rate": 1.0453270765523247e-06, "loss": 0.0206, "step": 5829 }, { "epoch": 1.3265073947667805, "grad_norm": 0.7148322270057228, "learning_rate": 1.045260945490932e-06, "loss": 0.0183, "step": 5830 }, { "epoch": 1.3267349260523322, "grad_norm": 0.8189570497153293, "learning_rate": 1.045194805840175e-06, "loss": 0.015, "step": 5831 }, { "epoch": 1.326962457337884, "grad_norm": 1.4734645621134759, "learning_rate": 1.045128657601406e-06, "loss": 0.0406, "step": 5832 }, { "epoch": 1.3271899886234357, "grad_norm": 0.6441526578319029, "learning_rate": 1.0450625007759765e-06, "loss": 0.0135, "step": 5833 }, { "epoch": 1.3274175199089875, "grad_norm": 0.5585116452445247, "learning_rate": 1.044996335365239e-06, "loss": 0.0089, "step": 5834 }, { "epoch": 1.3276450511945392, "grad_norm": 1.2564939600468337, "learning_rate": 1.0449301613705453e-06, "loss": 0.0312, "step": 5835 }, { "epoch": 1.327872582480091, "grad_norm": 0.7995562758285696, "learning_rate": 1.0448639787932482e-06, "loss": 0.0185, "step": 5836 }, { "epoch": 1.3281001137656427, "grad_norm": 0.6950191248274259, "learning_rate": 1.0447977876347005e-06, "loss": 0.0166, "step": 5837 }, { "epoch": 1.3283276450511945, "grad_norm": 1.1554827500771103, "learning_rate": 1.0447315878962547e-06, "loss": 0.0158, "step": 5838 }, { "epoch": 1.3285551763367462, "grad_norm": 0.7747805069607538, "learning_rate": 1.044665379579264e-06, "loss": 0.0129, "step": 5839 }, { "epoch": 1.328782707622298, "grad_norm": 0.80632772075669, "learning_rate": 1.0445991626850816e-06, "loss": 0.0206, "step": 5840 }, { "epoch": 1.3290102389078498, "grad_norm": 1.5896873106614287, "learning_rate": 1.0445329372150607e-06, "loss": 0.0389, "step": 5841 }, { "epoch": 1.3292377701934015, "grad_norm": 1.072021072181124, "learning_rate": 1.0444667031705549e-06, "loss": 0.0303, "step": 5842 }, { "epoch": 1.3294653014789533, "grad_norm": 2.222905444464611, "learning_rate": 1.0444004605529178e-06, "loss": 0.0631, "step": 5843 }, { "epoch": 1.329692832764505, "grad_norm": 0.8281594466779323, "learning_rate": 1.0443342093635036e-06, "loss": 0.022, "step": 5844 }, { "epoch": 1.3299203640500568, "grad_norm": 1.6272382185157073, "learning_rate": 1.044267949603666e-06, "loss": 0.0546, "step": 5845 }, { "epoch": 1.3301478953356085, "grad_norm": 0.9091509980029293, "learning_rate": 1.0442016812747594e-06, "loss": 0.0236, "step": 5846 }, { "epoch": 1.3303754266211605, "grad_norm": 0.85308617988147, "learning_rate": 1.0441354043781381e-06, "loss": 0.0136, "step": 5847 }, { "epoch": 1.3306029579067122, "grad_norm": 0.44808170737486047, "learning_rate": 1.0440691189151567e-06, "loss": 0.007, "step": 5848 }, { "epoch": 1.330830489192264, "grad_norm": 0.8973824993734406, "learning_rate": 1.0440028248871702e-06, "loss": 0.0177, "step": 5849 }, { "epoch": 1.3310580204778157, "grad_norm": 1.7619238754120394, "learning_rate": 1.0439365222955332e-06, "loss": 0.0331, "step": 5850 }, { "epoch": 1.3312855517633675, "grad_norm": 0.8705762797622655, "learning_rate": 1.043870211141601e-06, "loss": 0.0172, "step": 5851 }, { "epoch": 1.3315130830489192, "grad_norm": 1.096308879196295, "learning_rate": 1.0438038914267287e-06, "loss": 0.0274, "step": 5852 }, { "epoch": 1.331740614334471, "grad_norm": 0.8197895561671952, "learning_rate": 1.043737563152272e-06, "loss": 0.0172, "step": 5853 }, { "epoch": 1.3319681456200227, "grad_norm": 1.4719340019230323, "learning_rate": 1.0436712263195862e-06, "loss": 0.0141, "step": 5854 }, { "epoch": 1.3321956769055745, "grad_norm": 0.9648112032732078, "learning_rate": 1.0436048809300273e-06, "loss": 0.0276, "step": 5855 }, { "epoch": 1.3324232081911263, "grad_norm": 0.852726738751836, "learning_rate": 1.0435385269849515e-06, "loss": 0.0288, "step": 5856 }, { "epoch": 1.332650739476678, "grad_norm": 1.721578698981377, "learning_rate": 1.0434721644857146e-06, "loss": 0.043, "step": 5857 }, { "epoch": 1.3328782707622298, "grad_norm": 0.623574972886459, "learning_rate": 1.043405793433673e-06, "loss": 0.0162, "step": 5858 }, { "epoch": 1.3331058020477815, "grad_norm": 0.7840847487395295, "learning_rate": 1.0433394138301835e-06, "loss": 0.0168, "step": 5859 }, { "epoch": 1.3333333333333333, "grad_norm": 1.1889353006124617, "learning_rate": 1.0432730256766022e-06, "loss": 0.0291, "step": 5860 }, { "epoch": 1.3335608646188852, "grad_norm": 1.5874275317235012, "learning_rate": 1.0432066289742864e-06, "loss": 0.0482, "step": 5861 }, { "epoch": 1.333788395904437, "grad_norm": 0.8058160492544123, "learning_rate": 1.0431402237245932e-06, "loss": 0.0111, "step": 5862 }, { "epoch": 1.3340159271899887, "grad_norm": 0.6561988259057422, "learning_rate": 1.0430738099288794e-06, "loss": 0.0112, "step": 5863 }, { "epoch": 1.3342434584755405, "grad_norm": 1.0674110824492682, "learning_rate": 1.0430073875885026e-06, "loss": 0.0305, "step": 5864 }, { "epoch": 1.3344709897610922, "grad_norm": 0.6325379384484306, "learning_rate": 1.0429409567048205e-06, "loss": 0.0088, "step": 5865 }, { "epoch": 1.334698521046644, "grad_norm": 1.0168781752483684, "learning_rate": 1.0428745172791905e-06, "loss": 0.0387, "step": 5866 }, { "epoch": 1.3349260523321957, "grad_norm": 1.7297475905815625, "learning_rate": 1.0428080693129708e-06, "loss": 0.0589, "step": 5867 }, { "epoch": 1.3351535836177475, "grad_norm": 1.1628295813004157, "learning_rate": 1.0427416128075192e-06, "loss": 0.0193, "step": 5868 }, { "epoch": 1.3353811149032992, "grad_norm": 1.2631903831165703, "learning_rate": 1.0426751477641941e-06, "loss": 0.0324, "step": 5869 }, { "epoch": 1.335608646188851, "grad_norm": 1.4329045968752339, "learning_rate": 1.042608674184354e-06, "loss": 0.0255, "step": 5870 }, { "epoch": 1.3358361774744028, "grad_norm": 2.479731151758897, "learning_rate": 1.0425421920693575e-06, "loss": 0.0585, "step": 5871 }, { "epoch": 1.3360637087599545, "grad_norm": 0.9244796148915133, "learning_rate": 1.042475701420563e-06, "loss": 0.0195, "step": 5872 }, { "epoch": 1.3362912400455063, "grad_norm": 0.8951775033309297, "learning_rate": 1.04240920223933e-06, "loss": 0.0214, "step": 5873 }, { "epoch": 1.336518771331058, "grad_norm": 0.7274004184798262, "learning_rate": 1.0423426945270174e-06, "loss": 0.0219, "step": 5874 }, { "epoch": 1.3367463026166098, "grad_norm": 0.9074676685298566, "learning_rate": 1.0422761782849842e-06, "loss": 0.0315, "step": 5875 }, { "epoch": 1.3369738339021615, "grad_norm": 1.9629567837065391, "learning_rate": 1.0422096535145902e-06, "loss": 0.0489, "step": 5876 }, { "epoch": 1.3372013651877133, "grad_norm": 1.1684048882515632, "learning_rate": 1.042143120217195e-06, "loss": 0.0293, "step": 5877 }, { "epoch": 1.337428896473265, "grad_norm": 1.364293734448395, "learning_rate": 1.0420765783941586e-06, "loss": 0.0375, "step": 5878 }, { "epoch": 1.3376564277588168, "grad_norm": 1.039811324655209, "learning_rate": 1.0420100280468404e-06, "loss": 0.0265, "step": 5879 }, { "epoch": 1.3378839590443685, "grad_norm": 0.8625486316278609, "learning_rate": 1.0419434691766012e-06, "loss": 0.0268, "step": 5880 }, { "epoch": 1.3381114903299203, "grad_norm": 0.7261697725405176, "learning_rate": 1.041876901784801e-06, "loss": 0.0147, "step": 5881 }, { "epoch": 1.338339021615472, "grad_norm": 0.8914798651369713, "learning_rate": 1.0418103258728001e-06, "loss": 0.0233, "step": 5882 }, { "epoch": 1.3385665529010238, "grad_norm": 2.1255378013198962, "learning_rate": 1.04174374144196e-06, "loss": 0.0277, "step": 5883 }, { "epoch": 1.3387940841865755, "grad_norm": 1.1620134309649157, "learning_rate": 1.0416771484936409e-06, "loss": 0.0206, "step": 5884 }, { "epoch": 1.3390216154721273, "grad_norm": 1.787592523953072, "learning_rate": 1.041610547029204e-06, "loss": 0.0594, "step": 5885 }, { "epoch": 1.3392491467576793, "grad_norm": 1.3432064804257042, "learning_rate": 1.0415439370500104e-06, "loss": 0.0271, "step": 5886 }, { "epoch": 1.339476678043231, "grad_norm": 1.4695495810180548, "learning_rate": 1.0414773185574214e-06, "loss": 0.0632, "step": 5887 }, { "epoch": 1.3397042093287828, "grad_norm": 1.002580161934096, "learning_rate": 1.0414106915527991e-06, "loss": 0.034, "step": 5888 }, { "epoch": 1.3399317406143345, "grad_norm": 1.2230261450463973, "learning_rate": 1.041344056037505e-06, "loss": 0.0527, "step": 5889 }, { "epoch": 1.3401592718998863, "grad_norm": 1.5508468505953605, "learning_rate": 1.0412774120129004e-06, "loss": 0.0364, "step": 5890 }, { "epoch": 1.340386803185438, "grad_norm": 1.1182067053381388, "learning_rate": 1.0412107594803484e-06, "loss": 0.0224, "step": 5891 }, { "epoch": 1.3406143344709898, "grad_norm": 1.334755248496988, "learning_rate": 1.0411440984412103e-06, "loss": 0.0306, "step": 5892 }, { "epoch": 1.3408418657565415, "grad_norm": 1.0312638416250242, "learning_rate": 1.0410774288968492e-06, "loss": 0.0273, "step": 5893 }, { "epoch": 1.3410693970420933, "grad_norm": 0.8814569099212579, "learning_rate": 1.0410107508486272e-06, "loss": 0.0175, "step": 5894 }, { "epoch": 1.341296928327645, "grad_norm": 0.9202861833902302, "learning_rate": 1.0409440642979077e-06, "loss": 0.0258, "step": 5895 }, { "epoch": 1.3415244596131968, "grad_norm": 1.0927206734816597, "learning_rate": 1.040877369246053e-06, "loss": 0.0429, "step": 5896 }, { "epoch": 1.3417519908987485, "grad_norm": 1.3413817549764846, "learning_rate": 1.0408106656944267e-06, "loss": 0.0248, "step": 5897 }, { "epoch": 1.3419795221843003, "grad_norm": 0.8153253460902535, "learning_rate": 1.0407439536443919e-06, "loss": 0.0208, "step": 5898 }, { "epoch": 1.342207053469852, "grad_norm": 0.7094173678000857, "learning_rate": 1.040677233097312e-06, "loss": 0.0238, "step": 5899 }, { "epoch": 1.342434584755404, "grad_norm": 0.6444151744345981, "learning_rate": 1.0406105040545509e-06, "loss": 0.0161, "step": 5900 }, { "epoch": 1.3426621160409558, "grad_norm": 1.379128100384456, "learning_rate": 1.0405437665174719e-06, "loss": 0.0388, "step": 5901 }, { "epoch": 1.3428896473265075, "grad_norm": 0.8946926394570293, "learning_rate": 1.0404770204874396e-06, "loss": 0.0267, "step": 5902 }, { "epoch": 1.3431171786120593, "grad_norm": 2.042177852704746, "learning_rate": 1.040410265965818e-06, "loss": 0.0404, "step": 5903 }, { "epoch": 1.343344709897611, "grad_norm": 1.3077906262962142, "learning_rate": 1.0403435029539711e-06, "loss": 0.0308, "step": 5904 }, { "epoch": 1.3435722411831628, "grad_norm": 1.0324100339589763, "learning_rate": 1.0402767314532638e-06, "loss": 0.0206, "step": 5905 }, { "epoch": 1.3437997724687145, "grad_norm": 1.0265663580127615, "learning_rate": 1.0402099514650607e-06, "loss": 0.0324, "step": 5906 }, { "epoch": 1.3440273037542663, "grad_norm": 0.7549200836426128, "learning_rate": 1.0401431629907267e-06, "loss": 0.0121, "step": 5907 }, { "epoch": 1.344254835039818, "grad_norm": 0.7290621969799262, "learning_rate": 1.0400763660316265e-06, "loss": 0.0131, "step": 5908 }, { "epoch": 1.3444823663253698, "grad_norm": 1.119125346963153, "learning_rate": 1.0400095605891258e-06, "loss": 0.0423, "step": 5909 }, { "epoch": 1.3447098976109215, "grad_norm": 1.4041545259060262, "learning_rate": 1.0399427466645895e-06, "loss": 0.0256, "step": 5910 }, { "epoch": 1.3449374288964733, "grad_norm": 1.0699960135340252, "learning_rate": 1.0398759242593834e-06, "loss": 0.0193, "step": 5911 }, { "epoch": 1.345164960182025, "grad_norm": 1.4394734119461763, "learning_rate": 1.0398090933748733e-06, "loss": 0.0451, "step": 5912 }, { "epoch": 1.3453924914675768, "grad_norm": 0.7328296507836, "learning_rate": 1.039742254012425e-06, "loss": 0.0117, "step": 5913 }, { "epoch": 1.3456200227531285, "grad_norm": 0.9315877067634672, "learning_rate": 1.0396754061734047e-06, "loss": 0.0291, "step": 5914 }, { "epoch": 1.3458475540386803, "grad_norm": 1.1230101632788236, "learning_rate": 1.0396085498591785e-06, "loss": 0.0134, "step": 5915 }, { "epoch": 1.346075085324232, "grad_norm": 1.0983618909120694, "learning_rate": 1.0395416850711127e-06, "loss": 0.02, "step": 5916 }, { "epoch": 1.3463026166097838, "grad_norm": 0.6035953597253712, "learning_rate": 1.0394748118105743e-06, "loss": 0.0092, "step": 5917 }, { "epoch": 1.3465301478953355, "grad_norm": 0.5057823769699702, "learning_rate": 1.0394079300789296e-06, "loss": 0.006, "step": 5918 }, { "epoch": 1.3467576791808873, "grad_norm": 1.2058457720453324, "learning_rate": 1.0393410398775459e-06, "loss": 0.0381, "step": 5919 }, { "epoch": 1.346985210466439, "grad_norm": 1.0689739159271736, "learning_rate": 1.03927414120779e-06, "loss": 0.0128, "step": 5920 }, { "epoch": 1.3472127417519908, "grad_norm": 1.3055479840732933, "learning_rate": 1.0392072340710296e-06, "loss": 0.0441, "step": 5921 }, { "epoch": 1.3474402730375425, "grad_norm": 1.387509390007852, "learning_rate": 1.0391403184686318e-06, "loss": 0.0302, "step": 5922 }, { "epoch": 1.3476678043230943, "grad_norm": 1.1536233789260337, "learning_rate": 1.0390733944019645e-06, "loss": 0.0202, "step": 5923 }, { "epoch": 1.347895335608646, "grad_norm": 0.9911221130460622, "learning_rate": 1.0390064618723952e-06, "loss": 0.0153, "step": 5924 }, { "epoch": 1.348122866894198, "grad_norm": 0.9561197103111988, "learning_rate": 1.038939520881292e-06, "loss": 0.0114, "step": 5925 }, { "epoch": 1.3483503981797498, "grad_norm": 1.102900309683922, "learning_rate": 1.038872571430023e-06, "loss": 0.0337, "step": 5926 }, { "epoch": 1.3485779294653015, "grad_norm": 1.2902418265691835, "learning_rate": 1.038805613519957e-06, "loss": 0.04, "step": 5927 }, { "epoch": 1.3488054607508533, "grad_norm": 1.420709768769003, "learning_rate": 1.038738647152462e-06, "loss": 0.0199, "step": 5928 }, { "epoch": 1.349032992036405, "grad_norm": 0.8364236936432923, "learning_rate": 1.0386716723289063e-06, "loss": 0.0222, "step": 5929 }, { "epoch": 1.3492605233219568, "grad_norm": 1.0505205577839964, "learning_rate": 1.0386046890506596e-06, "loss": 0.0248, "step": 5930 }, { "epoch": 1.3494880546075085, "grad_norm": 1.401571027676419, "learning_rate": 1.0385376973190906e-06, "loss": 0.0399, "step": 5931 }, { "epoch": 1.3497155858930603, "grad_norm": 1.1032877927835119, "learning_rate": 1.0384706971355683e-06, "loss": 0.0273, "step": 5932 }, { "epoch": 1.349943117178612, "grad_norm": 0.7007490772962891, "learning_rate": 1.038403688501462e-06, "loss": 0.0102, "step": 5933 }, { "epoch": 1.3501706484641638, "grad_norm": 0.8472863824911138, "learning_rate": 1.0383366714181419e-06, "loss": 0.011, "step": 5934 }, { "epoch": 1.3503981797497155, "grad_norm": 0.8977164627217846, "learning_rate": 1.038269645886977e-06, "loss": 0.0167, "step": 5935 }, { "epoch": 1.3506257110352673, "grad_norm": 1.2746342659674816, "learning_rate": 1.0382026119093372e-06, "loss": 0.0272, "step": 5936 }, { "epoch": 1.350853242320819, "grad_norm": 1.5516990871708691, "learning_rate": 1.038135569486593e-06, "loss": 0.0292, "step": 5937 }, { "epoch": 1.3510807736063708, "grad_norm": 1.9165286699452406, "learning_rate": 1.0380685186201143e-06, "loss": 0.0594, "step": 5938 }, { "epoch": 1.3513083048919228, "grad_norm": 1.3297283101826065, "learning_rate": 1.0380014593112714e-06, "loss": 0.0568, "step": 5939 }, { "epoch": 1.3515358361774745, "grad_norm": 1.2661268564639319, "learning_rate": 1.0379343915614354e-06, "loss": 0.0433, "step": 5940 }, { "epoch": 1.3517633674630263, "grad_norm": 1.0448046867479912, "learning_rate": 1.0378673153719764e-06, "loss": 0.0288, "step": 5941 }, { "epoch": 1.351990898748578, "grad_norm": 0.6263965338196285, "learning_rate": 1.0378002307442659e-06, "loss": 0.0067, "step": 5942 }, { "epoch": 1.3522184300341298, "grad_norm": 0.9186544964954529, "learning_rate": 1.0377331376796745e-06, "loss": 0.034, "step": 5943 }, { "epoch": 1.3524459613196815, "grad_norm": 1.3175528661105624, "learning_rate": 1.0376660361795738e-06, "loss": 0.0577, "step": 5944 }, { "epoch": 1.3526734926052333, "grad_norm": 1.4599781406050027, "learning_rate": 1.0375989262453348e-06, "loss": 0.0447, "step": 5945 }, { "epoch": 1.352901023890785, "grad_norm": 1.1156489974661725, "learning_rate": 1.0375318078783294e-06, "loss": 0.0511, "step": 5946 }, { "epoch": 1.3531285551763368, "grad_norm": 0.9483213458533444, "learning_rate": 1.0374646810799297e-06, "loss": 0.0238, "step": 5947 }, { "epoch": 1.3533560864618885, "grad_norm": 1.2661883390274016, "learning_rate": 1.037397545851507e-06, "loss": 0.0429, "step": 5948 }, { "epoch": 1.3535836177474403, "grad_norm": 1.0018616531819395, "learning_rate": 1.0373304021944338e-06, "loss": 0.0221, "step": 5949 }, { "epoch": 1.353811149032992, "grad_norm": 0.7808472720700552, "learning_rate": 1.0372632501100826e-06, "loss": 0.0091, "step": 5950 }, { "epoch": 1.3540386803185438, "grad_norm": 0.8140478794271282, "learning_rate": 1.0371960895998252e-06, "loss": 0.0145, "step": 5951 }, { "epoch": 1.3542662116040955, "grad_norm": 0.9117276219302244, "learning_rate": 1.0371289206650349e-06, "loss": 0.0201, "step": 5952 }, { "epoch": 1.3544937428896473, "grad_norm": 1.2061163633959278, "learning_rate": 1.0370617433070842e-06, "loss": 0.0322, "step": 5953 }, { "epoch": 1.354721274175199, "grad_norm": 0.7876722921495687, "learning_rate": 1.036994557527346e-06, "loss": 0.0144, "step": 5954 }, { "epoch": 1.3549488054607508, "grad_norm": 0.6613445307925959, "learning_rate": 1.0369273633271936e-06, "loss": 0.0069, "step": 5955 }, { "epoch": 1.3551763367463026, "grad_norm": 0.6345409248116708, "learning_rate": 1.0368601607080004e-06, "loss": 0.0155, "step": 5956 }, { "epoch": 1.3554038680318543, "grad_norm": 1.8147704904537438, "learning_rate": 1.0367929496711397e-06, "loss": 0.039, "step": 5957 }, { "epoch": 1.355631399317406, "grad_norm": 0.833931276693109, "learning_rate": 1.0367257302179853e-06, "loss": 0.0146, "step": 5958 }, { "epoch": 1.3558589306029578, "grad_norm": 0.7692458341928677, "learning_rate": 1.036658502349911e-06, "loss": 0.0125, "step": 5959 }, { "epoch": 1.3560864618885096, "grad_norm": 0.8091203823295923, "learning_rate": 1.0365912660682908e-06, "loss": 0.0216, "step": 5960 }, { "epoch": 1.3563139931740613, "grad_norm": 2.503585891485645, "learning_rate": 1.036524021374499e-06, "loss": 0.0419, "step": 5961 }, { "epoch": 1.356541524459613, "grad_norm": 1.0464032831404197, "learning_rate": 1.0364567682699098e-06, "loss": 0.0279, "step": 5962 }, { "epoch": 1.356769055745165, "grad_norm": 1.0400014436949596, "learning_rate": 1.036389506755898e-06, "loss": 0.0202, "step": 5963 }, { "epoch": 1.3569965870307168, "grad_norm": 0.9813071406643438, "learning_rate": 1.036322236833838e-06, "loss": 0.0235, "step": 5964 }, { "epoch": 1.3572241183162685, "grad_norm": 1.118519599019973, "learning_rate": 1.0362549585051046e-06, "loss": 0.0308, "step": 5965 }, { "epoch": 1.3574516496018203, "grad_norm": 0.5898336323005567, "learning_rate": 1.0361876717710731e-06, "loss": 0.013, "step": 5966 }, { "epoch": 1.357679180887372, "grad_norm": 1.361363746809614, "learning_rate": 1.0361203766331187e-06, "loss": 0.034, "step": 5967 }, { "epoch": 1.3579067121729238, "grad_norm": 0.6464652035747823, "learning_rate": 1.036053073092617e-06, "loss": 0.0129, "step": 5968 }, { "epoch": 1.3581342434584756, "grad_norm": 1.8759837911851749, "learning_rate": 1.0359857611509428e-06, "loss": 0.0321, "step": 5969 }, { "epoch": 1.3583617747440273, "grad_norm": 1.619283823736038, "learning_rate": 1.0359184408094726e-06, "loss": 0.0301, "step": 5970 }, { "epoch": 1.358589306029579, "grad_norm": 2.0895320562539723, "learning_rate": 1.0358511120695819e-06, "loss": 0.0571, "step": 5971 }, { "epoch": 1.3588168373151308, "grad_norm": 1.1559191193621738, "learning_rate": 1.0357837749326471e-06, "loss": 0.0305, "step": 5972 }, { "epoch": 1.3590443686006826, "grad_norm": 1.1597343434931056, "learning_rate": 1.0357164294000442e-06, "loss": 0.0301, "step": 5973 }, { "epoch": 1.3592718998862343, "grad_norm": 1.1311805121187837, "learning_rate": 1.0356490754731496e-06, "loss": 0.022, "step": 5974 }, { "epoch": 1.359499431171786, "grad_norm": 0.8235807140315661, "learning_rate": 1.03558171315334e-06, "loss": 0.0125, "step": 5975 }, { "epoch": 1.3597269624573378, "grad_norm": 0.8840629724459017, "learning_rate": 1.0355143424419922e-06, "loss": 0.0124, "step": 5976 }, { "epoch": 1.3599544937428896, "grad_norm": 1.674273107123145, "learning_rate": 1.035446963340483e-06, "loss": 0.0611, "step": 5977 }, { "epoch": 1.3601820250284415, "grad_norm": 1.0994944800187416, "learning_rate": 1.0353795758501894e-06, "loss": 0.0184, "step": 5978 }, { "epoch": 1.3604095563139933, "grad_norm": 1.0588312713917787, "learning_rate": 1.0353121799724892e-06, "loss": 0.0194, "step": 5979 }, { "epoch": 1.360637087599545, "grad_norm": 1.3104261203754106, "learning_rate": 1.0352447757087592e-06, "loss": 0.0296, "step": 5980 }, { "epoch": 1.3608646188850968, "grad_norm": 1.3004787031070375, "learning_rate": 1.0351773630603774e-06, "loss": 0.0204, "step": 5981 }, { "epoch": 1.3610921501706486, "grad_norm": 1.0659064927726833, "learning_rate": 1.0351099420287213e-06, "loss": 0.036, "step": 5982 }, { "epoch": 1.3613196814562003, "grad_norm": 1.1812994279020221, "learning_rate": 1.0350425126151694e-06, "loss": 0.0248, "step": 5983 }, { "epoch": 1.361547212741752, "grad_norm": 0.9622198048005316, "learning_rate": 1.0349750748210994e-06, "loss": 0.0132, "step": 5984 }, { "epoch": 1.3617747440273038, "grad_norm": 0.7133679642974851, "learning_rate": 1.0349076286478897e-06, "loss": 0.0086, "step": 5985 }, { "epoch": 1.3620022753128556, "grad_norm": 1.3396910291108761, "learning_rate": 1.0348401740969188e-06, "loss": 0.0457, "step": 5986 }, { "epoch": 1.3622298065984073, "grad_norm": 0.976016551467127, "learning_rate": 1.0347727111695652e-06, "loss": 0.0182, "step": 5987 }, { "epoch": 1.362457337883959, "grad_norm": 1.2706452532311896, "learning_rate": 1.0347052398672079e-06, "loss": 0.0337, "step": 5988 }, { "epoch": 1.3626848691695108, "grad_norm": 1.1241759656924852, "learning_rate": 1.034637760191226e-06, "loss": 0.0451, "step": 5989 }, { "epoch": 1.3629124004550626, "grad_norm": 0.9870356931300208, "learning_rate": 1.0345702721429982e-06, "loss": 0.0292, "step": 5990 }, { "epoch": 1.3631399317406143, "grad_norm": 0.7055169330755962, "learning_rate": 1.0345027757239044e-06, "loss": 0.0146, "step": 5991 }, { "epoch": 1.363367463026166, "grad_norm": 1.4129407289578744, "learning_rate": 1.0344352709353237e-06, "loss": 0.049, "step": 5992 }, { "epoch": 1.3635949943117178, "grad_norm": 0.6615348229173598, "learning_rate": 1.034367757778636e-06, "loss": 0.0161, "step": 5993 }, { "epoch": 1.3638225255972696, "grad_norm": 1.3319609442015357, "learning_rate": 1.034300236255221e-06, "loss": 0.0142, "step": 5994 }, { "epoch": 1.3640500568828213, "grad_norm": 1.101573823862616, "learning_rate": 1.0342327063664587e-06, "loss": 0.0252, "step": 5995 }, { "epoch": 1.364277588168373, "grad_norm": 0.49851149150245927, "learning_rate": 1.0341651681137293e-06, "loss": 0.0058, "step": 5996 }, { "epoch": 1.3645051194539248, "grad_norm": 0.7916106342215998, "learning_rate": 1.0340976214984136e-06, "loss": 0.012, "step": 5997 }, { "epoch": 1.3647326507394766, "grad_norm": 1.3033775616866645, "learning_rate": 1.0340300665218913e-06, "loss": 0.0224, "step": 5998 }, { "epoch": 1.3649601820250283, "grad_norm": 1.1530071908333883, "learning_rate": 1.0339625031855438e-06, "loss": 0.0264, "step": 5999 }, { "epoch": 1.36518771331058, "grad_norm": 1.7119840122096044, "learning_rate": 1.0338949314907515e-06, "loss": 0.0542, "step": 6000 }, { "epoch": 1.3654152445961318, "grad_norm": 1.0229925474062507, "learning_rate": 1.0338273514388958e-06, "loss": 0.0439, "step": 6001 }, { "epoch": 1.3656427758816838, "grad_norm": 1.2452426043381837, "learning_rate": 1.0337597630313578e-06, "loss": 0.015, "step": 6002 }, { "epoch": 1.3658703071672356, "grad_norm": 1.8653310977298578, "learning_rate": 1.0336921662695188e-06, "loss": 0.0488, "step": 6003 }, { "epoch": 1.3660978384527873, "grad_norm": 0.6741864416257192, "learning_rate": 1.0336245611547605e-06, "loss": 0.0103, "step": 6004 }, { "epoch": 1.366325369738339, "grad_norm": 0.7051952693815983, "learning_rate": 1.0335569476884643e-06, "loss": 0.014, "step": 6005 }, { "epoch": 1.3665529010238908, "grad_norm": 0.9708525738875694, "learning_rate": 1.0334893258720124e-06, "loss": 0.0151, "step": 6006 }, { "epoch": 1.3667804323094426, "grad_norm": 1.109837397667383, "learning_rate": 1.033421695706787e-06, "loss": 0.0233, "step": 6007 }, { "epoch": 1.3670079635949943, "grad_norm": 0.9083533358364363, "learning_rate": 1.03335405719417e-06, "loss": 0.0135, "step": 6008 }, { "epoch": 1.367235494880546, "grad_norm": 1.1720567751480195, "learning_rate": 1.0332864103355438e-06, "loss": 0.015, "step": 6009 }, { "epoch": 1.3674630261660978, "grad_norm": 0.7630354357142868, "learning_rate": 1.0332187551322914e-06, "loss": 0.0128, "step": 6010 }, { "epoch": 1.3676905574516496, "grad_norm": 0.6765984321238594, "learning_rate": 1.0331510915857951e-06, "loss": 0.0138, "step": 6011 }, { "epoch": 1.3679180887372013, "grad_norm": 0.528880382934291, "learning_rate": 1.0330834196974378e-06, "loss": 0.006, "step": 6012 }, { "epoch": 1.368145620022753, "grad_norm": 0.9749392678640607, "learning_rate": 1.0330157394686031e-06, "loss": 0.0291, "step": 6013 }, { "epoch": 1.3683731513083048, "grad_norm": 1.6667345822917836, "learning_rate": 1.0329480509006738e-06, "loss": 0.0284, "step": 6014 }, { "epoch": 1.3686006825938566, "grad_norm": 0.9719828123924177, "learning_rate": 1.0328803539950332e-06, "loss": 0.0161, "step": 6015 }, { "epoch": 1.3688282138794083, "grad_norm": 0.6687579179263169, "learning_rate": 1.0328126487530657e-06, "loss": 0.0171, "step": 6016 }, { "epoch": 1.3690557451649603, "grad_norm": 0.8484600929868883, "learning_rate": 1.0327449351761542e-06, "loss": 0.0234, "step": 6017 }, { "epoch": 1.369283276450512, "grad_norm": 0.9980151540533014, "learning_rate": 1.0326772132656828e-06, "loss": 0.0228, "step": 6018 }, { "epoch": 1.3695108077360638, "grad_norm": 1.432111946362771, "learning_rate": 1.0326094830230362e-06, "loss": 0.0265, "step": 6019 }, { "epoch": 1.3697383390216156, "grad_norm": 3.205956674239623, "learning_rate": 1.032541744449598e-06, "loss": 0.0793, "step": 6020 }, { "epoch": 1.3699658703071673, "grad_norm": 1.033249260757496, "learning_rate": 1.0324739975467529e-06, "loss": 0.042, "step": 6021 }, { "epoch": 1.370193401592719, "grad_norm": 0.9157158705373819, "learning_rate": 1.0324062423158857e-06, "loss": 0.0294, "step": 6022 }, { "epoch": 1.3704209328782708, "grad_norm": 0.9368902048923686, "learning_rate": 1.0323384787583809e-06, "loss": 0.0244, "step": 6023 }, { "epoch": 1.3706484641638226, "grad_norm": 1.4416896631494844, "learning_rate": 1.0322707068756238e-06, "loss": 0.0353, "step": 6024 }, { "epoch": 1.3708759954493743, "grad_norm": 1.2550874861994528, "learning_rate": 1.0322029266689992e-06, "loss": 0.058, "step": 6025 }, { "epoch": 1.371103526734926, "grad_norm": 1.3124105011895348, "learning_rate": 1.0321351381398926e-06, "loss": 0.0274, "step": 6026 }, { "epoch": 1.3713310580204778, "grad_norm": 0.8889050886123729, "learning_rate": 1.0320673412896891e-06, "loss": 0.0268, "step": 6027 }, { "epoch": 1.3715585893060296, "grad_norm": 1.1494781940988754, "learning_rate": 1.0319995361197752e-06, "loss": 0.025, "step": 6028 }, { "epoch": 1.3717861205915813, "grad_norm": 1.3212407031910525, "learning_rate": 1.0319317226315358e-06, "loss": 0.0298, "step": 6029 }, { "epoch": 1.372013651877133, "grad_norm": 1.286243454157273, "learning_rate": 1.0318639008263572e-06, "loss": 0.0342, "step": 6030 }, { "epoch": 1.3722411831626848, "grad_norm": 0.8914612174450923, "learning_rate": 1.0317960707056256e-06, "loss": 0.0126, "step": 6031 }, { "epoch": 1.3724687144482366, "grad_norm": 0.7573600841393602, "learning_rate": 1.0317282322707275e-06, "loss": 0.0197, "step": 6032 }, { "epoch": 1.3726962457337883, "grad_norm": 0.5345772069608944, "learning_rate": 1.0316603855230492e-06, "loss": 0.0111, "step": 6033 }, { "epoch": 1.37292377701934, "grad_norm": 0.8338590952425786, "learning_rate": 1.0315925304639773e-06, "loss": 0.0179, "step": 6034 }, { "epoch": 1.3731513083048918, "grad_norm": 0.9724921448714786, "learning_rate": 1.0315246670948988e-06, "loss": 0.0169, "step": 6035 }, { "epoch": 1.3733788395904436, "grad_norm": 0.873659813292738, "learning_rate": 1.0314567954172006e-06, "loss": 0.0277, "step": 6036 }, { "epoch": 1.3736063708759954, "grad_norm": 0.9265881375432587, "learning_rate": 1.03138891543227e-06, "loss": 0.0364, "step": 6037 }, { "epoch": 1.373833902161547, "grad_norm": 1.1953217652768424, "learning_rate": 1.031321027141494e-06, "loss": 0.0279, "step": 6038 }, { "epoch": 1.3740614334470989, "grad_norm": 1.6682562329608188, "learning_rate": 1.0312531305462607e-06, "loss": 0.0443, "step": 6039 }, { "epoch": 1.3742889647326506, "grad_norm": 1.1077668460967074, "learning_rate": 1.031185225647957e-06, "loss": 0.0163, "step": 6040 }, { "epoch": 1.3745164960182026, "grad_norm": 0.648925246369876, "learning_rate": 1.0311173124479715e-06, "loss": 0.0091, "step": 6041 }, { "epoch": 1.3747440273037543, "grad_norm": 0.8886275125074955, "learning_rate": 1.0310493909476916e-06, "loss": 0.0224, "step": 6042 }, { "epoch": 1.374971558589306, "grad_norm": 0.8524740196914463, "learning_rate": 1.0309814611485062e-06, "loss": 0.0259, "step": 6043 }, { "epoch": 1.3751990898748578, "grad_norm": 0.9742799558119335, "learning_rate": 1.0309135230518028e-06, "loss": 0.0223, "step": 6044 }, { "epoch": 1.3754266211604096, "grad_norm": 1.383114536961926, "learning_rate": 1.0308455766589706e-06, "loss": 0.0319, "step": 6045 }, { "epoch": 1.3756541524459613, "grad_norm": 1.3723012718452987, "learning_rate": 1.030777621971398e-06, "loss": 0.0293, "step": 6046 }, { "epoch": 1.375881683731513, "grad_norm": 2.4276688201868044, "learning_rate": 1.0307096589904742e-06, "loss": 0.0765, "step": 6047 }, { "epoch": 1.3761092150170648, "grad_norm": 1.3054528485981858, "learning_rate": 1.030641687717588e-06, "loss": 0.0282, "step": 6048 }, { "epoch": 1.3763367463026166, "grad_norm": 1.3658620692254781, "learning_rate": 1.0305737081541283e-06, "loss": 0.019, "step": 6049 }, { "epoch": 1.3765642775881684, "grad_norm": 1.2988678220894119, "learning_rate": 1.0305057203014848e-06, "loss": 0.0457, "step": 6050 }, { "epoch": 1.37679180887372, "grad_norm": 1.8263728967860204, "learning_rate": 1.0304377241610472e-06, "loss": 0.0681, "step": 6051 }, { "epoch": 1.3770193401592719, "grad_norm": 0.48009564319798714, "learning_rate": 1.030369719734205e-06, "loss": 0.0078, "step": 6052 }, { "epoch": 1.3772468714448236, "grad_norm": 1.5065012818800154, "learning_rate": 1.0303017070223482e-06, "loss": 0.0227, "step": 6053 }, { "epoch": 1.3774744027303754, "grad_norm": 0.5740072329076822, "learning_rate": 1.0302336860268667e-06, "loss": 0.0198, "step": 6054 }, { "epoch": 1.377701934015927, "grad_norm": 0.9008705562256887, "learning_rate": 1.0301656567491507e-06, "loss": 0.0209, "step": 6055 }, { "epoch": 1.377929465301479, "grad_norm": 0.9052594350778648, "learning_rate": 1.0300976191905907e-06, "loss": 0.0134, "step": 6056 }, { "epoch": 1.3781569965870308, "grad_norm": 1.008952514422943, "learning_rate": 1.0300295733525774e-06, "loss": 0.026, "step": 6057 }, { "epoch": 1.3783845278725826, "grad_norm": 0.8655700871947668, "learning_rate": 1.0299615192365015e-06, "loss": 0.0169, "step": 6058 }, { "epoch": 1.3786120591581343, "grad_norm": 1.0614823863183105, "learning_rate": 1.0298934568437535e-06, "loss": 0.025, "step": 6059 }, { "epoch": 1.378839590443686, "grad_norm": 1.2671893505632115, "learning_rate": 1.029825386175725e-06, "loss": 0.032, "step": 6060 }, { "epoch": 1.3790671217292378, "grad_norm": 0.8414566963740582, "learning_rate": 1.029757307233807e-06, "loss": 0.029, "step": 6061 }, { "epoch": 1.3792946530147896, "grad_norm": 1.4410574086008923, "learning_rate": 1.0296892200193908e-06, "loss": 0.0407, "step": 6062 }, { "epoch": 1.3795221843003413, "grad_norm": 1.160321097652087, "learning_rate": 1.029621124533868e-06, "loss": 0.0177, "step": 6063 }, { "epoch": 1.379749715585893, "grad_norm": 1.0069056688464664, "learning_rate": 1.0295530207786307e-06, "loss": 0.0153, "step": 6064 }, { "epoch": 1.3799772468714449, "grad_norm": 0.9343986607113232, "learning_rate": 1.0294849087550703e-06, "loss": 0.0236, "step": 6065 }, { "epoch": 1.3802047781569966, "grad_norm": 1.154725705666541, "learning_rate": 1.0294167884645795e-06, "loss": 0.0223, "step": 6066 }, { "epoch": 1.3804323094425484, "grad_norm": 1.106739391483581, "learning_rate": 1.02934865990855e-06, "loss": 0.0261, "step": 6067 }, { "epoch": 1.3806598407281, "grad_norm": 0.48989173855238655, "learning_rate": 1.0292805230883743e-06, "loss": 0.0084, "step": 6068 }, { "epoch": 1.3808873720136519, "grad_norm": 1.146572789777464, "learning_rate": 1.0292123780054452e-06, "loss": 0.0149, "step": 6069 }, { "epoch": 1.3811149032992036, "grad_norm": 1.508059128952438, "learning_rate": 1.0291442246611555e-06, "loss": 0.0348, "step": 6070 }, { "epoch": 1.3813424345847554, "grad_norm": 0.7179341697610161, "learning_rate": 1.029076063056898e-06, "loss": 0.0163, "step": 6071 }, { "epoch": 1.3815699658703071, "grad_norm": 1.6491912488062737, "learning_rate": 1.0290078931940656e-06, "loss": 0.0648, "step": 6072 }, { "epoch": 1.3817974971558589, "grad_norm": 0.7870360955520767, "learning_rate": 1.028939715074052e-06, "loss": 0.0152, "step": 6073 }, { "epoch": 1.3820250284414106, "grad_norm": 0.8138866022930463, "learning_rate": 1.0288715286982504e-06, "loss": 0.0189, "step": 6074 }, { "epoch": 1.3822525597269624, "grad_norm": 0.5554949851167332, "learning_rate": 1.0288033340680543e-06, "loss": 0.0088, "step": 6075 }, { "epoch": 1.3824800910125141, "grad_norm": 1.3233523342981628, "learning_rate": 1.0287351311848574e-06, "loss": 0.0433, "step": 6076 }, { "epoch": 1.3827076222980659, "grad_norm": 0.6777854674940008, "learning_rate": 1.028666920050054e-06, "loss": 0.0084, "step": 6077 }, { "epoch": 1.3829351535836176, "grad_norm": 1.2431302079486768, "learning_rate": 1.0285987006650381e-06, "loss": 0.0264, "step": 6078 }, { "epoch": 1.3831626848691694, "grad_norm": 1.0244884125500628, "learning_rate": 1.028530473031204e-06, "loss": 0.0354, "step": 6079 }, { "epoch": 1.3833902161547214, "grad_norm": 0.7987683729662631, "learning_rate": 1.0284622371499457e-06, "loss": 0.0082, "step": 6080 }, { "epoch": 1.383617747440273, "grad_norm": 1.1972261120154417, "learning_rate": 1.0283939930226584e-06, "loss": 0.0416, "step": 6081 }, { "epoch": 1.3838452787258249, "grad_norm": 0.7965597712940404, "learning_rate": 1.0283257406507366e-06, "loss": 0.0168, "step": 6082 }, { "epoch": 1.3840728100113766, "grad_norm": 1.678085519895521, "learning_rate": 1.0282574800355755e-06, "loss": 0.0396, "step": 6083 }, { "epoch": 1.3843003412969284, "grad_norm": 1.1602726953842701, "learning_rate": 1.0281892111785699e-06, "loss": 0.03, "step": 6084 }, { "epoch": 1.3845278725824801, "grad_norm": 0.5738281114784806, "learning_rate": 1.0281209340811151e-06, "loss": 0.0128, "step": 6085 }, { "epoch": 1.3847554038680319, "grad_norm": 1.2307920377037582, "learning_rate": 1.0280526487446069e-06, "loss": 0.0298, "step": 6086 }, { "epoch": 1.3849829351535836, "grad_norm": 1.0906060725259301, "learning_rate": 1.0279843551704409e-06, "loss": 0.0102, "step": 6087 }, { "epoch": 1.3852104664391354, "grad_norm": 1.1283393095113754, "learning_rate": 1.0279160533600121e-06, "loss": 0.0367, "step": 6088 }, { "epoch": 1.3854379977246871, "grad_norm": 1.1681818545410683, "learning_rate": 1.0278477433147176e-06, "loss": 0.0416, "step": 6089 }, { "epoch": 1.3856655290102389, "grad_norm": 1.1415077838638938, "learning_rate": 1.0277794250359529e-06, "loss": 0.0271, "step": 6090 }, { "epoch": 1.3858930602957906, "grad_norm": 1.5760408905808865, "learning_rate": 1.0277110985251142e-06, "loss": 0.0389, "step": 6091 }, { "epoch": 1.3861205915813424, "grad_norm": 2.721888747642395, "learning_rate": 1.0276427637835984e-06, "loss": 0.0733, "step": 6092 }, { "epoch": 1.3863481228668941, "grad_norm": 0.6394797109060689, "learning_rate": 1.0275744208128019e-06, "loss": 0.0085, "step": 6093 }, { "epoch": 1.3865756541524459, "grad_norm": 0.8175458312812267, "learning_rate": 1.0275060696141215e-06, "loss": 0.0158, "step": 6094 }, { "epoch": 1.3868031854379979, "grad_norm": 0.8768873105674826, "learning_rate": 1.027437710188954e-06, "loss": 0.0252, "step": 6095 }, { "epoch": 1.3870307167235496, "grad_norm": 1.1764335482126738, "learning_rate": 1.027369342538697e-06, "loss": 0.0383, "step": 6096 }, { "epoch": 1.3872582480091014, "grad_norm": 0.9523241021653495, "learning_rate": 1.0273009666647472e-06, "loss": 0.0148, "step": 6097 }, { "epoch": 1.387485779294653, "grad_norm": 1.1658646040112732, "learning_rate": 1.0272325825685028e-06, "loss": 0.0245, "step": 6098 }, { "epoch": 1.3877133105802049, "grad_norm": 0.7975610013435775, "learning_rate": 1.027164190251361e-06, "loss": 0.0165, "step": 6099 }, { "epoch": 1.3879408418657566, "grad_norm": 1.2045558237977083, "learning_rate": 1.0270957897147196e-06, "loss": 0.034, "step": 6100 }, { "epoch": 1.3881683731513084, "grad_norm": 1.1965276205251034, "learning_rate": 1.0270273809599764e-06, "loss": 0.0311, "step": 6101 }, { "epoch": 1.3883959044368601, "grad_norm": 0.851268325678731, "learning_rate": 1.0269589639885302e-06, "loss": 0.0167, "step": 6102 }, { "epoch": 1.3886234357224119, "grad_norm": 1.3240030725330403, "learning_rate": 1.0268905388017788e-06, "loss": 0.0269, "step": 6103 }, { "epoch": 1.3888509670079636, "grad_norm": 1.0202558011027276, "learning_rate": 1.0268221054011208e-06, "loss": 0.027, "step": 6104 }, { "epoch": 1.3890784982935154, "grad_norm": 1.2434564945488167, "learning_rate": 1.026753663787955e-06, "loss": 0.0185, "step": 6105 }, { "epoch": 1.3893060295790671, "grad_norm": 1.186497952145426, "learning_rate": 1.0266852139636799e-06, "loss": 0.0225, "step": 6106 }, { "epoch": 1.3895335608646189, "grad_norm": 1.6183733781772422, "learning_rate": 1.0266167559296946e-06, "loss": 0.0483, "step": 6107 }, { "epoch": 1.3897610921501706, "grad_norm": 2.1320506255788056, "learning_rate": 1.0265482896873986e-06, "loss": 0.0398, "step": 6108 }, { "epoch": 1.3899886234357224, "grad_norm": 0.7471768184650087, "learning_rate": 1.0264798152381907e-06, "loss": 0.0144, "step": 6109 }, { "epoch": 1.3902161547212741, "grad_norm": 1.1509914195740398, "learning_rate": 1.026411332583471e-06, "loss": 0.0267, "step": 6110 }, { "epoch": 1.3904436860068259, "grad_norm": 1.6574823707279855, "learning_rate": 1.0263428417246385e-06, "loss": 0.0395, "step": 6111 }, { "epoch": 1.3906712172923776, "grad_norm": 1.097456018217718, "learning_rate": 1.0262743426630935e-06, "loss": 0.0306, "step": 6112 }, { "epoch": 1.3908987485779294, "grad_norm": 0.8275409605053028, "learning_rate": 1.0262058354002357e-06, "loss": 0.0253, "step": 6113 }, { "epoch": 1.3911262798634811, "grad_norm": 0.741933960938598, "learning_rate": 1.0261373199374655e-06, "loss": 0.0117, "step": 6114 }, { "epoch": 1.391353811149033, "grad_norm": 0.806245283558632, "learning_rate": 1.026068796276183e-06, "loss": 0.0163, "step": 6115 }, { "epoch": 1.3915813424345846, "grad_norm": 1.2256667129702425, "learning_rate": 1.0260002644177892e-06, "loss": 0.0335, "step": 6116 }, { "epoch": 1.3918088737201364, "grad_norm": 1.433487886403296, "learning_rate": 1.025931724363684e-06, "loss": 0.0302, "step": 6117 }, { "epoch": 1.3920364050056881, "grad_norm": 1.2323013972098553, "learning_rate": 1.0258631761152687e-06, "loss": 0.0423, "step": 6118 }, { "epoch": 1.3922639362912401, "grad_norm": 1.844647486739168, "learning_rate": 1.0257946196739444e-06, "loss": 0.063, "step": 6119 }, { "epoch": 1.3924914675767919, "grad_norm": 0.8259588697399817, "learning_rate": 1.025726055041112e-06, "loss": 0.0201, "step": 6120 }, { "epoch": 1.3927189988623436, "grad_norm": 0.4898924275811768, "learning_rate": 1.0256574822181727e-06, "loss": 0.0174, "step": 6121 }, { "epoch": 1.3929465301478954, "grad_norm": 0.8058598212354585, "learning_rate": 1.0255889012065285e-06, "loss": 0.0172, "step": 6122 }, { "epoch": 1.3931740614334471, "grad_norm": 1.4437392885866793, "learning_rate": 1.025520312007581e-06, "loss": 0.0302, "step": 6123 }, { "epoch": 1.3934015927189989, "grad_norm": 1.1533771806604551, "learning_rate": 1.0254517146227314e-06, "loss": 0.0303, "step": 6124 }, { "epoch": 1.3936291240045506, "grad_norm": 1.7256511601711935, "learning_rate": 1.0253831090533823e-06, "loss": 0.0466, "step": 6125 }, { "epoch": 1.3938566552901024, "grad_norm": 1.6375877890419952, "learning_rate": 1.0253144953009357e-06, "loss": 0.0416, "step": 6126 }, { "epoch": 1.3940841865756541, "grad_norm": 1.210907244464415, "learning_rate": 1.025245873366794e-06, "loss": 0.0202, "step": 6127 }, { "epoch": 1.394311717861206, "grad_norm": 1.7819981672345377, "learning_rate": 1.0251772432523596e-06, "loss": 0.0485, "step": 6128 }, { "epoch": 1.3945392491467576, "grad_norm": 2.224440960495445, "learning_rate": 1.0251086049590355e-06, "loss": 0.0656, "step": 6129 }, { "epoch": 1.3947667804323094, "grad_norm": 1.0114089903725545, "learning_rate": 1.0250399584882239e-06, "loss": 0.0204, "step": 6130 }, { "epoch": 1.3949943117178611, "grad_norm": 1.0071772232385818, "learning_rate": 1.0249713038413285e-06, "loss": 0.0168, "step": 6131 }, { "epoch": 1.395221843003413, "grad_norm": 1.0545053650829948, "learning_rate": 1.024902641019752e-06, "loss": 0.0256, "step": 6132 }, { "epoch": 1.3954493742889647, "grad_norm": 0.901290123486691, "learning_rate": 1.024833970024898e-06, "loss": 0.0197, "step": 6133 }, { "epoch": 1.3956769055745166, "grad_norm": 1.3496311004496413, "learning_rate": 1.0247652908581697e-06, "loss": 0.0306, "step": 6134 }, { "epoch": 1.3959044368600684, "grad_norm": 1.3181866205349257, "learning_rate": 1.0246966035209712e-06, "loss": 0.0274, "step": 6135 }, { "epoch": 1.3961319681456201, "grad_norm": 1.1655739118413535, "learning_rate": 1.024627908014706e-06, "loss": 0.0272, "step": 6136 }, { "epoch": 1.3963594994311719, "grad_norm": 0.6907363078845099, "learning_rate": 1.0245592043407784e-06, "loss": 0.0159, "step": 6137 }, { "epoch": 1.3965870307167236, "grad_norm": 0.7193043243021984, "learning_rate": 1.0244904925005924e-06, "loss": 0.0096, "step": 6138 }, { "epoch": 1.3968145620022754, "grad_norm": 1.10393335812822, "learning_rate": 1.0244217724955523e-06, "loss": 0.0364, "step": 6139 }, { "epoch": 1.3970420932878271, "grad_norm": 1.0603654771057396, "learning_rate": 1.0243530443270627e-06, "loss": 0.0183, "step": 6140 }, { "epoch": 1.3972696245733789, "grad_norm": 0.7222265706967784, "learning_rate": 1.0242843079965281e-06, "loss": 0.0101, "step": 6141 }, { "epoch": 1.3974971558589306, "grad_norm": 1.6811181869493026, "learning_rate": 1.024215563505354e-06, "loss": 0.0257, "step": 6142 }, { "epoch": 1.3977246871444824, "grad_norm": 0.6300121361838538, "learning_rate": 1.0241468108549443e-06, "loss": 0.0109, "step": 6143 }, { "epoch": 1.3979522184300341, "grad_norm": 0.942491659732893, "learning_rate": 1.024078050046705e-06, "loss": 0.0307, "step": 6144 }, { "epoch": 1.398179749715586, "grad_norm": 1.455940520193875, "learning_rate": 1.0240092810820412e-06, "loss": 0.0234, "step": 6145 }, { "epoch": 1.3984072810011376, "grad_norm": 1.8583374809051696, "learning_rate": 1.0239405039623585e-06, "loss": 0.0429, "step": 6146 }, { "epoch": 1.3986348122866894, "grad_norm": 0.9130814882867986, "learning_rate": 1.0238717186890625e-06, "loss": 0.018, "step": 6147 }, { "epoch": 1.3988623435722412, "grad_norm": 1.1845147398658868, "learning_rate": 1.0238029252635591e-06, "loss": 0.0247, "step": 6148 }, { "epoch": 1.399089874857793, "grad_norm": 0.839565054462471, "learning_rate": 1.0237341236872544e-06, "loss": 0.0117, "step": 6149 }, { "epoch": 1.3993174061433447, "grad_norm": 0.7511833754662133, "learning_rate": 1.0236653139615542e-06, "loss": 0.0142, "step": 6150 }, { "epoch": 1.3995449374288964, "grad_norm": 0.7794622692038276, "learning_rate": 1.0235964960878655e-06, "loss": 0.0179, "step": 6151 }, { "epoch": 1.3997724687144482, "grad_norm": 0.9503219335351416, "learning_rate": 1.023527670067594e-06, "loss": 0.0213, "step": 6152 }, { "epoch": 1.4, "grad_norm": 1.10300561276624, "learning_rate": 1.023458835902147e-06, "loss": 0.0364, "step": 6153 }, { "epoch": 1.4002275312855517, "grad_norm": 1.28977779817073, "learning_rate": 1.0233899935929311e-06, "loss": 0.0373, "step": 6154 }, { "epoch": 1.4004550625711034, "grad_norm": 11.609520950996195, "learning_rate": 1.0233211431413534e-06, "loss": 0.0171, "step": 6155 }, { "epoch": 1.4006825938566552, "grad_norm": 0.8504313269319569, "learning_rate": 1.023252284548821e-06, "loss": 0.0224, "step": 6156 }, { "epoch": 1.400910125142207, "grad_norm": 1.4928827347926052, "learning_rate": 1.0231834178167412e-06, "loss": 0.04, "step": 6157 }, { "epoch": 1.401137656427759, "grad_norm": 0.8946676834128838, "learning_rate": 1.0231145429465216e-06, "loss": 0.0179, "step": 6158 }, { "epoch": 1.4013651877133106, "grad_norm": 1.1255364229538867, "learning_rate": 1.02304565993957e-06, "loss": 0.0204, "step": 6159 }, { "epoch": 1.4015927189988624, "grad_norm": 0.9409196567271754, "learning_rate": 1.022976768797294e-06, "loss": 0.0135, "step": 6160 }, { "epoch": 1.4018202502844141, "grad_norm": 1.6268367112917375, "learning_rate": 1.0229078695211015e-06, "loss": 0.0306, "step": 6161 }, { "epoch": 1.402047781569966, "grad_norm": 1.3902005456097397, "learning_rate": 1.0228389621124011e-06, "loss": 0.0304, "step": 6162 }, { "epoch": 1.4022753128555177, "grad_norm": 1.5439359170326747, "learning_rate": 1.022770046572601e-06, "loss": 0.0418, "step": 6163 }, { "epoch": 1.4025028441410694, "grad_norm": 1.0405833401494775, "learning_rate": 1.0227011229031095e-06, "loss": 0.027, "step": 6164 }, { "epoch": 1.4027303754266212, "grad_norm": 1.0531666297415323, "learning_rate": 1.0226321911053353e-06, "loss": 0.0154, "step": 6165 }, { "epoch": 1.402957906712173, "grad_norm": 1.1548106181399105, "learning_rate": 1.0225632511806873e-06, "loss": 0.0195, "step": 6166 }, { "epoch": 1.4031854379977247, "grad_norm": 0.8440928382130881, "learning_rate": 1.0224943031305747e-06, "loss": 0.0149, "step": 6167 }, { "epoch": 1.4034129692832764, "grad_norm": 1.262449747166055, "learning_rate": 1.0224253469564067e-06, "loss": 0.029, "step": 6168 }, { "epoch": 1.4036405005688282, "grad_norm": 1.4654938363999002, "learning_rate": 1.0223563826595923e-06, "loss": 0.0461, "step": 6169 }, { "epoch": 1.40386803185438, "grad_norm": 1.980710283153395, "learning_rate": 1.0222874102415412e-06, "loss": 0.0344, "step": 6170 }, { "epoch": 1.4040955631399317, "grad_norm": 1.5348477981590474, "learning_rate": 1.0222184297036628e-06, "loss": 0.0411, "step": 6171 }, { "epoch": 1.4043230944254836, "grad_norm": 1.4468014368091555, "learning_rate": 1.0221494410473674e-06, "loss": 0.0326, "step": 6172 }, { "epoch": 1.4045506257110354, "grad_norm": 1.0219615062149323, "learning_rate": 1.0220804442740648e-06, "loss": 0.0211, "step": 6173 }, { "epoch": 1.4047781569965871, "grad_norm": 1.0935211989568192, "learning_rate": 1.022011439385165e-06, "loss": 0.0193, "step": 6174 }, { "epoch": 1.405005688282139, "grad_norm": 1.2466607977301363, "learning_rate": 1.0219424263820784e-06, "loss": 0.0313, "step": 6175 }, { "epoch": 1.4052332195676907, "grad_norm": 1.3325638727923352, "learning_rate": 1.0218734052662158e-06, "loss": 0.0285, "step": 6176 }, { "epoch": 1.4054607508532424, "grad_norm": 1.2957750834848885, "learning_rate": 1.0218043760389875e-06, "loss": 0.04, "step": 6177 }, { "epoch": 1.4056882821387942, "grad_norm": 0.836332776641381, "learning_rate": 1.0217353387018045e-06, "loss": 0.0126, "step": 6178 }, { "epoch": 1.405915813424346, "grad_norm": 1.0248086467082256, "learning_rate": 1.0216662932560779e-06, "loss": 0.0201, "step": 6179 }, { "epoch": 1.4061433447098977, "grad_norm": 1.1101913879706153, "learning_rate": 1.0215972397032185e-06, "loss": 0.0195, "step": 6180 }, { "epoch": 1.4063708759954494, "grad_norm": 1.314224111106247, "learning_rate": 1.0215281780446378e-06, "loss": 0.0369, "step": 6181 }, { "epoch": 1.4065984072810012, "grad_norm": 1.4531464271881769, "learning_rate": 1.0214591082817477e-06, "loss": 0.0473, "step": 6182 }, { "epoch": 1.406825938566553, "grad_norm": 1.420628464660094, "learning_rate": 1.0213900304159592e-06, "loss": 0.0244, "step": 6183 }, { "epoch": 1.4070534698521047, "grad_norm": 1.039601484802879, "learning_rate": 1.0213209444486844e-06, "loss": 0.0141, "step": 6184 }, { "epoch": 1.4072810011376564, "grad_norm": 1.4560729122411875, "learning_rate": 1.0212518503813356e-06, "loss": 0.0354, "step": 6185 }, { "epoch": 1.4075085324232082, "grad_norm": 1.1373268608112097, "learning_rate": 1.0211827482153244e-06, "loss": 0.0256, "step": 6186 }, { "epoch": 1.40773606370876, "grad_norm": 1.9894618121379821, "learning_rate": 1.0211136379520636e-06, "loss": 0.0619, "step": 6187 }, { "epoch": 1.4079635949943117, "grad_norm": 1.0161488340364868, "learning_rate": 1.0210445195929653e-06, "loss": 0.0115, "step": 6188 }, { "epoch": 1.4081911262798634, "grad_norm": 1.209292084474128, "learning_rate": 1.0209753931394426e-06, "loss": 0.0385, "step": 6189 }, { "epoch": 1.4084186575654152, "grad_norm": 0.5428346750526308, "learning_rate": 1.0209062585929077e-06, "loss": 0.0073, "step": 6190 }, { "epoch": 1.408646188850967, "grad_norm": 1.264679875063447, "learning_rate": 1.0208371159547742e-06, "loss": 0.0157, "step": 6191 }, { "epoch": 1.4088737201365187, "grad_norm": 0.8975530088077646, "learning_rate": 1.020767965226455e-06, "loss": 0.0226, "step": 6192 }, { "epoch": 1.4091012514220704, "grad_norm": 1.2170421461893541, "learning_rate": 1.0206988064093633e-06, "loss": 0.0229, "step": 6193 }, { "epoch": 1.4093287827076222, "grad_norm": 1.4713267946729547, "learning_rate": 1.0206296395049128e-06, "loss": 0.026, "step": 6194 }, { "epoch": 1.409556313993174, "grad_norm": 0.799539608207469, "learning_rate": 1.020560464514517e-06, "loss": 0.0083, "step": 6195 }, { "epoch": 1.4097838452787257, "grad_norm": 1.2012299824313988, "learning_rate": 1.0204912814395898e-06, "loss": 0.0313, "step": 6196 }, { "epoch": 1.4100113765642777, "grad_norm": 0.6653702932749492, "learning_rate": 1.020422090281545e-06, "loss": 0.0091, "step": 6197 }, { "epoch": 1.4102389078498294, "grad_norm": 0.9721621164097385, "learning_rate": 1.0203528910417967e-06, "loss": 0.028, "step": 6198 }, { "epoch": 1.4104664391353812, "grad_norm": 1.0457933906877754, "learning_rate": 1.0202836837217597e-06, "loss": 0.0191, "step": 6199 }, { "epoch": 1.410693970420933, "grad_norm": 0.8908414799276027, "learning_rate": 1.0202144683228478e-06, "loss": 0.0208, "step": 6200 }, { "epoch": 1.4109215017064847, "grad_norm": 1.0085943408848066, "learning_rate": 1.0201452448464762e-06, "loss": 0.0195, "step": 6201 }, { "epoch": 1.4111490329920364, "grad_norm": 4.061894671138372, "learning_rate": 1.0200760132940597e-06, "loss": 0.0959, "step": 6202 }, { "epoch": 1.4113765642775882, "grad_norm": 0.681487143918558, "learning_rate": 1.0200067736670125e-06, "loss": 0.0199, "step": 6203 }, { "epoch": 1.41160409556314, "grad_norm": 0.9015094053704671, "learning_rate": 1.0199375259667505e-06, "loss": 0.0361, "step": 6204 }, { "epoch": 1.4118316268486917, "grad_norm": 1.3272825952215426, "learning_rate": 1.0198682701946889e-06, "loss": 0.0534, "step": 6205 }, { "epoch": 1.4120591581342434, "grad_norm": 1.132521458300331, "learning_rate": 1.0197990063522428e-06, "loss": 0.0212, "step": 6206 }, { "epoch": 1.4122866894197952, "grad_norm": 0.8792620678025737, "learning_rate": 1.0197297344408284e-06, "loss": 0.0347, "step": 6207 }, { "epoch": 1.412514220705347, "grad_norm": 1.4367192638283304, "learning_rate": 1.0196604544618607e-06, "loss": 0.0303, "step": 6208 }, { "epoch": 1.4127417519908987, "grad_norm": 1.0722086308564236, "learning_rate": 1.0195911664167562e-06, "loss": 0.0212, "step": 6209 }, { "epoch": 1.4129692832764504, "grad_norm": 2.002458559768931, "learning_rate": 1.0195218703069311e-06, "loss": 0.0213, "step": 6210 }, { "epoch": 1.4131968145620024, "grad_norm": 1.304428975916366, "learning_rate": 1.0194525661338014e-06, "loss": 0.0279, "step": 6211 }, { "epoch": 1.4134243458475542, "grad_norm": 1.0699552185731178, "learning_rate": 1.0193832538987838e-06, "loss": 0.0169, "step": 6212 }, { "epoch": 1.413651877133106, "grad_norm": 0.7360253534935643, "learning_rate": 1.0193139336032945e-06, "loss": 0.0152, "step": 6213 }, { "epoch": 1.4138794084186577, "grad_norm": 0.7178080008393654, "learning_rate": 1.0192446052487505e-06, "loss": 0.028, "step": 6214 }, { "epoch": 1.4141069397042094, "grad_norm": 1.0835136894120514, "learning_rate": 1.0191752688365691e-06, "loss": 0.045, "step": 6215 }, { "epoch": 1.4143344709897612, "grad_norm": 1.1029027025275766, "learning_rate": 1.019105924368167e-06, "loss": 0.0259, "step": 6216 }, { "epoch": 1.414562002275313, "grad_norm": 0.8923013888405551, "learning_rate": 1.0190365718449616e-06, "loss": 0.0116, "step": 6217 }, { "epoch": 1.4147895335608647, "grad_norm": 1.2539584305545126, "learning_rate": 1.0189672112683704e-06, "loss": 0.0195, "step": 6218 }, { "epoch": 1.4150170648464164, "grad_norm": 1.3274709438566543, "learning_rate": 1.0188978426398107e-06, "loss": 0.036, "step": 6219 }, { "epoch": 1.4152445961319682, "grad_norm": 1.0618852132476584, "learning_rate": 1.0188284659607007e-06, "loss": 0.0281, "step": 6220 }, { "epoch": 1.41547212741752, "grad_norm": 0.9193492374472514, "learning_rate": 1.018759081232458e-06, "loss": 0.0374, "step": 6221 }, { "epoch": 1.4156996587030717, "grad_norm": 0.5586627911962394, "learning_rate": 1.0186896884565005e-06, "loss": 0.0096, "step": 6222 }, { "epoch": 1.4159271899886234, "grad_norm": 0.6214003540618175, "learning_rate": 1.0186202876342473e-06, "loss": 0.0107, "step": 6223 }, { "epoch": 1.4161547212741752, "grad_norm": 0.9172538558057997, "learning_rate": 1.0185508787671162e-06, "loss": 0.0298, "step": 6224 }, { "epoch": 1.416382252559727, "grad_norm": 1.2046274625206315, "learning_rate": 1.0184814618565257e-06, "loss": 0.0355, "step": 6225 }, { "epoch": 1.4166097838452787, "grad_norm": 1.0097184663278695, "learning_rate": 1.0184120369038948e-06, "loss": 0.0296, "step": 6226 }, { "epoch": 1.4168373151308304, "grad_norm": 1.6454059603970002, "learning_rate": 1.0183426039106425e-06, "loss": 0.0514, "step": 6227 }, { "epoch": 1.4170648464163822, "grad_norm": 1.8322270655293305, "learning_rate": 1.0182731628781876e-06, "loss": 0.0295, "step": 6228 }, { "epoch": 1.417292377701934, "grad_norm": 0.9245095966214673, "learning_rate": 1.0182037138079494e-06, "loss": 0.0261, "step": 6229 }, { "epoch": 1.4175199089874857, "grad_norm": 1.9220130390307328, "learning_rate": 1.0181342567013477e-06, "loss": 0.0398, "step": 6230 }, { "epoch": 1.4177474402730375, "grad_norm": 0.8271648914590224, "learning_rate": 1.0180647915598017e-06, "loss": 0.0261, "step": 6231 }, { "epoch": 1.4179749715585892, "grad_norm": 1.0929333293095134, "learning_rate": 1.017995318384731e-06, "loss": 0.0413, "step": 6232 }, { "epoch": 1.418202502844141, "grad_norm": 1.0356716624792288, "learning_rate": 1.017925837177556e-06, "loss": 0.0248, "step": 6233 }, { "epoch": 1.4184300341296927, "grad_norm": 1.0477341375465965, "learning_rate": 1.0178563479396964e-06, "loss": 0.0291, "step": 6234 }, { "epoch": 1.4186575654152445, "grad_norm": 5.47070249270738, "learning_rate": 1.0177868506725725e-06, "loss": 0.0942, "step": 6235 }, { "epoch": 1.4188850967007964, "grad_norm": 1.4224582455243118, "learning_rate": 1.0177173453776047e-06, "loss": 0.0342, "step": 6236 }, { "epoch": 1.4191126279863482, "grad_norm": 1.9233599739151919, "learning_rate": 1.0176478320562136e-06, "loss": 0.056, "step": 6237 }, { "epoch": 1.4193401592719, "grad_norm": 0.9663548468064327, "learning_rate": 1.01757831070982e-06, "loss": 0.0276, "step": 6238 }, { "epoch": 1.4195676905574517, "grad_norm": 1.357744989256782, "learning_rate": 1.0175087813398446e-06, "loss": 0.0514, "step": 6239 }, { "epoch": 1.4197952218430034, "grad_norm": 0.7221163854356452, "learning_rate": 1.0174392439477087e-06, "loss": 0.0149, "step": 6240 }, { "epoch": 1.4200227531285552, "grad_norm": 1.587061634386172, "learning_rate": 1.0173696985348333e-06, "loss": 0.0596, "step": 6241 }, { "epoch": 1.420250284414107, "grad_norm": 0.7502705383802173, "learning_rate": 1.0173001451026396e-06, "loss": 0.0093, "step": 6242 }, { "epoch": 1.4204778156996587, "grad_norm": 0.7101344663498327, "learning_rate": 1.0172305836525498e-06, "loss": 0.0205, "step": 6243 }, { "epoch": 1.4207053469852104, "grad_norm": 0.754211158471996, "learning_rate": 1.017161014185985e-06, "loss": 0.0143, "step": 6244 }, { "epoch": 1.4209328782707622, "grad_norm": 0.9800518919994446, "learning_rate": 1.0170914367043672e-06, "loss": 0.0254, "step": 6245 }, { "epoch": 1.421160409556314, "grad_norm": 0.8703204775778092, "learning_rate": 1.0170218512091188e-06, "loss": 0.0154, "step": 6246 }, { "epoch": 1.4213879408418657, "grad_norm": 0.844078965636516, "learning_rate": 1.0169522577016614e-06, "loss": 0.022, "step": 6247 }, { "epoch": 1.4216154721274175, "grad_norm": 0.675422468791866, "learning_rate": 1.016882656183418e-06, "loss": 0.0082, "step": 6248 }, { "epoch": 1.4218430034129692, "grad_norm": 1.1004804368030876, "learning_rate": 1.0168130466558106e-06, "loss": 0.0125, "step": 6249 }, { "epoch": 1.4220705346985212, "grad_norm": 0.6727724492875137, "learning_rate": 1.0167434291202622e-06, "loss": 0.0126, "step": 6250 }, { "epoch": 1.422298065984073, "grad_norm": 0.9283584425159678, "learning_rate": 1.0166738035781954e-06, "loss": 0.0304, "step": 6251 }, { "epoch": 1.4225255972696247, "grad_norm": 0.9319135535277618, "learning_rate": 1.0166041700310334e-06, "loss": 0.0154, "step": 6252 }, { "epoch": 1.4227531285551764, "grad_norm": 0.9010921406225174, "learning_rate": 1.0165345284801995e-06, "loss": 0.0253, "step": 6253 }, { "epoch": 1.4229806598407282, "grad_norm": 1.2295648463971003, "learning_rate": 1.0164648789271167e-06, "loss": 0.0501, "step": 6254 }, { "epoch": 1.42320819112628, "grad_norm": 1.2697785438424416, "learning_rate": 1.016395221373209e-06, "loss": 0.046, "step": 6255 }, { "epoch": 1.4234357224118317, "grad_norm": 1.1004200253409915, "learning_rate": 1.0163255558198995e-06, "loss": 0.0248, "step": 6256 }, { "epoch": 1.4236632536973834, "grad_norm": 1.0079675426588957, "learning_rate": 1.0162558822686123e-06, "loss": 0.0323, "step": 6257 }, { "epoch": 1.4238907849829352, "grad_norm": 1.5356918423524708, "learning_rate": 1.0161862007207715e-06, "loss": 0.0522, "step": 6258 }, { "epoch": 1.424118316268487, "grad_norm": 0.753426366532939, "learning_rate": 1.0161165111778013e-06, "loss": 0.0105, "step": 6259 }, { "epoch": 1.4243458475540387, "grad_norm": 0.7423159215816296, "learning_rate": 1.0160468136411258e-06, "loss": 0.017, "step": 6260 }, { "epoch": 1.4245733788395905, "grad_norm": 1.5638797678798362, "learning_rate": 1.0159771081121697e-06, "loss": 0.041, "step": 6261 }, { "epoch": 1.4248009101251422, "grad_norm": 1.3149626939167967, "learning_rate": 1.0159073945923575e-06, "loss": 0.0332, "step": 6262 }, { "epoch": 1.425028441410694, "grad_norm": 1.573177812301038, "learning_rate": 1.015837673083114e-06, "loss": 0.0519, "step": 6263 }, { "epoch": 1.4252559726962457, "grad_norm": 1.0751260698885305, "learning_rate": 1.0157679435858643e-06, "loss": 0.0305, "step": 6264 }, { "epoch": 1.4254835039817975, "grad_norm": 0.9230599364770781, "learning_rate": 1.0156982061020335e-06, "loss": 0.0179, "step": 6265 }, { "epoch": 1.4257110352673492, "grad_norm": 1.4181072269182498, "learning_rate": 1.0156284606330468e-06, "loss": 0.0491, "step": 6266 }, { "epoch": 1.425938566552901, "grad_norm": 0.9090067280998548, "learning_rate": 1.0155587071803298e-06, "loss": 0.0232, "step": 6267 }, { "epoch": 1.4261660978384527, "grad_norm": 1.383666121761416, "learning_rate": 1.0154889457453082e-06, "loss": 0.0241, "step": 6268 }, { "epoch": 1.4263936291240045, "grad_norm": 0.7621859675263093, "learning_rate": 1.0154191763294077e-06, "loss": 0.0235, "step": 6269 }, { "epoch": 1.4266211604095562, "grad_norm": 1.2108840735457889, "learning_rate": 1.015349398934054e-06, "loss": 0.0401, "step": 6270 }, { "epoch": 1.426848691695108, "grad_norm": 0.7260177397917599, "learning_rate": 1.0152796135606739e-06, "loss": 0.011, "step": 6271 }, { "epoch": 1.4270762229806597, "grad_norm": 0.7749106615077918, "learning_rate": 1.015209820210693e-06, "loss": 0.0223, "step": 6272 }, { "epoch": 1.4273037542662115, "grad_norm": 1.0764688037172783, "learning_rate": 1.015140018885538e-06, "loss": 0.0347, "step": 6273 }, { "epoch": 1.4275312855517632, "grad_norm": 0.7315552904513073, "learning_rate": 1.0150702095866354e-06, "loss": 0.0146, "step": 6274 }, { "epoch": 1.4277588168373152, "grad_norm": 1.4725847050231393, "learning_rate": 1.0150003923154124e-06, "loss": 0.0286, "step": 6275 }, { "epoch": 1.427986348122867, "grad_norm": 0.8172080214482219, "learning_rate": 1.0149305670732953e-06, "loss": 0.0172, "step": 6276 }, { "epoch": 1.4282138794084187, "grad_norm": 0.9065092467190615, "learning_rate": 1.0148607338617118e-06, "loss": 0.026, "step": 6277 }, { "epoch": 1.4284414106939705, "grad_norm": 1.055574219901434, "learning_rate": 1.0147908926820887e-06, "loss": 0.0325, "step": 6278 }, { "epoch": 1.4286689419795222, "grad_norm": 1.3895263245690272, "learning_rate": 1.014721043535854e-06, "loss": 0.0309, "step": 6279 }, { "epoch": 1.428896473265074, "grad_norm": 1.3548353082702096, "learning_rate": 1.0146511864244344e-06, "loss": 0.02, "step": 6280 }, { "epoch": 1.4291240045506257, "grad_norm": 0.996001028629038, "learning_rate": 1.0145813213492587e-06, "loss": 0.0206, "step": 6281 }, { "epoch": 1.4293515358361775, "grad_norm": 1.5083691859247366, "learning_rate": 1.0145114483117539e-06, "loss": 0.0256, "step": 6282 }, { "epoch": 1.4295790671217292, "grad_norm": 0.8549655620512744, "learning_rate": 1.0144415673133485e-06, "loss": 0.0239, "step": 6283 }, { "epoch": 1.429806598407281, "grad_norm": 1.3514935766543403, "learning_rate": 1.0143716783554709e-06, "loss": 0.021, "step": 6284 }, { "epoch": 1.4300341296928327, "grad_norm": 1.1741487580376555, "learning_rate": 1.0143017814395489e-06, "loss": 0.0162, "step": 6285 }, { "epoch": 1.4302616609783845, "grad_norm": 0.9905348741277206, "learning_rate": 1.0142318765670117e-06, "loss": 0.0204, "step": 6286 }, { "epoch": 1.4304891922639362, "grad_norm": 1.5929745991128101, "learning_rate": 1.0141619637392878e-06, "loss": 0.0469, "step": 6287 }, { "epoch": 1.430716723549488, "grad_norm": 3.2298820198178886, "learning_rate": 1.0140920429578061e-06, "loss": 0.1133, "step": 6288 }, { "epoch": 1.43094425483504, "grad_norm": 1.4247664034317506, "learning_rate": 1.0140221142239957e-06, "loss": 0.028, "step": 6289 }, { "epoch": 1.4311717861205917, "grad_norm": 0.7515197976704004, "learning_rate": 1.0139521775392856e-06, "loss": 0.0304, "step": 6290 }, { "epoch": 1.4313993174061435, "grad_norm": 0.9354685764612675, "learning_rate": 1.0138822329051052e-06, "loss": 0.0182, "step": 6291 }, { "epoch": 1.4316268486916952, "grad_norm": 1.0152548776003998, "learning_rate": 1.0138122803228843e-06, "loss": 0.0161, "step": 6292 }, { "epoch": 1.431854379977247, "grad_norm": 1.2022208108630814, "learning_rate": 1.0137423197940527e-06, "loss": 0.0154, "step": 6293 }, { "epoch": 1.4320819112627987, "grad_norm": 1.095549117230315, "learning_rate": 1.0136723513200396e-06, "loss": 0.0291, "step": 6294 }, { "epoch": 1.4323094425483505, "grad_norm": 1.5045560124052015, "learning_rate": 1.0136023749022759e-06, "loss": 0.0491, "step": 6295 }, { "epoch": 1.4325369738339022, "grad_norm": 1.6455974388894132, "learning_rate": 1.013532390542191e-06, "loss": 0.0169, "step": 6296 }, { "epoch": 1.432764505119454, "grad_norm": 1.3646221962527803, "learning_rate": 1.0134623982412156e-06, "loss": 0.0343, "step": 6297 }, { "epoch": 1.4329920364050057, "grad_norm": 1.7956652104090614, "learning_rate": 1.0133923980007804e-06, "loss": 0.0231, "step": 6298 }, { "epoch": 1.4332195676905575, "grad_norm": 0.9926711065918172, "learning_rate": 1.013322389822316e-06, "loss": 0.0149, "step": 6299 }, { "epoch": 1.4334470989761092, "grad_norm": 1.3656153995620692, "learning_rate": 1.0132523737072528e-06, "loss": 0.0342, "step": 6300 }, { "epoch": 1.433674630261661, "grad_norm": 0.7242163481709295, "learning_rate": 1.0131823496570222e-06, "loss": 0.0118, "step": 6301 }, { "epoch": 1.4339021615472127, "grad_norm": 0.8670547748875898, "learning_rate": 1.0131123176730555e-06, "loss": 0.0221, "step": 6302 }, { "epoch": 1.4341296928327645, "grad_norm": 1.5995659761831407, "learning_rate": 1.0130422777567837e-06, "loss": 0.0308, "step": 6303 }, { "epoch": 1.4343572241183162, "grad_norm": 1.0654736743914723, "learning_rate": 1.0129722299096385e-06, "loss": 0.0221, "step": 6304 }, { "epoch": 1.434584755403868, "grad_norm": 0.7322779348375932, "learning_rate": 1.0129021741330514e-06, "loss": 0.017, "step": 6305 }, { "epoch": 1.4348122866894197, "grad_norm": 1.371952571606394, "learning_rate": 1.0128321104284541e-06, "loss": 0.0339, "step": 6306 }, { "epoch": 1.4350398179749715, "grad_norm": 1.6200877785866907, "learning_rate": 1.0127620387972789e-06, "loss": 0.0389, "step": 6307 }, { "epoch": 1.4352673492605232, "grad_norm": 1.3440385321706123, "learning_rate": 1.0126919592409578e-06, "loss": 0.0194, "step": 6308 }, { "epoch": 1.435494880546075, "grad_norm": 1.5916470330075667, "learning_rate": 1.012621871760923e-06, "loss": 0.0454, "step": 6309 }, { "epoch": 1.4357224118316267, "grad_norm": 0.9723442385422495, "learning_rate": 1.012551776358607e-06, "loss": 0.0207, "step": 6310 }, { "epoch": 1.4359499431171785, "grad_norm": 1.327551352285537, "learning_rate": 1.0124816730354428e-06, "loss": 0.0317, "step": 6311 }, { "epoch": 1.4361774744027302, "grad_norm": 1.1035989928859236, "learning_rate": 1.0124115617928626e-06, "loss": 0.0205, "step": 6312 }, { "epoch": 1.4364050056882822, "grad_norm": 0.752935991153831, "learning_rate": 1.0123414426322995e-06, "loss": 0.0168, "step": 6313 }, { "epoch": 1.436632536973834, "grad_norm": 1.5986259540246315, "learning_rate": 1.0122713155551867e-06, "loss": 0.0655, "step": 6314 }, { "epoch": 1.4368600682593857, "grad_norm": 1.251656443778033, "learning_rate": 1.0122011805629574e-06, "loss": 0.0381, "step": 6315 }, { "epoch": 1.4370875995449375, "grad_norm": 1.0440643194056838, "learning_rate": 1.0121310376570454e-06, "loss": 0.024, "step": 6316 }, { "epoch": 1.4373151308304892, "grad_norm": 2.2417861503091276, "learning_rate": 1.0120608868388837e-06, "loss": 0.045, "step": 6317 }, { "epoch": 1.437542662116041, "grad_norm": 1.1700571736014582, "learning_rate": 1.0119907281099066e-06, "loss": 0.0401, "step": 6318 }, { "epoch": 1.4377701934015927, "grad_norm": 0.9069348186314079, "learning_rate": 1.0119205614715476e-06, "loss": 0.0176, "step": 6319 }, { "epoch": 1.4379977246871445, "grad_norm": 1.9355113829065758, "learning_rate": 1.011850386925241e-06, "loss": 0.0294, "step": 6320 }, { "epoch": 1.4382252559726962, "grad_norm": 1.4052995825330266, "learning_rate": 1.011780204472421e-06, "loss": 0.0155, "step": 6321 }, { "epoch": 1.438452787258248, "grad_norm": 1.1704734179841323, "learning_rate": 1.0117100141145217e-06, "loss": 0.0319, "step": 6322 }, { "epoch": 1.4386803185437997, "grad_norm": 1.3977711378669946, "learning_rate": 1.0116398158529782e-06, "loss": 0.0358, "step": 6323 }, { "epoch": 1.4389078498293515, "grad_norm": 1.0414067986088198, "learning_rate": 1.0115696096892247e-06, "loss": 0.0293, "step": 6324 }, { "epoch": 1.4391353811149032, "grad_norm": 1.081914953392961, "learning_rate": 1.0114993956246968e-06, "loss": 0.0227, "step": 6325 }, { "epoch": 1.439362912400455, "grad_norm": 0.9696704441650997, "learning_rate": 1.0114291736608289e-06, "loss": 0.0269, "step": 6326 }, { "epoch": 1.4395904436860067, "grad_norm": 1.0191772217507478, "learning_rate": 1.0113589437990562e-06, "loss": 0.0266, "step": 6327 }, { "epoch": 1.4398179749715587, "grad_norm": 0.9746405255609321, "learning_rate": 1.0112887060408144e-06, "loss": 0.0301, "step": 6328 }, { "epoch": 1.4400455062571105, "grad_norm": 0.871919751178116, "learning_rate": 1.0112184603875387e-06, "loss": 0.0153, "step": 6329 }, { "epoch": 1.4402730375426622, "grad_norm": 1.052877314511932, "learning_rate": 1.0111482068406655e-06, "loss": 0.0118, "step": 6330 }, { "epoch": 1.440500568828214, "grad_norm": 1.418051417100814, "learning_rate": 1.0110779454016299e-06, "loss": 0.0269, "step": 6331 }, { "epoch": 1.4407281001137657, "grad_norm": 0.7054078297819081, "learning_rate": 1.011007676071868e-06, "loss": 0.0128, "step": 6332 }, { "epoch": 1.4409556313993175, "grad_norm": 0.9151449048408781, "learning_rate": 1.0109373988528161e-06, "loss": 0.0188, "step": 6333 }, { "epoch": 1.4411831626848692, "grad_norm": 0.6914366508726025, "learning_rate": 1.010867113745911e-06, "loss": 0.0152, "step": 6334 }, { "epoch": 1.441410693970421, "grad_norm": 0.8898176154301342, "learning_rate": 1.0107968207525884e-06, "loss": 0.0178, "step": 6335 }, { "epoch": 1.4416382252559727, "grad_norm": 1.24440607526392, "learning_rate": 1.0107265198742855e-06, "loss": 0.0426, "step": 6336 }, { "epoch": 1.4418657565415245, "grad_norm": 1.195011653706321, "learning_rate": 1.010656211112439e-06, "loss": 0.0279, "step": 6337 }, { "epoch": 1.4420932878270762, "grad_norm": 1.4854065146041637, "learning_rate": 1.0105858944684856e-06, "loss": 0.023, "step": 6338 }, { "epoch": 1.442320819112628, "grad_norm": 0.5511163443762787, "learning_rate": 1.010515569943863e-06, "loss": 0.0116, "step": 6339 }, { "epoch": 1.4425483503981797, "grad_norm": 1.2043446207357231, "learning_rate": 1.0104452375400078e-06, "loss": 0.0202, "step": 6340 }, { "epoch": 1.4427758816837315, "grad_norm": 1.3000326710853358, "learning_rate": 1.0103748972583582e-06, "loss": 0.0465, "step": 6341 }, { "epoch": 1.4430034129692833, "grad_norm": 1.3675162595675465, "learning_rate": 1.0103045491003514e-06, "loss": 0.0323, "step": 6342 }, { "epoch": 1.443230944254835, "grad_norm": 0.7420030229566207, "learning_rate": 1.0102341930674252e-06, "loss": 0.0237, "step": 6343 }, { "epoch": 1.4434584755403868, "grad_norm": 1.122028425409587, "learning_rate": 1.0101638291610176e-06, "loss": 0.0437, "step": 6344 }, { "epoch": 1.4436860068259385, "grad_norm": 2.271929983664725, "learning_rate": 1.0100934573825668e-06, "loss": 0.0384, "step": 6345 }, { "epoch": 1.4439135381114903, "grad_norm": 2.0024195181198214, "learning_rate": 1.010023077733511e-06, "loss": 0.0459, "step": 6346 }, { "epoch": 1.444141069397042, "grad_norm": 1.2513837267760826, "learning_rate": 1.0099526902152886e-06, "loss": 0.0245, "step": 6347 }, { "epoch": 1.4443686006825938, "grad_norm": 0.8662028583353463, "learning_rate": 1.0098822948293382e-06, "loss": 0.0222, "step": 6348 }, { "epoch": 1.4445961319681455, "grad_norm": 1.3336701254562313, "learning_rate": 1.0098118915770985e-06, "loss": 0.0467, "step": 6349 }, { "epoch": 1.4448236632536973, "grad_norm": 1.301840631614471, "learning_rate": 1.0097414804600087e-06, "loss": 0.036, "step": 6350 }, { "epoch": 1.445051194539249, "grad_norm": 0.7933102665199757, "learning_rate": 1.0096710614795077e-06, "loss": 0.0134, "step": 6351 }, { "epoch": 1.445278725824801, "grad_norm": 1.6967569868348495, "learning_rate": 1.0096006346370344e-06, "loss": 0.0453, "step": 6352 }, { "epoch": 1.4455062571103527, "grad_norm": 1.9726304694822052, "learning_rate": 1.0095301999340285e-06, "loss": 0.0373, "step": 6353 }, { "epoch": 1.4457337883959045, "grad_norm": 1.118511972338071, "learning_rate": 1.0094597573719297e-06, "loss": 0.0306, "step": 6354 }, { "epoch": 1.4459613196814562, "grad_norm": 1.3932750265447595, "learning_rate": 1.0093893069521777e-06, "loss": 0.0254, "step": 6355 }, { "epoch": 1.446188850967008, "grad_norm": 0.8939501658395617, "learning_rate": 1.009318848676212e-06, "loss": 0.0154, "step": 6356 }, { "epoch": 1.4464163822525598, "grad_norm": 0.8973575337225248, "learning_rate": 1.0092483825454729e-06, "loss": 0.0333, "step": 6357 }, { "epoch": 1.4466439135381115, "grad_norm": 0.8404430334263568, "learning_rate": 1.0091779085614006e-06, "loss": 0.0114, "step": 6358 }, { "epoch": 1.4468714448236633, "grad_norm": 0.554800530533077, "learning_rate": 1.0091074267254355e-06, "loss": 0.0072, "step": 6359 }, { "epoch": 1.447098976109215, "grad_norm": 1.3794649246980744, "learning_rate": 1.009036937039018e-06, "loss": 0.0345, "step": 6360 }, { "epoch": 1.4473265073947668, "grad_norm": 0.9682097442548065, "learning_rate": 1.008966439503589e-06, "loss": 0.0387, "step": 6361 }, { "epoch": 1.4475540386803185, "grad_norm": 0.9402002903723223, "learning_rate": 1.008895934120589e-06, "loss": 0.0147, "step": 6362 }, { "epoch": 1.4477815699658703, "grad_norm": 1.1075685325307638, "learning_rate": 1.0088254208914593e-06, "loss": 0.0223, "step": 6363 }, { "epoch": 1.448009101251422, "grad_norm": 1.2528562350335635, "learning_rate": 1.0087548998176409e-06, "loss": 0.0224, "step": 6364 }, { "epoch": 1.4482366325369738, "grad_norm": 1.0342471484913283, "learning_rate": 1.008684370900575e-06, "loss": 0.0391, "step": 6365 }, { "epoch": 1.4484641638225255, "grad_norm": 0.541143297382043, "learning_rate": 1.0086138341417035e-06, "loss": 0.01, "step": 6366 }, { "epoch": 1.4486916951080775, "grad_norm": 1.6811565885259934, "learning_rate": 1.0085432895424674e-06, "loss": 0.0642, "step": 6367 }, { "epoch": 1.4489192263936292, "grad_norm": 0.7476243414143443, "learning_rate": 1.0084727371043094e-06, "loss": 0.0168, "step": 6368 }, { "epoch": 1.449146757679181, "grad_norm": 1.1464144466295128, "learning_rate": 1.0084021768286706e-06, "loss": 0.0245, "step": 6369 }, { "epoch": 1.4493742889647327, "grad_norm": 1.359249500318607, "learning_rate": 1.0083316087169935e-06, "loss": 0.0627, "step": 6370 }, { "epoch": 1.4496018202502845, "grad_norm": 2.075043421424735, "learning_rate": 1.0082610327707204e-06, "loss": 0.0234, "step": 6371 }, { "epoch": 1.4498293515358363, "grad_norm": 1.7107015538291868, "learning_rate": 1.0081904489912937e-06, "loss": 0.0327, "step": 6372 }, { "epoch": 1.450056882821388, "grad_norm": 1.1424090301708094, "learning_rate": 1.008119857380156e-06, "loss": 0.0227, "step": 6373 }, { "epoch": 1.4502844141069398, "grad_norm": 1.8234753567513349, "learning_rate": 1.0080492579387503e-06, "loss": 0.0455, "step": 6374 }, { "epoch": 1.4505119453924915, "grad_norm": 0.7117928424230715, "learning_rate": 1.007978650668519e-06, "loss": 0.0081, "step": 6375 }, { "epoch": 1.4507394766780433, "grad_norm": 0.9207913101740711, "learning_rate": 1.0079080355709057e-06, "loss": 0.0194, "step": 6376 }, { "epoch": 1.450967007963595, "grad_norm": 1.8055497591635554, "learning_rate": 1.007837412647353e-06, "loss": 0.0275, "step": 6377 }, { "epoch": 1.4511945392491468, "grad_norm": 1.0900059173773669, "learning_rate": 1.007766781899305e-06, "loss": 0.0196, "step": 6378 }, { "epoch": 1.4514220705346985, "grad_norm": 0.7549560575688554, "learning_rate": 1.0076961433282051e-06, "loss": 0.0102, "step": 6379 }, { "epoch": 1.4516496018202503, "grad_norm": 1.0184772578883805, "learning_rate": 1.0076254969354967e-06, "loss": 0.0188, "step": 6380 }, { "epoch": 1.451877133105802, "grad_norm": 0.941415605985606, "learning_rate": 1.0075548427226241e-06, "loss": 0.0349, "step": 6381 }, { "epoch": 1.4521046643913538, "grad_norm": 0.8934463803567299, "learning_rate": 1.007484180691031e-06, "loss": 0.0147, "step": 6382 }, { "epoch": 1.4523321956769055, "grad_norm": 1.6447864640806058, "learning_rate": 1.0074135108421616e-06, "loss": 0.0414, "step": 6383 }, { "epoch": 1.4525597269624573, "grad_norm": 0.7619214840074895, "learning_rate": 1.0073428331774605e-06, "loss": 0.0194, "step": 6384 }, { "epoch": 1.452787258248009, "grad_norm": 1.042967290966513, "learning_rate": 1.007272147698372e-06, "loss": 0.0216, "step": 6385 }, { "epoch": 1.4530147895335608, "grad_norm": 0.7609050337711548, "learning_rate": 1.0072014544063412e-06, "loss": 0.0108, "step": 6386 }, { "epoch": 1.4532423208191125, "grad_norm": 0.8698525287916742, "learning_rate": 1.0071307533028125e-06, "loss": 0.029, "step": 6387 }, { "epoch": 1.4534698521046643, "grad_norm": 0.8501257290692393, "learning_rate": 1.007060044389231e-06, "loss": 0.0345, "step": 6388 }, { "epoch": 1.453697383390216, "grad_norm": 0.9391000541726464, "learning_rate": 1.006989327667042e-06, "loss": 0.0256, "step": 6389 }, { "epoch": 1.4539249146757678, "grad_norm": 1.8647278292453908, "learning_rate": 1.0069186031376906e-06, "loss": 0.0392, "step": 6390 }, { "epoch": 1.4541524459613198, "grad_norm": 0.7819994697112215, "learning_rate": 1.0068478708026224e-06, "loss": 0.0122, "step": 6391 }, { "epoch": 1.4543799772468715, "grad_norm": 0.585110905309953, "learning_rate": 1.0067771306632832e-06, "loss": 0.0126, "step": 6392 }, { "epoch": 1.4546075085324233, "grad_norm": 0.9364890195145283, "learning_rate": 1.0067063827211184e-06, "loss": 0.0136, "step": 6393 }, { "epoch": 1.454835039817975, "grad_norm": 0.6498349687781143, "learning_rate": 1.0066356269775744e-06, "loss": 0.0103, "step": 6394 }, { "epoch": 1.4550625711035268, "grad_norm": 0.869864688376475, "learning_rate": 1.0065648634340971e-06, "loss": 0.0219, "step": 6395 }, { "epoch": 1.4552901023890785, "grad_norm": 1.1931980839721092, "learning_rate": 1.0064940920921328e-06, "loss": 0.0287, "step": 6396 }, { "epoch": 1.4555176336746303, "grad_norm": 1.3209455372197918, "learning_rate": 1.006423312953128e-06, "loss": 0.0482, "step": 6397 }, { "epoch": 1.455745164960182, "grad_norm": 1.5390872099106532, "learning_rate": 1.0063525260185288e-06, "loss": 0.0114, "step": 6398 }, { "epoch": 1.4559726962457338, "grad_norm": 0.7920775851368451, "learning_rate": 1.0062817312897826e-06, "loss": 0.0124, "step": 6399 }, { "epoch": 1.4562002275312855, "grad_norm": 0.9021573629870503, "learning_rate": 1.0062109287683364e-06, "loss": 0.0191, "step": 6400 }, { "epoch": 1.4564277588168373, "grad_norm": 0.9639937227508991, "learning_rate": 1.0061401184556366e-06, "loss": 0.0165, "step": 6401 }, { "epoch": 1.456655290102389, "grad_norm": 1.3012355962020263, "learning_rate": 1.006069300353131e-06, "loss": 0.0381, "step": 6402 }, { "epoch": 1.4568828213879408, "grad_norm": 1.421780962954957, "learning_rate": 1.005998474462267e-06, "loss": 0.0278, "step": 6403 }, { "epoch": 1.4571103526734925, "grad_norm": 0.7020431371817161, "learning_rate": 1.0059276407844915e-06, "loss": 0.0136, "step": 6404 }, { "epoch": 1.4573378839590443, "grad_norm": 0.610596243564309, "learning_rate": 1.0058567993212528e-06, "loss": 0.0105, "step": 6405 }, { "epoch": 1.4575654152445963, "grad_norm": 1.6868440616932545, "learning_rate": 1.005785950073999e-06, "loss": 0.0662, "step": 6406 }, { "epoch": 1.457792946530148, "grad_norm": 1.3076997172219658, "learning_rate": 1.0057150930441772e-06, "loss": 0.0494, "step": 6407 }, { "epoch": 1.4580204778156998, "grad_norm": 1.8180694190438493, "learning_rate": 1.0056442282332365e-06, "loss": 0.0256, "step": 6408 }, { "epoch": 1.4582480091012515, "grad_norm": 0.7591625918315809, "learning_rate": 1.0055733556426248e-06, "loss": 0.0251, "step": 6409 }, { "epoch": 1.4584755403868033, "grad_norm": 0.8738294046187783, "learning_rate": 1.0055024752737906e-06, "loss": 0.0264, "step": 6410 }, { "epoch": 1.458703071672355, "grad_norm": 0.9064382129589225, "learning_rate": 1.0054315871281828e-06, "loss": 0.0214, "step": 6411 }, { "epoch": 1.4589306029579068, "grad_norm": 1.6826022489740848, "learning_rate": 1.00536069120725e-06, "loss": 0.0397, "step": 6412 }, { "epoch": 1.4591581342434585, "grad_norm": 0.8081842544846835, "learning_rate": 1.0052897875124412e-06, "loss": 0.0179, "step": 6413 }, { "epoch": 1.4593856655290103, "grad_norm": 1.0489435189018754, "learning_rate": 1.005218876045206e-06, "loss": 0.0258, "step": 6414 }, { "epoch": 1.459613196814562, "grad_norm": 0.9107275649992217, "learning_rate": 1.0051479568069927e-06, "loss": 0.0159, "step": 6415 }, { "epoch": 1.4598407281001138, "grad_norm": 0.7882760746921257, "learning_rate": 1.0050770297992517e-06, "loss": 0.0188, "step": 6416 }, { "epoch": 1.4600682593856655, "grad_norm": 0.494600424451502, "learning_rate": 1.0050060950234324e-06, "loss": 0.0091, "step": 6417 }, { "epoch": 1.4602957906712173, "grad_norm": 0.8887210333396641, "learning_rate": 1.0049351524809842e-06, "loss": 0.0234, "step": 6418 }, { "epoch": 1.460523321956769, "grad_norm": 0.8818727125075025, "learning_rate": 1.0048642021733576e-06, "loss": 0.0203, "step": 6419 }, { "epoch": 1.4607508532423208, "grad_norm": 0.8286346226624387, "learning_rate": 1.0047932441020022e-06, "loss": 0.0141, "step": 6420 }, { "epoch": 1.4609783845278725, "grad_norm": 1.0017852943824, "learning_rate": 1.0047222782683686e-06, "loss": 0.0184, "step": 6421 }, { "epoch": 1.4612059158134243, "grad_norm": 1.0992796278491528, "learning_rate": 1.0046513046739069e-06, "loss": 0.0177, "step": 6422 }, { "epoch": 1.461433447098976, "grad_norm": 1.0966388790320833, "learning_rate": 1.0045803233200679e-06, "loss": 0.0229, "step": 6423 }, { "epoch": 1.4616609783845278, "grad_norm": 1.1017149602413754, "learning_rate": 1.0045093342083022e-06, "loss": 0.0191, "step": 6424 }, { "epoch": 1.4618885096700796, "grad_norm": 1.6564794597904529, "learning_rate": 1.0044383373400608e-06, "loss": 0.0371, "step": 6425 }, { "epoch": 1.4621160409556313, "grad_norm": 2.1788289964022813, "learning_rate": 1.0043673327167946e-06, "loss": 0.0461, "step": 6426 }, { "epoch": 1.462343572241183, "grad_norm": 0.9615176745408612, "learning_rate": 1.004296320339955e-06, "loss": 0.0163, "step": 6427 }, { "epoch": 1.4625711035267348, "grad_norm": 0.9958220543488695, "learning_rate": 1.0042253002109933e-06, "loss": 0.0214, "step": 6428 }, { "epoch": 1.4627986348122866, "grad_norm": 0.78557582039727, "learning_rate": 1.004154272331361e-06, "loss": 0.0136, "step": 6429 }, { "epoch": 1.4630261660978385, "grad_norm": 1.0936828441296087, "learning_rate": 1.0040832367025097e-06, "loss": 0.0219, "step": 6430 }, { "epoch": 1.4632536973833903, "grad_norm": 0.7390343812119207, "learning_rate": 1.0040121933258912e-06, "loss": 0.01, "step": 6431 }, { "epoch": 1.463481228668942, "grad_norm": 1.4582085989040046, "learning_rate": 1.0039411422029578e-06, "loss": 0.0313, "step": 6432 }, { "epoch": 1.4637087599544938, "grad_norm": 0.9523062629655219, "learning_rate": 1.0038700833351612e-06, "loss": 0.0245, "step": 6433 }, { "epoch": 1.4639362912400455, "grad_norm": 1.2205425053268553, "learning_rate": 1.0037990167239542e-06, "loss": 0.0243, "step": 6434 }, { "epoch": 1.4641638225255973, "grad_norm": 1.3769758384207615, "learning_rate": 1.0037279423707888e-06, "loss": 0.0284, "step": 6435 }, { "epoch": 1.464391353811149, "grad_norm": 1.6348768046013533, "learning_rate": 1.0036568602771179e-06, "loss": 0.0674, "step": 6436 }, { "epoch": 1.4646188850967008, "grad_norm": 0.7786100751808739, "learning_rate": 1.0035857704443944e-06, "loss": 0.0112, "step": 6437 }, { "epoch": 1.4648464163822525, "grad_norm": 1.2999313140929, "learning_rate": 1.0035146728740712e-06, "loss": 0.0364, "step": 6438 }, { "epoch": 1.4650739476678043, "grad_norm": 1.6927396967754325, "learning_rate": 1.0034435675676012e-06, "loss": 0.027, "step": 6439 }, { "epoch": 1.465301478953356, "grad_norm": 0.7196939694803933, "learning_rate": 1.0033724545264378e-06, "loss": 0.0061, "step": 6440 }, { "epoch": 1.4655290102389078, "grad_norm": 0.8716526278131789, "learning_rate": 1.0033013337520342e-06, "loss": 0.0146, "step": 6441 }, { "epoch": 1.4657565415244596, "grad_norm": 1.048978533991283, "learning_rate": 1.0032302052458443e-06, "loss": 0.0315, "step": 6442 }, { "epoch": 1.4659840728100113, "grad_norm": 1.0777036173381194, "learning_rate": 1.0031590690093214e-06, "loss": 0.0334, "step": 6443 }, { "epoch": 1.466211604095563, "grad_norm": 0.4736534501721457, "learning_rate": 1.0030879250439203e-06, "loss": 0.0114, "step": 6444 }, { "epoch": 1.466439135381115, "grad_norm": 0.6021310354060467, "learning_rate": 1.003016773351094e-06, "loss": 0.0097, "step": 6445 }, { "epoch": 1.4666666666666668, "grad_norm": 0.8649430411936431, "learning_rate": 1.0029456139322973e-06, "loss": 0.0162, "step": 6446 }, { "epoch": 1.4668941979522185, "grad_norm": 1.543095421141427, "learning_rate": 1.0028744467889845e-06, "loss": 0.0291, "step": 6447 }, { "epoch": 1.4671217292377703, "grad_norm": 1.5988742387248307, "learning_rate": 1.0028032719226097e-06, "loss": 0.0373, "step": 6448 }, { "epoch": 1.467349260523322, "grad_norm": 2.0643928637015563, "learning_rate": 1.0027320893346284e-06, "loss": 0.0459, "step": 6449 }, { "epoch": 1.4675767918088738, "grad_norm": 2.220711557926011, "learning_rate": 1.0026608990264946e-06, "loss": 0.0321, "step": 6450 }, { "epoch": 1.4678043230944255, "grad_norm": 0.9445356318647576, "learning_rate": 1.0025897009996639e-06, "loss": 0.0226, "step": 6451 }, { "epoch": 1.4680318543799773, "grad_norm": 0.6330305963301595, "learning_rate": 1.0025184952555911e-06, "loss": 0.0205, "step": 6452 }, { "epoch": 1.468259385665529, "grad_norm": 0.9293483419699585, "learning_rate": 1.0024472817957318e-06, "loss": 0.0085, "step": 6453 }, { "epoch": 1.4684869169510808, "grad_norm": 1.4315734788723715, "learning_rate": 1.0023760606215412e-06, "loss": 0.0367, "step": 6454 }, { "epoch": 1.4687144482366326, "grad_norm": 1.1555294375278542, "learning_rate": 1.0023048317344752e-06, "loss": 0.0213, "step": 6455 }, { "epoch": 1.4689419795221843, "grad_norm": 1.0518687824248476, "learning_rate": 1.0022335951359894e-06, "loss": 0.0175, "step": 6456 }, { "epoch": 1.469169510807736, "grad_norm": 0.951733146012008, "learning_rate": 1.0021623508275397e-06, "loss": 0.027, "step": 6457 }, { "epoch": 1.4693970420932878, "grad_norm": 1.9723448140457374, "learning_rate": 1.0020910988105822e-06, "loss": 0.0413, "step": 6458 }, { "epoch": 1.4696245733788396, "grad_norm": 1.1686953011150991, "learning_rate": 1.0020198390865735e-06, "loss": 0.032, "step": 6459 }, { "epoch": 1.4698521046643913, "grad_norm": 0.8845231672296715, "learning_rate": 1.0019485716569696e-06, "loss": 0.0192, "step": 6460 }, { "epoch": 1.470079635949943, "grad_norm": 1.34301819211037, "learning_rate": 1.0018772965232272e-06, "loss": 0.0272, "step": 6461 }, { "epoch": 1.4703071672354948, "grad_norm": 1.0900651393805174, "learning_rate": 1.0018060136868033e-06, "loss": 0.0398, "step": 6462 }, { "epoch": 1.4705346985210466, "grad_norm": 0.8738815245685796, "learning_rate": 1.0017347231491544e-06, "loss": 0.0088, "step": 6463 }, { "epoch": 1.4707622298065983, "grad_norm": 1.0394239050490557, "learning_rate": 1.0016634249117378e-06, "loss": 0.0181, "step": 6464 }, { "epoch": 1.47098976109215, "grad_norm": 1.684560812659319, "learning_rate": 1.0015921189760105e-06, "loss": 0.0782, "step": 6465 }, { "epoch": 1.4712172923777018, "grad_norm": 0.6453980486743178, "learning_rate": 1.00152080534343e-06, "loss": 0.0194, "step": 6466 }, { "epoch": 1.4714448236632536, "grad_norm": 0.7102456235440718, "learning_rate": 1.001449484015454e-06, "loss": 0.0173, "step": 6467 }, { "epoch": 1.4716723549488053, "grad_norm": 0.9401483471225579, "learning_rate": 1.0013781549935396e-06, "loss": 0.0304, "step": 6468 }, { "epoch": 1.4718998862343573, "grad_norm": 1.3395349325920776, "learning_rate": 1.0013068182791454e-06, "loss": 0.025, "step": 6469 }, { "epoch": 1.472127417519909, "grad_norm": 0.8791378015144556, "learning_rate": 1.0012354738737288e-06, "loss": 0.0331, "step": 6470 }, { "epoch": 1.4723549488054608, "grad_norm": 0.9918757720929136, "learning_rate": 1.0011641217787481e-06, "loss": 0.017, "step": 6471 }, { "epoch": 1.4725824800910126, "grad_norm": 1.0478775077595175, "learning_rate": 1.001092761995662e-06, "loss": 0.0391, "step": 6472 }, { "epoch": 1.4728100113765643, "grad_norm": 0.8503563945359769, "learning_rate": 1.0010213945259282e-06, "loss": 0.0202, "step": 6473 }, { "epoch": 1.473037542662116, "grad_norm": 1.222389885020765, "learning_rate": 1.000950019371006e-06, "loss": 0.0237, "step": 6474 }, { "epoch": 1.4732650739476678, "grad_norm": 0.8048051018799113, "learning_rate": 1.0008786365323539e-06, "loss": 0.0178, "step": 6475 }, { "epoch": 1.4734926052332196, "grad_norm": 1.5299229006902042, "learning_rate": 1.0008072460114308e-06, "loss": 0.0328, "step": 6476 }, { "epoch": 1.4737201365187713, "grad_norm": 1.6732137429792273, "learning_rate": 1.0007358478096959e-06, "loss": 0.0471, "step": 6477 }, { "epoch": 1.473947667804323, "grad_norm": 0.43763199665482255, "learning_rate": 1.0006644419286084e-06, "loss": 0.0098, "step": 6478 }, { "epoch": 1.4741751990898748, "grad_norm": 0.7028792539926481, "learning_rate": 1.0005930283696277e-06, "loss": 0.0085, "step": 6479 }, { "epoch": 1.4744027303754266, "grad_norm": 0.9331242445594948, "learning_rate": 1.0005216071342133e-06, "loss": 0.0197, "step": 6480 }, { "epoch": 1.4746302616609783, "grad_norm": 1.8900811534734059, "learning_rate": 1.000450178223825e-06, "loss": 0.0396, "step": 6481 }, { "epoch": 1.47485779294653, "grad_norm": 1.15876808023037, "learning_rate": 1.0003787416399226e-06, "loss": 0.0302, "step": 6482 }, { "epoch": 1.4750853242320818, "grad_norm": 1.3311084179933157, "learning_rate": 1.0003072973839665e-06, "loss": 0.0214, "step": 6483 }, { "epoch": 1.4753128555176338, "grad_norm": 0.7978453155730808, "learning_rate": 1.0002358454574163e-06, "loss": 0.0126, "step": 6484 }, { "epoch": 1.4755403868031856, "grad_norm": 1.3200907472733454, "learning_rate": 1.0001643858617326e-06, "loss": 0.0372, "step": 6485 }, { "epoch": 1.4757679180887373, "grad_norm": 0.7587699333301391, "learning_rate": 1.0000929185983762e-06, "loss": 0.0145, "step": 6486 }, { "epoch": 1.475995449374289, "grad_norm": 1.115889703946624, "learning_rate": 1.0000214436688074e-06, "loss": 0.0221, "step": 6487 }, { "epoch": 1.4762229806598408, "grad_norm": 1.7530265279400412, "learning_rate": 9.99949961074487e-07, "loss": 0.0517, "step": 6488 }, { "epoch": 1.4764505119453926, "grad_norm": 1.4986267673780238, "learning_rate": 9.998784708168762e-07, "loss": 0.0281, "step": 6489 }, { "epoch": 1.4766780432309443, "grad_norm": 1.1981264017896749, "learning_rate": 9.998069728974357e-07, "loss": 0.0351, "step": 6490 }, { "epoch": 1.476905574516496, "grad_norm": 0.8147906912659221, "learning_rate": 9.997354673176273e-07, "loss": 0.0211, "step": 6491 }, { "epoch": 1.4771331058020478, "grad_norm": 0.7325162068189855, "learning_rate": 9.996639540789124e-07, "loss": 0.015, "step": 6492 }, { "epoch": 1.4773606370875996, "grad_norm": 1.0318994529873236, "learning_rate": 9.995924331827521e-07, "loss": 0.0223, "step": 6493 }, { "epoch": 1.4775881683731513, "grad_norm": 1.0766661165747942, "learning_rate": 9.99520904630609e-07, "loss": 0.0217, "step": 6494 }, { "epoch": 1.477815699658703, "grad_norm": 1.052496175953845, "learning_rate": 9.994493684239443e-07, "loss": 0.0261, "step": 6495 }, { "epoch": 1.4780432309442548, "grad_norm": 0.740359492998119, "learning_rate": 9.993778245642202e-07, "loss": 0.0218, "step": 6496 }, { "epoch": 1.4782707622298066, "grad_norm": 1.1088535016409602, "learning_rate": 9.99306273052899e-07, "loss": 0.0158, "step": 6497 }, { "epoch": 1.4784982935153583, "grad_norm": 1.4976458680888785, "learning_rate": 9.99234713891443e-07, "loss": 0.021, "step": 6498 }, { "epoch": 1.47872582480091, "grad_norm": 0.6855826863773865, "learning_rate": 9.99163147081315e-07, "loss": 0.0099, "step": 6499 }, { "epoch": 1.4789533560864618, "grad_norm": 1.0945214632210196, "learning_rate": 9.990915726239774e-07, "loss": 0.0297, "step": 6500 }, { "epoch": 1.4791808873720136, "grad_norm": 1.0597968699238194, "learning_rate": 9.990199905208933e-07, "loss": 0.0391, "step": 6501 }, { "epoch": 1.4794084186575653, "grad_norm": 1.3770835022888792, "learning_rate": 9.989484007735256e-07, "loss": 0.0389, "step": 6502 }, { "epoch": 1.479635949943117, "grad_norm": 1.8160093879173487, "learning_rate": 9.988768033833374e-07, "loss": 0.0681, "step": 6503 }, { "epoch": 1.4798634812286688, "grad_norm": 1.7490337161767424, "learning_rate": 9.98805198351792e-07, "loss": 0.0662, "step": 6504 }, { "epoch": 1.4800910125142206, "grad_norm": 0.6476476571772248, "learning_rate": 9.98733585680353e-07, "loss": 0.0105, "step": 6505 }, { "epoch": 1.4803185437997723, "grad_norm": 0.7755957452558864, "learning_rate": 9.986619653704838e-07, "loss": 0.0139, "step": 6506 }, { "epoch": 1.480546075085324, "grad_norm": 0.633426255815866, "learning_rate": 9.985903374236487e-07, "loss": 0.017, "step": 6507 }, { "epoch": 1.480773606370876, "grad_norm": 0.8678772008391588, "learning_rate": 9.985187018413108e-07, "loss": 0.0257, "step": 6508 }, { "epoch": 1.4810011376564278, "grad_norm": 0.8694342307699764, "learning_rate": 9.98447058624935e-07, "loss": 0.0172, "step": 6509 }, { "epoch": 1.4812286689419796, "grad_norm": 1.2761367957965584, "learning_rate": 9.983754077759852e-07, "loss": 0.0369, "step": 6510 }, { "epoch": 1.4814562002275313, "grad_norm": 1.0645165105201808, "learning_rate": 9.98303749295926e-07, "loss": 0.03, "step": 6511 }, { "epoch": 1.481683731513083, "grad_norm": 1.1149897643794233, "learning_rate": 9.982320831862217e-07, "loss": 0.0244, "step": 6512 }, { "epoch": 1.4819112627986348, "grad_norm": 1.5255781412882516, "learning_rate": 9.981604094483374e-07, "loss": 0.0314, "step": 6513 }, { "epoch": 1.4821387940841866, "grad_norm": 1.43555128988365, "learning_rate": 9.980887280837377e-07, "loss": 0.0441, "step": 6514 }, { "epoch": 1.4823663253697383, "grad_norm": 1.16961605961412, "learning_rate": 9.980170390938873e-07, "loss": 0.0308, "step": 6515 }, { "epoch": 1.48259385665529, "grad_norm": 1.0707706605010894, "learning_rate": 9.979453424802522e-07, "loss": 0.0244, "step": 6516 }, { "epoch": 1.4828213879408418, "grad_norm": 1.2895035901019312, "learning_rate": 9.978736382442969e-07, "loss": 0.0494, "step": 6517 }, { "epoch": 1.4830489192263936, "grad_norm": 1.310843233111791, "learning_rate": 9.978019263874875e-07, "loss": 0.0301, "step": 6518 }, { "epoch": 1.4832764505119453, "grad_norm": 1.6244622239104036, "learning_rate": 9.977302069112896e-07, "loss": 0.0271, "step": 6519 }, { "epoch": 1.483503981797497, "grad_norm": 1.3621603406802865, "learning_rate": 9.97658479817169e-07, "loss": 0.0281, "step": 6520 }, { "epoch": 1.4837315130830488, "grad_norm": 0.7915875199989744, "learning_rate": 9.975867451065913e-07, "loss": 0.0114, "step": 6521 }, { "epoch": 1.4839590443686006, "grad_norm": 2.0905935656543404, "learning_rate": 9.97515002781023e-07, "loss": 0.0765, "step": 6522 }, { "epoch": 1.4841865756541526, "grad_norm": 1.2649643198155474, "learning_rate": 9.9744325284193e-07, "loss": 0.0327, "step": 6523 }, { "epoch": 1.4844141069397043, "grad_norm": 1.0390661565491186, "learning_rate": 9.973714952907792e-07, "loss": 0.027, "step": 6524 }, { "epoch": 1.484641638225256, "grad_norm": 1.5278981168386385, "learning_rate": 9.97299730129037e-07, "loss": 0.0397, "step": 6525 }, { "epoch": 1.4848691695108078, "grad_norm": 1.021782093064846, "learning_rate": 9.972279573581705e-07, "loss": 0.0194, "step": 6526 }, { "epoch": 1.4850967007963596, "grad_norm": 0.8266467944017118, "learning_rate": 9.97156176979646e-07, "loss": 0.022, "step": 6527 }, { "epoch": 1.4853242320819113, "grad_norm": 0.7868124068525434, "learning_rate": 9.970843889949305e-07, "loss": 0.011, "step": 6528 }, { "epoch": 1.485551763367463, "grad_norm": 1.189191061369537, "learning_rate": 9.97012593405492e-07, "loss": 0.0361, "step": 6529 }, { "epoch": 1.4857792946530148, "grad_norm": 1.2164226385849939, "learning_rate": 9.969407902127972e-07, "loss": 0.0171, "step": 6530 }, { "epoch": 1.4860068259385666, "grad_norm": 1.2574211803000732, "learning_rate": 9.968689794183137e-07, "loss": 0.0261, "step": 6531 }, { "epoch": 1.4862343572241183, "grad_norm": 1.3800274592660757, "learning_rate": 9.967971610235095e-07, "loss": 0.0298, "step": 6532 }, { "epoch": 1.48646188850967, "grad_norm": 2.596631249407389, "learning_rate": 9.967253350298522e-07, "loss": 0.0824, "step": 6533 }, { "epoch": 1.4866894197952218, "grad_norm": 1.2827191671719944, "learning_rate": 9.966535014388098e-07, "loss": 0.0133, "step": 6534 }, { "epoch": 1.4869169510807736, "grad_norm": 0.9864326301137617, "learning_rate": 9.965816602518505e-07, "loss": 0.0169, "step": 6535 }, { "epoch": 1.4871444823663253, "grad_norm": 1.3480233639513728, "learning_rate": 9.965098114704425e-07, "loss": 0.0469, "step": 6536 }, { "epoch": 1.487372013651877, "grad_norm": 0.685810034151907, "learning_rate": 9.964379550960544e-07, "loss": 0.0167, "step": 6537 }, { "epoch": 1.4875995449374289, "grad_norm": 0.9568981168441036, "learning_rate": 9.96366091130155e-07, "loss": 0.0314, "step": 6538 }, { "epoch": 1.4878270762229806, "grad_norm": 1.6341154779837714, "learning_rate": 9.962942195742125e-07, "loss": 0.0577, "step": 6539 }, { "epoch": 1.4880546075085324, "grad_norm": 8.811099545201115, "learning_rate": 9.96222340429696e-07, "loss": 0.1751, "step": 6540 }, { "epoch": 1.488282138794084, "grad_norm": 1.0114318005924805, "learning_rate": 9.96150453698075e-07, "loss": 0.029, "step": 6541 }, { "epoch": 1.4885096700796359, "grad_norm": 1.4441091534455175, "learning_rate": 9.960785593808187e-07, "loss": 0.0153, "step": 6542 }, { "epoch": 1.4887372013651876, "grad_norm": 48.46661837605496, "learning_rate": 9.960066574793959e-07, "loss": 0.488, "step": 6543 }, { "epoch": 1.4889647326507394, "grad_norm": 1.1015052782252979, "learning_rate": 9.959347479952764e-07, "loss": 0.0251, "step": 6544 }, { "epoch": 1.4891922639362911, "grad_norm": 1.708134539419919, "learning_rate": 9.958628309299303e-07, "loss": 0.0352, "step": 6545 }, { "epoch": 1.4894197952218429, "grad_norm": 1.1503598719279833, "learning_rate": 9.95790906284827e-07, "loss": 0.0181, "step": 6546 }, { "epoch": 1.4896473265073948, "grad_norm": 0.6162346862876698, "learning_rate": 9.957189740614364e-07, "loss": 0.0136, "step": 6547 }, { "epoch": 1.4898748577929466, "grad_norm": 0.9769826135275878, "learning_rate": 9.956470342612292e-07, "loss": 0.0321, "step": 6548 }, { "epoch": 1.4901023890784983, "grad_norm": 1.2898048218947735, "learning_rate": 9.955750868856753e-07, "loss": 0.0292, "step": 6549 }, { "epoch": 1.49032992036405, "grad_norm": 1.0495260184817348, "learning_rate": 9.955031319362455e-07, "loss": 0.0225, "step": 6550 }, { "epoch": 1.4905574516496019, "grad_norm": 0.9451273648772042, "learning_rate": 9.954311694144101e-07, "loss": 0.0365, "step": 6551 }, { "epoch": 1.4907849829351536, "grad_norm": 0.7141248917734317, "learning_rate": 9.9535919932164e-07, "loss": 0.0136, "step": 6552 }, { "epoch": 1.4910125142207054, "grad_norm": 1.5055029123037509, "learning_rate": 9.952872216594062e-07, "loss": 0.0327, "step": 6553 }, { "epoch": 1.491240045506257, "grad_norm": 1.2197166533456572, "learning_rate": 9.952152364291795e-07, "loss": 0.0182, "step": 6554 }, { "epoch": 1.4914675767918089, "grad_norm": 1.5013632322134673, "learning_rate": 9.951432436324317e-07, "loss": 0.058, "step": 6555 }, { "epoch": 1.4916951080773606, "grad_norm": 0.8229751970283513, "learning_rate": 9.950712432706338e-07, "loss": 0.0126, "step": 6556 }, { "epoch": 1.4919226393629124, "grad_norm": 0.9036383949717536, "learning_rate": 9.949992353452575e-07, "loss": 0.0185, "step": 6557 }, { "epoch": 1.4921501706484641, "grad_norm": 1.5314393854266743, "learning_rate": 9.949272198577741e-07, "loss": 0.0386, "step": 6558 }, { "epoch": 1.4923777019340159, "grad_norm": 1.2094828607200816, "learning_rate": 9.948551968096562e-07, "loss": 0.0248, "step": 6559 }, { "epoch": 1.4926052332195676, "grad_norm": 1.1532482925479424, "learning_rate": 9.947831662023751e-07, "loss": 0.0208, "step": 6560 }, { "epoch": 1.4928327645051196, "grad_norm": 0.9806602475157391, "learning_rate": 9.947111280374036e-07, "loss": 0.027, "step": 6561 }, { "epoch": 1.4930602957906713, "grad_norm": 1.60568299137254, "learning_rate": 9.946390823162136e-07, "loss": 0.0771, "step": 6562 }, { "epoch": 1.493287827076223, "grad_norm": 1.8385530312374776, "learning_rate": 9.945670290402778e-07, "loss": 0.027, "step": 6563 }, { "epoch": 1.4935153583617748, "grad_norm": 1.6642618859811353, "learning_rate": 9.944949682110689e-07, "loss": 0.0303, "step": 6564 }, { "epoch": 1.4937428896473266, "grad_norm": 1.6989309916692763, "learning_rate": 9.944228998300592e-07, "loss": 0.0306, "step": 6565 }, { "epoch": 1.4939704209328784, "grad_norm": 1.1418893235884928, "learning_rate": 9.943508238987223e-07, "loss": 0.0248, "step": 6566 }, { "epoch": 1.49419795221843, "grad_norm": 1.1063431497011869, "learning_rate": 9.942787404185307e-07, "loss": 0.021, "step": 6567 }, { "epoch": 1.4944254835039819, "grad_norm": 1.0900849834712658, "learning_rate": 9.942066493909582e-07, "loss": 0.039, "step": 6568 }, { "epoch": 1.4946530147895336, "grad_norm": 0.8812585966390157, "learning_rate": 9.941345508174778e-07, "loss": 0.0254, "step": 6569 }, { "epoch": 1.4948805460750854, "grad_norm": 0.9753213224487992, "learning_rate": 9.940624446995633e-07, "loss": 0.0231, "step": 6570 }, { "epoch": 1.4951080773606371, "grad_norm": 1.2089422433395995, "learning_rate": 9.939903310386883e-07, "loss": 0.0259, "step": 6571 }, { "epoch": 1.4953356086461889, "grad_norm": 0.5864095115048934, "learning_rate": 9.939182098363267e-07, "loss": 0.0128, "step": 6572 }, { "epoch": 1.4955631399317406, "grad_norm": 0.9001389442956854, "learning_rate": 9.938460810939526e-07, "loss": 0.0106, "step": 6573 }, { "epoch": 1.4957906712172924, "grad_norm": 1.0616564747090018, "learning_rate": 9.9377394481304e-07, "loss": 0.0293, "step": 6574 }, { "epoch": 1.4960182025028441, "grad_norm": 1.0975433183868526, "learning_rate": 9.937018009950637e-07, "loss": 0.0438, "step": 6575 }, { "epoch": 1.4962457337883959, "grad_norm": 0.8757355770920893, "learning_rate": 9.936296496414974e-07, "loss": 0.0155, "step": 6576 }, { "epoch": 1.4964732650739476, "grad_norm": 1.296977963478537, "learning_rate": 9.935574907538162e-07, "loss": 0.0299, "step": 6577 }, { "epoch": 1.4967007963594994, "grad_norm": 1.5770938202420768, "learning_rate": 9.93485324333495e-07, "loss": 0.0447, "step": 6578 }, { "epoch": 1.4969283276450511, "grad_norm": 1.8187781113713664, "learning_rate": 9.934131503820086e-07, "loss": 0.0376, "step": 6579 }, { "epoch": 1.4971558589306029, "grad_norm": 1.1921366812845895, "learning_rate": 9.933409689008323e-07, "loss": 0.021, "step": 6580 }, { "epoch": 1.4973833902161546, "grad_norm": 0.8690783796494854, "learning_rate": 9.932687798914408e-07, "loss": 0.0112, "step": 6581 }, { "epoch": 1.4976109215017064, "grad_norm": 0.7177586988388654, "learning_rate": 9.9319658335531e-07, "loss": 0.0203, "step": 6582 }, { "epoch": 1.4978384527872581, "grad_norm": 0.9905715701722341, "learning_rate": 9.931243792939157e-07, "loss": 0.0282, "step": 6583 }, { "epoch": 1.4980659840728099, "grad_norm": 0.5676357177462131, "learning_rate": 9.93052167708733e-07, "loss": 0.0154, "step": 6584 }, { "epoch": 1.4982935153583616, "grad_norm": 1.4780735039451949, "learning_rate": 9.929799486012381e-07, "loss": 0.0318, "step": 6585 }, { "epoch": 1.4985210466439136, "grad_norm": 0.8150823907413614, "learning_rate": 9.92907721972907e-07, "loss": 0.0158, "step": 6586 }, { "epoch": 1.4987485779294654, "grad_norm": 1.3145997284823179, "learning_rate": 9.928354878252156e-07, "loss": 0.0217, "step": 6587 }, { "epoch": 1.4989761092150171, "grad_norm": 1.3597840014937403, "learning_rate": 9.927632461596409e-07, "loss": 0.0229, "step": 6588 }, { "epoch": 1.4992036405005689, "grad_norm": 1.8450367592169252, "learning_rate": 9.926909969776588e-07, "loss": 0.0265, "step": 6589 }, { "epoch": 1.4994311717861206, "grad_norm": 1.0598284283095365, "learning_rate": 9.926187402807461e-07, "loss": 0.0196, "step": 6590 }, { "epoch": 1.4996587030716724, "grad_norm": 1.5414138432676403, "learning_rate": 9.925464760703796e-07, "loss": 0.0456, "step": 6591 }, { "epoch": 1.4998862343572241, "grad_norm": 1.3712705609079485, "learning_rate": 9.924742043480361e-07, "loss": 0.035, "step": 6592 }, { "epoch": 1.5001137656427759, "grad_norm": 0.9971920077226126, "learning_rate": 9.924019251151932e-07, "loss": 0.0123, "step": 6593 }, { "epoch": 1.5003412969283276, "grad_norm": 1.367423618774891, "learning_rate": 9.923296383733274e-07, "loss": 0.0332, "step": 6594 }, { "epoch": 1.5005688282138794, "grad_norm": 0.8103598457125527, "learning_rate": 9.92257344123917e-07, "loss": 0.015, "step": 6595 }, { "epoch": 1.5007963594994311, "grad_norm": 1.5852508733151163, "learning_rate": 9.921850423684387e-07, "loss": 0.0393, "step": 6596 }, { "epoch": 1.5010238907849829, "grad_norm": 0.7983856504921896, "learning_rate": 9.921127331083708e-07, "loss": 0.0155, "step": 6597 }, { "epoch": 1.5012514220705349, "grad_norm": 0.8110345399367997, "learning_rate": 9.92040416345191e-07, "loss": 0.0106, "step": 6598 }, { "epoch": 1.5014789533560866, "grad_norm": 1.057033281890757, "learning_rate": 9.91968092080377e-07, "loss": 0.0364, "step": 6599 }, { "epoch": 1.5017064846416384, "grad_norm": 0.763089743036924, "learning_rate": 9.918957603154076e-07, "loss": 0.0098, "step": 6600 }, { "epoch": 1.5019340159271901, "grad_norm": 1.349260836853775, "learning_rate": 9.918234210517606e-07, "loss": 0.0291, "step": 6601 }, { "epoch": 1.5021615472127419, "grad_norm": 0.9124150356814112, "learning_rate": 9.917510742909147e-07, "loss": 0.0359, "step": 6602 }, { "epoch": 1.5023890784982936, "grad_norm": 1.0789877329504083, "learning_rate": 9.916787200343487e-07, "loss": 0.0271, "step": 6603 }, { "epoch": 1.5026166097838454, "grad_norm": 0.8830240075896032, "learning_rate": 9.91606358283541e-07, "loss": 0.023, "step": 6604 }, { "epoch": 1.5028441410693971, "grad_norm": 0.9189612669050022, "learning_rate": 9.915339890399707e-07, "loss": 0.0187, "step": 6605 }, { "epoch": 1.5030716723549489, "grad_norm": 1.1223886637061031, "learning_rate": 9.914616123051172e-07, "loss": 0.0185, "step": 6606 }, { "epoch": 1.5032992036405006, "grad_norm": 1.1895451968922761, "learning_rate": 9.913892280804593e-07, "loss": 0.0219, "step": 6607 }, { "epoch": 1.5035267349260524, "grad_norm": 1.33425573815634, "learning_rate": 9.913168363674768e-07, "loss": 0.0273, "step": 6608 }, { "epoch": 1.5037542662116041, "grad_norm": 0.9253483092504505, "learning_rate": 9.912444371676489e-07, "loss": 0.0219, "step": 6609 }, { "epoch": 1.5039817974971559, "grad_norm": 1.1040945433765377, "learning_rate": 9.911720304824555e-07, "loss": 0.0206, "step": 6610 }, { "epoch": 1.5042093287827076, "grad_norm": 1.3224220455519788, "learning_rate": 9.910996163133762e-07, "loss": 0.0228, "step": 6611 }, { "epoch": 1.5044368600682594, "grad_norm": 1.4206586266356582, "learning_rate": 9.910271946618913e-07, "loss": 0.04, "step": 6612 }, { "epoch": 1.5046643913538111, "grad_norm": 2.5555274852028464, "learning_rate": 9.90954765529481e-07, "loss": 0.0588, "step": 6613 }, { "epoch": 1.5048919226393629, "grad_norm": 1.4075662347643996, "learning_rate": 9.908823289176255e-07, "loss": 0.0497, "step": 6614 }, { "epoch": 1.5051194539249146, "grad_norm": 1.8116951308916345, "learning_rate": 9.90809884827805e-07, "loss": 0.0628, "step": 6615 }, { "epoch": 1.5053469852104664, "grad_norm": 1.1027641178320662, "learning_rate": 9.907374332615007e-07, "loss": 0.0507, "step": 6616 }, { "epoch": 1.5055745164960181, "grad_norm": 0.6991004095047127, "learning_rate": 9.90664974220193e-07, "loss": 0.013, "step": 6617 }, { "epoch": 1.50580204778157, "grad_norm": 1.15443604824671, "learning_rate": 9.90592507705363e-07, "loss": 0.0198, "step": 6618 }, { "epoch": 1.5060295790671216, "grad_norm": 1.2034338936063906, "learning_rate": 9.905200337184915e-07, "loss": 0.0229, "step": 6619 }, { "epoch": 1.5062571103526734, "grad_norm": 1.4942841146862913, "learning_rate": 9.904475522610602e-07, "loss": 0.0322, "step": 6620 }, { "epoch": 1.5064846416382252, "grad_norm": 1.6969990633258083, "learning_rate": 9.9037506333455e-07, "loss": 0.0807, "step": 6621 }, { "epoch": 1.506712172923777, "grad_norm": 1.6210050209374949, "learning_rate": 9.90302566940443e-07, "loss": 0.0719, "step": 6622 }, { "epoch": 1.5069397042093287, "grad_norm": 1.2474940776817767, "learning_rate": 9.902300630802201e-07, "loss": 0.0399, "step": 6623 }, { "epoch": 1.5071672354948804, "grad_norm": 0.837642435435654, "learning_rate": 9.901575517553636e-07, "loss": 0.0157, "step": 6624 }, { "epoch": 1.5073947667804322, "grad_norm": 1.1560577549497977, "learning_rate": 9.900850329673559e-07, "loss": 0.0121, "step": 6625 }, { "epoch": 1.507622298065984, "grad_norm": 1.6091563866332808, "learning_rate": 9.900125067176782e-07, "loss": 0.0332, "step": 6626 }, { "epoch": 1.5078498293515359, "grad_norm": 0.5882706831462519, "learning_rate": 9.899399730078138e-07, "loss": 0.0221, "step": 6627 }, { "epoch": 1.5080773606370876, "grad_norm": 1.4498045295631503, "learning_rate": 9.898674318392446e-07, "loss": 0.0185, "step": 6628 }, { "epoch": 1.5083048919226394, "grad_norm": 1.4230136293074578, "learning_rate": 9.897948832134532e-07, "loss": 0.0249, "step": 6629 }, { "epoch": 1.5085324232081911, "grad_norm": 0.8420732883563372, "learning_rate": 9.897223271319227e-07, "loss": 0.0244, "step": 6630 }, { "epoch": 1.508759954493743, "grad_norm": 1.4719539102826928, "learning_rate": 9.896497635961357e-07, "loss": 0.0309, "step": 6631 }, { "epoch": 1.5089874857792946, "grad_norm": 0.8467326556394097, "learning_rate": 9.895771926075753e-07, "loss": 0.0252, "step": 6632 }, { "epoch": 1.5092150170648464, "grad_norm": 0.7904313040506917, "learning_rate": 9.895046141677248e-07, "loss": 0.0173, "step": 6633 }, { "epoch": 1.5094425483503981, "grad_norm": 1.141524501415206, "learning_rate": 9.894320282780675e-07, "loss": 0.0336, "step": 6634 }, { "epoch": 1.50967007963595, "grad_norm": 0.7509190456969517, "learning_rate": 9.89359434940087e-07, "loss": 0.0159, "step": 6635 }, { "epoch": 1.5098976109215017, "grad_norm": 1.8948621758043176, "learning_rate": 9.89286834155267e-07, "loss": 0.0376, "step": 6636 }, { "epoch": 1.5101251422070536, "grad_norm": 0.7530416159112719, "learning_rate": 9.89214225925091e-07, "loss": 0.0157, "step": 6637 }, { "epoch": 1.5103526734926054, "grad_norm": 0.7653184176286111, "learning_rate": 9.891416102510436e-07, "loss": 0.0186, "step": 6638 }, { "epoch": 1.5105802047781571, "grad_norm": 1.0986906061359811, "learning_rate": 9.890689871346084e-07, "loss": 0.0369, "step": 6639 }, { "epoch": 1.5108077360637089, "grad_norm": 1.0874136319612577, "learning_rate": 9.8899635657727e-07, "loss": 0.0265, "step": 6640 }, { "epoch": 1.5110352673492606, "grad_norm": 0.8527398550564912, "learning_rate": 9.889237185805126e-07, "loss": 0.0122, "step": 6641 }, { "epoch": 1.5112627986348124, "grad_norm": 0.6283273575316396, "learning_rate": 9.88851073145821e-07, "loss": 0.0199, "step": 6642 }, { "epoch": 1.5114903299203641, "grad_norm": 1.4497090224286908, "learning_rate": 9.887784202746797e-07, "loss": 0.0333, "step": 6643 }, { "epoch": 1.511717861205916, "grad_norm": 1.4938621283787956, "learning_rate": 9.887057599685735e-07, "loss": 0.0569, "step": 6644 }, { "epoch": 1.5119453924914676, "grad_norm": 1.6336417251582953, "learning_rate": 9.88633092228988e-07, "loss": 0.0462, "step": 6645 }, { "epoch": 1.5121729237770194, "grad_norm": 1.2241304248465237, "learning_rate": 9.885604170574081e-07, "loss": 0.0174, "step": 6646 }, { "epoch": 1.5124004550625711, "grad_norm": 1.2606080177082761, "learning_rate": 9.884877344553189e-07, "loss": 0.0174, "step": 6647 }, { "epoch": 1.512627986348123, "grad_norm": 1.8808732730138569, "learning_rate": 9.884150444242063e-07, "loss": 0.0638, "step": 6648 }, { "epoch": 1.5128555176336747, "grad_norm": 0.9107630601145861, "learning_rate": 9.883423469655553e-07, "loss": 0.0235, "step": 6649 }, { "epoch": 1.5130830489192264, "grad_norm": 0.7714393934298326, "learning_rate": 9.882696420808526e-07, "loss": 0.0195, "step": 6650 }, { "epoch": 1.5133105802047782, "grad_norm": 0.8951764186258134, "learning_rate": 9.881969297715836e-07, "loss": 0.0184, "step": 6651 }, { "epoch": 1.51353811149033, "grad_norm": 0.7339540735377944, "learning_rate": 9.881242100392346e-07, "loss": 0.0131, "step": 6652 }, { "epoch": 1.5137656427758817, "grad_norm": 0.7233697664885694, "learning_rate": 9.880514828852916e-07, "loss": 0.0131, "step": 6653 }, { "epoch": 1.5139931740614334, "grad_norm": 0.6553569496775881, "learning_rate": 9.879787483112413e-07, "loss": 0.019, "step": 6654 }, { "epoch": 1.5142207053469852, "grad_norm": 1.246700822413907, "learning_rate": 9.879060063185702e-07, "loss": 0.0255, "step": 6655 }, { "epoch": 1.514448236632537, "grad_norm": 0.8464872917857196, "learning_rate": 9.878332569087647e-07, "loss": 0.0211, "step": 6656 }, { "epoch": 1.5146757679180887, "grad_norm": 0.8410971019174226, "learning_rate": 9.877605000833122e-07, "loss": 0.0163, "step": 6657 }, { "epoch": 1.5149032992036404, "grad_norm": 1.256002488770331, "learning_rate": 9.876877358436996e-07, "loss": 0.0159, "step": 6658 }, { "epoch": 1.5151308304891922, "grad_norm": 0.9727260780233405, "learning_rate": 9.876149641914135e-07, "loss": 0.0224, "step": 6659 }, { "epoch": 1.515358361774744, "grad_norm": 1.3058738134780012, "learning_rate": 9.875421851279419e-07, "loss": 0.0336, "step": 6660 }, { "epoch": 1.5155858930602957, "grad_norm": 1.0425717492602375, "learning_rate": 9.874693986547717e-07, "loss": 0.0177, "step": 6661 }, { "epoch": 1.5158134243458474, "grad_norm": 1.6217091401895596, "learning_rate": 9.87396604773391e-07, "loss": 0.0591, "step": 6662 }, { "epoch": 1.5160409556313992, "grad_norm": 0.9444794944797769, "learning_rate": 9.873238034852875e-07, "loss": 0.0129, "step": 6663 }, { "epoch": 1.516268486916951, "grad_norm": 1.5545596261258932, "learning_rate": 9.872509947919489e-07, "loss": 0.0464, "step": 6664 }, { "epoch": 1.5164960182025027, "grad_norm": 1.2645221488362741, "learning_rate": 9.871781786948636e-07, "loss": 0.019, "step": 6665 }, { "epoch": 1.5167235494880547, "grad_norm": 1.1669774401635575, "learning_rate": 9.871053551955194e-07, "loss": 0.0259, "step": 6666 }, { "epoch": 1.5169510807736064, "grad_norm": 1.3872929854862293, "learning_rate": 9.87032524295405e-07, "loss": 0.0349, "step": 6667 }, { "epoch": 1.5171786120591582, "grad_norm": 0.7264415536162637, "learning_rate": 9.869596859960087e-07, "loss": 0.0104, "step": 6668 }, { "epoch": 1.51740614334471, "grad_norm": 1.4722269411341886, "learning_rate": 9.868868402988194e-07, "loss": 0.0355, "step": 6669 }, { "epoch": 1.5176336746302617, "grad_norm": 0.9845541443981577, "learning_rate": 9.86813987205326e-07, "loss": 0.0319, "step": 6670 }, { "epoch": 1.5178612059158134, "grad_norm": 1.5625166554766878, "learning_rate": 9.867411267170171e-07, "loss": 0.0492, "step": 6671 }, { "epoch": 1.5180887372013652, "grad_norm": 1.4742159712737606, "learning_rate": 9.866682588353823e-07, "loss": 0.0382, "step": 6672 }, { "epoch": 1.518316268486917, "grad_norm": 0.9155861801364314, "learning_rate": 9.865953835619105e-07, "loss": 0.0197, "step": 6673 }, { "epoch": 1.5185437997724687, "grad_norm": 0.8446641198873437, "learning_rate": 9.865225008980913e-07, "loss": 0.0195, "step": 6674 }, { "epoch": 1.5187713310580204, "grad_norm": 0.9541679354683429, "learning_rate": 9.864496108454142e-07, "loss": 0.0304, "step": 6675 }, { "epoch": 1.5189988623435724, "grad_norm": 0.7771524650823696, "learning_rate": 9.863767134053691e-07, "loss": 0.0228, "step": 6676 }, { "epoch": 1.5192263936291241, "grad_norm": 0.9791099549816529, "learning_rate": 9.86303808579446e-07, "loss": 0.0296, "step": 6677 }, { "epoch": 1.519453924914676, "grad_norm": 1.2308294541790132, "learning_rate": 9.862308963691344e-07, "loss": 0.0416, "step": 6678 }, { "epoch": 1.5196814562002277, "grad_norm": 1.2569665779197494, "learning_rate": 9.86157976775925e-07, "loss": 0.0529, "step": 6679 }, { "epoch": 1.5199089874857794, "grad_norm": 1.0520527959182502, "learning_rate": 9.86085049801308e-07, "loss": 0.023, "step": 6680 }, { "epoch": 1.5201365187713312, "grad_norm": 0.954887600544443, "learning_rate": 9.860121154467738e-07, "loss": 0.0151, "step": 6681 }, { "epoch": 1.520364050056883, "grad_norm": 1.533955827216088, "learning_rate": 9.859391737138132e-07, "loss": 0.0302, "step": 6682 }, { "epoch": 1.5205915813424347, "grad_norm": 1.0229766183656177, "learning_rate": 9.85866224603917e-07, "loss": 0.0228, "step": 6683 }, { "epoch": 1.5208191126279864, "grad_norm": 1.3237978242768538, "learning_rate": 9.85793268118576e-07, "loss": 0.0457, "step": 6684 }, { "epoch": 1.5210466439135382, "grad_norm": 1.321958954848268, "learning_rate": 9.857203042592813e-07, "loss": 0.0218, "step": 6685 }, { "epoch": 1.52127417519909, "grad_norm": 0.9025338290350718, "learning_rate": 9.856473330275243e-07, "loss": 0.0153, "step": 6686 }, { "epoch": 1.5215017064846417, "grad_norm": 1.5634553678197673, "learning_rate": 9.855743544247962e-07, "loss": 0.0504, "step": 6687 }, { "epoch": 1.5217292377701934, "grad_norm": 1.2829828788158382, "learning_rate": 9.855013684525888e-07, "loss": 0.026, "step": 6688 }, { "epoch": 1.5219567690557452, "grad_norm": 1.03731641686974, "learning_rate": 9.854283751123935e-07, "loss": 0.0182, "step": 6689 }, { "epoch": 1.522184300341297, "grad_norm": 0.983918255174861, "learning_rate": 9.853553744057023e-07, "loss": 0.0122, "step": 6690 }, { "epoch": 1.5224118316268487, "grad_norm": 1.016391471307343, "learning_rate": 9.852823663340074e-07, "loss": 0.02, "step": 6691 }, { "epoch": 1.5226393629124004, "grad_norm": 2.2858262315266753, "learning_rate": 9.852093508988006e-07, "loss": 0.0629, "step": 6692 }, { "epoch": 1.5228668941979522, "grad_norm": 1.0878900203208213, "learning_rate": 9.851363281015745e-07, "loss": 0.021, "step": 6693 }, { "epoch": 1.523094425483504, "grad_norm": 0.9233126923128159, "learning_rate": 9.850632979438211e-07, "loss": 0.0202, "step": 6694 }, { "epoch": 1.5233219567690557, "grad_norm": 1.4496178728052294, "learning_rate": 9.849902604270335e-07, "loss": 0.0378, "step": 6695 }, { "epoch": 1.5235494880546074, "grad_norm": 0.627804195846366, "learning_rate": 9.849172155527044e-07, "loss": 0.0096, "step": 6696 }, { "epoch": 1.5237770193401592, "grad_norm": 1.152818962293583, "learning_rate": 9.848441633223266e-07, "loss": 0.0178, "step": 6697 }, { "epoch": 1.524004550625711, "grad_norm": 1.6797152892325085, "learning_rate": 9.847711037373928e-07, "loss": 0.0463, "step": 6698 }, { "epoch": 1.5242320819112627, "grad_norm": 1.367812533566416, "learning_rate": 9.846980367993968e-07, "loss": 0.0389, "step": 6699 }, { "epoch": 1.5244596131968144, "grad_norm": 1.6336085740093287, "learning_rate": 9.846249625098317e-07, "loss": 0.0467, "step": 6700 }, { "epoch": 1.5246871444823662, "grad_norm": 1.581198385688688, "learning_rate": 9.845518808701906e-07, "loss": 0.0297, "step": 6701 }, { "epoch": 1.524914675767918, "grad_norm": 1.6561397145815502, "learning_rate": 9.84478791881968e-07, "loss": 0.0458, "step": 6702 }, { "epoch": 1.5251422070534697, "grad_norm": 1.1249706240685309, "learning_rate": 9.844056955466571e-07, "loss": 0.0189, "step": 6703 }, { "epoch": 1.5253697383390215, "grad_norm": 0.5097362940197786, "learning_rate": 9.84332591865752e-07, "loss": 0.0076, "step": 6704 }, { "epoch": 1.5255972696245734, "grad_norm": 1.4051486139237497, "learning_rate": 9.842594808407467e-07, "loss": 0.0363, "step": 6705 }, { "epoch": 1.5258248009101252, "grad_norm": 1.7908955623268843, "learning_rate": 9.841863624731358e-07, "loss": 0.0349, "step": 6706 }, { "epoch": 1.526052332195677, "grad_norm": 1.4826315493924698, "learning_rate": 9.841132367644133e-07, "loss": 0.0277, "step": 6707 }, { "epoch": 1.5262798634812287, "grad_norm": 0.971764725095612, "learning_rate": 9.840401037160737e-07, "loss": 0.0124, "step": 6708 }, { "epoch": 1.5265073947667804, "grad_norm": 1.19269631627323, "learning_rate": 9.839669633296122e-07, "loss": 0.0162, "step": 6709 }, { "epoch": 1.5267349260523322, "grad_norm": 1.282535697041421, "learning_rate": 9.838938156065236e-07, "loss": 0.0314, "step": 6710 }, { "epoch": 1.526962457337884, "grad_norm": 0.9152937675240786, "learning_rate": 9.838206605483024e-07, "loss": 0.0174, "step": 6711 }, { "epoch": 1.5271899886234357, "grad_norm": 0.911742361618213, "learning_rate": 9.83747498156444e-07, "loss": 0.0139, "step": 6712 }, { "epoch": 1.5274175199089874, "grad_norm": 1.4421627011208242, "learning_rate": 9.836743284324438e-07, "loss": 0.0547, "step": 6713 }, { "epoch": 1.5276450511945392, "grad_norm": 1.0048756243159007, "learning_rate": 9.836011513777975e-07, "loss": 0.0269, "step": 6714 }, { "epoch": 1.5278725824800912, "grad_norm": 0.9415494134871707, "learning_rate": 9.835279669940002e-07, "loss": 0.0247, "step": 6715 }, { "epoch": 1.528100113765643, "grad_norm": 1.2117322988897647, "learning_rate": 9.834547752825477e-07, "loss": 0.0136, "step": 6716 }, { "epoch": 1.5283276450511947, "grad_norm": 0.8858649899732157, "learning_rate": 9.833815762449364e-07, "loss": 0.0261, "step": 6717 }, { "epoch": 1.5285551763367464, "grad_norm": 1.7990398165637471, "learning_rate": 9.833083698826618e-07, "loss": 0.0425, "step": 6718 }, { "epoch": 1.5287827076222982, "grad_norm": 0.97024081947437, "learning_rate": 9.832351561972204e-07, "loss": 0.0208, "step": 6719 }, { "epoch": 1.52901023890785, "grad_norm": 1.6886694635546007, "learning_rate": 9.831619351901083e-07, "loss": 0.0345, "step": 6720 }, { "epoch": 1.5292377701934017, "grad_norm": 0.9587538553610925, "learning_rate": 9.830887068628223e-07, "loss": 0.0135, "step": 6721 }, { "epoch": 1.5294653014789534, "grad_norm": 1.1164415198948383, "learning_rate": 9.830154712168591e-07, "loss": 0.0344, "step": 6722 }, { "epoch": 1.5296928327645052, "grad_norm": 1.8672159588691146, "learning_rate": 9.829422282537152e-07, "loss": 0.0527, "step": 6723 }, { "epoch": 1.529920364050057, "grad_norm": 0.9012193013849213, "learning_rate": 9.828689779748877e-07, "loss": 0.0167, "step": 6724 }, { "epoch": 1.5301478953356087, "grad_norm": 1.3942555518548752, "learning_rate": 9.827957203818737e-07, "loss": 0.0302, "step": 6725 }, { "epoch": 1.5303754266211604, "grad_norm": 1.4460186732987468, "learning_rate": 9.827224554761705e-07, "loss": 0.0339, "step": 6726 }, { "epoch": 1.5306029579067122, "grad_norm": 1.1521131254229502, "learning_rate": 9.826491832592754e-07, "loss": 0.0398, "step": 6727 }, { "epoch": 1.530830489192264, "grad_norm": 1.2216298964138947, "learning_rate": 9.825759037326861e-07, "loss": 0.0341, "step": 6728 }, { "epoch": 1.5310580204778157, "grad_norm": 0.6507030554372302, "learning_rate": 9.825026168979001e-07, "loss": 0.01, "step": 6729 }, { "epoch": 1.5312855517633674, "grad_norm": 0.7749797892824488, "learning_rate": 9.824293227564154e-07, "loss": 0.0217, "step": 6730 }, { "epoch": 1.5315130830489192, "grad_norm": 1.597607520935028, "learning_rate": 9.8235602130973e-07, "loss": 0.0294, "step": 6731 }, { "epoch": 1.531740614334471, "grad_norm": 0.8423108293730993, "learning_rate": 9.822827125593417e-07, "loss": 0.013, "step": 6732 }, { "epoch": 1.5319681456200227, "grad_norm": 0.7967776857206855, "learning_rate": 9.822093965067492e-07, "loss": 0.0286, "step": 6733 }, { "epoch": 1.5321956769055745, "grad_norm": 1.1464494724303058, "learning_rate": 9.821360731534512e-07, "loss": 0.0194, "step": 6734 }, { "epoch": 1.5324232081911262, "grad_norm": 0.811112555766526, "learning_rate": 9.820627425009455e-07, "loss": 0.0136, "step": 6735 }, { "epoch": 1.532650739476678, "grad_norm": 1.2307368178100238, "learning_rate": 9.819894045507315e-07, "loss": 0.0355, "step": 6736 }, { "epoch": 1.5328782707622297, "grad_norm": 2.5108588990818093, "learning_rate": 9.819160593043076e-07, "loss": 0.0587, "step": 6737 }, { "epoch": 1.5331058020477815, "grad_norm": 1.136986286417985, "learning_rate": 9.818427067631733e-07, "loss": 0.029, "step": 6738 }, { "epoch": 1.5333333333333332, "grad_norm": 1.4896083735623669, "learning_rate": 9.817693469288276e-07, "loss": 0.0408, "step": 6739 }, { "epoch": 1.533560864618885, "grad_norm": 1.1158307587709726, "learning_rate": 9.8169597980277e-07, "loss": 0.0288, "step": 6740 }, { "epoch": 1.5337883959044367, "grad_norm": 1.5678967902284398, "learning_rate": 9.816226053864996e-07, "loss": 0.0423, "step": 6741 }, { "epoch": 1.5340159271899885, "grad_norm": 1.0092054577952496, "learning_rate": 9.815492236815163e-07, "loss": 0.0257, "step": 6742 }, { "epoch": 1.5342434584755402, "grad_norm": 1.6864118266108366, "learning_rate": 9.8147583468932e-07, "loss": 0.0533, "step": 6743 }, { "epoch": 1.5344709897610922, "grad_norm": 0.8285650933249512, "learning_rate": 9.814024384114102e-07, "loss": 0.0133, "step": 6744 }, { "epoch": 1.534698521046644, "grad_norm": 0.7990007997593029, "learning_rate": 9.813290348492874e-07, "loss": 0.0145, "step": 6745 }, { "epoch": 1.5349260523321957, "grad_norm": 1.1452431912291574, "learning_rate": 9.812556240044518e-07, "loss": 0.0278, "step": 6746 }, { "epoch": 1.5351535836177475, "grad_norm": 0.9830841833772611, "learning_rate": 9.811822058784038e-07, "loss": 0.033, "step": 6747 }, { "epoch": 1.5353811149032992, "grad_norm": 1.5315817385842814, "learning_rate": 9.811087804726436e-07, "loss": 0.0445, "step": 6748 }, { "epoch": 1.535608646188851, "grad_norm": 0.8680360060196064, "learning_rate": 9.810353477886722e-07, "loss": 0.0151, "step": 6749 }, { "epoch": 1.5358361774744027, "grad_norm": 0.7341865104680492, "learning_rate": 9.809619078279904e-07, "loss": 0.0126, "step": 6750 }, { "epoch": 1.5360637087599545, "grad_norm": 0.862170127361045, "learning_rate": 9.80888460592099e-07, "loss": 0.0184, "step": 6751 }, { "epoch": 1.5362912400455062, "grad_norm": 1.7419206219624046, "learning_rate": 9.808150060824995e-07, "loss": 0.057, "step": 6752 }, { "epoch": 1.5365187713310582, "grad_norm": 1.571383267693236, "learning_rate": 9.807415443006926e-07, "loss": 0.0463, "step": 6753 }, { "epoch": 1.53674630261661, "grad_norm": 0.6689110433682907, "learning_rate": 9.806680752481803e-07, "loss": 0.0128, "step": 6754 }, { "epoch": 1.5369738339021617, "grad_norm": 1.3775118149606538, "learning_rate": 9.805945989264638e-07, "loss": 0.0273, "step": 6755 }, { "epoch": 1.5372013651877134, "grad_norm": 0.9715191112955526, "learning_rate": 9.805211153370448e-07, "loss": 0.0214, "step": 6756 }, { "epoch": 1.5374288964732652, "grad_norm": 1.0205394119019446, "learning_rate": 9.804476244814253e-07, "loss": 0.0164, "step": 6757 }, { "epoch": 1.537656427758817, "grad_norm": 1.434249017393466, "learning_rate": 9.803741263611076e-07, "loss": 0.0577, "step": 6758 }, { "epoch": 1.5378839590443687, "grad_norm": 1.1853467808691651, "learning_rate": 9.803006209775936e-07, "loss": 0.031, "step": 6759 }, { "epoch": 1.5381114903299204, "grad_norm": 0.9049396065493746, "learning_rate": 9.802271083323855e-07, "loss": 0.0204, "step": 6760 }, { "epoch": 1.5383390216154722, "grad_norm": 1.3834697594871388, "learning_rate": 9.801535884269859e-07, "loss": 0.0368, "step": 6761 }, { "epoch": 1.538566552901024, "grad_norm": 1.2885752750926514, "learning_rate": 9.800800612628971e-07, "loss": 0.0415, "step": 6762 }, { "epoch": 1.5387940841865757, "grad_norm": 1.5134152583249854, "learning_rate": 9.800065268416226e-07, "loss": 0.0283, "step": 6763 }, { "epoch": 1.5390216154721275, "grad_norm": 1.0536222290569848, "learning_rate": 9.799329851646643e-07, "loss": 0.0197, "step": 6764 }, { "epoch": 1.5392491467576792, "grad_norm": 1.4686741972372306, "learning_rate": 9.798594362335265e-07, "loss": 0.0245, "step": 6765 }, { "epoch": 1.539476678043231, "grad_norm": 1.1690678201506683, "learning_rate": 9.797858800497112e-07, "loss": 0.0157, "step": 6766 }, { "epoch": 1.5397042093287827, "grad_norm": 0.8238513238523729, "learning_rate": 9.797123166147224e-07, "loss": 0.0147, "step": 6767 }, { "epoch": 1.5399317406143345, "grad_norm": 1.0558486625656787, "learning_rate": 9.796387459300635e-07, "loss": 0.024, "step": 6768 }, { "epoch": 1.5401592718998862, "grad_norm": 1.1353917596760223, "learning_rate": 9.795651679972382e-07, "loss": 0.0309, "step": 6769 }, { "epoch": 1.540386803185438, "grad_norm": 0.8517135065926781, "learning_rate": 9.7949158281775e-07, "loss": 0.0184, "step": 6770 }, { "epoch": 1.5406143344709897, "grad_norm": 1.4294422070332768, "learning_rate": 9.794179903931035e-07, "loss": 0.0277, "step": 6771 }, { "epoch": 1.5408418657565415, "grad_norm": 1.2869738269014956, "learning_rate": 9.79344390724802e-07, "loss": 0.0257, "step": 6772 }, { "epoch": 1.5410693970420932, "grad_norm": 0.8960870538958028, "learning_rate": 9.7927078381435e-07, "loss": 0.0256, "step": 6773 }, { "epoch": 1.541296928327645, "grad_norm": 0.8475858169518586, "learning_rate": 9.791971696632523e-07, "loss": 0.0181, "step": 6774 }, { "epoch": 1.5415244596131967, "grad_norm": 1.44665286178453, "learning_rate": 9.79123548273013e-07, "loss": 0.0419, "step": 6775 }, { "epoch": 1.5417519908987485, "grad_norm": 0.7037598146385168, "learning_rate": 9.79049919645137e-07, "loss": 0.0119, "step": 6776 }, { "epoch": 1.5419795221843002, "grad_norm": 0.9126315131039067, "learning_rate": 9.78976283781129e-07, "loss": 0.0189, "step": 6777 }, { "epoch": 1.542207053469852, "grad_norm": 1.204330528834964, "learning_rate": 9.78902640682494e-07, "loss": 0.0226, "step": 6778 }, { "epoch": 1.5424345847554037, "grad_norm": 1.4964522539113427, "learning_rate": 9.788289903507373e-07, "loss": 0.0396, "step": 6779 }, { "epoch": 1.5426621160409555, "grad_norm": 1.5964692402160778, "learning_rate": 9.787553327873637e-07, "loss": 0.0481, "step": 6780 }, { "epoch": 1.5428896473265072, "grad_norm": 1.2787948266252873, "learning_rate": 9.786816679938794e-07, "loss": 0.0313, "step": 6781 }, { "epoch": 1.543117178612059, "grad_norm": 0.9053360200297276, "learning_rate": 9.786079959717892e-07, "loss": 0.0181, "step": 6782 }, { "epoch": 1.543344709897611, "grad_norm": 1.2025054194441325, "learning_rate": 9.785343167225994e-07, "loss": 0.025, "step": 6783 }, { "epoch": 1.5435722411831627, "grad_norm": 1.768093481908682, "learning_rate": 9.784606302478155e-07, "loss": 0.086, "step": 6784 }, { "epoch": 1.5437997724687145, "grad_norm": 0.9078465345048391, "learning_rate": 9.783869365489437e-07, "loss": 0.0115, "step": 6785 }, { "epoch": 1.5440273037542662, "grad_norm": 1.1497977464117497, "learning_rate": 9.783132356274901e-07, "loss": 0.0216, "step": 6786 }, { "epoch": 1.544254835039818, "grad_norm": 0.5313589583578873, "learning_rate": 9.78239527484961e-07, "loss": 0.0087, "step": 6787 }, { "epoch": 1.5444823663253697, "grad_norm": 0.9748789217096225, "learning_rate": 9.781658121228628e-07, "loss": 0.0318, "step": 6788 }, { "epoch": 1.5447098976109215, "grad_norm": 2.4694914461680844, "learning_rate": 9.780920895427025e-07, "loss": 0.05, "step": 6789 }, { "epoch": 1.5449374288964732, "grad_norm": 1.30619233097482, "learning_rate": 9.780183597459864e-07, "loss": 0.0352, "step": 6790 }, { "epoch": 1.545164960182025, "grad_norm": 1.8138249647505036, "learning_rate": 9.779446227342216e-07, "loss": 0.0507, "step": 6791 }, { "epoch": 1.545392491467577, "grad_norm": 1.2364189313694403, "learning_rate": 9.77870878508915e-07, "loss": 0.0194, "step": 6792 }, { "epoch": 1.5456200227531287, "grad_norm": 0.9954733274363685, "learning_rate": 9.77797127071574e-07, "loss": 0.0228, "step": 6793 }, { "epoch": 1.5458475540386805, "grad_norm": 1.1973963103082081, "learning_rate": 9.777233684237056e-07, "loss": 0.0259, "step": 6794 }, { "epoch": 1.5460750853242322, "grad_norm": 0.9207738182775058, "learning_rate": 9.776496025668174e-07, "loss": 0.0132, "step": 6795 }, { "epoch": 1.546302616609784, "grad_norm": 0.7174193444984276, "learning_rate": 9.775758295024177e-07, "loss": 0.0289, "step": 6796 }, { "epoch": 1.5465301478953357, "grad_norm": 1.2241595374734087, "learning_rate": 9.775020492320136e-07, "loss": 0.0378, "step": 6797 }, { "epoch": 1.5467576791808875, "grad_norm": 1.8511371140948074, "learning_rate": 9.774282617571129e-07, "loss": 0.032, "step": 6798 }, { "epoch": 1.5469852104664392, "grad_norm": 1.0367978069734165, "learning_rate": 9.773544670792243e-07, "loss": 0.0408, "step": 6799 }, { "epoch": 1.547212741751991, "grad_norm": 1.227189360276986, "learning_rate": 9.772806651998555e-07, "loss": 0.0256, "step": 6800 }, { "epoch": 1.5474402730375427, "grad_norm": 0.8034203266831912, "learning_rate": 9.772068561205152e-07, "loss": 0.0243, "step": 6801 }, { "epoch": 1.5476678043230945, "grad_norm": 1.053114722831617, "learning_rate": 9.771330398427118e-07, "loss": 0.0314, "step": 6802 }, { "epoch": 1.5478953356086462, "grad_norm": 0.657404541342885, "learning_rate": 9.770592163679539e-07, "loss": 0.0105, "step": 6803 }, { "epoch": 1.548122866894198, "grad_norm": 1.050779540517071, "learning_rate": 9.769853856977503e-07, "loss": 0.0245, "step": 6804 }, { "epoch": 1.5483503981797497, "grad_norm": 0.9962720723555235, "learning_rate": 9.769115478336102e-07, "loss": 0.0253, "step": 6805 }, { "epoch": 1.5485779294653015, "grad_norm": 1.8462344743111663, "learning_rate": 9.768377027770427e-07, "loss": 0.0327, "step": 6806 }, { "epoch": 1.5488054607508532, "grad_norm": 1.0035831512384688, "learning_rate": 9.767638505295566e-07, "loss": 0.0331, "step": 6807 }, { "epoch": 1.549032992036405, "grad_norm": 0.8002363836105062, "learning_rate": 9.766899910926617e-07, "loss": 0.0219, "step": 6808 }, { "epoch": 1.5492605233219567, "grad_norm": 1.069327907986938, "learning_rate": 9.766161244678675e-07, "loss": 0.0482, "step": 6809 }, { "epoch": 1.5494880546075085, "grad_norm": 0.7883539436623613, "learning_rate": 9.765422506566837e-07, "loss": 0.0127, "step": 6810 }, { "epoch": 1.5497155858930602, "grad_norm": 0.7269884120486411, "learning_rate": 9.7646836966062e-07, "loss": 0.0127, "step": 6811 }, { "epoch": 1.549943117178612, "grad_norm": 0.9990408828346841, "learning_rate": 9.763944814811866e-07, "loss": 0.0277, "step": 6812 }, { "epoch": 1.5501706484641637, "grad_norm": 0.8768522206549924, "learning_rate": 9.763205861198935e-07, "loss": 0.0218, "step": 6813 }, { "epoch": 1.5503981797497155, "grad_norm": 1.330953541460739, "learning_rate": 9.76246683578251e-07, "loss": 0.0279, "step": 6814 }, { "epoch": 1.5506257110352673, "grad_norm": 0.7770272018475644, "learning_rate": 9.761727738577698e-07, "loss": 0.0228, "step": 6815 }, { "epoch": 1.550853242320819, "grad_norm": 1.8387771356381233, "learning_rate": 9.760988569599602e-07, "loss": 0.0673, "step": 6816 }, { "epoch": 1.5510807736063708, "grad_norm": 1.2266606216673168, "learning_rate": 9.760249328863328e-07, "loss": 0.039, "step": 6817 }, { "epoch": 1.5513083048919225, "grad_norm": 1.0188689397210966, "learning_rate": 9.759510016383987e-07, "loss": 0.0207, "step": 6818 }, { "epoch": 1.5515358361774743, "grad_norm": 1.0579016282881235, "learning_rate": 9.758770632176688e-07, "loss": 0.0158, "step": 6819 }, { "epoch": 1.551763367463026, "grad_norm": 0.9612658877602658, "learning_rate": 9.758031176256543e-07, "loss": 0.0168, "step": 6820 }, { "epoch": 1.5519908987485778, "grad_norm": 2.3193682962515774, "learning_rate": 9.757291648638666e-07, "loss": 0.0746, "step": 6821 }, { "epoch": 1.5522184300341297, "grad_norm": 1.096008882913452, "learning_rate": 9.756552049338174e-07, "loss": 0.0177, "step": 6822 }, { "epoch": 1.5524459613196815, "grad_norm": 1.0739238458685665, "learning_rate": 9.755812378370177e-07, "loss": 0.0242, "step": 6823 }, { "epoch": 1.5526734926052332, "grad_norm": 0.9537760996671094, "learning_rate": 9.755072635749795e-07, "loss": 0.0256, "step": 6824 }, { "epoch": 1.552901023890785, "grad_norm": 0.8524455774216739, "learning_rate": 9.754332821492148e-07, "loss": 0.012, "step": 6825 }, { "epoch": 1.5531285551763367, "grad_norm": 1.459998149204111, "learning_rate": 9.753592935612358e-07, "loss": 0.0431, "step": 6826 }, { "epoch": 1.5533560864618885, "grad_norm": 0.9652354910786201, "learning_rate": 9.752852978125544e-07, "loss": 0.022, "step": 6827 }, { "epoch": 1.5535836177474402, "grad_norm": 0.6879353796285916, "learning_rate": 9.75211294904683e-07, "loss": 0.0135, "step": 6828 }, { "epoch": 1.553811149032992, "grad_norm": 1.4518274964176225, "learning_rate": 9.75137284839134e-07, "loss": 0.0377, "step": 6829 }, { "epoch": 1.5540386803185438, "grad_norm": 1.6705449868902968, "learning_rate": 9.750632676174201e-07, "loss": 0.0476, "step": 6830 }, { "epoch": 1.5542662116040957, "grad_norm": 1.2689213544556226, "learning_rate": 9.749892432410544e-07, "loss": 0.0282, "step": 6831 }, { "epoch": 1.5544937428896475, "grad_norm": 1.8099847360827157, "learning_rate": 9.749152117115494e-07, "loss": 0.0356, "step": 6832 }, { "epoch": 1.5547212741751992, "grad_norm": 1.5405555526689485, "learning_rate": 9.748411730304184e-07, "loss": 0.0444, "step": 6833 }, { "epoch": 1.554948805460751, "grad_norm": 0.8694412690578796, "learning_rate": 9.747671271991746e-07, "loss": 0.013, "step": 6834 }, { "epoch": 1.5551763367463027, "grad_norm": 1.02126526467446, "learning_rate": 9.746930742193307e-07, "loss": 0.0239, "step": 6835 }, { "epoch": 1.5554038680318545, "grad_norm": 0.7213813598412102, "learning_rate": 9.746190140924014e-07, "loss": 0.0222, "step": 6836 }, { "epoch": 1.5556313993174062, "grad_norm": 1.1742233662911234, "learning_rate": 9.745449468198997e-07, "loss": 0.0302, "step": 6837 }, { "epoch": 1.555858930602958, "grad_norm": 1.0483098687522778, "learning_rate": 9.744708724033393e-07, "loss": 0.0197, "step": 6838 }, { "epoch": 1.5560864618885097, "grad_norm": 1.2683705572721342, "learning_rate": 9.743967908442343e-07, "loss": 0.0414, "step": 6839 }, { "epoch": 1.5563139931740615, "grad_norm": 0.8649452535857219, "learning_rate": 9.743227021440988e-07, "loss": 0.0221, "step": 6840 }, { "epoch": 1.5565415244596132, "grad_norm": 1.1327479895451653, "learning_rate": 9.74248606304447e-07, "loss": 0.0233, "step": 6841 }, { "epoch": 1.556769055745165, "grad_norm": 0.853522919213304, "learning_rate": 9.741745033267932e-07, "loss": 0.0171, "step": 6842 }, { "epoch": 1.5569965870307167, "grad_norm": 1.4600706299872448, "learning_rate": 9.741003932126522e-07, "loss": 0.0273, "step": 6843 }, { "epoch": 1.5572241183162685, "grad_norm": 1.9026227073360096, "learning_rate": 9.740262759635386e-07, "loss": 0.053, "step": 6844 }, { "epoch": 1.5574516496018203, "grad_norm": 0.9978504859142656, "learning_rate": 9.739521515809669e-07, "loss": 0.0261, "step": 6845 }, { "epoch": 1.557679180887372, "grad_norm": 1.200148401809416, "learning_rate": 9.738780200664525e-07, "loss": 0.0254, "step": 6846 }, { "epoch": 1.5579067121729238, "grad_norm": 1.394694694968866, "learning_rate": 9.738038814215102e-07, "loss": 0.0246, "step": 6847 }, { "epoch": 1.5581342434584755, "grad_norm": 1.2354636254991684, "learning_rate": 9.737297356476554e-07, "loss": 0.0234, "step": 6848 }, { "epoch": 1.5583617747440273, "grad_norm": 1.27065517757966, "learning_rate": 9.736555827464034e-07, "loss": 0.0497, "step": 6849 }, { "epoch": 1.558589306029579, "grad_norm": 0.5183623239377243, "learning_rate": 9.7358142271927e-07, "loss": 0.0114, "step": 6850 }, { "epoch": 1.5588168373151308, "grad_norm": 0.8794614624216165, "learning_rate": 9.735072555677705e-07, "loss": 0.0202, "step": 6851 }, { "epoch": 1.5590443686006825, "grad_norm": 1.621272386650821, "learning_rate": 9.73433081293421e-07, "loss": 0.0386, "step": 6852 }, { "epoch": 1.5592718998862343, "grad_norm": 1.1922606611608835, "learning_rate": 9.733588998977376e-07, "loss": 0.0222, "step": 6853 }, { "epoch": 1.559499431171786, "grad_norm": 1.1849209727802208, "learning_rate": 9.73284711382236e-07, "loss": 0.0201, "step": 6854 }, { "epoch": 1.5597269624573378, "grad_norm": 1.6673549617864156, "learning_rate": 9.732105157484332e-07, "loss": 0.056, "step": 6855 }, { "epoch": 1.5599544937428895, "grad_norm": 1.3125965381213744, "learning_rate": 9.731363129978447e-07, "loss": 0.0228, "step": 6856 }, { "epoch": 1.5601820250284413, "grad_norm": 1.6685078337354073, "learning_rate": 9.730621031319878e-07, "loss": 0.0288, "step": 6857 }, { "epoch": 1.560409556313993, "grad_norm": 1.2959301226520625, "learning_rate": 9.729878861523788e-07, "loss": 0.0274, "step": 6858 }, { "epoch": 1.5606370875995448, "grad_norm": 0.8526003882872059, "learning_rate": 9.729136620605347e-07, "loss": 0.0162, "step": 6859 }, { "epoch": 1.5608646188850968, "grad_norm": 4.875759566271044, "learning_rate": 9.728394308579727e-07, "loss": 0.0988, "step": 6860 }, { "epoch": 1.5610921501706485, "grad_norm": 0.9454552747509546, "learning_rate": 9.727651925462098e-07, "loss": 0.0238, "step": 6861 }, { "epoch": 1.5613196814562003, "grad_norm": 0.8040446441336326, "learning_rate": 9.726909471267632e-07, "loss": 0.0169, "step": 6862 }, { "epoch": 1.561547212741752, "grad_norm": 0.975335759529535, "learning_rate": 9.726166946011503e-07, "loss": 0.0208, "step": 6863 }, { "epoch": 1.5617747440273038, "grad_norm": 0.995135796770694, "learning_rate": 9.72542434970889e-07, "loss": 0.0316, "step": 6864 }, { "epoch": 1.5620022753128555, "grad_norm": 1.531607130104655, "learning_rate": 9.724681682374965e-07, "loss": 0.0526, "step": 6865 }, { "epoch": 1.5622298065984073, "grad_norm": 0.980914346176978, "learning_rate": 9.723938944024913e-07, "loss": 0.016, "step": 6866 }, { "epoch": 1.562457337883959, "grad_norm": 2.20124488164597, "learning_rate": 9.72319613467391e-07, "loss": 0.0452, "step": 6867 }, { "epoch": 1.5626848691695108, "grad_norm": 1.4818225810862038, "learning_rate": 9.722453254337139e-07, "loss": 0.0406, "step": 6868 }, { "epoch": 1.5629124004550625, "grad_norm": 1.2655098223089418, "learning_rate": 9.721710303029783e-07, "loss": 0.0235, "step": 6869 }, { "epoch": 1.5631399317406145, "grad_norm": 1.1710684519730319, "learning_rate": 9.720967280767026e-07, "loss": 0.0208, "step": 6870 }, { "epoch": 1.5633674630261662, "grad_norm": 1.1745759284941633, "learning_rate": 9.720224187564057e-07, "loss": 0.0244, "step": 6871 }, { "epoch": 1.563594994311718, "grad_norm": 0.8708806170643246, "learning_rate": 9.719481023436059e-07, "loss": 0.0218, "step": 6872 }, { "epoch": 1.5638225255972698, "grad_norm": 1.2108273614761154, "learning_rate": 9.718737788398223e-07, "loss": 0.0329, "step": 6873 }, { "epoch": 1.5640500568828215, "grad_norm": 2.0062378591587646, "learning_rate": 9.71799448246574e-07, "loss": 0.0602, "step": 6874 }, { "epoch": 1.5642775881683733, "grad_norm": 0.6282582638053863, "learning_rate": 9.717251105653799e-07, "loss": 0.0078, "step": 6875 }, { "epoch": 1.564505119453925, "grad_norm": 1.8411837776806999, "learning_rate": 9.716507657977597e-07, "loss": 0.0311, "step": 6876 }, { "epoch": 1.5647326507394768, "grad_norm": 1.5565448824175396, "learning_rate": 9.715764139452327e-07, "loss": 0.0324, "step": 6877 }, { "epoch": 1.5649601820250285, "grad_norm": 1.1011446306121413, "learning_rate": 9.715020550093185e-07, "loss": 0.0206, "step": 6878 }, { "epoch": 1.5651877133105803, "grad_norm": 1.4189312012820676, "learning_rate": 9.71427688991537e-07, "loss": 0.0366, "step": 6879 }, { "epoch": 1.565415244596132, "grad_norm": 1.1440113593957346, "learning_rate": 9.713533158934079e-07, "loss": 0.0163, "step": 6880 }, { "epoch": 1.5656427758816838, "grad_norm": 0.8339533986631594, "learning_rate": 9.712789357164512e-07, "loss": 0.0177, "step": 6881 }, { "epoch": 1.5658703071672355, "grad_norm": 1.3924735109528363, "learning_rate": 9.712045484621874e-07, "loss": 0.0387, "step": 6882 }, { "epoch": 1.5660978384527873, "grad_norm": 0.9259666291165694, "learning_rate": 9.711301541321365e-07, "loss": 0.0177, "step": 6883 }, { "epoch": 1.566325369738339, "grad_norm": 1.3135763130095726, "learning_rate": 9.710557527278195e-07, "loss": 0.0305, "step": 6884 }, { "epoch": 1.5665529010238908, "grad_norm": 1.1297719627959584, "learning_rate": 9.709813442507565e-07, "loss": 0.0247, "step": 6885 }, { "epoch": 1.5667804323094425, "grad_norm": 1.4600258713674805, "learning_rate": 9.709069287024684e-07, "loss": 0.0411, "step": 6886 }, { "epoch": 1.5670079635949943, "grad_norm": 1.0383415502711744, "learning_rate": 9.708325060844761e-07, "loss": 0.0202, "step": 6887 }, { "epoch": 1.567235494880546, "grad_norm": 1.6819369088665528, "learning_rate": 9.707580763983008e-07, "loss": 0.0348, "step": 6888 }, { "epoch": 1.5674630261660978, "grad_norm": 1.1909010495383987, "learning_rate": 9.706836396454638e-07, "loss": 0.0226, "step": 6889 }, { "epoch": 1.5676905574516495, "grad_norm": 1.1133637552881577, "learning_rate": 9.70609195827486e-07, "loss": 0.0187, "step": 6890 }, { "epoch": 1.5679180887372013, "grad_norm": 1.8834546240381704, "learning_rate": 9.705347449458896e-07, "loss": 0.0615, "step": 6891 }, { "epoch": 1.568145620022753, "grad_norm": 1.1406718003995044, "learning_rate": 9.704602870021954e-07, "loss": 0.0306, "step": 6892 }, { "epoch": 1.5683731513083048, "grad_norm": 1.0476596873190438, "learning_rate": 9.70385821997926e-07, "loss": 0.0257, "step": 6893 }, { "epoch": 1.5686006825938565, "grad_norm": 1.505185232649498, "learning_rate": 9.703113499346026e-07, "loss": 0.0329, "step": 6894 }, { "epoch": 1.5688282138794083, "grad_norm": 1.3766493287200758, "learning_rate": 9.70236870813748e-07, "loss": 0.0303, "step": 6895 }, { "epoch": 1.56905574516496, "grad_norm": 0.8517840379820023, "learning_rate": 9.701623846368836e-07, "loss": 0.0135, "step": 6896 }, { "epoch": 1.5692832764505118, "grad_norm": 1.4103036090074965, "learning_rate": 9.700878914055325e-07, "loss": 0.0531, "step": 6897 }, { "epoch": 1.5695108077360636, "grad_norm": 3.6980656854925904, "learning_rate": 9.700133911212168e-07, "loss": 0.0921, "step": 6898 }, { "epoch": 1.5697383390216155, "grad_norm": 0.6914093108994455, "learning_rate": 9.699388837854593e-07, "loss": 0.0099, "step": 6899 }, { "epoch": 1.5699658703071673, "grad_norm": 0.9100695130578376, "learning_rate": 9.698643693997827e-07, "loss": 0.0245, "step": 6900 }, { "epoch": 1.570193401592719, "grad_norm": 0.8627006881738912, "learning_rate": 9.697898479657098e-07, "loss": 0.0146, "step": 6901 }, { "epoch": 1.5704209328782708, "grad_norm": 1.4110615783342308, "learning_rate": 9.697153194847641e-07, "loss": 0.0236, "step": 6902 }, { "epoch": 1.5706484641638225, "grad_norm": 1.1347870516723162, "learning_rate": 9.696407839584684e-07, "loss": 0.0336, "step": 6903 }, { "epoch": 1.5708759954493743, "grad_norm": 1.2538699432855616, "learning_rate": 9.695662413883466e-07, "loss": 0.0172, "step": 6904 }, { "epoch": 1.571103526734926, "grad_norm": 0.8716398094298144, "learning_rate": 9.694916917759218e-07, "loss": 0.0186, "step": 6905 }, { "epoch": 1.5713310580204778, "grad_norm": 0.845944219449997, "learning_rate": 9.694171351227175e-07, "loss": 0.0172, "step": 6906 }, { "epoch": 1.5715585893060295, "grad_norm": 1.5123986101385158, "learning_rate": 9.693425714302577e-07, "loss": 0.025, "step": 6907 }, { "epoch": 1.5717861205915813, "grad_norm": 1.0592873353102206, "learning_rate": 9.692680007000663e-07, "loss": 0.0182, "step": 6908 }, { "epoch": 1.5720136518771333, "grad_norm": 1.5343957838140057, "learning_rate": 9.691934229336677e-07, "loss": 0.0301, "step": 6909 }, { "epoch": 1.572241183162685, "grad_norm": 1.5525566912067996, "learning_rate": 9.69118838132586e-07, "loss": 0.0298, "step": 6910 }, { "epoch": 1.5724687144482368, "grad_norm": 0.9091899940543618, "learning_rate": 9.69044246298345e-07, "loss": 0.0364, "step": 6911 }, { "epoch": 1.5726962457337885, "grad_norm": 0.7851322451427193, "learning_rate": 9.689696474324703e-07, "loss": 0.0187, "step": 6912 }, { "epoch": 1.5729237770193403, "grad_norm": 0.9342893603380591, "learning_rate": 9.688950415364855e-07, "loss": 0.0376, "step": 6913 }, { "epoch": 1.573151308304892, "grad_norm": 2.3291078950763624, "learning_rate": 9.68820428611916e-07, "loss": 0.0834, "step": 6914 }, { "epoch": 1.5733788395904438, "grad_norm": 1.3019341126571247, "learning_rate": 9.687458086602866e-07, "loss": 0.0308, "step": 6915 }, { "epoch": 1.5736063708759955, "grad_norm": 0.772079275849239, "learning_rate": 9.686711816831226e-07, "loss": 0.015, "step": 6916 }, { "epoch": 1.5738339021615473, "grad_norm": 0.9630973601051392, "learning_rate": 9.68596547681949e-07, "loss": 0.0279, "step": 6917 }, { "epoch": 1.574061433447099, "grad_norm": 1.734908432077837, "learning_rate": 9.68521906658291e-07, "loss": 0.0595, "step": 6918 }, { "epoch": 1.5742889647326508, "grad_norm": 1.4035571745166036, "learning_rate": 9.684472586136745e-07, "loss": 0.0214, "step": 6919 }, { "epoch": 1.5745164960182025, "grad_norm": 1.3254699748716203, "learning_rate": 9.68372603549625e-07, "loss": 0.0362, "step": 6920 }, { "epoch": 1.5747440273037543, "grad_norm": 1.4628284794894173, "learning_rate": 9.682979414676682e-07, "loss": 0.0256, "step": 6921 }, { "epoch": 1.574971558589306, "grad_norm": 1.3653689349920086, "learning_rate": 9.682232723693305e-07, "loss": 0.0258, "step": 6922 }, { "epoch": 1.5751990898748578, "grad_norm": 0.7209251346481367, "learning_rate": 9.681485962561377e-07, "loss": 0.0174, "step": 6923 }, { "epoch": 1.5754266211604095, "grad_norm": 1.0955934147353268, "learning_rate": 9.680739131296158e-07, "loss": 0.0208, "step": 6924 }, { "epoch": 1.5756541524459613, "grad_norm": 1.2439741485652382, "learning_rate": 9.679992229912914e-07, "loss": 0.0305, "step": 6925 }, { "epoch": 1.575881683731513, "grad_norm": 1.4366682730583082, "learning_rate": 9.67924525842691e-07, "loss": 0.0306, "step": 6926 }, { "epoch": 1.5761092150170648, "grad_norm": 1.3379090050907754, "learning_rate": 9.678498216853416e-07, "loss": 0.0411, "step": 6927 }, { "epoch": 1.5763367463026166, "grad_norm": 1.1244782762370413, "learning_rate": 9.677751105207696e-07, "loss": 0.0267, "step": 6928 }, { "epoch": 1.5765642775881683, "grad_norm": 1.0334857986896164, "learning_rate": 9.677003923505025e-07, "loss": 0.0208, "step": 6929 }, { "epoch": 1.57679180887372, "grad_norm": 0.6704299998949605, "learning_rate": 9.676256671760665e-07, "loss": 0.0068, "step": 6930 }, { "epoch": 1.5770193401592718, "grad_norm": 0.9533956478964293, "learning_rate": 9.675509349989896e-07, "loss": 0.0213, "step": 6931 }, { "epoch": 1.5772468714448236, "grad_norm": 0.9244996451919156, "learning_rate": 9.674761958207986e-07, "loss": 0.018, "step": 6932 }, { "epoch": 1.5774744027303753, "grad_norm": 0.6496056903137244, "learning_rate": 9.674014496430218e-07, "loss": 0.0098, "step": 6933 }, { "epoch": 1.577701934015927, "grad_norm": 0.9270304301068275, "learning_rate": 9.673266964671862e-07, "loss": 0.017, "step": 6934 }, { "epoch": 1.5779294653014788, "grad_norm": 2.313850498927015, "learning_rate": 9.6725193629482e-07, "loss": 0.0177, "step": 6935 }, { "epoch": 1.5781569965870306, "grad_norm": 0.8972440977156996, "learning_rate": 9.671771691274508e-07, "loss": 0.0155, "step": 6936 }, { "epoch": 1.5783845278725823, "grad_norm": 0.9084538782830909, "learning_rate": 9.671023949666073e-07, "loss": 0.0144, "step": 6937 }, { "epoch": 1.5786120591581343, "grad_norm": 0.8179863989404683, "learning_rate": 9.67027613813817e-07, "loss": 0.0205, "step": 6938 }, { "epoch": 1.578839590443686, "grad_norm": 1.4846267938383064, "learning_rate": 9.66952825670609e-07, "loss": 0.044, "step": 6939 }, { "epoch": 1.5790671217292378, "grad_norm": 1.18034980748661, "learning_rate": 9.66878030538511e-07, "loss": 0.0326, "step": 6940 }, { "epoch": 1.5792946530147896, "grad_norm": 1.256043529456657, "learning_rate": 9.668032284190529e-07, "loss": 0.0165, "step": 6941 }, { "epoch": 1.5795221843003413, "grad_norm": 0.8899435581756949, "learning_rate": 9.667284193137622e-07, "loss": 0.0208, "step": 6942 }, { "epoch": 1.579749715585893, "grad_norm": 1.0826065036291013, "learning_rate": 9.666536032241687e-07, "loss": 0.0264, "step": 6943 }, { "epoch": 1.5799772468714448, "grad_norm": 1.0986489683562306, "learning_rate": 9.66578780151801e-07, "loss": 0.0357, "step": 6944 }, { "epoch": 1.5802047781569966, "grad_norm": 0.7910773749797155, "learning_rate": 9.66503950098189e-07, "loss": 0.0124, "step": 6945 }, { "epoch": 1.5804323094425483, "grad_norm": 1.2608845947842933, "learning_rate": 9.664291130648616e-07, "loss": 0.0219, "step": 6946 }, { "epoch": 1.5806598407281, "grad_norm": 0.896154253642754, "learning_rate": 9.663542690533485e-07, "loss": 0.0123, "step": 6947 }, { "epoch": 1.580887372013652, "grad_norm": 0.9731058492147535, "learning_rate": 9.66279418065179e-07, "loss": 0.0188, "step": 6948 }, { "epoch": 1.5811149032992038, "grad_norm": 2.180460150678355, "learning_rate": 9.662045601018834e-07, "loss": 0.0376, "step": 6949 }, { "epoch": 1.5813424345847555, "grad_norm": 1.4286588206093294, "learning_rate": 9.661296951649914e-07, "loss": 0.0409, "step": 6950 }, { "epoch": 1.5815699658703073, "grad_norm": 1.3765902238160939, "learning_rate": 9.660548232560331e-07, "loss": 0.0463, "step": 6951 }, { "epoch": 1.581797497155859, "grad_norm": 1.185065939414716, "learning_rate": 9.659799443765392e-07, "loss": 0.0448, "step": 6952 }, { "epoch": 1.5820250284414108, "grad_norm": 1.337116480171379, "learning_rate": 9.659050585280394e-07, "loss": 0.026, "step": 6953 }, { "epoch": 1.5822525597269625, "grad_norm": 1.020718028178023, "learning_rate": 9.658301657120646e-07, "loss": 0.0281, "step": 6954 }, { "epoch": 1.5824800910125143, "grad_norm": 0.9055039875506272, "learning_rate": 9.657552659301455e-07, "loss": 0.0219, "step": 6955 }, { "epoch": 1.582707622298066, "grad_norm": 1.6953021742731489, "learning_rate": 9.656803591838126e-07, "loss": 0.0678, "step": 6956 }, { "epoch": 1.5829351535836178, "grad_norm": 1.0772903707549362, "learning_rate": 9.656054454745973e-07, "loss": 0.0345, "step": 6957 }, { "epoch": 1.5831626848691696, "grad_norm": 1.1587537360572808, "learning_rate": 9.655305248040302e-07, "loss": 0.0359, "step": 6958 }, { "epoch": 1.5833902161547213, "grad_norm": 0.9105264852735304, "learning_rate": 9.654555971736431e-07, "loss": 0.0282, "step": 6959 }, { "epoch": 1.583617747440273, "grad_norm": 1.220396846866995, "learning_rate": 9.653806625849671e-07, "loss": 0.0155, "step": 6960 }, { "epoch": 1.5838452787258248, "grad_norm": 0.7086608428171763, "learning_rate": 9.653057210395338e-07, "loss": 0.0112, "step": 6961 }, { "epoch": 1.5840728100113766, "grad_norm": 1.075979091036995, "learning_rate": 9.652307725388746e-07, "loss": 0.0189, "step": 6962 }, { "epoch": 1.5843003412969283, "grad_norm": 1.5170568607738595, "learning_rate": 9.651558170845216e-07, "loss": 0.0338, "step": 6963 }, { "epoch": 1.58452787258248, "grad_norm": 0.7652438799921432, "learning_rate": 9.650808546780068e-07, "loss": 0.0144, "step": 6964 }, { "epoch": 1.5847554038680318, "grad_norm": 1.9783702731659012, "learning_rate": 9.65005885320862e-07, "loss": 0.0512, "step": 6965 }, { "epoch": 1.5849829351535836, "grad_norm": 1.2326785631564439, "learning_rate": 9.649309090146194e-07, "loss": 0.0308, "step": 6966 }, { "epoch": 1.5852104664391353, "grad_norm": 0.9273837401389944, "learning_rate": 9.64855925760812e-07, "loss": 0.0099, "step": 6967 }, { "epoch": 1.585437997724687, "grad_norm": 2.082982278260053, "learning_rate": 9.647809355609715e-07, "loss": 0.0815, "step": 6968 }, { "epoch": 1.5856655290102388, "grad_norm": 1.154389332279708, "learning_rate": 9.647059384166314e-07, "loss": 0.0199, "step": 6969 }, { "epoch": 1.5858930602957906, "grad_norm": 1.3155932055166657, "learning_rate": 9.646309343293237e-07, "loss": 0.042, "step": 6970 }, { "epoch": 1.5861205915813423, "grad_norm": 0.7089433372912292, "learning_rate": 9.645559233005819e-07, "loss": 0.016, "step": 6971 }, { "epoch": 1.586348122866894, "grad_norm": 0.9808906887148743, "learning_rate": 9.644809053319388e-07, "loss": 0.0152, "step": 6972 }, { "epoch": 1.5865756541524458, "grad_norm": 1.154371513813329, "learning_rate": 9.644058804249276e-07, "loss": 0.0157, "step": 6973 }, { "epoch": 1.5868031854379976, "grad_norm": 1.1758967067439108, "learning_rate": 9.64330848581082e-07, "loss": 0.051, "step": 6974 }, { "epoch": 1.5870307167235493, "grad_norm": 1.0737810620882602, "learning_rate": 9.642558098019353e-07, "loss": 0.0295, "step": 6975 }, { "epoch": 1.587258248009101, "grad_norm": 1.288007211828143, "learning_rate": 9.641807640890212e-07, "loss": 0.0406, "step": 6976 }, { "epoch": 1.587485779294653, "grad_norm": 0.8343056698359398, "learning_rate": 9.64105711443873e-07, "loss": 0.0229, "step": 6977 }, { "epoch": 1.5877133105802048, "grad_norm": 2.336951974280419, "learning_rate": 9.640306518680257e-07, "loss": 0.0516, "step": 6978 }, { "epoch": 1.5879408418657566, "grad_norm": 1.471956955036482, "learning_rate": 9.639555853630126e-07, "loss": 0.0328, "step": 6979 }, { "epoch": 1.5881683731513083, "grad_norm": 1.2981308020075435, "learning_rate": 9.63880511930368e-07, "loss": 0.0315, "step": 6980 }, { "epoch": 1.58839590443686, "grad_norm": 1.142394389444487, "learning_rate": 9.638054315716264e-07, "loss": 0.0304, "step": 6981 }, { "epoch": 1.5886234357224118, "grad_norm": 1.578253454753235, "learning_rate": 9.637303442883223e-07, "loss": 0.0333, "step": 6982 }, { "epoch": 1.5888509670079636, "grad_norm": 0.8057669195788537, "learning_rate": 9.636552500819903e-07, "loss": 0.0113, "step": 6983 }, { "epoch": 1.5890784982935153, "grad_norm": 1.0137918642215906, "learning_rate": 9.635801489541652e-07, "loss": 0.0174, "step": 6984 }, { "epoch": 1.589306029579067, "grad_norm": 0.764528773311874, "learning_rate": 9.635050409063818e-07, "loss": 0.0138, "step": 6985 }, { "epoch": 1.5895335608646188, "grad_norm": 1.0778573956251072, "learning_rate": 9.634299259401756e-07, "loss": 0.0266, "step": 6986 }, { "epoch": 1.5897610921501708, "grad_norm": 0.9923931257391829, "learning_rate": 9.633548040570815e-07, "loss": 0.0304, "step": 6987 }, { "epoch": 1.5899886234357226, "grad_norm": 1.6429925247966484, "learning_rate": 9.632796752586345e-07, "loss": 0.062, "step": 6988 }, { "epoch": 1.5902161547212743, "grad_norm": 0.9225605285785448, "learning_rate": 9.632045395463708e-07, "loss": 0.0226, "step": 6989 }, { "epoch": 1.590443686006826, "grad_norm": 1.3067475369066461, "learning_rate": 9.631293969218256e-07, "loss": 0.0369, "step": 6990 }, { "epoch": 1.5906712172923778, "grad_norm": 1.4050369350662906, "learning_rate": 9.63054247386535e-07, "loss": 0.0373, "step": 6991 }, { "epoch": 1.5908987485779296, "grad_norm": 1.1648943302861068, "learning_rate": 9.629790909420344e-07, "loss": 0.0246, "step": 6992 }, { "epoch": 1.5911262798634813, "grad_norm": 1.9942288503602235, "learning_rate": 9.629039275898603e-07, "loss": 0.0334, "step": 6993 }, { "epoch": 1.591353811149033, "grad_norm": 0.732963655692899, "learning_rate": 9.628287573315488e-07, "loss": 0.0166, "step": 6994 }, { "epoch": 1.5915813424345848, "grad_norm": 1.2794851527570241, "learning_rate": 9.627535801686359e-07, "loss": 0.0213, "step": 6995 }, { "epoch": 1.5918088737201366, "grad_norm": 0.8394812636488541, "learning_rate": 9.626783961026586e-07, "loss": 0.0207, "step": 6996 }, { "epoch": 1.5920364050056883, "grad_norm": 1.6392112440185473, "learning_rate": 9.626032051351534e-07, "loss": 0.0401, "step": 6997 }, { "epoch": 1.59226393629124, "grad_norm": 0.744033455630395, "learning_rate": 9.62528007267657e-07, "loss": 0.0228, "step": 6998 }, { "epoch": 1.5924914675767918, "grad_norm": 1.1583696590619958, "learning_rate": 9.62452802501706e-07, "loss": 0.0285, "step": 6999 }, { "epoch": 1.5927189988623436, "grad_norm": 1.0576690714175723, "learning_rate": 9.62377590838838e-07, "loss": 0.0264, "step": 7000 }, { "epoch": 1.5929465301478953, "grad_norm": 1.275729463204334, "learning_rate": 9.623023722805898e-07, "loss": 0.0288, "step": 7001 }, { "epoch": 1.593174061433447, "grad_norm": 1.1375669002340378, "learning_rate": 9.62227146828499e-07, "loss": 0.0186, "step": 7002 }, { "epoch": 1.5934015927189988, "grad_norm": 1.612053860660408, "learning_rate": 9.621519144841028e-07, "loss": 0.0388, "step": 7003 }, { "epoch": 1.5936291240045506, "grad_norm": 1.0591497663172853, "learning_rate": 9.62076675248939e-07, "loss": 0.0183, "step": 7004 }, { "epoch": 1.5938566552901023, "grad_norm": 1.325781849288007, "learning_rate": 9.620014291245452e-07, "loss": 0.0326, "step": 7005 }, { "epoch": 1.594084186575654, "grad_norm": 1.2729296118881142, "learning_rate": 9.619261761124592e-07, "loss": 0.0193, "step": 7006 }, { "epoch": 1.5943117178612058, "grad_norm": 1.681909742131678, "learning_rate": 9.618509162142196e-07, "loss": 0.0475, "step": 7007 }, { "epoch": 1.5945392491467576, "grad_norm": 0.7128073458713802, "learning_rate": 9.61775649431364e-07, "loss": 0.0121, "step": 7008 }, { "epoch": 1.5947667804323093, "grad_norm": 1.3690331746720998, "learning_rate": 9.617003757654309e-07, "loss": 0.0678, "step": 7009 }, { "epoch": 1.594994311717861, "grad_norm": 2.0090900821884397, "learning_rate": 9.616250952179586e-07, "loss": 0.0745, "step": 7010 }, { "epoch": 1.5952218430034129, "grad_norm": 0.8477768247832146, "learning_rate": 9.615498077904865e-07, "loss": 0.0171, "step": 7011 }, { "epoch": 1.5954493742889646, "grad_norm": 1.319320154301809, "learning_rate": 9.614745134845518e-07, "loss": 0.0162, "step": 7012 }, { "epoch": 1.5956769055745164, "grad_norm": 1.1420941610839186, "learning_rate": 9.613992123016948e-07, "loss": 0.019, "step": 7013 }, { "epoch": 1.595904436860068, "grad_norm": 0.9134071733158048, "learning_rate": 9.61323904243454e-07, "loss": 0.0152, "step": 7014 }, { "epoch": 1.5961319681456199, "grad_norm": 0.7303097608530996, "learning_rate": 9.612485893113682e-07, "loss": 0.0278, "step": 7015 }, { "epoch": 1.5963594994311718, "grad_norm": 1.0612669762930091, "learning_rate": 9.611732675069773e-07, "loss": 0.0261, "step": 7016 }, { "epoch": 1.5965870307167236, "grad_norm": 1.3408790033343598, "learning_rate": 9.610979388318206e-07, "loss": 0.0209, "step": 7017 }, { "epoch": 1.5968145620022753, "grad_norm": 0.7958312148744503, "learning_rate": 9.610226032874374e-07, "loss": 0.0143, "step": 7018 }, { "epoch": 1.597042093287827, "grad_norm": 0.937747305014374, "learning_rate": 9.609472608753676e-07, "loss": 0.028, "step": 7019 }, { "epoch": 1.5972696245733788, "grad_norm": 1.8013387061820019, "learning_rate": 9.60871911597151e-07, "loss": 0.0424, "step": 7020 }, { "epoch": 1.5974971558589306, "grad_norm": 0.7645277826475468, "learning_rate": 9.607965554543276e-07, "loss": 0.0296, "step": 7021 }, { "epoch": 1.5977246871444823, "grad_norm": 3.9610847778692526, "learning_rate": 9.607211924484378e-07, "loss": 0.0255, "step": 7022 }, { "epoch": 1.597952218430034, "grad_norm": 0.8342016000959991, "learning_rate": 9.606458225810214e-07, "loss": 0.0212, "step": 7023 }, { "epoch": 1.5981797497155859, "grad_norm": 1.2153427389880582, "learning_rate": 9.605704458536193e-07, "loss": 0.0266, "step": 7024 }, { "epoch": 1.5984072810011376, "grad_norm": 1.2102515172387232, "learning_rate": 9.604950622677717e-07, "loss": 0.025, "step": 7025 }, { "epoch": 1.5986348122866896, "grad_norm": 0.8437385307802597, "learning_rate": 9.604196718250197e-07, "loss": 0.0233, "step": 7026 }, { "epoch": 1.5988623435722413, "grad_norm": 1.0488726295589468, "learning_rate": 9.603442745269036e-07, "loss": 0.0252, "step": 7027 }, { "epoch": 1.599089874857793, "grad_norm": 1.6157731581386559, "learning_rate": 9.602688703749648e-07, "loss": 0.043, "step": 7028 }, { "epoch": 1.5993174061433448, "grad_norm": 1.303474155434243, "learning_rate": 9.601934593707444e-07, "loss": 0.0532, "step": 7029 }, { "epoch": 1.5995449374288966, "grad_norm": 1.2151510476102094, "learning_rate": 9.601180415157834e-07, "loss": 0.0337, "step": 7030 }, { "epoch": 1.5997724687144483, "grad_norm": 2.003854866210954, "learning_rate": 9.600426168116237e-07, "loss": 0.0616, "step": 7031 }, { "epoch": 1.6, "grad_norm": 1.1432974018636979, "learning_rate": 9.599671852598062e-07, "loss": 0.0266, "step": 7032 }, { "epoch": 1.6002275312855518, "grad_norm": 1.0901706676214284, "learning_rate": 9.59891746861873e-07, "loss": 0.0457, "step": 7033 }, { "epoch": 1.6004550625711036, "grad_norm": 1.1518684169841051, "learning_rate": 9.598163016193656e-07, "loss": 0.0343, "step": 7034 }, { "epoch": 1.6006825938566553, "grad_norm": 0.9541937097744977, "learning_rate": 9.597408495338266e-07, "loss": 0.0209, "step": 7035 }, { "epoch": 1.600910125142207, "grad_norm": 1.065605058803838, "learning_rate": 9.596653906067974e-07, "loss": 0.0174, "step": 7036 }, { "epoch": 1.6011376564277588, "grad_norm": 1.3338536559778928, "learning_rate": 9.595899248398209e-07, "loss": 0.0121, "step": 7037 }, { "epoch": 1.6013651877133106, "grad_norm": 1.0338937176005598, "learning_rate": 9.595144522344386e-07, "loss": 0.0312, "step": 7038 }, { "epoch": 1.6015927189988624, "grad_norm": 1.0835957785189174, "learning_rate": 9.594389727921937e-07, "loss": 0.0321, "step": 7039 }, { "epoch": 1.601820250284414, "grad_norm": 1.4114015950065997, "learning_rate": 9.593634865146286e-07, "loss": 0.0361, "step": 7040 }, { "epoch": 1.6020477815699659, "grad_norm": 0.8252106392926577, "learning_rate": 9.592879934032864e-07, "loss": 0.0237, "step": 7041 }, { "epoch": 1.6022753128555176, "grad_norm": 0.844534530767116, "learning_rate": 9.5921249345971e-07, "loss": 0.0202, "step": 7042 }, { "epoch": 1.6025028441410694, "grad_norm": 0.9928822413510855, "learning_rate": 9.59136986685442e-07, "loss": 0.0299, "step": 7043 }, { "epoch": 1.6027303754266211, "grad_norm": 0.9196169522806006, "learning_rate": 9.59061473082026e-07, "loss": 0.0214, "step": 7044 }, { "epoch": 1.6029579067121729, "grad_norm": 1.1755612158971747, "learning_rate": 9.589859526510053e-07, "loss": 0.0353, "step": 7045 }, { "epoch": 1.6031854379977246, "grad_norm": 1.148468492505787, "learning_rate": 9.589104253939233e-07, "loss": 0.028, "step": 7046 }, { "epoch": 1.6034129692832764, "grad_norm": 1.6602307360868302, "learning_rate": 9.588348913123238e-07, "loss": 0.0441, "step": 7047 }, { "epoch": 1.6036405005688281, "grad_norm": 0.6872308598203596, "learning_rate": 9.587593504077506e-07, "loss": 0.0098, "step": 7048 }, { "epoch": 1.6038680318543799, "grad_norm": 1.9397616045566994, "learning_rate": 9.586838026817475e-07, "loss": 0.044, "step": 7049 }, { "epoch": 1.6040955631399316, "grad_norm": 0.5914971934167476, "learning_rate": 9.586082481358587e-07, "loss": 0.0166, "step": 7050 }, { "epoch": 1.6043230944254834, "grad_norm": 1.1549598175673381, "learning_rate": 9.58532686771628e-07, "loss": 0.0387, "step": 7051 }, { "epoch": 1.6045506257110351, "grad_norm": 1.300805798462683, "learning_rate": 9.584571185906002e-07, "loss": 0.0419, "step": 7052 }, { "epoch": 1.6047781569965869, "grad_norm": 1.663258284565078, "learning_rate": 9.583815435943195e-07, "loss": 0.0545, "step": 7053 }, { "epoch": 1.6050056882821386, "grad_norm": 1.6346928610912306, "learning_rate": 9.583059617843306e-07, "loss": 0.0531, "step": 7054 }, { "epoch": 1.6052332195676906, "grad_norm": 1.3362888182930384, "learning_rate": 9.582303731621784e-07, "loss": 0.0339, "step": 7055 }, { "epoch": 1.6054607508532424, "grad_norm": 1.2524212164281763, "learning_rate": 9.581547777294076e-07, "loss": 0.037, "step": 7056 }, { "epoch": 1.605688282138794, "grad_norm": 1.1246757203173876, "learning_rate": 9.580791754875631e-07, "loss": 0.028, "step": 7057 }, { "epoch": 1.6059158134243459, "grad_norm": 1.333681435778776, "learning_rate": 9.580035664381905e-07, "loss": 0.0228, "step": 7058 }, { "epoch": 1.6061433447098976, "grad_norm": 0.9594094533322884, "learning_rate": 9.579279505828348e-07, "loss": 0.0143, "step": 7059 }, { "epoch": 1.6063708759954494, "grad_norm": 0.9563960168156239, "learning_rate": 9.578523279230414e-07, "loss": 0.0163, "step": 7060 }, { "epoch": 1.6065984072810011, "grad_norm": 5.9699340851931835, "learning_rate": 9.577766984603562e-07, "loss": 0.0241, "step": 7061 }, { "epoch": 1.6068259385665529, "grad_norm": 0.7339313234548795, "learning_rate": 9.577010621963249e-07, "loss": 0.0231, "step": 7062 }, { "epoch": 1.6070534698521046, "grad_norm": 0.6163762811246133, "learning_rate": 9.576254191324926e-07, "loss": 0.0111, "step": 7063 }, { "epoch": 1.6072810011376564, "grad_norm": 1.226228473815418, "learning_rate": 9.575497692704063e-07, "loss": 0.0258, "step": 7064 }, { "epoch": 1.6075085324232083, "grad_norm": 0.8877466161700153, "learning_rate": 9.574741126116118e-07, "loss": 0.0208, "step": 7065 }, { "epoch": 1.60773606370876, "grad_norm": 0.9889765681955124, "learning_rate": 9.573984491576554e-07, "loss": 0.0187, "step": 7066 }, { "epoch": 1.6079635949943119, "grad_norm": 1.0927095972515526, "learning_rate": 9.573227789100833e-07, "loss": 0.0284, "step": 7067 }, { "epoch": 1.6081911262798636, "grad_norm": 1.243753628953149, "learning_rate": 9.572471018704422e-07, "loss": 0.0313, "step": 7068 }, { "epoch": 1.6084186575654154, "grad_norm": 0.8906322800855065, "learning_rate": 9.57171418040279e-07, "loss": 0.0161, "step": 7069 }, { "epoch": 1.608646188850967, "grad_norm": 1.049183753999333, "learning_rate": 9.570957274211399e-07, "loss": 0.0159, "step": 7070 }, { "epoch": 1.6088737201365189, "grad_norm": 0.7870709759753043, "learning_rate": 9.570200300145727e-07, "loss": 0.0226, "step": 7071 }, { "epoch": 1.6091012514220706, "grad_norm": 0.9038235928408551, "learning_rate": 9.569443258221243e-07, "loss": 0.0271, "step": 7072 }, { "epoch": 1.6093287827076224, "grad_norm": 0.5708462387231178, "learning_rate": 9.568686148453412e-07, "loss": 0.01, "step": 7073 }, { "epoch": 1.6095563139931741, "grad_norm": 3.0375077188874386, "learning_rate": 9.56792897085772e-07, "loss": 0.0262, "step": 7074 }, { "epoch": 1.6097838452787259, "grad_norm": 1.0056015986896478, "learning_rate": 9.567171725449635e-07, "loss": 0.0212, "step": 7075 }, { "epoch": 1.6100113765642776, "grad_norm": 0.5035348315325101, "learning_rate": 9.566414412244635e-07, "loss": 0.0092, "step": 7076 }, { "epoch": 1.6102389078498294, "grad_norm": 0.7901614524633361, "learning_rate": 9.565657031258196e-07, "loss": 0.0181, "step": 7077 }, { "epoch": 1.6104664391353811, "grad_norm": 1.0025231926932794, "learning_rate": 9.564899582505802e-07, "loss": 0.0192, "step": 7078 }, { "epoch": 1.6106939704209329, "grad_norm": 1.5425227306548526, "learning_rate": 9.56414206600293e-07, "loss": 0.0264, "step": 7079 }, { "epoch": 1.6109215017064846, "grad_norm": 0.8753251618688311, "learning_rate": 9.563384481765064e-07, "loss": 0.0146, "step": 7080 }, { "epoch": 1.6111490329920364, "grad_norm": 1.6211547505794992, "learning_rate": 9.562626829807689e-07, "loss": 0.0337, "step": 7081 }, { "epoch": 1.6113765642775881, "grad_norm": 1.0293396736787777, "learning_rate": 9.561869110146288e-07, "loss": 0.0242, "step": 7082 }, { "epoch": 1.6116040955631399, "grad_norm": 1.170655178365064, "learning_rate": 9.561111322796346e-07, "loss": 0.0202, "step": 7083 }, { "epoch": 1.6118316268486916, "grad_norm": 0.9824361143419721, "learning_rate": 9.560353467773354e-07, "loss": 0.0191, "step": 7084 }, { "epoch": 1.6120591581342434, "grad_norm": 1.6512507172023276, "learning_rate": 9.5595955450928e-07, "loss": 0.0912, "step": 7085 }, { "epoch": 1.6122866894197951, "grad_norm": 1.0057053210926752, "learning_rate": 9.558837554770173e-07, "loss": 0.0117, "step": 7086 }, { "epoch": 1.612514220705347, "grad_norm": 1.152283256946704, "learning_rate": 9.558079496820965e-07, "loss": 0.028, "step": 7087 }, { "epoch": 1.6127417519908986, "grad_norm": 1.4855302834454076, "learning_rate": 9.557321371260675e-07, "loss": 0.0496, "step": 7088 }, { "epoch": 1.6129692832764504, "grad_norm": 0.614029539419703, "learning_rate": 9.55656317810479e-07, "loss": 0.0062, "step": 7089 }, { "epoch": 1.6131968145620021, "grad_norm": 1.2505747278089356, "learning_rate": 9.555804917368808e-07, "loss": 0.0318, "step": 7090 }, { "epoch": 1.613424345847554, "grad_norm": 0.48847611292584286, "learning_rate": 9.55504658906823e-07, "loss": 0.009, "step": 7091 }, { "epoch": 1.6136518771331056, "grad_norm": 1.2376858891100468, "learning_rate": 9.554288193218552e-07, "loss": 0.0173, "step": 7092 }, { "epoch": 1.6138794084186574, "grad_norm": 1.169463723123295, "learning_rate": 9.553529729835275e-07, "loss": 0.0211, "step": 7093 }, { "epoch": 1.6141069397042094, "grad_norm": 0.857640511419088, "learning_rate": 9.552771198933903e-07, "loss": 0.0288, "step": 7094 }, { "epoch": 1.6143344709897611, "grad_norm": 1.47380443051205, "learning_rate": 9.552012600529934e-07, "loss": 0.039, "step": 7095 }, { "epoch": 1.6145620022753129, "grad_norm": 0.6577467462472111, "learning_rate": 9.551253934638874e-07, "loss": 0.0161, "step": 7096 }, { "epoch": 1.6147895335608646, "grad_norm": 0.8489890237697779, "learning_rate": 9.550495201276231e-07, "loss": 0.0205, "step": 7097 }, { "epoch": 1.6150170648464164, "grad_norm": 0.9889782164153689, "learning_rate": 9.54973640045751e-07, "loss": 0.0237, "step": 7098 }, { "epoch": 1.6152445961319681, "grad_norm": 0.809890832374879, "learning_rate": 9.54897753219822e-07, "loss": 0.0291, "step": 7099 }, { "epoch": 1.6154721274175199, "grad_norm": 1.2855868112580302, "learning_rate": 9.548218596513871e-07, "loss": 0.0264, "step": 7100 }, { "epoch": 1.6156996587030716, "grad_norm": 1.212074283812276, "learning_rate": 9.547459593419975e-07, "loss": 0.025, "step": 7101 }, { "epoch": 1.6159271899886234, "grad_norm": 0.8920524690958506, "learning_rate": 9.546700522932042e-07, "loss": 0.0223, "step": 7102 }, { "epoch": 1.6161547212741754, "grad_norm": 1.4420267514584946, "learning_rate": 9.54594138506559e-07, "loss": 0.0249, "step": 7103 }, { "epoch": 1.6163822525597271, "grad_norm": 1.054995208097927, "learning_rate": 9.545182179836132e-07, "loss": 0.0195, "step": 7104 }, { "epoch": 1.6166097838452789, "grad_norm": 0.9679075593459778, "learning_rate": 9.544422907259185e-07, "loss": 0.0212, "step": 7105 }, { "epoch": 1.6168373151308306, "grad_norm": 1.397218645429241, "learning_rate": 9.543663567350267e-07, "loss": 0.0392, "step": 7106 }, { "epoch": 1.6170648464163824, "grad_norm": 1.2828433568502122, "learning_rate": 9.542904160124896e-07, "loss": 0.0249, "step": 7107 }, { "epoch": 1.6172923777019341, "grad_norm": 1.5322721263241048, "learning_rate": 9.542144685598598e-07, "loss": 0.0478, "step": 7108 }, { "epoch": 1.6175199089874859, "grad_norm": 1.2137972791553564, "learning_rate": 9.54138514378689e-07, "loss": 0.0375, "step": 7109 }, { "epoch": 1.6177474402730376, "grad_norm": 0.7890160927750831, "learning_rate": 9.540625534705297e-07, "loss": 0.0283, "step": 7110 }, { "epoch": 1.6179749715585894, "grad_norm": 0.985091132889504, "learning_rate": 9.539865858369347e-07, "loss": 0.0232, "step": 7111 }, { "epoch": 1.6182025028441411, "grad_norm": 1.9270292191386824, "learning_rate": 9.539106114794564e-07, "loss": 0.0349, "step": 7112 }, { "epoch": 1.6184300341296929, "grad_norm": 1.0921842788734426, "learning_rate": 9.538346303996472e-07, "loss": 0.0309, "step": 7113 }, { "epoch": 1.6186575654152446, "grad_norm": 1.5428410141330413, "learning_rate": 9.537586425990604e-07, "loss": 0.0376, "step": 7114 }, { "epoch": 1.6188850967007964, "grad_norm": 0.9541924274634735, "learning_rate": 9.536826480792493e-07, "loss": 0.0222, "step": 7115 }, { "epoch": 1.6191126279863481, "grad_norm": 0.6491782772626704, "learning_rate": 9.53606646841767e-07, "loss": 0.0117, "step": 7116 }, { "epoch": 1.6193401592719, "grad_norm": 0.7582067823854062, "learning_rate": 9.535306388881663e-07, "loss": 0.0115, "step": 7117 }, { "epoch": 1.6195676905574516, "grad_norm": 1.058048254765856, "learning_rate": 9.534546242200012e-07, "loss": 0.0288, "step": 7118 }, { "epoch": 1.6197952218430034, "grad_norm": 1.006371706098208, "learning_rate": 9.53378602838825e-07, "loss": 0.0251, "step": 7119 }, { "epoch": 1.6200227531285551, "grad_norm": 1.4533877255802528, "learning_rate": 9.533025747461916e-07, "loss": 0.0265, "step": 7120 }, { "epoch": 1.620250284414107, "grad_norm": 0.6692865346649786, "learning_rate": 9.532265399436549e-07, "loss": 0.0212, "step": 7121 }, { "epoch": 1.6204778156996587, "grad_norm": 0.7404732378891826, "learning_rate": 9.531504984327688e-07, "loss": 0.0244, "step": 7122 }, { "epoch": 1.6207053469852104, "grad_norm": 1.2920381960808738, "learning_rate": 9.530744502150874e-07, "loss": 0.038, "step": 7123 }, { "epoch": 1.6209328782707622, "grad_norm": 1.065581280470929, "learning_rate": 9.529983952921652e-07, "loss": 0.0133, "step": 7124 }, { "epoch": 1.621160409556314, "grad_norm": 1.0352421647641357, "learning_rate": 9.529223336655565e-07, "loss": 0.0148, "step": 7125 }, { "epoch": 1.6213879408418657, "grad_norm": 1.7347261921396229, "learning_rate": 9.528462653368158e-07, "loss": 0.0332, "step": 7126 }, { "epoch": 1.6216154721274174, "grad_norm": 3.9973664373381004, "learning_rate": 9.527701903074977e-07, "loss": 0.0268, "step": 7127 }, { "epoch": 1.6218430034129692, "grad_norm": 1.2707508978387, "learning_rate": 9.526941085791574e-07, "loss": 0.0144, "step": 7128 }, { "epoch": 1.622070534698521, "grad_norm": 1.3140699709212826, "learning_rate": 9.526180201533497e-07, "loss": 0.0481, "step": 7129 }, { "epoch": 1.6222980659840727, "grad_norm": 1.2337723938681724, "learning_rate": 9.525419250316295e-07, "loss": 0.0393, "step": 7130 }, { "epoch": 1.6225255972696244, "grad_norm": 0.9964400049733639, "learning_rate": 9.524658232155524e-07, "loss": 0.024, "step": 7131 }, { "epoch": 1.6227531285551762, "grad_norm": 1.707150409119038, "learning_rate": 9.523897147066735e-07, "loss": 0.0838, "step": 7132 }, { "epoch": 1.6229806598407281, "grad_norm": 1.8267390601418545, "learning_rate": 9.523135995065483e-07, "loss": 0.0496, "step": 7133 }, { "epoch": 1.62320819112628, "grad_norm": 1.1513425182735106, "learning_rate": 9.522374776167328e-07, "loss": 0.0184, "step": 7134 }, { "epoch": 1.6234357224118316, "grad_norm": 0.9185048401595257, "learning_rate": 9.521613490387824e-07, "loss": 0.0248, "step": 7135 }, { "epoch": 1.6236632536973834, "grad_norm": 0.8073827228532496, "learning_rate": 9.520852137742534e-07, "loss": 0.0101, "step": 7136 }, { "epoch": 1.6238907849829352, "grad_norm": 1.1657350043826, "learning_rate": 9.520090718247016e-07, "loss": 0.0196, "step": 7137 }, { "epoch": 1.624118316268487, "grad_norm": 1.216680110083079, "learning_rate": 9.519329231916831e-07, "loss": 0.0166, "step": 7138 }, { "epoch": 1.6243458475540387, "grad_norm": 0.8535660856425935, "learning_rate": 9.518567678767546e-07, "loss": 0.0175, "step": 7139 }, { "epoch": 1.6245733788395904, "grad_norm": 0.8711790356295614, "learning_rate": 9.51780605881472e-07, "loss": 0.0194, "step": 7140 }, { "epoch": 1.6248009101251422, "grad_norm": 0.9825901471843272, "learning_rate": 9.517044372073927e-07, "loss": 0.0329, "step": 7141 }, { "epoch": 1.6250284414106941, "grad_norm": 0.8661738676983474, "learning_rate": 9.516282618560727e-07, "loss": 0.0159, "step": 7142 }, { "epoch": 1.6252559726962459, "grad_norm": 1.4036723999735945, "learning_rate": 9.515520798290695e-07, "loss": 0.0349, "step": 7143 }, { "epoch": 1.6254835039817976, "grad_norm": 2.037341510060114, "learning_rate": 9.514758911279398e-07, "loss": 0.0468, "step": 7144 }, { "epoch": 1.6257110352673494, "grad_norm": 0.9408128263381598, "learning_rate": 9.513996957542407e-07, "loss": 0.0159, "step": 7145 }, { "epoch": 1.6259385665529011, "grad_norm": 0.8642664200516336, "learning_rate": 9.513234937095296e-07, "loss": 0.0171, "step": 7146 }, { "epoch": 1.626166097838453, "grad_norm": 0.6696172061661173, "learning_rate": 9.512472849953639e-07, "loss": 0.008, "step": 7147 }, { "epoch": 1.6263936291240046, "grad_norm": 1.0375083047536862, "learning_rate": 9.511710696133012e-07, "loss": 0.0301, "step": 7148 }, { "epoch": 1.6266211604095564, "grad_norm": 1.0584004449366777, "learning_rate": 9.510948475648993e-07, "loss": 0.0303, "step": 7149 }, { "epoch": 1.6268486916951082, "grad_norm": 1.3319455316451698, "learning_rate": 9.510186188517159e-07, "loss": 0.0252, "step": 7150 }, { "epoch": 1.62707622298066, "grad_norm": 0.8623683188578419, "learning_rate": 9.50942383475309e-07, "loss": 0.021, "step": 7151 }, { "epoch": 1.6273037542662117, "grad_norm": 0.8237163876539081, "learning_rate": 9.508661414372367e-07, "loss": 0.0187, "step": 7152 }, { "epoch": 1.6275312855517634, "grad_norm": 1.228002048633561, "learning_rate": 9.507898927390571e-07, "loss": 0.0269, "step": 7153 }, { "epoch": 1.6277588168373152, "grad_norm": 0.974390010062014, "learning_rate": 9.507136373823289e-07, "loss": 0.0239, "step": 7154 }, { "epoch": 1.627986348122867, "grad_norm": 0.9397725129892953, "learning_rate": 9.506373753686104e-07, "loss": 0.023, "step": 7155 }, { "epoch": 1.6282138794084187, "grad_norm": 0.7242842173267595, "learning_rate": 9.505611066994605e-07, "loss": 0.0181, "step": 7156 }, { "epoch": 1.6284414106939704, "grad_norm": 1.0535752948833204, "learning_rate": 9.504848313764375e-07, "loss": 0.0308, "step": 7157 }, { "epoch": 1.6286689419795222, "grad_norm": 0.6753903947752541, "learning_rate": 9.50408549401101e-07, "loss": 0.0213, "step": 7158 }, { "epoch": 1.628896473265074, "grad_norm": 0.6869344941943479, "learning_rate": 9.503322607750096e-07, "loss": 0.0177, "step": 7159 }, { "epoch": 1.6291240045506257, "grad_norm": 0.7178640822389143, "learning_rate": 9.502559654997226e-07, "loss": 0.0132, "step": 7160 }, { "epoch": 1.6293515358361774, "grad_norm": 0.7864947505397063, "learning_rate": 9.501796635767992e-07, "loss": 0.0173, "step": 7161 }, { "epoch": 1.6295790671217292, "grad_norm": 3.864576003209912, "learning_rate": 9.501033550077993e-07, "loss": 0.0368, "step": 7162 }, { "epoch": 1.629806598407281, "grad_norm": 1.2986294430212195, "learning_rate": 9.500270397942819e-07, "loss": 0.0478, "step": 7163 }, { "epoch": 1.6300341296928327, "grad_norm": 1.3721206434366422, "learning_rate": 9.499507179378072e-07, "loss": 0.0359, "step": 7164 }, { "epoch": 1.6302616609783844, "grad_norm": 1.316378474967445, "learning_rate": 9.498743894399348e-07, "loss": 0.0303, "step": 7165 }, { "epoch": 1.6304891922639362, "grad_norm": 0.8738319121581792, "learning_rate": 9.497980543022251e-07, "loss": 0.0132, "step": 7166 }, { "epoch": 1.630716723549488, "grad_norm": 0.608916362735345, "learning_rate": 9.497217125262378e-07, "loss": 0.0151, "step": 7167 }, { "epoch": 1.6309442548350397, "grad_norm": 2.021086411000962, "learning_rate": 9.496453641135337e-07, "loss": 0.0522, "step": 7168 }, { "epoch": 1.6311717861205914, "grad_norm": 1.1154208604186049, "learning_rate": 9.495690090656728e-07, "loss": 0.0159, "step": 7169 }, { "epoch": 1.6313993174061432, "grad_norm": 0.9846084938710009, "learning_rate": 9.494926473842156e-07, "loss": 0.0165, "step": 7170 }, { "epoch": 1.631626848691695, "grad_norm": 0.5328708135848763, "learning_rate": 9.494162790707232e-07, "loss": 0.0068, "step": 7171 }, { "epoch": 1.631854379977247, "grad_norm": 0.6702708665810064, "learning_rate": 9.493399041267559e-07, "loss": 0.0098, "step": 7172 }, { "epoch": 1.6320819112627987, "grad_norm": 1.448780085449854, "learning_rate": 9.492635225538751e-07, "loss": 0.0299, "step": 7173 }, { "epoch": 1.6323094425483504, "grad_norm": 0.7311154868873958, "learning_rate": 9.491871343536418e-07, "loss": 0.0232, "step": 7174 }, { "epoch": 1.6325369738339022, "grad_norm": 0.8439545969021113, "learning_rate": 9.491107395276172e-07, "loss": 0.0165, "step": 7175 }, { "epoch": 1.632764505119454, "grad_norm": 1.1423895358734097, "learning_rate": 9.490343380773629e-07, "loss": 0.0248, "step": 7176 }, { "epoch": 1.6329920364050057, "grad_norm": 0.9038841951011269, "learning_rate": 9.489579300044396e-07, "loss": 0.015, "step": 7177 }, { "epoch": 1.6332195676905574, "grad_norm": 0.8016153369265806, "learning_rate": 9.4888151531041e-07, "loss": 0.0187, "step": 7178 }, { "epoch": 1.6334470989761092, "grad_norm": 0.8901417826041008, "learning_rate": 9.488050939968351e-07, "loss": 0.0258, "step": 7179 }, { "epoch": 1.633674630261661, "grad_norm": 0.8252096401919968, "learning_rate": 9.487286660652773e-07, "loss": 0.0103, "step": 7180 }, { "epoch": 1.633902161547213, "grad_norm": 1.5818607668923288, "learning_rate": 9.486522315172983e-07, "loss": 0.0392, "step": 7181 }, { "epoch": 1.6341296928327647, "grad_norm": 0.843929267173253, "learning_rate": 9.485757903544606e-07, "loss": 0.0193, "step": 7182 }, { "epoch": 1.6343572241183164, "grad_norm": 0.9775290008365384, "learning_rate": 9.484993425783262e-07, "loss": 0.0249, "step": 7183 }, { "epoch": 1.6345847554038682, "grad_norm": 1.3741978732780942, "learning_rate": 9.484228881904577e-07, "loss": 0.0445, "step": 7184 }, { "epoch": 1.63481228668942, "grad_norm": 1.494963474433962, "learning_rate": 9.483464271924177e-07, "loss": 0.0309, "step": 7185 }, { "epoch": 1.6350398179749717, "grad_norm": 0.9195283327097106, "learning_rate": 9.482699595857689e-07, "loss": 0.0197, "step": 7186 }, { "epoch": 1.6352673492605234, "grad_norm": 0.819552193594169, "learning_rate": 9.481934853720742e-07, "loss": 0.0136, "step": 7187 }, { "epoch": 1.6354948805460752, "grad_norm": 1.3681250341696483, "learning_rate": 9.481170045528968e-07, "loss": 0.0309, "step": 7188 }, { "epoch": 1.635722411831627, "grad_norm": 1.0838732709720864, "learning_rate": 9.480405171297992e-07, "loss": 0.0183, "step": 7189 }, { "epoch": 1.6359499431171787, "grad_norm": 0.8589492659489036, "learning_rate": 9.479640231043453e-07, "loss": 0.0283, "step": 7190 }, { "epoch": 1.6361774744027304, "grad_norm": 0.4389346454173033, "learning_rate": 9.47887522478098e-07, "loss": 0.0107, "step": 7191 }, { "epoch": 1.6364050056882822, "grad_norm": 1.5076520306070575, "learning_rate": 9.478110152526212e-07, "loss": 0.0318, "step": 7192 }, { "epoch": 1.636632536973834, "grad_norm": 0.8666999378509156, "learning_rate": 9.477345014294787e-07, "loss": 0.0343, "step": 7193 }, { "epoch": 1.6368600682593857, "grad_norm": 1.4057039119124, "learning_rate": 9.476579810102337e-07, "loss": 0.0308, "step": 7194 }, { "epoch": 1.6370875995449374, "grad_norm": 0.9295417198301341, "learning_rate": 9.475814539964506e-07, "loss": 0.0146, "step": 7195 }, { "epoch": 1.6373151308304892, "grad_norm": 1.7038654584980972, "learning_rate": 9.475049203896934e-07, "loss": 0.0385, "step": 7196 }, { "epoch": 1.637542662116041, "grad_norm": 0.8551699103163861, "learning_rate": 9.474283801915262e-07, "loss": 0.0181, "step": 7197 }, { "epoch": 1.6377701934015927, "grad_norm": 0.733499182783048, "learning_rate": 9.473518334035134e-07, "loss": 0.0141, "step": 7198 }, { "epoch": 1.6379977246871444, "grad_norm": 1.8245741661840873, "learning_rate": 9.472752800272194e-07, "loss": 0.0502, "step": 7199 }, { "epoch": 1.6382252559726962, "grad_norm": 1.7101516018547451, "learning_rate": 9.471987200642093e-07, "loss": 0.0461, "step": 7200 }, { "epoch": 1.638452787258248, "grad_norm": 0.7019785797323762, "learning_rate": 9.471221535160471e-07, "loss": 0.013, "step": 7201 }, { "epoch": 1.6386803185437997, "grad_norm": 1.2558910970344273, "learning_rate": 9.470455803842982e-07, "loss": 0.029, "step": 7202 }, { "epoch": 1.6389078498293514, "grad_norm": 1.521689503580041, "learning_rate": 9.469690006705274e-07, "loss": 0.0158, "step": 7203 }, { "epoch": 1.6391353811149032, "grad_norm": 1.334570302964857, "learning_rate": 9.468924143762996e-07, "loss": 0.0271, "step": 7204 }, { "epoch": 1.639362912400455, "grad_norm": 0.718253019493209, "learning_rate": 9.468158215031805e-07, "loss": 0.0142, "step": 7205 }, { "epoch": 1.6395904436860067, "grad_norm": 1.066686510907017, "learning_rate": 9.467392220527358e-07, "loss": 0.0289, "step": 7206 }, { "epoch": 1.6398179749715585, "grad_norm": 1.0683098954831638, "learning_rate": 9.4666261602653e-07, "loss": 0.0362, "step": 7207 }, { "epoch": 1.6400455062571102, "grad_norm": 1.8077047968568642, "learning_rate": 9.465860034261298e-07, "loss": 0.026, "step": 7208 }, { "epoch": 1.640273037542662, "grad_norm": 1.1472184148194302, "learning_rate": 9.465093842531007e-07, "loss": 0.0234, "step": 7209 }, { "epoch": 1.640500568828214, "grad_norm": 0.8276394924288174, "learning_rate": 9.464327585090084e-07, "loss": 0.0125, "step": 7210 }, { "epoch": 1.6407281001137657, "grad_norm": 1.301523612966525, "learning_rate": 9.463561261954192e-07, "loss": 0.0398, "step": 7211 }, { "epoch": 1.6409556313993174, "grad_norm": 0.8836337063456542, "learning_rate": 9.462794873138995e-07, "loss": 0.0209, "step": 7212 }, { "epoch": 1.6411831626848692, "grad_norm": 1.0208628010333278, "learning_rate": 9.462028418660155e-07, "loss": 0.0331, "step": 7213 }, { "epoch": 1.641410693970421, "grad_norm": 0.8560244593909649, "learning_rate": 9.461261898533333e-07, "loss": 0.0181, "step": 7214 }, { "epoch": 1.6416382252559727, "grad_norm": 0.8831773567982883, "learning_rate": 9.460495312774203e-07, "loss": 0.0168, "step": 7215 }, { "epoch": 1.6418657565415244, "grad_norm": 1.4094359738334923, "learning_rate": 9.459728661398427e-07, "loss": 0.0321, "step": 7216 }, { "epoch": 1.6420932878270762, "grad_norm": 1.2053767582653305, "learning_rate": 9.458961944421674e-07, "loss": 0.026, "step": 7217 }, { "epoch": 1.642320819112628, "grad_norm": 0.9548072481260151, "learning_rate": 9.458195161859618e-07, "loss": 0.0164, "step": 7218 }, { "epoch": 1.6425483503981797, "grad_norm": 1.0144085210237268, "learning_rate": 9.457428313727927e-07, "loss": 0.0167, "step": 7219 }, { "epoch": 1.6427758816837317, "grad_norm": 0.9111105741597719, "learning_rate": 9.456661400042278e-07, "loss": 0.0232, "step": 7220 }, { "epoch": 1.6430034129692834, "grad_norm": 1.2223424085257393, "learning_rate": 9.455894420818339e-07, "loss": 0.0372, "step": 7221 }, { "epoch": 1.6432309442548352, "grad_norm": 0.9476964903869362, "learning_rate": 9.455127376071791e-07, "loss": 0.0312, "step": 7222 }, { "epoch": 1.643458475540387, "grad_norm": 1.9505254497792965, "learning_rate": 9.454360265818309e-07, "loss": 0.0434, "step": 7223 }, { "epoch": 1.6436860068259387, "grad_norm": 0.5165376819358042, "learning_rate": 9.453593090073571e-07, "loss": 0.0096, "step": 7224 }, { "epoch": 1.6439135381114904, "grad_norm": 0.9526396496896132, "learning_rate": 9.45282584885326e-07, "loss": 0.0214, "step": 7225 }, { "epoch": 1.6441410693970422, "grad_norm": 0.9178268969108745, "learning_rate": 9.452058542173054e-07, "loss": 0.0165, "step": 7226 }, { "epoch": 1.644368600682594, "grad_norm": 1.1974894992771798, "learning_rate": 9.451291170048632e-07, "loss": 0.0297, "step": 7227 }, { "epoch": 1.6445961319681457, "grad_norm": 1.5678130163290367, "learning_rate": 9.450523732495684e-07, "loss": 0.0464, "step": 7228 }, { "epoch": 1.6448236632536974, "grad_norm": 1.4468718707240071, "learning_rate": 9.449756229529893e-07, "loss": 0.0431, "step": 7229 }, { "epoch": 1.6450511945392492, "grad_norm": 1.6752861254777147, "learning_rate": 9.448988661166944e-07, "loss": 0.0437, "step": 7230 }, { "epoch": 1.645278725824801, "grad_norm": 1.17485711424526, "learning_rate": 9.448221027422525e-07, "loss": 0.0181, "step": 7231 }, { "epoch": 1.6455062571103527, "grad_norm": 1.1092110440467429, "learning_rate": 9.447453328312325e-07, "loss": 0.0199, "step": 7232 }, { "epoch": 1.6457337883959045, "grad_norm": 1.4139573034619977, "learning_rate": 9.446685563852037e-07, "loss": 0.0596, "step": 7233 }, { "epoch": 1.6459613196814562, "grad_norm": 1.2088953201451433, "learning_rate": 9.445917734057349e-07, "loss": 0.0287, "step": 7234 }, { "epoch": 1.646188850967008, "grad_norm": 0.7540427001183505, "learning_rate": 9.445149838943955e-07, "loss": 0.0102, "step": 7235 }, { "epoch": 1.6464163822525597, "grad_norm": 1.241840511545734, "learning_rate": 9.44438187852755e-07, "loss": 0.0251, "step": 7236 }, { "epoch": 1.6466439135381115, "grad_norm": 1.290037885762213, "learning_rate": 9.443613852823832e-07, "loss": 0.0261, "step": 7237 }, { "epoch": 1.6468714448236632, "grad_norm": 0.7588594864777832, "learning_rate": 9.442845761848493e-07, "loss": 0.0163, "step": 7238 }, { "epoch": 1.647098976109215, "grad_norm": 1.0104778694363679, "learning_rate": 9.442077605617236e-07, "loss": 0.026, "step": 7239 }, { "epoch": 1.6473265073947667, "grad_norm": 0.8587600369889241, "learning_rate": 9.441309384145758e-07, "loss": 0.0284, "step": 7240 }, { "epoch": 1.6475540386803185, "grad_norm": 1.9541419364169168, "learning_rate": 9.440541097449759e-07, "loss": 0.0459, "step": 7241 }, { "epoch": 1.6477815699658702, "grad_norm": 0.6698653743269256, "learning_rate": 9.439772745544945e-07, "loss": 0.0146, "step": 7242 }, { "epoch": 1.648009101251422, "grad_norm": 1.3702624178440999, "learning_rate": 9.439004328447019e-07, "loss": 0.0271, "step": 7243 }, { "epoch": 1.6482366325369737, "grad_norm": 0.9113341463128616, "learning_rate": 9.438235846171684e-07, "loss": 0.0201, "step": 7244 }, { "epoch": 1.6484641638225255, "grad_norm": 2.2293229554521536, "learning_rate": 9.437467298734646e-07, "loss": 0.0516, "step": 7245 }, { "epoch": 1.6486916951080772, "grad_norm": 1.2345944716664825, "learning_rate": 9.436698686151616e-07, "loss": 0.0433, "step": 7246 }, { "epoch": 1.648919226393629, "grad_norm": 1.026762857151891, "learning_rate": 9.435930008438299e-07, "loss": 0.0249, "step": 7247 }, { "epoch": 1.6491467576791807, "grad_norm": 2.006823130023674, "learning_rate": 9.435161265610407e-07, "loss": 0.0533, "step": 7248 }, { "epoch": 1.6493742889647327, "grad_norm": 0.9398804691129719, "learning_rate": 9.434392457683653e-07, "loss": 0.0194, "step": 7249 }, { "epoch": 1.6496018202502845, "grad_norm": 0.9516028765806651, "learning_rate": 9.433623584673751e-07, "loss": 0.0172, "step": 7250 }, { "epoch": 1.6498293515358362, "grad_norm": 1.2348276563947835, "learning_rate": 9.432854646596412e-07, "loss": 0.028, "step": 7251 }, { "epoch": 1.650056882821388, "grad_norm": 1.1645809136436136, "learning_rate": 9.432085643467352e-07, "loss": 0.0327, "step": 7252 }, { "epoch": 1.6502844141069397, "grad_norm": 0.9654120827213446, "learning_rate": 9.43131657530229e-07, "loss": 0.0279, "step": 7253 }, { "epoch": 1.6505119453924915, "grad_norm": 1.6599070006924297, "learning_rate": 9.430547442116944e-07, "loss": 0.0255, "step": 7254 }, { "epoch": 1.6507394766780432, "grad_norm": 1.3574100186809808, "learning_rate": 9.429778243927031e-07, "loss": 0.0319, "step": 7255 }, { "epoch": 1.650967007963595, "grad_norm": 0.8311506286233733, "learning_rate": 9.429008980748279e-07, "loss": 0.0174, "step": 7256 }, { "epoch": 1.6511945392491467, "grad_norm": 1.1772735396269376, "learning_rate": 9.428239652596402e-07, "loss": 0.0208, "step": 7257 }, { "epoch": 1.6514220705346985, "grad_norm": 1.2001797242913066, "learning_rate": 9.427470259487127e-07, "loss": 0.0211, "step": 7258 }, { "epoch": 1.6516496018202504, "grad_norm": 0.9282059355299311, "learning_rate": 9.426700801436181e-07, "loss": 0.0173, "step": 7259 }, { "epoch": 1.6518771331058022, "grad_norm": 1.5594692564466364, "learning_rate": 9.425931278459287e-07, "loss": 0.0325, "step": 7260 }, { "epoch": 1.652104664391354, "grad_norm": 0.7983223497891204, "learning_rate": 9.425161690572174e-07, "loss": 0.0221, "step": 7261 }, { "epoch": 1.6523321956769057, "grad_norm": 0.7429757690165122, "learning_rate": 9.42439203779057e-07, "loss": 0.0181, "step": 7262 }, { "epoch": 1.6525597269624575, "grad_norm": 0.6184275747393381, "learning_rate": 9.42362232013021e-07, "loss": 0.0101, "step": 7263 }, { "epoch": 1.6527872582480092, "grad_norm": 0.8345316604506051, "learning_rate": 9.422852537606819e-07, "loss": 0.0171, "step": 7264 }, { "epoch": 1.653014789533561, "grad_norm": 1.211708377820841, "learning_rate": 9.422082690236134e-07, "loss": 0.0391, "step": 7265 }, { "epoch": 1.6532423208191127, "grad_norm": 0.9833647618604962, "learning_rate": 9.421312778033889e-07, "loss": 0.0255, "step": 7266 }, { "epoch": 1.6534698521046645, "grad_norm": 0.6817384084220871, "learning_rate": 9.420542801015817e-07, "loss": 0.0113, "step": 7267 }, { "epoch": 1.6536973833902162, "grad_norm": 0.7215860417309474, "learning_rate": 9.419772759197656e-07, "loss": 0.0136, "step": 7268 }, { "epoch": 1.653924914675768, "grad_norm": 1.638819919411159, "learning_rate": 9.41900265259515e-07, "loss": 0.0288, "step": 7269 }, { "epoch": 1.6541524459613197, "grad_norm": 1.4490962654168988, "learning_rate": 9.418232481224029e-07, "loss": 0.0174, "step": 7270 }, { "epoch": 1.6543799772468715, "grad_norm": 0.8905064462620383, "learning_rate": 9.417462245100038e-07, "loss": 0.0317, "step": 7271 }, { "epoch": 1.6546075085324232, "grad_norm": 1.6835820308875613, "learning_rate": 9.416691944238922e-07, "loss": 0.0232, "step": 7272 }, { "epoch": 1.654835039817975, "grad_norm": 1.1362565076308901, "learning_rate": 9.415921578656422e-07, "loss": 0.0204, "step": 7273 }, { "epoch": 1.6550625711035267, "grad_norm": 0.8853854118859108, "learning_rate": 9.415151148368282e-07, "loss": 0.0178, "step": 7274 }, { "epoch": 1.6552901023890785, "grad_norm": 1.4527616622652773, "learning_rate": 9.41438065339025e-07, "loss": 0.0324, "step": 7275 }, { "epoch": 1.6555176336746302, "grad_norm": 1.7095898173356143, "learning_rate": 9.413610093738072e-07, "loss": 0.0593, "step": 7276 }, { "epoch": 1.655745164960182, "grad_norm": 1.983306213333864, "learning_rate": 9.412839469427498e-07, "loss": 0.0194, "step": 7277 }, { "epoch": 1.6559726962457337, "grad_norm": 0.8860075841323366, "learning_rate": 9.412068780474277e-07, "loss": 0.0176, "step": 7278 }, { "epoch": 1.6562002275312855, "grad_norm": 1.241390628166181, "learning_rate": 9.411298026894161e-07, "loss": 0.0347, "step": 7279 }, { "epoch": 1.6564277588168372, "grad_norm": 1.6842602052705251, "learning_rate": 9.410527208702905e-07, "loss": 0.0421, "step": 7280 }, { "epoch": 1.656655290102389, "grad_norm": 0.750562018500017, "learning_rate": 9.409756325916259e-07, "loss": 0.0059, "step": 7281 }, { "epoch": 1.6568828213879407, "grad_norm": 1.815823404100124, "learning_rate": 9.408985378549981e-07, "loss": 0.0168, "step": 7282 }, { "epoch": 1.6571103526734925, "grad_norm": 0.7503514595870169, "learning_rate": 9.408214366619829e-07, "loss": 0.0124, "step": 7283 }, { "epoch": 1.6573378839590442, "grad_norm": 2.24763699227768, "learning_rate": 9.407443290141557e-07, "loss": 0.0377, "step": 7284 }, { "epoch": 1.657565415244596, "grad_norm": 1.0037578814814987, "learning_rate": 9.406672149130928e-07, "loss": 0.0243, "step": 7285 }, { "epoch": 1.6577929465301477, "grad_norm": 1.4239295877989984, "learning_rate": 9.405900943603702e-07, "loss": 0.0293, "step": 7286 }, { "epoch": 1.6580204778156995, "grad_norm": 1.4300072807272197, "learning_rate": 9.405129673575639e-07, "loss": 0.0236, "step": 7287 }, { "epoch": 1.6582480091012515, "grad_norm": 1.1491851444657144, "learning_rate": 9.404358339062505e-07, "loss": 0.0288, "step": 7288 }, { "epoch": 1.6584755403868032, "grad_norm": 0.7516080758836439, "learning_rate": 9.403586940080063e-07, "loss": 0.0144, "step": 7289 }, { "epoch": 1.658703071672355, "grad_norm": 1.530757840694647, "learning_rate": 9.40281547664408e-07, "loss": 0.0397, "step": 7290 }, { "epoch": 1.6589306029579067, "grad_norm": 1.1643602793634618, "learning_rate": 9.402043948770321e-07, "loss": 0.028, "step": 7291 }, { "epoch": 1.6591581342434585, "grad_norm": 1.0627327234680397, "learning_rate": 9.401272356474557e-07, "loss": 0.02, "step": 7292 }, { "epoch": 1.6593856655290102, "grad_norm": 1.4900034307204348, "learning_rate": 9.400500699772558e-07, "loss": 0.0311, "step": 7293 }, { "epoch": 1.659613196814562, "grad_norm": 1.0255614989109871, "learning_rate": 9.399728978680094e-07, "loss": 0.0169, "step": 7294 }, { "epoch": 1.6598407281001137, "grad_norm": 1.9785969932383063, "learning_rate": 9.39895719321294e-07, "loss": 0.0473, "step": 7295 }, { "epoch": 1.6600682593856655, "grad_norm": 1.7615173107010964, "learning_rate": 9.398185343386866e-07, "loss": 0.0247, "step": 7296 }, { "epoch": 1.6602957906712172, "grad_norm": 1.1723530953451027, "learning_rate": 9.39741342921765e-07, "loss": 0.0554, "step": 7297 }, { "epoch": 1.6605233219567692, "grad_norm": 0.7436297955855727, "learning_rate": 9.396641450721067e-07, "loss": 0.0194, "step": 7298 }, { "epoch": 1.660750853242321, "grad_norm": 1.4081310717365318, "learning_rate": 9.395869407912897e-07, "loss": 0.0569, "step": 7299 }, { "epoch": 1.6609783845278727, "grad_norm": 0.894628689932518, "learning_rate": 9.395097300808916e-07, "loss": 0.0159, "step": 7300 }, { "epoch": 1.6612059158134245, "grad_norm": 1.372321476496526, "learning_rate": 9.394325129424906e-07, "loss": 0.0302, "step": 7301 }, { "epoch": 1.6614334470989762, "grad_norm": 1.3718759092878112, "learning_rate": 9.39355289377665e-07, "loss": 0.0438, "step": 7302 }, { "epoch": 1.661660978384528, "grad_norm": 0.7459997929216897, "learning_rate": 9.392780593879932e-07, "loss": 0.014, "step": 7303 }, { "epoch": 1.6618885096700797, "grad_norm": 1.247027911519019, "learning_rate": 9.392008229750533e-07, "loss": 0.0262, "step": 7304 }, { "epoch": 1.6621160409556315, "grad_norm": 0.7780135059733467, "learning_rate": 9.391235801404236e-07, "loss": 0.025, "step": 7305 }, { "epoch": 1.6623435722411832, "grad_norm": 1.2318547883057585, "learning_rate": 9.390463308856837e-07, "loss": 0.0457, "step": 7306 }, { "epoch": 1.662571103526735, "grad_norm": 1.0879174466911097, "learning_rate": 9.389690752124118e-07, "loss": 0.0241, "step": 7307 }, { "epoch": 1.6627986348122867, "grad_norm": 0.7759080705210638, "learning_rate": 9.388918131221869e-07, "loss": 0.0128, "step": 7308 }, { "epoch": 1.6630261660978385, "grad_norm": 0.8826852994061488, "learning_rate": 9.388145446165884e-07, "loss": 0.0182, "step": 7309 }, { "epoch": 1.6632536973833902, "grad_norm": 1.1297233571426981, "learning_rate": 9.387372696971952e-07, "loss": 0.0199, "step": 7310 }, { "epoch": 1.663481228668942, "grad_norm": 0.8544651904661695, "learning_rate": 9.386599883655869e-07, "loss": 0.0221, "step": 7311 }, { "epoch": 1.6637087599544937, "grad_norm": 0.9358707418174771, "learning_rate": 9.385827006233426e-07, "loss": 0.0198, "step": 7312 }, { "epoch": 1.6639362912400455, "grad_norm": 1.1977498117294199, "learning_rate": 9.385054064720425e-07, "loss": 0.0239, "step": 7313 }, { "epoch": 1.6641638225255972, "grad_norm": 0.7773318647986129, "learning_rate": 9.38428105913266e-07, "loss": 0.0172, "step": 7314 }, { "epoch": 1.664391353811149, "grad_norm": 0.9321658881754474, "learning_rate": 9.38350798948593e-07, "loss": 0.0264, "step": 7315 }, { "epoch": 1.6646188850967008, "grad_norm": 0.8640475281006017, "learning_rate": 9.382734855796036e-07, "loss": 0.0143, "step": 7316 }, { "epoch": 1.6648464163822525, "grad_norm": 1.0548381808425786, "learning_rate": 9.381961658078778e-07, "loss": 0.0281, "step": 7317 }, { "epoch": 1.6650739476678043, "grad_norm": 8.927252731821625, "learning_rate": 9.381188396349958e-07, "loss": 0.1749, "step": 7318 }, { "epoch": 1.665301478953356, "grad_norm": 1.344358077736511, "learning_rate": 9.380415070625384e-07, "loss": 0.0198, "step": 7319 }, { "epoch": 1.6655290102389078, "grad_norm": 0.9360614628382258, "learning_rate": 9.379641680920859e-07, "loss": 0.0305, "step": 7320 }, { "epoch": 1.6657565415244595, "grad_norm": 1.0008530296862048, "learning_rate": 9.378868227252188e-07, "loss": 0.0316, "step": 7321 }, { "epoch": 1.6659840728100113, "grad_norm": 1.0104338772741304, "learning_rate": 9.378094709635183e-07, "loss": 0.0222, "step": 7322 }, { "epoch": 1.666211604095563, "grad_norm": 1.1295029565277173, "learning_rate": 9.377321128085651e-07, "loss": 0.0216, "step": 7323 }, { "epoch": 1.6664391353811148, "grad_norm": 1.4325128459640029, "learning_rate": 9.3765474826194e-07, "loss": 0.0407, "step": 7324 }, { "epoch": 1.6666666666666665, "grad_norm": 0.8717439451003935, "learning_rate": 9.375773773252248e-07, "loss": 0.0229, "step": 7325 }, { "epoch": 1.6668941979522183, "grad_norm": 1.5212675549475514, "learning_rate": 9.375000000000001e-07, "loss": 0.0272, "step": 7326 }, { "epoch": 1.6671217292377702, "grad_norm": 0.8629482674997104, "learning_rate": 9.374226162878478e-07, "loss": 0.0384, "step": 7327 }, { "epoch": 1.667349260523322, "grad_norm": 1.1348949806800248, "learning_rate": 9.373452261903495e-07, "loss": 0.024, "step": 7328 }, { "epoch": 1.6675767918088737, "grad_norm": 0.5711114628031161, "learning_rate": 9.372678297090867e-07, "loss": 0.0079, "step": 7329 }, { "epoch": 1.6678043230944255, "grad_norm": 1.4567308110129513, "learning_rate": 9.371904268456415e-07, "loss": 0.0331, "step": 7330 }, { "epoch": 1.6680318543799773, "grad_norm": 1.9138608329728288, "learning_rate": 9.371130176015958e-07, "loss": 0.0366, "step": 7331 }, { "epoch": 1.668259385665529, "grad_norm": 2.0571902981483845, "learning_rate": 9.370356019785315e-07, "loss": 0.0592, "step": 7332 }, { "epoch": 1.6684869169510808, "grad_norm": 0.7881569099172085, "learning_rate": 9.369581799780308e-07, "loss": 0.0176, "step": 7333 }, { "epoch": 1.6687144482366325, "grad_norm": 1.1900721132902, "learning_rate": 9.368807516016764e-07, "loss": 0.0328, "step": 7334 }, { "epoch": 1.6689419795221843, "grad_norm": 0.5535988858385974, "learning_rate": 9.368033168510506e-07, "loss": 0.0104, "step": 7335 }, { "epoch": 1.669169510807736, "grad_norm": 1.6854773824574556, "learning_rate": 9.36725875727736e-07, "loss": 0.0526, "step": 7336 }, { "epoch": 1.669397042093288, "grad_norm": 3.146539161288818, "learning_rate": 9.366484282333155e-07, "loss": 0.0663, "step": 7337 }, { "epoch": 1.6696245733788397, "grad_norm": 0.7589184408986651, "learning_rate": 9.365709743693718e-07, "loss": 0.0129, "step": 7338 }, { "epoch": 1.6698521046643915, "grad_norm": 1.0190198600786413, "learning_rate": 9.364935141374881e-07, "loss": 0.0205, "step": 7339 }, { "epoch": 1.6700796359499432, "grad_norm": 1.0383142874986837, "learning_rate": 9.364160475392472e-07, "loss": 0.0219, "step": 7340 }, { "epoch": 1.670307167235495, "grad_norm": 0.8229443778154666, "learning_rate": 9.363385745762329e-07, "loss": 0.0181, "step": 7341 }, { "epoch": 1.6705346985210467, "grad_norm": 0.7971442304086289, "learning_rate": 9.362610952500281e-07, "loss": 0.0131, "step": 7342 }, { "epoch": 1.6707622298065985, "grad_norm": 1.2641570859853692, "learning_rate": 9.361836095622167e-07, "loss": 0.0268, "step": 7343 }, { "epoch": 1.6709897610921502, "grad_norm": 1.5217910567435424, "learning_rate": 9.361061175143823e-07, "loss": 0.0465, "step": 7344 }, { "epoch": 1.671217292377702, "grad_norm": 1.3028329103521379, "learning_rate": 9.360286191081085e-07, "loss": 0.0354, "step": 7345 }, { "epoch": 1.6714448236632538, "grad_norm": 1.3445524633570782, "learning_rate": 9.359511143449794e-07, "loss": 0.0476, "step": 7346 }, { "epoch": 1.6716723549488055, "grad_norm": 0.6942290235140224, "learning_rate": 9.358736032265788e-07, "loss": 0.0179, "step": 7347 }, { "epoch": 1.6718998862343573, "grad_norm": 1.4997610759007964, "learning_rate": 9.357960857544912e-07, "loss": 0.0142, "step": 7348 }, { "epoch": 1.672127417519909, "grad_norm": 1.3145764575012553, "learning_rate": 9.357185619303009e-07, "loss": 0.023, "step": 7349 }, { "epoch": 1.6723549488054608, "grad_norm": 1.079215509391157, "learning_rate": 9.356410317555922e-07, "loss": 0.0315, "step": 7350 }, { "epoch": 1.6725824800910125, "grad_norm": 1.029924502412833, "learning_rate": 9.355634952319498e-07, "loss": 0.0183, "step": 7351 }, { "epoch": 1.6728100113765643, "grad_norm": 1.2293364013970451, "learning_rate": 9.354859523609583e-07, "loss": 0.0252, "step": 7352 }, { "epoch": 1.673037542662116, "grad_norm": 0.9643322571862788, "learning_rate": 9.354084031442027e-07, "loss": 0.0226, "step": 7353 }, { "epoch": 1.6732650739476678, "grad_norm": 1.57517031227939, "learning_rate": 9.353308475832676e-07, "loss": 0.0383, "step": 7354 }, { "epoch": 1.6734926052332195, "grad_norm": 0.9929231596528064, "learning_rate": 9.352532856797382e-07, "loss": 0.0227, "step": 7355 }, { "epoch": 1.6737201365187713, "grad_norm": 0.6182995644994113, "learning_rate": 9.351757174352e-07, "loss": 0.0106, "step": 7356 }, { "epoch": 1.673947667804323, "grad_norm": 1.3596248896377752, "learning_rate": 9.350981428512383e-07, "loss": 0.0203, "step": 7357 }, { "epoch": 1.6741751990898748, "grad_norm": 1.124354179944877, "learning_rate": 9.350205619294383e-07, "loss": 0.0285, "step": 7358 }, { "epoch": 1.6744027303754265, "grad_norm": 1.3116324776794714, "learning_rate": 9.349429746713859e-07, "loss": 0.0271, "step": 7359 }, { "epoch": 1.6746302616609783, "grad_norm": 1.3279079181685614, "learning_rate": 9.348653810786667e-07, "loss": 0.0402, "step": 7360 }, { "epoch": 1.67485779294653, "grad_norm": 1.265035440034809, "learning_rate": 9.347877811528666e-07, "loss": 0.0562, "step": 7361 }, { "epoch": 1.6750853242320818, "grad_norm": 1.6657212776082273, "learning_rate": 9.347101748955715e-07, "loss": 0.0492, "step": 7362 }, { "epoch": 1.6753128555176335, "grad_norm": 1.4213794447450105, "learning_rate": 9.346325623083679e-07, "loss": 0.0293, "step": 7363 }, { "epoch": 1.6755403868031853, "grad_norm": 1.7636702749688, "learning_rate": 9.345549433928416e-07, "loss": 0.035, "step": 7364 }, { "epoch": 1.675767918088737, "grad_norm": 1.589993461555999, "learning_rate": 9.344773181505793e-07, "loss": 0.0368, "step": 7365 }, { "epoch": 1.675995449374289, "grad_norm": 0.9830121986788878, "learning_rate": 9.343996865831673e-07, "loss": 0.0286, "step": 7366 }, { "epoch": 1.6762229806598408, "grad_norm": 1.110294495379522, "learning_rate": 9.343220486921924e-07, "loss": 0.0201, "step": 7367 }, { "epoch": 1.6764505119453925, "grad_norm": 0.9378938801043143, "learning_rate": 9.342444044792412e-07, "loss": 0.0161, "step": 7368 }, { "epoch": 1.6766780432309443, "grad_norm": 1.0705872722742766, "learning_rate": 9.341667539459006e-07, "loss": 0.0288, "step": 7369 }, { "epoch": 1.676905574516496, "grad_norm": 0.9448376066075745, "learning_rate": 9.340890970937583e-07, "loss": 0.0204, "step": 7370 }, { "epoch": 1.6771331058020478, "grad_norm": 0.5664806207314872, "learning_rate": 9.340114339244006e-07, "loss": 0.0095, "step": 7371 }, { "epoch": 1.6773606370875995, "grad_norm": 0.8796169735391105, "learning_rate": 9.33933764439415e-07, "loss": 0.0151, "step": 7372 }, { "epoch": 1.6775881683731513, "grad_norm": 1.1600519212694234, "learning_rate": 9.338560886403891e-07, "loss": 0.0406, "step": 7373 }, { "epoch": 1.677815699658703, "grad_norm": 1.0289543332326065, "learning_rate": 9.337784065289104e-07, "loss": 0.0193, "step": 7374 }, { "epoch": 1.6780432309442548, "grad_norm": 1.826700733249313, "learning_rate": 9.337007181065667e-07, "loss": 0.0701, "step": 7375 }, { "epoch": 1.6782707622298068, "grad_norm": 1.2835558531886389, "learning_rate": 9.336230233749456e-07, "loss": 0.0076, "step": 7376 }, { "epoch": 1.6784982935153585, "grad_norm": 0.7659024994689081, "learning_rate": 9.335453223356351e-07, "loss": 0.0114, "step": 7377 }, { "epoch": 1.6787258248009103, "grad_norm": 0.9334888188139624, "learning_rate": 9.334676149902233e-07, "loss": 0.018, "step": 7378 }, { "epoch": 1.678953356086462, "grad_norm": 0.9494022161647141, "learning_rate": 9.333899013402984e-07, "loss": 0.0234, "step": 7379 }, { "epoch": 1.6791808873720138, "grad_norm": 1.139412640514536, "learning_rate": 9.333121813874486e-07, "loss": 0.0264, "step": 7380 }, { "epoch": 1.6794084186575655, "grad_norm": 1.3475699049472754, "learning_rate": 9.332344551332626e-07, "loss": 0.0236, "step": 7381 }, { "epoch": 1.6796359499431173, "grad_norm": 1.1919998065647228, "learning_rate": 9.331567225793288e-07, "loss": 0.034, "step": 7382 }, { "epoch": 1.679863481228669, "grad_norm": 0.8936854196890182, "learning_rate": 9.330789837272359e-07, "loss": 0.0238, "step": 7383 }, { "epoch": 1.6800910125142208, "grad_norm": 1.846647841510941, "learning_rate": 9.330012385785729e-07, "loss": 0.0722, "step": 7384 }, { "epoch": 1.6803185437997725, "grad_norm": 1.193450876221038, "learning_rate": 9.329234871349285e-07, "loss": 0.036, "step": 7385 }, { "epoch": 1.6805460750853243, "grad_norm": 0.8992689459727948, "learning_rate": 9.328457293978921e-07, "loss": 0.0089, "step": 7386 }, { "epoch": 1.680773606370876, "grad_norm": 0.9963987302000527, "learning_rate": 9.327679653690527e-07, "loss": 0.0235, "step": 7387 }, { "epoch": 1.6810011376564278, "grad_norm": 1.4603172265236832, "learning_rate": 9.326901950499997e-07, "loss": 0.0596, "step": 7388 }, { "epoch": 1.6812286689419795, "grad_norm": 1.043900668728106, "learning_rate": 9.326124184423228e-07, "loss": 0.0163, "step": 7389 }, { "epoch": 1.6814562002275313, "grad_norm": 1.0382331801469895, "learning_rate": 9.325346355476114e-07, "loss": 0.0208, "step": 7390 }, { "epoch": 1.681683731513083, "grad_norm": 0.893778247022103, "learning_rate": 9.324568463674552e-07, "loss": 0.0251, "step": 7391 }, { "epoch": 1.6819112627986348, "grad_norm": 1.3602050853170236, "learning_rate": 9.323790509034441e-07, "loss": 0.0275, "step": 7392 }, { "epoch": 1.6821387940841865, "grad_norm": 1.0857184214021618, "learning_rate": 9.323012491571682e-07, "loss": 0.0191, "step": 7393 }, { "epoch": 1.6823663253697383, "grad_norm": 0.8532593287473691, "learning_rate": 9.322234411302175e-07, "loss": 0.0121, "step": 7394 }, { "epoch": 1.68259385665529, "grad_norm": 0.8830902866673185, "learning_rate": 9.321456268241824e-07, "loss": 0.0241, "step": 7395 }, { "epoch": 1.6828213879408418, "grad_norm": 0.7559806353099447, "learning_rate": 9.320678062406531e-07, "loss": 0.0076, "step": 7396 }, { "epoch": 1.6830489192263935, "grad_norm": 0.9228198263065078, "learning_rate": 9.319899793812204e-07, "loss": 0.0145, "step": 7397 }, { "epoch": 1.6832764505119453, "grad_norm": 1.1206750032487018, "learning_rate": 9.319121462474745e-07, "loss": 0.0285, "step": 7398 }, { "epoch": 1.683503981797497, "grad_norm": 0.9495217613214355, "learning_rate": 9.318343068410065e-07, "loss": 0.0201, "step": 7399 }, { "epoch": 1.6837315130830488, "grad_norm": 1.2258328125547402, "learning_rate": 9.317564611634072e-07, "loss": 0.025, "step": 7400 }, { "epoch": 1.6839590443686006, "grad_norm": 0.8280424203512745, "learning_rate": 9.316786092162678e-07, "loss": 0.0189, "step": 7401 }, { "epoch": 1.6841865756541523, "grad_norm": 1.1336003257642784, "learning_rate": 9.31600751001179e-07, "loss": 0.0225, "step": 7402 }, { "epoch": 1.684414106939704, "grad_norm": 1.69117863085864, "learning_rate": 9.315228865197326e-07, "loss": 0.0549, "step": 7403 }, { "epoch": 1.6846416382252558, "grad_norm": 1.110342505920422, "learning_rate": 9.314450157735197e-07, "loss": 0.0196, "step": 7404 }, { "epoch": 1.6848691695108078, "grad_norm": 68.47354332095117, "learning_rate": 9.313671387641318e-07, "loss": 1.134, "step": 7405 }, { "epoch": 1.6850967007963595, "grad_norm": 1.1485468681711508, "learning_rate": 9.312892554931608e-07, "loss": 0.0197, "step": 7406 }, { "epoch": 1.6853242320819113, "grad_norm": 1.3445765903343956, "learning_rate": 9.312113659621984e-07, "loss": 0.0476, "step": 7407 }, { "epoch": 1.685551763367463, "grad_norm": 1.3255216302851847, "learning_rate": 9.311334701728364e-07, "loss": 0.027, "step": 7408 }, { "epoch": 1.6857792946530148, "grad_norm": 0.883532434592329, "learning_rate": 9.310555681266668e-07, "loss": 0.0155, "step": 7409 }, { "epoch": 1.6860068259385665, "grad_norm": 0.8561859578890467, "learning_rate": 9.30977659825282e-07, "loss": 0.0175, "step": 7410 }, { "epoch": 1.6862343572241183, "grad_norm": 0.8447963217284506, "learning_rate": 9.308997452702743e-07, "loss": 0.0196, "step": 7411 }, { "epoch": 1.68646188850967, "grad_norm": 1.0538999067728367, "learning_rate": 9.308218244632358e-07, "loss": 0.0325, "step": 7412 }, { "epoch": 1.6866894197952218, "grad_norm": 1.1841066018440165, "learning_rate": 9.307438974057595e-07, "loss": 0.032, "step": 7413 }, { "epoch": 1.6869169510807736, "grad_norm": 1.1766023443596012, "learning_rate": 9.306659640994381e-07, "loss": 0.0242, "step": 7414 }, { "epoch": 1.6871444823663255, "grad_norm": 1.1353504481993184, "learning_rate": 9.305880245458637e-07, "loss": 0.0427, "step": 7415 }, { "epoch": 1.6873720136518773, "grad_norm": 0.9494360095379312, "learning_rate": 9.305100787466301e-07, "loss": 0.0199, "step": 7416 }, { "epoch": 1.687599544937429, "grad_norm": 1.1011922337725744, "learning_rate": 9.304321267033298e-07, "loss": 0.0101, "step": 7417 }, { "epoch": 1.6878270762229808, "grad_norm": 0.7887872994259693, "learning_rate": 9.303541684175563e-07, "loss": 0.0228, "step": 7418 }, { "epoch": 1.6880546075085325, "grad_norm": 1.071159027848962, "learning_rate": 9.302762038909028e-07, "loss": 0.0248, "step": 7419 }, { "epoch": 1.6882821387940843, "grad_norm": 0.9825876397165821, "learning_rate": 9.301982331249629e-07, "loss": 0.0169, "step": 7420 }, { "epoch": 1.688509670079636, "grad_norm": 0.8027691821074338, "learning_rate": 9.301202561213298e-07, "loss": 0.0212, "step": 7421 }, { "epoch": 1.6887372013651878, "grad_norm": 1.7674718194893417, "learning_rate": 9.300422728815976e-07, "loss": 0.0408, "step": 7422 }, { "epoch": 1.6889647326507395, "grad_norm": 1.0714477140147776, "learning_rate": 9.2996428340736e-07, "loss": 0.016, "step": 7423 }, { "epoch": 1.6891922639362913, "grad_norm": 0.9724404293673335, "learning_rate": 9.29886287700211e-07, "loss": 0.0421, "step": 7424 }, { "epoch": 1.689419795221843, "grad_norm": 0.7905940535995999, "learning_rate": 9.298082857617446e-07, "loss": 0.0192, "step": 7425 }, { "epoch": 1.6896473265073948, "grad_norm": 1.026772059663101, "learning_rate": 9.297302775935552e-07, "loss": 0.0206, "step": 7426 }, { "epoch": 1.6898748577929465, "grad_norm": 2.099313726248406, "learning_rate": 9.296522631972368e-07, "loss": 0.0546, "step": 7427 }, { "epoch": 1.6901023890784983, "grad_norm": 1.1080974714503837, "learning_rate": 9.295742425743842e-07, "loss": 0.0292, "step": 7428 }, { "epoch": 1.69032992036405, "grad_norm": 0.9493726656722671, "learning_rate": 9.294962157265918e-07, "loss": 0.0203, "step": 7429 }, { "epoch": 1.6905574516496018, "grad_norm": 0.6548379026825043, "learning_rate": 9.294181826554547e-07, "loss": 0.0122, "step": 7430 }, { "epoch": 1.6907849829351536, "grad_norm": 0.7572188450490555, "learning_rate": 9.29340143362567e-07, "loss": 0.0247, "step": 7431 }, { "epoch": 1.6910125142207053, "grad_norm": 1.2752020698204618, "learning_rate": 9.292620978495246e-07, "loss": 0.0319, "step": 7432 }, { "epoch": 1.691240045506257, "grad_norm": 1.4114210445705098, "learning_rate": 9.291840461179219e-07, "loss": 0.0457, "step": 7433 }, { "epoch": 1.6914675767918088, "grad_norm": 0.816540485123266, "learning_rate": 9.291059881693544e-07, "loss": 0.0135, "step": 7434 }, { "epoch": 1.6916951080773606, "grad_norm": 1.280629632454358, "learning_rate": 9.290279240054176e-07, "loss": 0.0228, "step": 7435 }, { "epoch": 1.6919226393629123, "grad_norm": 1.4406386686448849, "learning_rate": 9.289498536277066e-07, "loss": 0.0536, "step": 7436 }, { "epoch": 1.692150170648464, "grad_norm": 1.5606918794122948, "learning_rate": 9.288717770378172e-07, "loss": 0.0312, "step": 7437 }, { "epoch": 1.6923777019340158, "grad_norm": 1.2240851387654794, "learning_rate": 9.287936942373454e-07, "loss": 0.0286, "step": 7438 }, { "epoch": 1.6926052332195676, "grad_norm": 1.2141319640587955, "learning_rate": 9.287156052278869e-07, "loss": 0.0205, "step": 7439 }, { "epoch": 1.6928327645051193, "grad_norm": 1.0529105375576406, "learning_rate": 9.286375100110376e-07, "loss": 0.0239, "step": 7440 }, { "epoch": 1.693060295790671, "grad_norm": 1.1278735283615677, "learning_rate": 9.285594085883937e-07, "loss": 0.0151, "step": 7441 }, { "epoch": 1.6932878270762228, "grad_norm": 0.9651776105310846, "learning_rate": 9.284813009615512e-07, "loss": 0.0294, "step": 7442 }, { "epoch": 1.6935153583617746, "grad_norm": 1.3435219511382133, "learning_rate": 9.28403187132107e-07, "loss": 0.0446, "step": 7443 }, { "epoch": 1.6937428896473266, "grad_norm": 1.3393538999942238, "learning_rate": 9.28325067101657e-07, "loss": 0.0313, "step": 7444 }, { "epoch": 1.6939704209328783, "grad_norm": 1.2636248804704842, "learning_rate": 9.282469408717984e-07, "loss": 0.0351, "step": 7445 }, { "epoch": 1.69419795221843, "grad_norm": 0.7870749416554453, "learning_rate": 9.281688084441277e-07, "loss": 0.0214, "step": 7446 }, { "epoch": 1.6944254835039818, "grad_norm": 1.8762587310238628, "learning_rate": 9.280906698202417e-07, "loss": 0.0515, "step": 7447 }, { "epoch": 1.6946530147895336, "grad_norm": 1.6998422130770203, "learning_rate": 9.280125250017375e-07, "loss": 0.0239, "step": 7448 }, { "epoch": 1.6948805460750853, "grad_norm": 0.9027847291427934, "learning_rate": 9.279343739902122e-07, "loss": 0.017, "step": 7449 }, { "epoch": 1.695108077360637, "grad_norm": 1.1062164148509734, "learning_rate": 9.278562167872632e-07, "loss": 0.0235, "step": 7450 }, { "epoch": 1.6953356086461888, "grad_norm": 1.812784584203428, "learning_rate": 9.277780533944879e-07, "loss": 0.0443, "step": 7451 }, { "epoch": 1.6955631399317406, "grad_norm": 0.9038757591065284, "learning_rate": 9.276998838134834e-07, "loss": 0.0233, "step": 7452 }, { "epoch": 1.6957906712172923, "grad_norm": 0.9025973802036622, "learning_rate": 9.276217080458478e-07, "loss": 0.0189, "step": 7453 }, { "epoch": 1.6960182025028443, "grad_norm": 1.081066833196723, "learning_rate": 9.275435260931786e-07, "loss": 0.0291, "step": 7454 }, { "epoch": 1.696245733788396, "grad_norm": 1.4646992378969927, "learning_rate": 9.274653379570739e-07, "loss": 0.0387, "step": 7455 }, { "epoch": 1.6964732650739478, "grad_norm": 0.9392167588281297, "learning_rate": 9.273871436391315e-07, "loss": 0.0394, "step": 7456 }, { "epoch": 1.6967007963594996, "grad_norm": 1.3086290254658115, "learning_rate": 9.273089431409499e-07, "loss": 0.0305, "step": 7457 }, { "epoch": 1.6969283276450513, "grad_norm": 0.9762633653670589, "learning_rate": 9.272307364641271e-07, "loss": 0.0202, "step": 7458 }, { "epoch": 1.697155858930603, "grad_norm": 1.5684419166838992, "learning_rate": 9.271525236102614e-07, "loss": 0.0353, "step": 7459 }, { "epoch": 1.6973833902161548, "grad_norm": 1.3066854597715047, "learning_rate": 9.270743045809516e-07, "loss": 0.0173, "step": 7460 }, { "epoch": 1.6976109215017066, "grad_norm": 1.2901196273408886, "learning_rate": 9.269960793777963e-07, "loss": 0.0259, "step": 7461 }, { "epoch": 1.6978384527872583, "grad_norm": 1.4716393793331854, "learning_rate": 9.26917848002394e-07, "loss": 0.029, "step": 7462 }, { "epoch": 1.69806598407281, "grad_norm": 2.9483009434833374, "learning_rate": 9.26839610456344e-07, "loss": 0.02, "step": 7463 }, { "epoch": 1.6982935153583618, "grad_norm": 0.780802510505116, "learning_rate": 9.267613667412453e-07, "loss": 0.0226, "step": 7464 }, { "epoch": 1.6985210466439136, "grad_norm": 1.1866004688183827, "learning_rate": 9.266831168586968e-07, "loss": 0.0265, "step": 7465 }, { "epoch": 1.6987485779294653, "grad_norm": 0.7694346635219753, "learning_rate": 9.266048608102978e-07, "loss": 0.0202, "step": 7466 }, { "epoch": 1.698976109215017, "grad_norm": 1.106780796505893, "learning_rate": 9.265265985976478e-07, "loss": 0.0175, "step": 7467 }, { "epoch": 1.6992036405005688, "grad_norm": 0.8669229898039597, "learning_rate": 9.264483302223464e-07, "loss": 0.0207, "step": 7468 }, { "epoch": 1.6994311717861206, "grad_norm": 1.5196225362339568, "learning_rate": 9.263700556859931e-07, "loss": 0.0391, "step": 7469 }, { "epoch": 1.6996587030716723, "grad_norm": 0.813734490997272, "learning_rate": 9.26291774990188e-07, "loss": 0.0277, "step": 7470 }, { "epoch": 1.699886234357224, "grad_norm": 0.7735285752321128, "learning_rate": 9.262134881365307e-07, "loss": 0.0197, "step": 7471 }, { "epoch": 1.7001137656427758, "grad_norm": 1.4230702281863177, "learning_rate": 9.261351951266211e-07, "loss": 0.0292, "step": 7472 }, { "epoch": 1.7003412969283276, "grad_norm": 2.4934340707549993, "learning_rate": 9.2605689596206e-07, "loss": 0.065, "step": 7473 }, { "epoch": 1.7005688282138793, "grad_norm": 1.265713576588293, "learning_rate": 9.259785906444473e-07, "loss": 0.0451, "step": 7474 }, { "epoch": 1.700796359499431, "grad_norm": 1.107331460710875, "learning_rate": 9.259002791753832e-07, "loss": 0.031, "step": 7475 }, { "epoch": 1.7010238907849828, "grad_norm": 0.9656516950035509, "learning_rate": 9.258219615564684e-07, "loss": 0.0157, "step": 7476 }, { "epoch": 1.7012514220705346, "grad_norm": 1.6648471363780213, "learning_rate": 9.25743637789304e-07, "loss": 0.0159, "step": 7477 }, { "epoch": 1.7014789533560863, "grad_norm": 0.8136434066676951, "learning_rate": 9.256653078754903e-07, "loss": 0.025, "step": 7478 }, { "epoch": 1.701706484641638, "grad_norm": 1.729468058214187, "learning_rate": 9.255869718166281e-07, "loss": 0.0448, "step": 7479 }, { "epoch": 1.7019340159271898, "grad_norm": 1.747237104201101, "learning_rate": 9.255086296143189e-07, "loss": 0.0421, "step": 7480 }, { "epoch": 1.7021615472127416, "grad_norm": 1.1319667627658456, "learning_rate": 9.254302812701636e-07, "loss": 0.0203, "step": 7481 }, { "epoch": 1.7023890784982934, "grad_norm": 6.34869179504398, "learning_rate": 9.253519267857637e-07, "loss": 0.1211, "step": 7482 }, { "epoch": 1.7026166097838453, "grad_norm": 1.342839250597215, "learning_rate": 9.252735661627204e-07, "loss": 0.0301, "step": 7483 }, { "epoch": 1.702844141069397, "grad_norm": 1.1568654598416463, "learning_rate": 9.251951994026353e-07, "loss": 0.0202, "step": 7484 }, { "epoch": 1.7030716723549488, "grad_norm": 0.9603437591283284, "learning_rate": 9.251168265071101e-07, "loss": 0.0276, "step": 7485 }, { "epoch": 1.7032992036405006, "grad_norm": 1.1938581829579682, "learning_rate": 9.250384474777465e-07, "loss": 0.0221, "step": 7486 }, { "epoch": 1.7035267349260523, "grad_norm": 0.7456001958336463, "learning_rate": 9.249600623161467e-07, "loss": 0.0262, "step": 7487 }, { "epoch": 1.703754266211604, "grad_norm": 1.4700522344198281, "learning_rate": 9.248816710239125e-07, "loss": 0.0395, "step": 7488 }, { "epoch": 1.7039817974971558, "grad_norm": 1.1506393584869568, "learning_rate": 9.248032736026463e-07, "loss": 0.0233, "step": 7489 }, { "epoch": 1.7042093287827076, "grad_norm": 1.1195835539109666, "learning_rate": 9.247248700539502e-07, "loss": 0.0217, "step": 7490 }, { "epoch": 1.7044368600682593, "grad_norm": 2.465921051795379, "learning_rate": 9.246464603794266e-07, "loss": 0.0532, "step": 7491 }, { "epoch": 1.7046643913538113, "grad_norm": 1.3355635126606438, "learning_rate": 9.245680445806782e-07, "loss": 0.0516, "step": 7492 }, { "epoch": 1.704891922639363, "grad_norm": 0.9992091005260795, "learning_rate": 9.244896226593074e-07, "loss": 0.0284, "step": 7493 }, { "epoch": 1.7051194539249148, "grad_norm": 0.8019991772837796, "learning_rate": 9.244111946169173e-07, "loss": 0.0132, "step": 7494 }, { "epoch": 1.7053469852104666, "grad_norm": 1.0414066656735075, "learning_rate": 9.243327604551109e-07, "loss": 0.0203, "step": 7495 }, { "epoch": 1.7055745164960183, "grad_norm": 1.0467030740861185, "learning_rate": 9.242543201754908e-07, "loss": 0.0217, "step": 7496 }, { "epoch": 1.70580204778157, "grad_norm": 1.7898345976143868, "learning_rate": 9.241758737796608e-07, "loss": 0.0366, "step": 7497 }, { "epoch": 1.7060295790671218, "grad_norm": 1.4837502642765534, "learning_rate": 9.240974212692235e-07, "loss": 0.0316, "step": 7498 }, { "epoch": 1.7062571103526736, "grad_norm": 1.09132144334733, "learning_rate": 9.240189626457828e-07, "loss": 0.0389, "step": 7499 }, { "epoch": 1.7064846416382253, "grad_norm": 1.192777370323259, "learning_rate": 9.239404979109422e-07, "loss": 0.0171, "step": 7500 }, { "epoch": 1.706712172923777, "grad_norm": 1.4297979166462436, "learning_rate": 9.238620270663053e-07, "loss": 0.0213, "step": 7501 }, { "epoch": 1.7069397042093288, "grad_norm": 0.948422819607699, "learning_rate": 9.237835501134759e-07, "loss": 0.0134, "step": 7502 }, { "epoch": 1.7071672354948806, "grad_norm": 1.2121992571283962, "learning_rate": 9.237050670540579e-07, "loss": 0.0184, "step": 7503 }, { "epoch": 1.7073947667804323, "grad_norm": 0.8297872919210153, "learning_rate": 9.236265778896554e-07, "loss": 0.0194, "step": 7504 }, { "epoch": 1.707622298065984, "grad_norm": 0.9096517797805543, "learning_rate": 9.235480826218726e-07, "loss": 0.0222, "step": 7505 }, { "epoch": 1.7078498293515358, "grad_norm": 0.9438726780665672, "learning_rate": 9.234695812523137e-07, "loss": 0.017, "step": 7506 }, { "epoch": 1.7080773606370876, "grad_norm": 1.6086698482204602, "learning_rate": 9.233910737825831e-07, "loss": 0.0424, "step": 7507 }, { "epoch": 1.7083048919226393, "grad_norm": 1.142830190893745, "learning_rate": 9.233125602142856e-07, "loss": 0.0186, "step": 7508 }, { "epoch": 1.708532423208191, "grad_norm": 1.6320142128089656, "learning_rate": 9.232340405490256e-07, "loss": 0.0347, "step": 7509 }, { "epoch": 1.7087599544937428, "grad_norm": 0.9514508167221402, "learning_rate": 9.23155514788408e-07, "loss": 0.0263, "step": 7510 }, { "epoch": 1.7089874857792946, "grad_norm": 0.6544786260782971, "learning_rate": 9.230769829340378e-07, "loss": 0.0102, "step": 7511 }, { "epoch": 1.7092150170648464, "grad_norm": 0.692110961995533, "learning_rate": 9.229984449875199e-07, "loss": 0.0136, "step": 7512 }, { "epoch": 1.709442548350398, "grad_norm": 1.9764741743177798, "learning_rate": 9.229199009504594e-07, "loss": 0.0235, "step": 7513 }, { "epoch": 1.7096700796359499, "grad_norm": 0.734190462175171, "learning_rate": 9.228413508244621e-07, "loss": 0.0082, "step": 7514 }, { "epoch": 1.7098976109215016, "grad_norm": 0.8945274189963984, "learning_rate": 9.227627946111328e-07, "loss": 0.0227, "step": 7515 }, { "epoch": 1.7101251422070534, "grad_norm": 3.5912888738002664, "learning_rate": 9.226842323120773e-07, "loss": 0.1081, "step": 7516 }, { "epoch": 1.7103526734926051, "grad_norm": 1.560919222763877, "learning_rate": 9.226056639289013e-07, "loss": 0.0337, "step": 7517 }, { "epoch": 1.7105802047781569, "grad_norm": 1.095784516414262, "learning_rate": 9.225270894632107e-07, "loss": 0.0115, "step": 7518 }, { "epoch": 1.7108077360637086, "grad_norm": 0.9859467458505337, "learning_rate": 9.224485089166111e-07, "loss": 0.0195, "step": 7519 }, { "epoch": 1.7110352673492604, "grad_norm": 0.7148271370823988, "learning_rate": 9.223699222907088e-07, "loss": 0.0149, "step": 7520 }, { "epoch": 1.7112627986348121, "grad_norm": 0.8875065071349563, "learning_rate": 9.222913295871101e-07, "loss": 0.0184, "step": 7521 }, { "epoch": 1.711490329920364, "grad_norm": 1.2178376423136983, "learning_rate": 9.22212730807421e-07, "loss": 0.0461, "step": 7522 }, { "epoch": 1.7117178612059158, "grad_norm": 1.0997910738708634, "learning_rate": 9.221341259532476e-07, "loss": 0.0255, "step": 7523 }, { "epoch": 1.7119453924914676, "grad_norm": 1.609907335399055, "learning_rate": 9.220555150261973e-07, "loss": 0.0582, "step": 7524 }, { "epoch": 1.7121729237770194, "grad_norm": 1.489916582646409, "learning_rate": 9.219768980278762e-07, "loss": 0.0361, "step": 7525 }, { "epoch": 1.712400455062571, "grad_norm": 1.3699399244111667, "learning_rate": 9.218982749598911e-07, "loss": 0.0314, "step": 7526 }, { "epoch": 1.7126279863481229, "grad_norm": 1.1259448383440134, "learning_rate": 9.21819645823849e-07, "loss": 0.0198, "step": 7527 }, { "epoch": 1.7128555176336746, "grad_norm": 1.2000322678352968, "learning_rate": 9.21741010621357e-07, "loss": 0.0237, "step": 7528 }, { "epoch": 1.7130830489192264, "grad_norm": 0.9291990030726819, "learning_rate": 9.216623693540222e-07, "loss": 0.0154, "step": 7529 }, { "epoch": 1.713310580204778, "grad_norm": 0.7752060975128637, "learning_rate": 9.215837220234518e-07, "loss": 0.0247, "step": 7530 }, { "epoch": 1.71353811149033, "grad_norm": 1.0433568166304124, "learning_rate": 9.215050686312534e-07, "loss": 0.0151, "step": 7531 }, { "epoch": 1.7137656427758818, "grad_norm": 0.9686106141353347, "learning_rate": 9.214264091790343e-07, "loss": 0.0144, "step": 7532 }, { "epoch": 1.7139931740614336, "grad_norm": 1.54478146480293, "learning_rate": 9.213477436684025e-07, "loss": 0.0379, "step": 7533 }, { "epoch": 1.7142207053469853, "grad_norm": 1.2519994785023527, "learning_rate": 9.212690721009654e-07, "loss": 0.0302, "step": 7534 }, { "epoch": 1.714448236632537, "grad_norm": 1.7383491670887976, "learning_rate": 9.21190394478331e-07, "loss": 0.068, "step": 7535 }, { "epoch": 1.7146757679180888, "grad_norm": 0.8552599017233935, "learning_rate": 9.211117108021074e-07, "loss": 0.0168, "step": 7536 }, { "epoch": 1.7149032992036406, "grad_norm": 1.788803024478606, "learning_rate": 9.210330210739029e-07, "loss": 0.0287, "step": 7537 }, { "epoch": 1.7151308304891923, "grad_norm": 0.9740522034800336, "learning_rate": 9.209543252953254e-07, "loss": 0.0234, "step": 7538 }, { "epoch": 1.715358361774744, "grad_norm": 1.0280768113489238, "learning_rate": 9.208756234679836e-07, "loss": 0.0159, "step": 7539 }, { "epoch": 1.7155858930602959, "grad_norm": 1.3489600407656455, "learning_rate": 9.20796915593486e-07, "loss": 0.0449, "step": 7540 }, { "epoch": 1.7158134243458476, "grad_norm": 1.4755745930733852, "learning_rate": 9.20718201673441e-07, "loss": 0.0434, "step": 7541 }, { "epoch": 1.7160409556313994, "grad_norm": 0.7616219435437076, "learning_rate": 9.206394817094577e-07, "loss": 0.0186, "step": 7542 }, { "epoch": 1.716268486916951, "grad_norm": 1.1468109557516537, "learning_rate": 9.205607557031446e-07, "loss": 0.0237, "step": 7543 }, { "epoch": 1.7164960182025029, "grad_norm": 0.9443214015433008, "learning_rate": 9.204820236561111e-07, "loss": 0.0258, "step": 7544 }, { "epoch": 1.7167235494880546, "grad_norm": 0.5774152629808543, "learning_rate": 9.204032855699663e-07, "loss": 0.01, "step": 7545 }, { "epoch": 1.7169510807736064, "grad_norm": 1.0276101636351294, "learning_rate": 9.203245414463192e-07, "loss": 0.0151, "step": 7546 }, { "epoch": 1.7171786120591581, "grad_norm": 0.6598829360890991, "learning_rate": 9.202457912867795e-07, "loss": 0.0089, "step": 7547 }, { "epoch": 1.7174061433447099, "grad_norm": 1.3229858553591636, "learning_rate": 9.201670350929564e-07, "loss": 0.0274, "step": 7548 }, { "epoch": 1.7176336746302616, "grad_norm": 0.7485471387835653, "learning_rate": 9.200882728664598e-07, "loss": 0.0138, "step": 7549 }, { "epoch": 1.7178612059158134, "grad_norm": 1.5299363260996441, "learning_rate": 9.20009504608899e-07, "loss": 0.0518, "step": 7550 }, { "epoch": 1.7180887372013651, "grad_norm": 0.7200038697909544, "learning_rate": 9.199307303218844e-07, "loss": 0.0145, "step": 7551 }, { "epoch": 1.7183162684869169, "grad_norm": 1.2509303912139782, "learning_rate": 9.198519500070261e-07, "loss": 0.0289, "step": 7552 }, { "epoch": 1.7185437997724686, "grad_norm": 1.1044637932726378, "learning_rate": 9.197731636659335e-07, "loss": 0.0326, "step": 7553 }, { "epoch": 1.7187713310580204, "grad_norm": 1.5182960629667408, "learning_rate": 9.196943713002177e-07, "loss": 0.0261, "step": 7554 }, { "epoch": 1.7189988623435721, "grad_norm": 0.8343994799574279, "learning_rate": 9.196155729114883e-07, "loss": 0.0171, "step": 7555 }, { "epoch": 1.7192263936291239, "grad_norm": 0.7119142890489658, "learning_rate": 9.195367685013564e-07, "loss": 0.0075, "step": 7556 }, { "epoch": 1.7194539249146756, "grad_norm": 1.2873836111171169, "learning_rate": 9.19457958071432e-07, "loss": 0.0367, "step": 7557 }, { "epoch": 1.7196814562002274, "grad_norm": 1.0237951516204418, "learning_rate": 9.193791416233266e-07, "loss": 0.0193, "step": 7558 }, { "epoch": 1.7199089874857791, "grad_norm": 1.871531878414218, "learning_rate": 9.193003191586507e-07, "loss": 0.0321, "step": 7559 }, { "epoch": 1.7201365187713311, "grad_norm": 0.9889278233426179, "learning_rate": 9.192214906790149e-07, "loss": 0.0291, "step": 7560 }, { "epoch": 1.7203640500568829, "grad_norm": 0.9873975389279321, "learning_rate": 9.191426561860308e-07, "loss": 0.0221, "step": 7561 }, { "epoch": 1.7205915813424346, "grad_norm": 0.9219114577412436, "learning_rate": 9.190638156813097e-07, "loss": 0.0136, "step": 7562 }, { "epoch": 1.7208191126279864, "grad_norm": 0.8251545811829912, "learning_rate": 9.189849691664626e-07, "loss": 0.0149, "step": 7563 }, { "epoch": 1.7210466439135381, "grad_norm": 1.0869299679841706, "learning_rate": 9.189061166431012e-07, "loss": 0.0376, "step": 7564 }, { "epoch": 1.7212741751990899, "grad_norm": 0.6003001398423039, "learning_rate": 9.188272581128372e-07, "loss": 0.0111, "step": 7565 }, { "epoch": 1.7215017064846416, "grad_norm": 1.5253865672646039, "learning_rate": 9.187483935772818e-07, "loss": 0.0242, "step": 7566 }, { "epoch": 1.7217292377701934, "grad_norm": 1.5907519445499267, "learning_rate": 9.186695230380474e-07, "loss": 0.0296, "step": 7567 }, { "epoch": 1.7219567690557451, "grad_norm": 0.9523869090994135, "learning_rate": 9.185906464967459e-07, "loss": 0.0263, "step": 7568 }, { "epoch": 1.7221843003412969, "grad_norm": 1.2465558325527257, "learning_rate": 9.185117639549891e-07, "loss": 0.0171, "step": 7569 }, { "epoch": 1.7224118316268489, "grad_norm": 0.6801386122458417, "learning_rate": 9.184328754143893e-07, "loss": 0.0104, "step": 7570 }, { "epoch": 1.7226393629124006, "grad_norm": 0.7904268653100202, "learning_rate": 9.183539808765591e-07, "loss": 0.0254, "step": 7571 }, { "epoch": 1.7228668941979524, "grad_norm": 0.7570825293218078, "learning_rate": 9.182750803431109e-07, "loss": 0.0232, "step": 7572 }, { "epoch": 1.723094425483504, "grad_norm": 1.2836056735908439, "learning_rate": 9.181961738156568e-07, "loss": 0.0475, "step": 7573 }, { "epoch": 1.7233219567690559, "grad_norm": 1.168137213634141, "learning_rate": 9.181172612958101e-07, "loss": 0.0236, "step": 7574 }, { "epoch": 1.7235494880546076, "grad_norm": 0.47684817828903375, "learning_rate": 9.180383427851834e-07, "loss": 0.0127, "step": 7575 }, { "epoch": 1.7237770193401594, "grad_norm": 1.046578127030525, "learning_rate": 9.179594182853898e-07, "loss": 0.0175, "step": 7576 }, { "epoch": 1.7240045506257111, "grad_norm": 1.1140965944976429, "learning_rate": 9.17880487798042e-07, "loss": 0.024, "step": 7577 }, { "epoch": 1.7242320819112629, "grad_norm": 1.1920191951868853, "learning_rate": 9.178015513247534e-07, "loss": 0.0184, "step": 7578 }, { "epoch": 1.7244596131968146, "grad_norm": 0.7990927056094128, "learning_rate": 9.177226088671375e-07, "loss": 0.0193, "step": 7579 }, { "epoch": 1.7246871444823664, "grad_norm": 0.7907100321498542, "learning_rate": 9.176436604268073e-07, "loss": 0.0163, "step": 7580 }, { "epoch": 1.7249146757679181, "grad_norm": 0.7239726229060026, "learning_rate": 9.175647060053767e-07, "loss": 0.0297, "step": 7581 }, { "epoch": 1.7251422070534699, "grad_norm": 1.2556927924521644, "learning_rate": 9.174857456044595e-07, "loss": 0.0177, "step": 7582 }, { "epoch": 1.7253697383390216, "grad_norm": 0.7280661687160516, "learning_rate": 9.174067792256693e-07, "loss": 0.0158, "step": 7583 }, { "epoch": 1.7255972696245734, "grad_norm": 0.8140510413495747, "learning_rate": 9.1732780687062e-07, "loss": 0.0187, "step": 7584 }, { "epoch": 1.7258248009101251, "grad_norm": 1.2618606105844503, "learning_rate": 9.172488285409256e-07, "loss": 0.0345, "step": 7585 }, { "epoch": 1.7260523321956769, "grad_norm": 1.2138797171822122, "learning_rate": 9.171698442382005e-07, "loss": 0.0233, "step": 7586 }, { "epoch": 1.7262798634812286, "grad_norm": 0.7342179818630192, "learning_rate": 9.170908539640587e-07, "loss": 0.0095, "step": 7587 }, { "epoch": 1.7265073947667804, "grad_norm": 0.8163594897829524, "learning_rate": 9.170118577201149e-07, "loss": 0.0133, "step": 7588 }, { "epoch": 1.7267349260523321, "grad_norm": 0.764870090102491, "learning_rate": 9.169328555079836e-07, "loss": 0.0148, "step": 7589 }, { "epoch": 1.726962457337884, "grad_norm": 1.0034494545193837, "learning_rate": 9.168538473292793e-07, "loss": 0.0227, "step": 7590 }, { "epoch": 1.7271899886234356, "grad_norm": 0.8061146348026804, "learning_rate": 9.167748331856169e-07, "loss": 0.0153, "step": 7591 }, { "epoch": 1.7274175199089874, "grad_norm": 0.7549426350040234, "learning_rate": 9.166958130786113e-07, "loss": 0.0158, "step": 7592 }, { "epoch": 1.7276450511945391, "grad_norm": 0.7542254964161569, "learning_rate": 9.166167870098773e-07, "loss": 0.0272, "step": 7593 }, { "epoch": 1.727872582480091, "grad_norm": 0.7889596892238906, "learning_rate": 9.165377549810305e-07, "loss": 0.0095, "step": 7594 }, { "epoch": 1.7281001137656427, "grad_norm": 1.2357332696364653, "learning_rate": 9.164587169936858e-07, "loss": 0.0223, "step": 7595 }, { "epoch": 1.7283276450511944, "grad_norm": 1.1093238855927807, "learning_rate": 9.163796730494587e-07, "loss": 0.0165, "step": 7596 }, { "epoch": 1.7285551763367462, "grad_norm": 0.8007305366299827, "learning_rate": 9.163006231499647e-07, "loss": 0.0144, "step": 7597 }, { "epoch": 1.728782707622298, "grad_norm": 0.7873052222770126, "learning_rate": 9.162215672968194e-07, "loss": 0.0131, "step": 7598 }, { "epoch": 1.7290102389078499, "grad_norm": 1.4418125843313119, "learning_rate": 9.161425054916388e-07, "loss": 0.0295, "step": 7599 }, { "epoch": 1.7292377701934016, "grad_norm": 1.376158903306575, "learning_rate": 9.160634377360383e-07, "loss": 0.0285, "step": 7600 }, { "epoch": 1.7294653014789534, "grad_norm": 1.2162050247028884, "learning_rate": 9.159843640316345e-07, "loss": 0.0342, "step": 7601 }, { "epoch": 1.7296928327645051, "grad_norm": 1.0911247413585297, "learning_rate": 9.159052843800431e-07, "loss": 0.0355, "step": 7602 }, { "epoch": 1.729920364050057, "grad_norm": 0.8596735837001528, "learning_rate": 9.158261987828804e-07, "loss": 0.0195, "step": 7603 }, { "epoch": 1.7301478953356086, "grad_norm": 1.0547684986800836, "learning_rate": 9.157471072417629e-07, "loss": 0.0328, "step": 7604 }, { "epoch": 1.7303754266211604, "grad_norm": 0.9808997952350144, "learning_rate": 9.156680097583071e-07, "loss": 0.0156, "step": 7605 }, { "epoch": 1.7306029579067121, "grad_norm": 0.9559678639554583, "learning_rate": 9.155889063341293e-07, "loss": 0.0275, "step": 7606 }, { "epoch": 1.730830489192264, "grad_norm": 0.738789350988573, "learning_rate": 9.155097969708464e-07, "loss": 0.0237, "step": 7607 }, { "epoch": 1.7310580204778157, "grad_norm": 1.6370811776774898, "learning_rate": 9.154306816700755e-07, "loss": 0.0682, "step": 7608 }, { "epoch": 1.7312855517633676, "grad_norm": 0.9051106679300437, "learning_rate": 9.153515604334334e-07, "loss": 0.0116, "step": 7609 }, { "epoch": 1.7315130830489194, "grad_norm": 0.8336307762546704, "learning_rate": 9.152724332625369e-07, "loss": 0.0275, "step": 7610 }, { "epoch": 1.7317406143344711, "grad_norm": 1.7561758401491077, "learning_rate": 9.151933001590035e-07, "loss": 0.032, "step": 7611 }, { "epoch": 1.7319681456200229, "grad_norm": 1.0973531743829115, "learning_rate": 9.151141611244507e-07, "loss": 0.029, "step": 7612 }, { "epoch": 1.7321956769055746, "grad_norm": 1.2627804391545918, "learning_rate": 9.150350161604957e-07, "loss": 0.0528, "step": 7613 }, { "epoch": 1.7324232081911264, "grad_norm": 1.6371028258031264, "learning_rate": 9.149558652687561e-07, "loss": 0.0572, "step": 7614 }, { "epoch": 1.7326507394766781, "grad_norm": 0.8989759452735959, "learning_rate": 9.148767084508497e-07, "loss": 0.0116, "step": 7615 }, { "epoch": 1.7328782707622299, "grad_norm": 2.1842929628774055, "learning_rate": 9.147975457083943e-07, "loss": 0.0329, "step": 7616 }, { "epoch": 1.7331058020477816, "grad_norm": 0.5320116937274283, "learning_rate": 9.147183770430076e-07, "loss": 0.0147, "step": 7617 }, { "epoch": 1.7333333333333334, "grad_norm": 1.8104756589418485, "learning_rate": 9.146392024563081e-07, "loss": 0.0597, "step": 7618 }, { "epoch": 1.7335608646188851, "grad_norm": 0.7544718465559345, "learning_rate": 9.145600219499137e-07, "loss": 0.0192, "step": 7619 }, { "epoch": 1.733788395904437, "grad_norm": 1.008324222694622, "learning_rate": 9.144808355254426e-07, "loss": 0.0122, "step": 7620 }, { "epoch": 1.7340159271899886, "grad_norm": 0.8300301314244892, "learning_rate": 9.144016431845136e-07, "loss": 0.0144, "step": 7621 }, { "epoch": 1.7342434584755404, "grad_norm": 0.6402850376707664, "learning_rate": 9.143224449287449e-07, "loss": 0.0097, "step": 7622 }, { "epoch": 1.7344709897610922, "grad_norm": 1.3962807580152192, "learning_rate": 9.142432407597552e-07, "loss": 0.0289, "step": 7623 }, { "epoch": 1.734698521046644, "grad_norm": 0.7192631361443328, "learning_rate": 9.141640306791635e-07, "loss": 0.0218, "step": 7624 }, { "epoch": 1.7349260523321957, "grad_norm": 1.2618423454164023, "learning_rate": 9.140848146885888e-07, "loss": 0.0185, "step": 7625 }, { "epoch": 1.7351535836177474, "grad_norm": 1.1271057373978712, "learning_rate": 9.140055927896497e-07, "loss": 0.0242, "step": 7626 }, { "epoch": 1.7353811149032992, "grad_norm": 0.8881741003317172, "learning_rate": 9.139263649839654e-07, "loss": 0.0196, "step": 7627 }, { "epoch": 1.735608646188851, "grad_norm": 0.8282851305946936, "learning_rate": 9.138471312731558e-07, "loss": 0.0118, "step": 7628 }, { "epoch": 1.7358361774744027, "grad_norm": 1.030456822140469, "learning_rate": 9.137678916588395e-07, "loss": 0.0201, "step": 7629 }, { "epoch": 1.7360637087599544, "grad_norm": 1.2098948742481863, "learning_rate": 9.136886461426363e-07, "loss": 0.0324, "step": 7630 }, { "epoch": 1.7362912400455062, "grad_norm": 0.944387144024131, "learning_rate": 9.136093947261659e-07, "loss": 0.0349, "step": 7631 }, { "epoch": 1.736518771331058, "grad_norm": 1.0435591722724238, "learning_rate": 9.135301374110482e-07, "loss": 0.0245, "step": 7632 }, { "epoch": 1.7367463026166097, "grad_norm": 1.1073705234081634, "learning_rate": 9.134508741989028e-07, "loss": 0.0149, "step": 7633 }, { "epoch": 1.7369738339021614, "grad_norm": 0.6957285837630877, "learning_rate": 9.133716050913499e-07, "loss": 0.011, "step": 7634 }, { "epoch": 1.7372013651877132, "grad_norm": 0.8438881915815134, "learning_rate": 9.132923300900096e-07, "loss": 0.0194, "step": 7635 }, { "epoch": 1.737428896473265, "grad_norm": 1.1755251192640188, "learning_rate": 9.132130491965019e-07, "loss": 0.0329, "step": 7636 }, { "epoch": 1.7376564277588167, "grad_norm": 1.9216126330504404, "learning_rate": 9.131337624124473e-07, "loss": 0.0547, "step": 7637 }, { "epoch": 1.7378839590443687, "grad_norm": 0.861009420801015, "learning_rate": 9.130544697394662e-07, "loss": 0.0215, "step": 7638 }, { "epoch": 1.7381114903299204, "grad_norm": 1.1882707354996638, "learning_rate": 9.129751711791796e-07, "loss": 0.023, "step": 7639 }, { "epoch": 1.7383390216154722, "grad_norm": 1.2239475383326814, "learning_rate": 9.128958667332076e-07, "loss": 0.0288, "step": 7640 }, { "epoch": 1.738566552901024, "grad_norm": 53.355564991987, "learning_rate": 9.128165564031715e-07, "loss": 0.2876, "step": 7641 }, { "epoch": 1.7387940841865757, "grad_norm": 0.8841269788948465, "learning_rate": 9.127372401906919e-07, "loss": 0.01, "step": 7642 }, { "epoch": 1.7390216154721274, "grad_norm": 1.2206625753180147, "learning_rate": 9.126579180973904e-07, "loss": 0.0415, "step": 7643 }, { "epoch": 1.7392491467576792, "grad_norm": 1.086603987389488, "learning_rate": 9.125785901248875e-07, "loss": 0.0173, "step": 7644 }, { "epoch": 1.739476678043231, "grad_norm": 1.1225650990057543, "learning_rate": 9.124992562748051e-07, "loss": 0.0294, "step": 7645 }, { "epoch": 1.7397042093287827, "grad_norm": 1.2175034170150192, "learning_rate": 9.124199165487646e-07, "loss": 0.0159, "step": 7646 }, { "epoch": 1.7399317406143344, "grad_norm": 2.092254610281203, "learning_rate": 9.12340570948387e-07, "loss": 0.0881, "step": 7647 }, { "epoch": 1.7401592718998864, "grad_norm": 0.8586350139414584, "learning_rate": 9.122612194752947e-07, "loss": 0.0261, "step": 7648 }, { "epoch": 1.7403868031854381, "grad_norm": 1.215639356905783, "learning_rate": 9.12181862131109e-07, "loss": 0.0163, "step": 7649 }, { "epoch": 1.74061433447099, "grad_norm": 1.725700806539854, "learning_rate": 9.121024989174521e-07, "loss": 0.0263, "step": 7650 }, { "epoch": 1.7408418657565417, "grad_norm": 1.2188163467235449, "learning_rate": 9.120231298359458e-07, "loss": 0.022, "step": 7651 }, { "epoch": 1.7410693970420934, "grad_norm": 1.1976272309936158, "learning_rate": 9.119437548882125e-07, "loss": 0.0387, "step": 7652 }, { "epoch": 1.7412969283276452, "grad_norm": 1.2953796333595993, "learning_rate": 9.118643740758744e-07, "loss": 0.0134, "step": 7653 }, { "epoch": 1.741524459613197, "grad_norm": 1.2311997698528327, "learning_rate": 9.117849874005537e-07, "loss": 0.036, "step": 7654 }, { "epoch": 1.7417519908987487, "grad_norm": 1.1628900893695522, "learning_rate": 9.117055948638731e-07, "loss": 0.0429, "step": 7655 }, { "epoch": 1.7419795221843004, "grad_norm": 1.8508559292221363, "learning_rate": 9.116261964674553e-07, "loss": 0.0517, "step": 7656 }, { "epoch": 1.7422070534698522, "grad_norm": 0.8746941733793605, "learning_rate": 9.115467922129229e-07, "loss": 0.0241, "step": 7657 }, { "epoch": 1.742434584755404, "grad_norm": 0.9077736076367343, "learning_rate": 9.114673821018987e-07, "loss": 0.0244, "step": 7658 }, { "epoch": 1.7426621160409557, "grad_norm": 1.5287989175270456, "learning_rate": 9.113879661360063e-07, "loss": 0.0234, "step": 7659 }, { "epoch": 1.7428896473265074, "grad_norm": 1.6432947458522034, "learning_rate": 9.11308544316868e-07, "loss": 0.0352, "step": 7660 }, { "epoch": 1.7431171786120592, "grad_norm": 0.8065355560534044, "learning_rate": 9.112291166461076e-07, "loss": 0.0219, "step": 7661 }, { "epoch": 1.743344709897611, "grad_norm": 2.3664864556682845, "learning_rate": 9.111496831253481e-07, "loss": 0.075, "step": 7662 }, { "epoch": 1.7435722411831627, "grad_norm": 1.5414737823776685, "learning_rate": 9.110702437562132e-07, "loss": 0.0295, "step": 7663 }, { "epoch": 1.7437997724687144, "grad_norm": 1.5196225851842906, "learning_rate": 9.109907985403265e-07, "loss": 0.0254, "step": 7664 }, { "epoch": 1.7440273037542662, "grad_norm": 1.8653557622531247, "learning_rate": 9.109113474793116e-07, "loss": 0.0577, "step": 7665 }, { "epoch": 1.744254835039818, "grad_norm": 1.5718530339793813, "learning_rate": 9.108318905747924e-07, "loss": 0.0433, "step": 7666 }, { "epoch": 1.7444823663253697, "grad_norm": 0.8753040187644087, "learning_rate": 9.107524278283928e-07, "loss": 0.0133, "step": 7667 }, { "epoch": 1.7447098976109214, "grad_norm": 0.9921757911909639, "learning_rate": 9.106729592417368e-07, "loss": 0.0148, "step": 7668 }, { "epoch": 1.7449374288964732, "grad_norm": 1.023329566714207, "learning_rate": 9.105934848164488e-07, "loss": 0.0282, "step": 7669 }, { "epoch": 1.745164960182025, "grad_norm": 1.2481670331085064, "learning_rate": 9.105140045541532e-07, "loss": 0.0427, "step": 7670 }, { "epoch": 1.7453924914675767, "grad_norm": 1.3173050639321486, "learning_rate": 9.10434518456474e-07, "loss": 0.0519, "step": 7671 }, { "epoch": 1.7456200227531284, "grad_norm": 0.9060431301408944, "learning_rate": 9.10355026525036e-07, "loss": 0.0247, "step": 7672 }, { "epoch": 1.7458475540386802, "grad_norm": 0.9275970798228872, "learning_rate": 9.102755287614639e-07, "loss": 0.0178, "step": 7673 }, { "epoch": 1.746075085324232, "grad_norm": 1.2478408678541246, "learning_rate": 9.101960251673825e-07, "loss": 0.0253, "step": 7674 }, { "epoch": 1.7463026166097837, "grad_norm": 1.0197676494224464, "learning_rate": 9.101165157444166e-07, "loss": 0.0225, "step": 7675 }, { "epoch": 1.7465301478953354, "grad_norm": 0.909094720632412, "learning_rate": 9.100370004941912e-07, "loss": 0.0188, "step": 7676 }, { "epoch": 1.7467576791808874, "grad_norm": 1.2769228356341842, "learning_rate": 9.099574794183317e-07, "loss": 0.0356, "step": 7677 }, { "epoch": 1.7469852104664392, "grad_norm": 1.0980874895229569, "learning_rate": 9.098779525184631e-07, "loss": 0.029, "step": 7678 }, { "epoch": 1.747212741751991, "grad_norm": 1.1165038442429356, "learning_rate": 9.097984197962109e-07, "loss": 0.0289, "step": 7679 }, { "epoch": 1.7474402730375427, "grad_norm": 0.96382139387914, "learning_rate": 9.097188812532006e-07, "loss": 0.0171, "step": 7680 }, { "epoch": 1.7476678043230944, "grad_norm": 1.3350100903286808, "learning_rate": 9.096393368910578e-07, "loss": 0.0345, "step": 7681 }, { "epoch": 1.7478953356086462, "grad_norm": 1.0713437039923497, "learning_rate": 9.095597867114082e-07, "loss": 0.0131, "step": 7682 }, { "epoch": 1.748122866894198, "grad_norm": 1.0460207092655962, "learning_rate": 9.094802307158777e-07, "loss": 0.0282, "step": 7683 }, { "epoch": 1.7483503981797497, "grad_norm": 1.2263445161655493, "learning_rate": 9.094006689060924e-07, "loss": 0.0336, "step": 7684 }, { "epoch": 1.7485779294653014, "grad_norm": 1.1487925099211806, "learning_rate": 9.093211012836782e-07, "loss": 0.0194, "step": 7685 }, { "epoch": 1.7488054607508532, "grad_norm": 0.9865132985502546, "learning_rate": 9.092415278502614e-07, "loss": 0.0426, "step": 7686 }, { "epoch": 1.7490329920364052, "grad_norm": 0.9010433966430745, "learning_rate": 9.091619486074684e-07, "loss": 0.0153, "step": 7687 }, { "epoch": 1.749260523321957, "grad_norm": 0.3964431214764849, "learning_rate": 9.090823635569254e-07, "loss": 0.0089, "step": 7688 }, { "epoch": 1.7494880546075087, "grad_norm": 2.1364993775182044, "learning_rate": 9.090027727002594e-07, "loss": 0.1037, "step": 7689 }, { "epoch": 1.7497155858930604, "grad_norm": 1.0142838136704868, "learning_rate": 9.089231760390968e-07, "loss": 0.0383, "step": 7690 }, { "epoch": 1.7499431171786122, "grad_norm": 0.9385889195565317, "learning_rate": 9.088435735750643e-07, "loss": 0.0216, "step": 7691 }, { "epoch": 1.750170648464164, "grad_norm": 0.8684423870809375, "learning_rate": 9.087639653097892e-07, "loss": 0.0317, "step": 7692 }, { "epoch": 1.7503981797497157, "grad_norm": 1.296162026638584, "learning_rate": 9.086843512448983e-07, "loss": 0.0442, "step": 7693 }, { "epoch": 1.7506257110352674, "grad_norm": 0.6407459373187171, "learning_rate": 9.086047313820186e-07, "loss": 0.0131, "step": 7694 }, { "epoch": 1.7508532423208192, "grad_norm": 1.5991269608707057, "learning_rate": 9.085251057227777e-07, "loss": 0.0494, "step": 7695 }, { "epoch": 1.751080773606371, "grad_norm": 1.246921304342, "learning_rate": 9.08445474268803e-07, "loss": 0.0254, "step": 7696 }, { "epoch": 1.7513083048919227, "grad_norm": 1.0011932326498272, "learning_rate": 9.083658370217219e-07, "loss": 0.0273, "step": 7697 }, { "epoch": 1.7515358361774744, "grad_norm": 1.7295845937220786, "learning_rate": 9.082861939831619e-07, "loss": 0.0849, "step": 7698 }, { "epoch": 1.7517633674630262, "grad_norm": 0.7603956433839477, "learning_rate": 9.08206545154751e-07, "loss": 0.017, "step": 7699 }, { "epoch": 1.751990898748578, "grad_norm": 1.1788808766142622, "learning_rate": 9.08126890538117e-07, "loss": 0.0504, "step": 7700 }, { "epoch": 1.7522184300341297, "grad_norm": 0.747307893601606, "learning_rate": 9.080472301348878e-07, "loss": 0.0192, "step": 7701 }, { "epoch": 1.7524459613196814, "grad_norm": 1.1518157305353764, "learning_rate": 9.079675639466918e-07, "loss": 0.045, "step": 7702 }, { "epoch": 1.7526734926052332, "grad_norm": 1.0953592367909082, "learning_rate": 9.078878919751569e-07, "loss": 0.0428, "step": 7703 }, { "epoch": 1.752901023890785, "grad_norm": 1.363606620510174, "learning_rate": 9.078082142219114e-07, "loss": 0.0349, "step": 7704 }, { "epoch": 1.7531285551763367, "grad_norm": 1.0499526817597125, "learning_rate": 9.077285306885842e-07, "loss": 0.0202, "step": 7705 }, { "epoch": 1.7533560864618885, "grad_norm": 0.7675164362932144, "learning_rate": 9.076488413768035e-07, "loss": 0.0218, "step": 7706 }, { "epoch": 1.7535836177474402, "grad_norm": 1.2618843485916398, "learning_rate": 9.075691462881981e-07, "loss": 0.0502, "step": 7707 }, { "epoch": 1.753811149032992, "grad_norm": 1.5141641473520007, "learning_rate": 9.074894454243968e-07, "loss": 0.0283, "step": 7708 }, { "epoch": 1.7540386803185437, "grad_norm": 1.1092758523541948, "learning_rate": 9.074097387870289e-07, "loss": 0.0208, "step": 7709 }, { "epoch": 1.7542662116040955, "grad_norm": 1.6035144538724415, "learning_rate": 9.07330026377723e-07, "loss": 0.0198, "step": 7710 }, { "epoch": 1.7544937428896472, "grad_norm": 1.167017853281529, "learning_rate": 9.072503081981081e-07, "loss": 0.0293, "step": 7711 }, { "epoch": 1.754721274175199, "grad_norm": 0.9184705076375725, "learning_rate": 9.07170584249814e-07, "loss": 0.0255, "step": 7712 }, { "epoch": 1.7549488054607507, "grad_norm": 1.2906245689143385, "learning_rate": 9.070908545344702e-07, "loss": 0.0323, "step": 7713 }, { "epoch": 1.7551763367463025, "grad_norm": 1.1306246357358782, "learning_rate": 9.070111190537057e-07, "loss": 0.027, "step": 7714 }, { "epoch": 1.7554038680318542, "grad_norm": 1.2717939026208809, "learning_rate": 9.069313778091504e-07, "loss": 0.0701, "step": 7715 }, { "epoch": 1.7556313993174062, "grad_norm": 0.9364844022319152, "learning_rate": 9.068516308024343e-07, "loss": 0.0179, "step": 7716 }, { "epoch": 1.755858930602958, "grad_norm": 2.120884695289572, "learning_rate": 9.067718780351867e-07, "loss": 0.0357, "step": 7717 }, { "epoch": 1.7560864618885097, "grad_norm": 1.7372318690678035, "learning_rate": 9.066921195090383e-07, "loss": 0.0533, "step": 7718 }, { "epoch": 1.7563139931740614, "grad_norm": 0.9006127396212078, "learning_rate": 9.066123552256187e-07, "loss": 0.0262, "step": 7719 }, { "epoch": 1.7565415244596132, "grad_norm": 1.1618139823950684, "learning_rate": 9.065325851865583e-07, "loss": 0.0255, "step": 7720 }, { "epoch": 1.756769055745165, "grad_norm": 1.4553349352673766, "learning_rate": 9.064528093934874e-07, "loss": 0.0231, "step": 7721 }, { "epoch": 1.7569965870307167, "grad_norm": 0.7189318772127737, "learning_rate": 9.063730278480368e-07, "loss": 0.0168, "step": 7722 }, { "epoch": 1.7572241183162685, "grad_norm": 0.7521106143967847, "learning_rate": 9.062932405518365e-07, "loss": 0.0131, "step": 7723 }, { "epoch": 1.7574516496018202, "grad_norm": 1.3566892763492646, "learning_rate": 9.062134475065176e-07, "loss": 0.0452, "step": 7724 }, { "epoch": 1.757679180887372, "grad_norm": 2.059052973839986, "learning_rate": 9.06133648713711e-07, "loss": 0.0423, "step": 7725 }, { "epoch": 1.757906712172924, "grad_norm": 1.673179110604813, "learning_rate": 9.060538441750475e-07, "loss": 0.0488, "step": 7726 }, { "epoch": 1.7581342434584757, "grad_norm": 1.190424328968161, "learning_rate": 9.05974033892158e-07, "loss": 0.021, "step": 7727 }, { "epoch": 1.7583617747440274, "grad_norm": 1.0485429519532234, "learning_rate": 9.058942178666738e-07, "loss": 0.028, "step": 7728 }, { "epoch": 1.7585893060295792, "grad_norm": 1.0357293376161343, "learning_rate": 9.058143961002263e-07, "loss": 0.0319, "step": 7729 }, { "epoch": 1.758816837315131, "grad_norm": 1.5732344809554888, "learning_rate": 9.05734568594447e-07, "loss": 0.028, "step": 7730 }, { "epoch": 1.7590443686006827, "grad_norm": 0.9379471313058495, "learning_rate": 9.05654735350967e-07, "loss": 0.012, "step": 7731 }, { "epoch": 1.7592718998862344, "grad_norm": 1.1693048482911874, "learning_rate": 9.055748963714183e-07, "loss": 0.0354, "step": 7732 }, { "epoch": 1.7594994311717862, "grad_norm": 1.1467606906199936, "learning_rate": 9.054950516574327e-07, "loss": 0.0274, "step": 7733 }, { "epoch": 1.759726962457338, "grad_norm": 1.0319823876324419, "learning_rate": 9.054152012106417e-07, "loss": 0.0325, "step": 7734 }, { "epoch": 1.7599544937428897, "grad_norm": 0.9017057064223635, "learning_rate": 9.053353450326777e-07, "loss": 0.0163, "step": 7735 }, { "epoch": 1.7601820250284415, "grad_norm": 1.3437259664925094, "learning_rate": 9.052554831251725e-07, "loss": 0.0492, "step": 7736 }, { "epoch": 1.7604095563139932, "grad_norm": 1.235304716459916, "learning_rate": 9.051756154897587e-07, "loss": 0.0264, "step": 7737 }, { "epoch": 1.760637087599545, "grad_norm": 1.3021796415012392, "learning_rate": 9.050957421280683e-07, "loss": 0.0355, "step": 7738 }, { "epoch": 1.7608646188850967, "grad_norm": 0.6730637046090184, "learning_rate": 9.05015863041734e-07, "loss": 0.01, "step": 7739 }, { "epoch": 1.7610921501706485, "grad_norm": 1.2763599910909942, "learning_rate": 9.049359782323881e-07, "loss": 0.0226, "step": 7740 }, { "epoch": 1.7613196814562002, "grad_norm": 1.3964209453463265, "learning_rate": 9.048560877016637e-07, "loss": 0.0284, "step": 7741 }, { "epoch": 1.761547212741752, "grad_norm": 1.620164064470503, "learning_rate": 9.047761914511933e-07, "loss": 0.0291, "step": 7742 }, { "epoch": 1.7617747440273037, "grad_norm": 0.3708609160081374, "learning_rate": 9.0469628948261e-07, "loss": 0.0055, "step": 7743 }, { "epoch": 1.7620022753128555, "grad_norm": 0.8799311045280214, "learning_rate": 9.046163817975466e-07, "loss": 0.0149, "step": 7744 }, { "epoch": 1.7622298065984072, "grad_norm": 1.062318942187663, "learning_rate": 9.045364683976366e-07, "loss": 0.0377, "step": 7745 }, { "epoch": 1.762457337883959, "grad_norm": 1.085127144596073, "learning_rate": 9.044565492845131e-07, "loss": 0.0471, "step": 7746 }, { "epoch": 1.7626848691695107, "grad_norm": 1.7016862939695618, "learning_rate": 9.043766244598096e-07, "loss": 0.0365, "step": 7747 }, { "epoch": 1.7629124004550625, "grad_norm": 0.7069815925067215, "learning_rate": 9.042966939251595e-07, "loss": 0.0211, "step": 7748 }, { "epoch": 1.7631399317406142, "grad_norm": 2.3165835449442826, "learning_rate": 9.042167576821964e-07, "loss": 0.0642, "step": 7749 }, { "epoch": 1.763367463026166, "grad_norm": 1.1909554351095002, "learning_rate": 9.041368157325543e-07, "loss": 0.0301, "step": 7750 }, { "epoch": 1.7635949943117177, "grad_norm": 1.460760689556158, "learning_rate": 9.040568680778668e-07, "loss": 0.0481, "step": 7751 }, { "epoch": 1.7638225255972695, "grad_norm": 1.4806066713210055, "learning_rate": 9.03976914719768e-07, "loss": 0.0397, "step": 7752 }, { "epoch": 1.7640500568828212, "grad_norm": 0.990535408952515, "learning_rate": 9.03896955659892e-07, "loss": 0.0266, "step": 7753 }, { "epoch": 1.764277588168373, "grad_norm": 1.0187827421869802, "learning_rate": 9.03816990899873e-07, "loss": 0.025, "step": 7754 }, { "epoch": 1.764505119453925, "grad_norm": 1.468842093699268, "learning_rate": 9.037370204413452e-07, "loss": 0.0308, "step": 7755 }, { "epoch": 1.7647326507394767, "grad_norm": 1.104325889984664, "learning_rate": 9.036570442859433e-07, "loss": 0.0274, "step": 7756 }, { "epoch": 1.7649601820250285, "grad_norm": 0.9609041262599042, "learning_rate": 9.035770624353018e-07, "loss": 0.0233, "step": 7757 }, { "epoch": 1.7651877133105802, "grad_norm": 1.300462656770205, "learning_rate": 9.034970748910552e-07, "loss": 0.0564, "step": 7758 }, { "epoch": 1.765415244596132, "grad_norm": 0.7966242008387683, "learning_rate": 9.034170816548387e-07, "loss": 0.0158, "step": 7759 }, { "epoch": 1.7656427758816837, "grad_norm": 1.0068645889333558, "learning_rate": 9.033370827282868e-07, "loss": 0.015, "step": 7760 }, { "epoch": 1.7658703071672355, "grad_norm": 1.84372314061819, "learning_rate": 9.032570781130346e-07, "loss": 0.0359, "step": 7761 }, { "epoch": 1.7660978384527872, "grad_norm": 0.9517555469231603, "learning_rate": 9.031770678107174e-07, "loss": 0.0221, "step": 7762 }, { "epoch": 1.766325369738339, "grad_norm": 1.008357002228374, "learning_rate": 9.030970518229704e-07, "loss": 0.0224, "step": 7763 }, { "epoch": 1.7665529010238907, "grad_norm": 0.4490169893485138, "learning_rate": 9.030170301514289e-07, "loss": 0.0051, "step": 7764 }, { "epoch": 1.7667804323094427, "grad_norm": 1.433194534836921, "learning_rate": 9.029370027977284e-07, "loss": 0.0244, "step": 7765 }, { "epoch": 1.7670079635949945, "grad_norm": 0.9253299076158662, "learning_rate": 9.028569697635047e-07, "loss": 0.016, "step": 7766 }, { "epoch": 1.7672354948805462, "grad_norm": 1.0293544978932465, "learning_rate": 9.027769310503935e-07, "loss": 0.0393, "step": 7767 }, { "epoch": 1.767463026166098, "grad_norm": 1.2746844789912744, "learning_rate": 9.026968866600304e-07, "loss": 0.0254, "step": 7768 }, { "epoch": 1.7676905574516497, "grad_norm": 0.9738684622085294, "learning_rate": 9.026168365940516e-07, "loss": 0.0353, "step": 7769 }, { "epoch": 1.7679180887372015, "grad_norm": 1.4215988518341673, "learning_rate": 9.02536780854093e-07, "loss": 0.0289, "step": 7770 }, { "epoch": 1.7681456200227532, "grad_norm": 1.9463891862128035, "learning_rate": 9.024567194417911e-07, "loss": 0.0234, "step": 7771 }, { "epoch": 1.768373151308305, "grad_norm": 0.9261059100245073, "learning_rate": 9.023766523587817e-07, "loss": 0.0121, "step": 7772 }, { "epoch": 1.7686006825938567, "grad_norm": 2.673950698193844, "learning_rate": 9.022965796067016e-07, "loss": 0.0323, "step": 7773 }, { "epoch": 1.7688282138794085, "grad_norm": 0.8834482418976045, "learning_rate": 9.022165011871873e-07, "loss": 0.0302, "step": 7774 }, { "epoch": 1.7690557451649602, "grad_norm": 1.1860901670932948, "learning_rate": 9.021364171018754e-07, "loss": 0.0311, "step": 7775 }, { "epoch": 1.769283276450512, "grad_norm": 0.9597974646507369, "learning_rate": 9.020563273524027e-07, "loss": 0.0229, "step": 7776 }, { "epoch": 1.7695108077360637, "grad_norm": 0.847588734062367, "learning_rate": 9.019762319404061e-07, "loss": 0.0195, "step": 7777 }, { "epoch": 1.7697383390216155, "grad_norm": 1.1738522512510032, "learning_rate": 9.018961308675225e-07, "loss": 0.0342, "step": 7778 }, { "epoch": 1.7699658703071672, "grad_norm": 0.7503668751477243, "learning_rate": 9.018160241353893e-07, "loss": 0.0145, "step": 7779 }, { "epoch": 1.770193401592719, "grad_norm": 1.084695064985875, "learning_rate": 9.017359117456434e-07, "loss": 0.0248, "step": 7780 }, { "epoch": 1.7704209328782707, "grad_norm": 0.8899828699614253, "learning_rate": 9.016557936999221e-07, "loss": 0.0212, "step": 7781 }, { "epoch": 1.7706484641638225, "grad_norm": 0.7597079457803091, "learning_rate": 9.015756699998632e-07, "loss": 0.032, "step": 7782 }, { "epoch": 1.7708759954493742, "grad_norm": 0.8711745869354168, "learning_rate": 9.014955406471041e-07, "loss": 0.0102, "step": 7783 }, { "epoch": 1.771103526734926, "grad_norm": 0.8186824087409144, "learning_rate": 9.014154056432828e-07, "loss": 0.0241, "step": 7784 }, { "epoch": 1.7713310580204777, "grad_norm": 0.7283696556407617, "learning_rate": 9.013352649900365e-07, "loss": 0.0164, "step": 7785 }, { "epoch": 1.7715585893060295, "grad_norm": 0.9632421077654101, "learning_rate": 9.012551186890037e-07, "loss": 0.0201, "step": 7786 }, { "epoch": 1.7717861205915812, "grad_norm": 1.2968869584635645, "learning_rate": 9.011749667418221e-07, "loss": 0.033, "step": 7787 }, { "epoch": 1.772013651877133, "grad_norm": 1.1781087189325576, "learning_rate": 9.010948091501298e-07, "loss": 0.0323, "step": 7788 }, { "epoch": 1.7722411831626848, "grad_norm": 0.6227189258752035, "learning_rate": 9.010146459155654e-07, "loss": 0.0177, "step": 7789 }, { "epoch": 1.7724687144482365, "grad_norm": 1.1525215566714462, "learning_rate": 9.009344770397671e-07, "loss": 0.0209, "step": 7790 }, { "epoch": 1.7726962457337883, "grad_norm": 0.9174352181093524, "learning_rate": 9.008543025243735e-07, "loss": 0.0191, "step": 7791 }, { "epoch": 1.77292377701934, "grad_norm": 0.7635480894250786, "learning_rate": 9.007741223710232e-07, "loss": 0.0146, "step": 7792 }, { "epoch": 1.7731513083048918, "grad_norm": 1.521151554028818, "learning_rate": 9.006939365813549e-07, "loss": 0.0471, "step": 7793 }, { "epoch": 1.7733788395904437, "grad_norm": 2.081083148922227, "learning_rate": 9.006137451570074e-07, "loss": 0.029, "step": 7794 }, { "epoch": 1.7736063708759955, "grad_norm": 1.1298450626530687, "learning_rate": 9.005335480996196e-07, "loss": 0.022, "step": 7795 }, { "epoch": 1.7738339021615472, "grad_norm": 1.2497151471052976, "learning_rate": 9.004533454108308e-07, "loss": 0.0333, "step": 7796 }, { "epoch": 1.774061433447099, "grad_norm": 1.0582139306273477, "learning_rate": 9.0037313709228e-07, "loss": 0.0199, "step": 7797 }, { "epoch": 1.7742889647326507, "grad_norm": 1.5328551731921303, "learning_rate": 9.002929231456067e-07, "loss": 0.0529, "step": 7798 }, { "epoch": 1.7745164960182025, "grad_norm": 1.2155287205090175, "learning_rate": 9.002127035724502e-07, "loss": 0.037, "step": 7799 }, { "epoch": 1.7747440273037542, "grad_norm": 1.0218347865522226, "learning_rate": 9.001324783744501e-07, "loss": 0.0417, "step": 7800 }, { "epoch": 1.774971558589306, "grad_norm": 3.390477976630992, "learning_rate": 9.000522475532461e-07, "loss": 0.0554, "step": 7801 }, { "epoch": 1.7751990898748577, "grad_norm": 0.592801892287802, "learning_rate": 8.999720111104776e-07, "loss": 0.0088, "step": 7802 }, { "epoch": 1.7754266211604095, "grad_norm": 1.4856820411102438, "learning_rate": 8.99891769047785e-07, "loss": 0.0302, "step": 7803 }, { "epoch": 1.7756541524459615, "grad_norm": 1.3825413426440172, "learning_rate": 8.998115213668082e-07, "loss": 0.0393, "step": 7804 }, { "epoch": 1.7758816837315132, "grad_norm": 1.0715853298379499, "learning_rate": 8.997312680691869e-07, "loss": 0.0251, "step": 7805 }, { "epoch": 1.776109215017065, "grad_norm": 1.086273322174223, "learning_rate": 8.996510091565618e-07, "loss": 0.0222, "step": 7806 }, { "epoch": 1.7763367463026167, "grad_norm": 1.574504492749921, "learning_rate": 8.99570744630573e-07, "loss": 0.0218, "step": 7807 }, { "epoch": 1.7765642775881685, "grad_norm": 0.7604610383602236, "learning_rate": 8.99490474492861e-07, "loss": 0.0165, "step": 7808 }, { "epoch": 1.7767918088737202, "grad_norm": 0.9545288476626476, "learning_rate": 8.994101987450665e-07, "loss": 0.0223, "step": 7809 }, { "epoch": 1.777019340159272, "grad_norm": 0.6882788064421679, "learning_rate": 8.993299173888302e-07, "loss": 0.0225, "step": 7810 }, { "epoch": 1.7772468714448237, "grad_norm": 1.7333778039577077, "learning_rate": 8.992496304257926e-07, "loss": 0.0323, "step": 7811 }, { "epoch": 1.7774744027303755, "grad_norm": 1.1051484045866493, "learning_rate": 8.99169337857595e-07, "loss": 0.0293, "step": 7812 }, { "epoch": 1.7777019340159272, "grad_norm": 0.9884789847111075, "learning_rate": 8.990890396858781e-07, "loss": 0.0259, "step": 7813 }, { "epoch": 1.777929465301479, "grad_norm": 1.2914085957558916, "learning_rate": 8.990087359122832e-07, "loss": 0.0311, "step": 7814 }, { "epoch": 1.7781569965870307, "grad_norm": 1.049883175471557, "learning_rate": 8.989284265384515e-07, "loss": 0.023, "step": 7815 }, { "epoch": 1.7783845278725825, "grad_norm": 0.6694835390188534, "learning_rate": 8.988481115660247e-07, "loss": 0.0099, "step": 7816 }, { "epoch": 1.7786120591581343, "grad_norm": 1.0588133640503645, "learning_rate": 8.987677909966439e-07, "loss": 0.0206, "step": 7817 }, { "epoch": 1.778839590443686, "grad_norm": 0.7819167306883373, "learning_rate": 8.986874648319507e-07, "loss": 0.0148, "step": 7818 }, { "epoch": 1.7790671217292378, "grad_norm": 1.4821086058928985, "learning_rate": 8.986071330735872e-07, "loss": 0.0237, "step": 7819 }, { "epoch": 1.7792946530147895, "grad_norm": 1.0958911672140499, "learning_rate": 8.985267957231947e-07, "loss": 0.0308, "step": 7820 }, { "epoch": 1.7795221843003413, "grad_norm": 0.8791299425783055, "learning_rate": 8.984464527824157e-07, "loss": 0.0218, "step": 7821 }, { "epoch": 1.779749715585893, "grad_norm": 1.1468618488025002, "learning_rate": 8.983661042528917e-07, "loss": 0.015, "step": 7822 }, { "epoch": 1.7799772468714448, "grad_norm": 0.9483905040621247, "learning_rate": 8.982857501362655e-07, "loss": 0.0161, "step": 7823 }, { "epoch": 1.7802047781569965, "grad_norm": 0.8960232478818805, "learning_rate": 8.982053904341789e-07, "loss": 0.0198, "step": 7824 }, { "epoch": 1.7804323094425483, "grad_norm": 1.4328448860754595, "learning_rate": 8.981250251482746e-07, "loss": 0.017, "step": 7825 }, { "epoch": 1.7806598407281, "grad_norm": 0.9743777428010655, "learning_rate": 8.980446542801947e-07, "loss": 0.0113, "step": 7826 }, { "epoch": 1.7808873720136518, "grad_norm": 1.2703087358314946, "learning_rate": 8.979642778315824e-07, "loss": 0.032, "step": 7827 }, { "epoch": 1.7811149032992035, "grad_norm": 1.4011343554143572, "learning_rate": 8.9788389580408e-07, "loss": 0.0324, "step": 7828 }, { "epoch": 1.7813424345847553, "grad_norm": 1.6343876573253318, "learning_rate": 8.978035081993307e-07, "loss": 0.0268, "step": 7829 }, { "epoch": 1.781569965870307, "grad_norm": 1.7645817562932788, "learning_rate": 8.977231150189772e-07, "loss": 0.0317, "step": 7830 }, { "epoch": 1.7817974971558588, "grad_norm": 1.6087529896149062, "learning_rate": 8.976427162646628e-07, "loss": 0.0449, "step": 7831 }, { "epoch": 1.7820250284414105, "grad_norm": 0.8531146257805737, "learning_rate": 8.975623119380304e-07, "loss": 0.0196, "step": 7832 }, { "epoch": 1.7822525597269625, "grad_norm": 1.206582803943983, "learning_rate": 8.974819020407237e-07, "loss": 0.02, "step": 7833 }, { "epoch": 1.7824800910125143, "grad_norm": 1.0145655803861944, "learning_rate": 8.974014865743859e-07, "loss": 0.022, "step": 7834 }, { "epoch": 1.782707622298066, "grad_norm": 1.4964526683444779, "learning_rate": 8.973210655406605e-07, "loss": 0.0404, "step": 7835 }, { "epoch": 1.7829351535836178, "grad_norm": 0.7848232581214782, "learning_rate": 8.972406389411915e-07, "loss": 0.0183, "step": 7836 }, { "epoch": 1.7831626848691695, "grad_norm": 0.9976090202029402, "learning_rate": 8.971602067776222e-07, "loss": 0.0197, "step": 7837 }, { "epoch": 1.7833902161547213, "grad_norm": 1.4889625882400916, "learning_rate": 8.970797690515967e-07, "loss": 0.0304, "step": 7838 }, { "epoch": 1.783617747440273, "grad_norm": 0.8452260647654178, "learning_rate": 8.969993257647591e-07, "loss": 0.0146, "step": 7839 }, { "epoch": 1.7838452787258248, "grad_norm": 1.4782817609899408, "learning_rate": 8.969188769187534e-07, "loss": 0.026, "step": 7840 }, { "epoch": 1.7840728100113765, "grad_norm": 1.225959473840193, "learning_rate": 8.96838422515224e-07, "loss": 0.0429, "step": 7841 }, { "epoch": 1.7843003412969285, "grad_norm": 0.8206177553996756, "learning_rate": 8.967579625558148e-07, "loss": 0.0111, "step": 7842 }, { "epoch": 1.7845278725824802, "grad_norm": 1.2525656353069259, "learning_rate": 8.966774970421708e-07, "loss": 0.0289, "step": 7843 }, { "epoch": 1.784755403868032, "grad_norm": 0.9338096531101292, "learning_rate": 8.965970259759363e-07, "loss": 0.0228, "step": 7844 }, { "epoch": 1.7849829351535837, "grad_norm": 0.7275812301839438, "learning_rate": 8.965165493587557e-07, "loss": 0.0117, "step": 7845 }, { "epoch": 1.7852104664391355, "grad_norm": 0.7969106720107036, "learning_rate": 8.964360671922743e-07, "loss": 0.0147, "step": 7846 }, { "epoch": 1.7854379977246873, "grad_norm": 1.458795393176787, "learning_rate": 8.963555794781369e-07, "loss": 0.0273, "step": 7847 }, { "epoch": 1.785665529010239, "grad_norm": 1.7699436515502187, "learning_rate": 8.962750862179883e-07, "loss": 0.06, "step": 7848 }, { "epoch": 1.7858930602957908, "grad_norm": 0.5431062763845044, "learning_rate": 8.961945874134738e-07, "loss": 0.0081, "step": 7849 }, { "epoch": 1.7861205915813425, "grad_norm": 1.2451912708323942, "learning_rate": 8.961140830662386e-07, "loss": 0.0481, "step": 7850 }, { "epoch": 1.7863481228668943, "grad_norm": 0.9236082482255349, "learning_rate": 8.960335731779281e-07, "loss": 0.0209, "step": 7851 }, { "epoch": 1.786575654152446, "grad_norm": 0.6970816456118641, "learning_rate": 8.959530577501875e-07, "loss": 0.0171, "step": 7852 }, { "epoch": 1.7868031854379978, "grad_norm": 0.9506476518379603, "learning_rate": 8.958725367846628e-07, "loss": 0.0221, "step": 7853 }, { "epoch": 1.7870307167235495, "grad_norm": 1.000808992626888, "learning_rate": 8.957920102829997e-07, "loss": 0.0155, "step": 7854 }, { "epoch": 1.7872582480091013, "grad_norm": 1.4581764598911346, "learning_rate": 8.957114782468436e-07, "loss": 0.0308, "step": 7855 }, { "epoch": 1.787485779294653, "grad_norm": 1.2562396437883345, "learning_rate": 8.956309406778407e-07, "loss": 0.0206, "step": 7856 }, { "epoch": 1.7877133105802048, "grad_norm": 1.4772297021594707, "learning_rate": 8.955503975776371e-07, "loss": 0.0396, "step": 7857 }, { "epoch": 1.7879408418657565, "grad_norm": 1.005126633250784, "learning_rate": 8.954698489478788e-07, "loss": 0.0151, "step": 7858 }, { "epoch": 1.7881683731513083, "grad_norm": 1.8209464517520468, "learning_rate": 8.953892947902121e-07, "loss": 0.047, "step": 7859 }, { "epoch": 1.78839590443686, "grad_norm": 1.4465065149603678, "learning_rate": 8.953087351062835e-07, "loss": 0.0274, "step": 7860 }, { "epoch": 1.7886234357224118, "grad_norm": 1.972798509649138, "learning_rate": 8.952281698977394e-07, "loss": 0.0326, "step": 7861 }, { "epoch": 1.7888509670079635, "grad_norm": 1.34164452934922, "learning_rate": 8.951475991662263e-07, "loss": 0.0318, "step": 7862 }, { "epoch": 1.7890784982935153, "grad_norm": 1.212238624627934, "learning_rate": 8.950670229133912e-07, "loss": 0.0398, "step": 7863 }, { "epoch": 1.789306029579067, "grad_norm": 1.7900647347687184, "learning_rate": 8.949864411408807e-07, "loss": 0.056, "step": 7864 }, { "epoch": 1.7895335608646188, "grad_norm": 1.1479883805932574, "learning_rate": 8.949058538503416e-07, "loss": 0.0272, "step": 7865 }, { "epoch": 1.7897610921501705, "grad_norm": 0.8523729761501173, "learning_rate": 8.948252610434213e-07, "loss": 0.0216, "step": 7866 }, { "epoch": 1.7899886234357223, "grad_norm": 1.53956140553133, "learning_rate": 8.947446627217669e-07, "loss": 0.0369, "step": 7867 }, { "epoch": 1.790216154721274, "grad_norm": 0.9818481136003215, "learning_rate": 8.946640588870254e-07, "loss": 0.0284, "step": 7868 }, { "epoch": 1.7904436860068258, "grad_norm": 0.7124464532670008, "learning_rate": 8.945834495408447e-07, "loss": 0.014, "step": 7869 }, { "epoch": 1.7906712172923775, "grad_norm": 0.6767071176129998, "learning_rate": 8.945028346848718e-07, "loss": 0.0204, "step": 7870 }, { "epoch": 1.7908987485779293, "grad_norm": 1.195333260881732, "learning_rate": 8.944222143207545e-07, "loss": 0.0306, "step": 7871 }, { "epoch": 1.7911262798634813, "grad_norm": 1.7612928725245607, "learning_rate": 8.943415884501407e-07, "loss": 0.0242, "step": 7872 }, { "epoch": 1.791353811149033, "grad_norm": 0.8906098462117128, "learning_rate": 8.942609570746781e-07, "loss": 0.0155, "step": 7873 }, { "epoch": 1.7915813424345848, "grad_norm": 1.6234756574496614, "learning_rate": 8.941803201960146e-07, "loss": 0.0568, "step": 7874 }, { "epoch": 1.7918088737201365, "grad_norm": 1.3753803192692626, "learning_rate": 8.940996778157983e-07, "loss": 0.0504, "step": 7875 }, { "epoch": 1.7920364050056883, "grad_norm": 1.3303981402354177, "learning_rate": 8.940190299356774e-07, "loss": 0.0348, "step": 7876 }, { "epoch": 1.79226393629124, "grad_norm": 1.1381428529489983, "learning_rate": 8.939383765573004e-07, "loss": 0.0313, "step": 7877 }, { "epoch": 1.7924914675767918, "grad_norm": 0.6313270073845239, "learning_rate": 8.938577176823154e-07, "loss": 0.0133, "step": 7878 }, { "epoch": 1.7927189988623435, "grad_norm": 1.1526738136130819, "learning_rate": 8.93777053312371e-07, "loss": 0.0462, "step": 7879 }, { "epoch": 1.7929465301478953, "grad_norm": 1.4424825876427267, "learning_rate": 8.936963834491161e-07, "loss": 0.0192, "step": 7880 }, { "epoch": 1.7931740614334473, "grad_norm": 1.0566092128588358, "learning_rate": 8.93615708094199e-07, "loss": 0.0225, "step": 7881 }, { "epoch": 1.793401592718999, "grad_norm": 1.9176983642544965, "learning_rate": 8.935350272492687e-07, "loss": 0.0644, "step": 7882 }, { "epoch": 1.7936291240045508, "grad_norm": 0.9927270092271882, "learning_rate": 8.934543409159743e-07, "loss": 0.0196, "step": 7883 }, { "epoch": 1.7938566552901025, "grad_norm": 0.8956004254398622, "learning_rate": 8.933736490959649e-07, "loss": 0.0283, "step": 7884 }, { "epoch": 1.7940841865756543, "grad_norm": 0.8771623418465615, "learning_rate": 8.932929517908896e-07, "loss": 0.0276, "step": 7885 }, { "epoch": 1.794311717861206, "grad_norm": 0.8757995564864757, "learning_rate": 8.932122490023977e-07, "loss": 0.0178, "step": 7886 }, { "epoch": 1.7945392491467578, "grad_norm": 0.9619281755992323, "learning_rate": 8.931315407321387e-07, "loss": 0.0254, "step": 7887 }, { "epoch": 1.7947667804323095, "grad_norm": 0.830607334642633, "learning_rate": 8.93050826981762e-07, "loss": 0.0145, "step": 7888 }, { "epoch": 1.7949943117178613, "grad_norm": 0.9754645372250353, "learning_rate": 8.929701077529173e-07, "loss": 0.0248, "step": 7889 }, { "epoch": 1.795221843003413, "grad_norm": 1.3446910778885603, "learning_rate": 8.928893830472544e-07, "loss": 0.0257, "step": 7890 }, { "epoch": 1.7954493742889648, "grad_norm": 1.0600421240544748, "learning_rate": 8.92808652866423e-07, "loss": 0.0229, "step": 7891 }, { "epoch": 1.7956769055745165, "grad_norm": 1.3045169917709618, "learning_rate": 8.927279172120734e-07, "loss": 0.0345, "step": 7892 }, { "epoch": 1.7959044368600683, "grad_norm": 1.3535442753365499, "learning_rate": 8.926471760858554e-07, "loss": 0.0331, "step": 7893 }, { "epoch": 1.79613196814562, "grad_norm": 1.1986153843262288, "learning_rate": 8.925664294894193e-07, "loss": 0.0228, "step": 7894 }, { "epoch": 1.7963594994311718, "grad_norm": 1.6326418452338822, "learning_rate": 8.924856774244154e-07, "loss": 0.0439, "step": 7895 }, { "epoch": 1.7965870307167235, "grad_norm": 1.274533599454996, "learning_rate": 8.92404919892494e-07, "loss": 0.0284, "step": 7896 }, { "epoch": 1.7968145620022753, "grad_norm": 0.9245558198366477, "learning_rate": 8.923241568953061e-07, "loss": 0.025, "step": 7897 }, { "epoch": 1.797042093287827, "grad_norm": 1.4659383065797449, "learning_rate": 8.922433884345018e-07, "loss": 0.0359, "step": 7898 }, { "epoch": 1.7972696245733788, "grad_norm": 1.2678360189132305, "learning_rate": 8.921626145117321e-07, "loss": 0.045, "step": 7899 }, { "epoch": 1.7974971558589306, "grad_norm": 0.807399970096153, "learning_rate": 8.920818351286479e-07, "loss": 0.0165, "step": 7900 }, { "epoch": 1.7977246871444823, "grad_norm": 1.1167806568004228, "learning_rate": 8.920010502868999e-07, "loss": 0.0248, "step": 7901 }, { "epoch": 1.797952218430034, "grad_norm": 1.1293230465683268, "learning_rate": 8.919202599881395e-07, "loss": 0.0209, "step": 7902 }, { "epoch": 1.7981797497155858, "grad_norm": 1.0832700318486221, "learning_rate": 8.918394642340179e-07, "loss": 0.0221, "step": 7903 }, { "epoch": 1.7984072810011376, "grad_norm": 0.7064646043342412, "learning_rate": 8.917586630261864e-07, "loss": 0.0206, "step": 7904 }, { "epoch": 1.7986348122866893, "grad_norm": 1.4065933340491492, "learning_rate": 8.916778563662963e-07, "loss": 0.0303, "step": 7905 }, { "epoch": 1.798862343572241, "grad_norm": 0.6552459781539552, "learning_rate": 8.915970442559993e-07, "loss": 0.0132, "step": 7906 }, { "epoch": 1.7990898748577928, "grad_norm": 0.7452927540406299, "learning_rate": 8.915162266969469e-07, "loss": 0.0207, "step": 7907 }, { "epoch": 1.7993174061433446, "grad_norm": 1.4399513026164452, "learning_rate": 8.91435403690791e-07, "loss": 0.0292, "step": 7908 }, { "epoch": 1.7995449374288963, "grad_norm": 0.758350041185595, "learning_rate": 8.913545752391832e-07, "loss": 0.0149, "step": 7909 }, { "epoch": 1.799772468714448, "grad_norm": 0.9981336405534768, "learning_rate": 8.912737413437758e-07, "loss": 0.0181, "step": 7910 }, { "epoch": 1.8, "grad_norm": 0.8948519853122613, "learning_rate": 8.91192902006221e-07, "loss": 0.0137, "step": 7911 }, { "epoch": 1.8002275312855518, "grad_norm": 1.138919667694445, "learning_rate": 8.911120572281705e-07, "loss": 0.0377, "step": 7912 }, { "epoch": 1.8004550625711035, "grad_norm": 1.2169297926213631, "learning_rate": 8.91031207011277e-07, "loss": 0.0265, "step": 7913 }, { "epoch": 1.8006825938566553, "grad_norm": 1.0046099153521237, "learning_rate": 8.90950351357193e-07, "loss": 0.0165, "step": 7914 }, { "epoch": 1.800910125142207, "grad_norm": 1.2305460947745517, "learning_rate": 8.908694902675706e-07, "loss": 0.034, "step": 7915 }, { "epoch": 1.8011376564277588, "grad_norm": 0.908175336046685, "learning_rate": 8.907886237440627e-07, "loss": 0.0311, "step": 7916 }, { "epoch": 1.8013651877133106, "grad_norm": 0.9640989084574261, "learning_rate": 8.907077517883225e-07, "loss": 0.011, "step": 7917 }, { "epoch": 1.8015927189988623, "grad_norm": 1.0611776598333045, "learning_rate": 8.906268744020022e-07, "loss": 0.0391, "step": 7918 }, { "epoch": 1.801820250284414, "grad_norm": 1.2696769190669295, "learning_rate": 8.905459915867551e-07, "loss": 0.0394, "step": 7919 }, { "epoch": 1.802047781569966, "grad_norm": 0.9864570238447806, "learning_rate": 8.904651033442342e-07, "loss": 0.0209, "step": 7920 }, { "epoch": 1.8022753128555178, "grad_norm": 0.7898526012800101, "learning_rate": 8.903842096760929e-07, "loss": 0.0295, "step": 7921 }, { "epoch": 1.8025028441410695, "grad_norm": 1.1684045824501679, "learning_rate": 8.903033105839842e-07, "loss": 0.0316, "step": 7922 }, { "epoch": 1.8027303754266213, "grad_norm": 1.718325976581944, "learning_rate": 8.902224060695619e-07, "loss": 0.0553, "step": 7923 }, { "epoch": 1.802957906712173, "grad_norm": 1.1620730732976743, "learning_rate": 8.901414961344792e-07, "loss": 0.036, "step": 7924 }, { "epoch": 1.8031854379977248, "grad_norm": 1.6500968741389468, "learning_rate": 8.900605807803901e-07, "loss": 0.0398, "step": 7925 }, { "epoch": 1.8034129692832765, "grad_norm": 1.103134518928244, "learning_rate": 8.89979660008948e-07, "loss": 0.0242, "step": 7926 }, { "epoch": 1.8036405005688283, "grad_norm": 0.977204940987202, "learning_rate": 8.898987338218069e-07, "loss": 0.0202, "step": 7927 }, { "epoch": 1.80386803185438, "grad_norm": 1.0754200353452477, "learning_rate": 8.89817802220621e-07, "loss": 0.0227, "step": 7928 }, { "epoch": 1.8040955631399318, "grad_norm": 1.2777556299967832, "learning_rate": 8.89736865207044e-07, "loss": 0.026, "step": 7929 }, { "epoch": 1.8043230944254836, "grad_norm": 1.2235154401866535, "learning_rate": 8.896559227827305e-07, "loss": 0.031, "step": 7930 }, { "epoch": 1.8045506257110353, "grad_norm": 0.6503055008256462, "learning_rate": 8.895749749493346e-07, "loss": 0.0096, "step": 7931 }, { "epoch": 1.804778156996587, "grad_norm": 0.834214057724775, "learning_rate": 8.894940217085106e-07, "loss": 0.0155, "step": 7932 }, { "epoch": 1.8050056882821388, "grad_norm": 1.0672819385337902, "learning_rate": 8.894130630619133e-07, "loss": 0.0243, "step": 7933 }, { "epoch": 1.8052332195676906, "grad_norm": 0.9146202345603645, "learning_rate": 8.893320990111972e-07, "loss": 0.0185, "step": 7934 }, { "epoch": 1.8054607508532423, "grad_norm": 1.2276574359105301, "learning_rate": 8.892511295580172e-07, "loss": 0.0265, "step": 7935 }, { "epoch": 1.805688282138794, "grad_norm": 1.3625327539993874, "learning_rate": 8.891701547040281e-07, "loss": 0.0428, "step": 7936 }, { "epoch": 1.8059158134243458, "grad_norm": 0.8436216552179677, "learning_rate": 8.890891744508847e-07, "loss": 0.0157, "step": 7937 }, { "epoch": 1.8061433447098976, "grad_norm": 2.15371282783646, "learning_rate": 8.890081888002424e-07, "loss": 0.0624, "step": 7938 }, { "epoch": 1.8063708759954493, "grad_norm": 0.8801776627933559, "learning_rate": 8.88927197753756e-07, "loss": 0.024, "step": 7939 }, { "epoch": 1.806598407281001, "grad_norm": 0.9460686718278507, "learning_rate": 8.888462013130811e-07, "loss": 0.0246, "step": 7940 }, { "epoch": 1.8068259385665528, "grad_norm": 1.1341179520118103, "learning_rate": 8.887651994798732e-07, "loss": 0.0531, "step": 7941 }, { "epoch": 1.8070534698521046, "grad_norm": 0.83323675871723, "learning_rate": 8.886841922557876e-07, "loss": 0.0107, "step": 7942 }, { "epoch": 1.8072810011376563, "grad_norm": 1.769703851324121, "learning_rate": 8.8860317964248e-07, "loss": 0.0255, "step": 7943 }, { "epoch": 1.807508532423208, "grad_norm": 1.63313720621551, "learning_rate": 8.885221616416063e-07, "loss": 0.0419, "step": 7944 }, { "epoch": 1.8077360637087598, "grad_norm": 1.135504726716167, "learning_rate": 8.884411382548221e-07, "loss": 0.03, "step": 7945 }, { "epoch": 1.8079635949943116, "grad_norm": 1.2830667286887556, "learning_rate": 8.883601094837835e-07, "loss": 0.0321, "step": 7946 }, { "epoch": 1.8081911262798633, "grad_norm": 1.3598193317394558, "learning_rate": 8.882790753301465e-07, "loss": 0.0267, "step": 7947 }, { "epoch": 1.808418657565415, "grad_norm": 1.33412820168281, "learning_rate": 8.881980357955676e-07, "loss": 0.0241, "step": 7948 }, { "epoch": 1.808646188850967, "grad_norm": 1.658987742425555, "learning_rate": 8.881169908817028e-07, "loss": 0.0349, "step": 7949 }, { "epoch": 1.8088737201365188, "grad_norm": 0.4945242506035396, "learning_rate": 8.880359405902085e-07, "loss": 0.0159, "step": 7950 }, { "epoch": 1.8091012514220706, "grad_norm": 1.5533789527331354, "learning_rate": 8.879548849227413e-07, "loss": 0.0493, "step": 7951 }, { "epoch": 1.8093287827076223, "grad_norm": 1.78859864942384, "learning_rate": 8.87873823880958e-07, "loss": 0.0371, "step": 7952 }, { "epoch": 1.809556313993174, "grad_norm": 0.9977759236749801, "learning_rate": 8.877927574665149e-07, "loss": 0.0143, "step": 7953 }, { "epoch": 1.8097838452787258, "grad_norm": 1.7399862593633308, "learning_rate": 8.877116856810693e-07, "loss": 0.0389, "step": 7954 }, { "epoch": 1.8100113765642776, "grad_norm": 1.335838427331153, "learning_rate": 8.876306085262781e-07, "loss": 0.0215, "step": 7955 }, { "epoch": 1.8102389078498293, "grad_norm": 1.1320804254900072, "learning_rate": 8.875495260037979e-07, "loss": 0.0407, "step": 7956 }, { "epoch": 1.810466439135381, "grad_norm": 1.1642449596779554, "learning_rate": 8.874684381152865e-07, "loss": 0.021, "step": 7957 }, { "epoch": 1.8106939704209328, "grad_norm": 0.8676467298246693, "learning_rate": 8.873873448624008e-07, "loss": 0.0197, "step": 7958 }, { "epoch": 1.8109215017064848, "grad_norm": 0.8532877197343411, "learning_rate": 8.873062462467983e-07, "loss": 0.0148, "step": 7959 }, { "epoch": 1.8111490329920366, "grad_norm": 1.1008892392506717, "learning_rate": 8.872251422701366e-07, "loss": 0.0229, "step": 7960 }, { "epoch": 1.8113765642775883, "grad_norm": 0.7547039153153899, "learning_rate": 8.871440329340733e-07, "loss": 0.0247, "step": 7961 }, { "epoch": 1.81160409556314, "grad_norm": 1.815452580029783, "learning_rate": 8.870629182402659e-07, "loss": 0.0669, "step": 7962 }, { "epoch": 1.8118316268486918, "grad_norm": 1.3820207820518486, "learning_rate": 8.869817981903725e-07, "loss": 0.031, "step": 7963 }, { "epoch": 1.8120591581342436, "grad_norm": 0.8467210866515458, "learning_rate": 8.869006727860508e-07, "loss": 0.0204, "step": 7964 }, { "epoch": 1.8122866894197953, "grad_norm": 0.5195820151495079, "learning_rate": 8.868195420289591e-07, "loss": 0.006, "step": 7965 }, { "epoch": 1.812514220705347, "grad_norm": 1.158209961743232, "learning_rate": 8.867384059207554e-07, "loss": 0.032, "step": 7966 }, { "epoch": 1.8127417519908988, "grad_norm": 0.731730761445067, "learning_rate": 8.86657264463098e-07, "loss": 0.0122, "step": 7967 }, { "epoch": 1.8129692832764506, "grad_norm": 1.5433000241193469, "learning_rate": 8.865761176576457e-07, "loss": 0.0358, "step": 7968 }, { "epoch": 1.8131968145620023, "grad_norm": 0.9802801658756328, "learning_rate": 8.864949655060562e-07, "loss": 0.0254, "step": 7969 }, { "epoch": 1.813424345847554, "grad_norm": 1.2893269013156747, "learning_rate": 8.864138080099886e-07, "loss": 0.0344, "step": 7970 }, { "epoch": 1.8136518771331058, "grad_norm": 1.6378973935445966, "learning_rate": 8.863326451711015e-07, "loss": 0.0346, "step": 7971 }, { "epoch": 1.8138794084186576, "grad_norm": 1.5901382545769034, "learning_rate": 8.862514769910538e-07, "loss": 0.0257, "step": 7972 }, { "epoch": 1.8141069397042093, "grad_norm": 1.1125358321048289, "learning_rate": 8.861703034715042e-07, "loss": 0.0214, "step": 7973 }, { "epoch": 1.814334470989761, "grad_norm": 0.8342772107162001, "learning_rate": 8.860891246141123e-07, "loss": 0.0117, "step": 7974 }, { "epoch": 1.8145620022753128, "grad_norm": 1.3763064707775834, "learning_rate": 8.860079404205367e-07, "loss": 0.0244, "step": 7975 }, { "epoch": 1.8147895335608646, "grad_norm": 1.2902917037907535, "learning_rate": 8.859267508924366e-07, "loss": 0.0292, "step": 7976 }, { "epoch": 1.8150170648464163, "grad_norm": 0.9417112029914969, "learning_rate": 8.858455560314718e-07, "loss": 0.0152, "step": 7977 }, { "epoch": 1.815244596131968, "grad_norm": 1.228076700663053, "learning_rate": 8.857643558393015e-07, "loss": 0.0392, "step": 7978 }, { "epoch": 1.8154721274175198, "grad_norm": 1.3554843285182212, "learning_rate": 8.856831503175852e-07, "loss": 0.0425, "step": 7979 }, { "epoch": 1.8156996587030716, "grad_norm": 1.283048051846109, "learning_rate": 8.856019394679828e-07, "loss": 0.0145, "step": 7980 }, { "epoch": 1.8159271899886233, "grad_norm": 1.2300924493354228, "learning_rate": 8.855207232921541e-07, "loss": 0.0159, "step": 7981 }, { "epoch": 1.816154721274175, "grad_norm": 1.053461009988531, "learning_rate": 8.854395017917588e-07, "loss": 0.0212, "step": 7982 }, { "epoch": 1.8163822525597269, "grad_norm": 0.8458982455151165, "learning_rate": 8.853582749684571e-07, "loss": 0.0276, "step": 7983 }, { "epoch": 1.8166097838452786, "grad_norm": 0.7110026030106191, "learning_rate": 8.852770428239091e-07, "loss": 0.012, "step": 7984 }, { "epoch": 1.8168373151308304, "grad_norm": 1.3057855515690473, "learning_rate": 8.851958053597751e-07, "loss": 0.0393, "step": 7985 }, { "epoch": 1.817064846416382, "grad_norm": 1.3452318740996148, "learning_rate": 8.851145625777153e-07, "loss": 0.0508, "step": 7986 }, { "epoch": 1.8172923777019339, "grad_norm": 0.8218495774051612, "learning_rate": 8.850333144793903e-07, "loss": 0.0179, "step": 7987 }, { "epoch": 1.8175199089874858, "grad_norm": 0.5802176343383713, "learning_rate": 8.849520610664605e-07, "loss": 0.0038, "step": 7988 }, { "epoch": 1.8177474402730376, "grad_norm": 1.9961297192685032, "learning_rate": 8.848708023405866e-07, "loss": 0.0418, "step": 7989 }, { "epoch": 1.8179749715585893, "grad_norm": 0.8797672758098891, "learning_rate": 8.847895383034294e-07, "loss": 0.0101, "step": 7990 }, { "epoch": 1.818202502844141, "grad_norm": 1.3216697039016492, "learning_rate": 8.847082689566499e-07, "loss": 0.0273, "step": 7991 }, { "epoch": 1.8184300341296928, "grad_norm": 1.3162763009494356, "learning_rate": 8.846269943019091e-07, "loss": 0.0317, "step": 7992 }, { "epoch": 1.8186575654152446, "grad_norm": 1.3398769067970908, "learning_rate": 8.84545714340868e-07, "loss": 0.043, "step": 7993 }, { "epoch": 1.8188850967007963, "grad_norm": 1.3502712513593529, "learning_rate": 8.844644290751877e-07, "loss": 0.0276, "step": 7994 }, { "epoch": 1.819112627986348, "grad_norm": 1.3898881342636042, "learning_rate": 8.843831385065298e-07, "loss": 0.0475, "step": 7995 }, { "epoch": 1.8193401592718998, "grad_norm": 1.0577003818497825, "learning_rate": 8.843018426365555e-07, "loss": 0.0289, "step": 7996 }, { "epoch": 1.8195676905574516, "grad_norm": 1.3552733005663498, "learning_rate": 8.842205414669264e-07, "loss": 0.024, "step": 7997 }, { "epoch": 1.8197952218430036, "grad_norm": 0.6563826511647947, "learning_rate": 8.841392349993041e-07, "loss": 0.01, "step": 7998 }, { "epoch": 1.8200227531285553, "grad_norm": 0.9195258352180651, "learning_rate": 8.840579232353506e-07, "loss": 0.0203, "step": 7999 }, { "epoch": 1.820250284414107, "grad_norm": 0.9859767262646522, "learning_rate": 8.839766061767277e-07, "loss": 0.0268, "step": 8000 }, { "epoch": 1.8204778156996588, "grad_norm": 1.1374828137033943, "learning_rate": 8.838952838250971e-07, "loss": 0.0412, "step": 8001 }, { "epoch": 1.8207053469852106, "grad_norm": 1.6080148893216402, "learning_rate": 8.83813956182121e-07, "loss": 0.0557, "step": 8002 }, { "epoch": 1.8209328782707623, "grad_norm": 0.9689172649902232, "learning_rate": 8.837326232494616e-07, "loss": 0.018, "step": 8003 }, { "epoch": 1.821160409556314, "grad_norm": 1.869046995653937, "learning_rate": 8.836512850287812e-07, "loss": 0.0148, "step": 8004 }, { "epoch": 1.8213879408418658, "grad_norm": 1.0872933086687537, "learning_rate": 8.835699415217425e-07, "loss": 0.0149, "step": 8005 }, { "epoch": 1.8216154721274176, "grad_norm": 1.0450905191093498, "learning_rate": 8.834885927300075e-07, "loss": 0.0373, "step": 8006 }, { "epoch": 1.8218430034129693, "grad_norm": 1.5362767889787077, "learning_rate": 8.834072386552392e-07, "loss": 0.0384, "step": 8007 }, { "epoch": 1.822070534698521, "grad_norm": 1.2374289696704717, "learning_rate": 8.833258792991001e-07, "loss": 0.0324, "step": 8008 }, { "epoch": 1.8222980659840728, "grad_norm": 0.6400280975849308, "learning_rate": 8.832445146632531e-07, "loss": 0.0109, "step": 8009 }, { "epoch": 1.8225255972696246, "grad_norm": 1.3815605316771693, "learning_rate": 8.831631447493612e-07, "loss": 0.0322, "step": 8010 }, { "epoch": 1.8227531285551763, "grad_norm": 1.5167509474073282, "learning_rate": 8.830817695590875e-07, "loss": 0.0224, "step": 8011 }, { "epoch": 1.822980659840728, "grad_norm": 0.6230151252863323, "learning_rate": 8.830003890940953e-07, "loss": 0.0129, "step": 8012 }, { "epoch": 1.8232081911262799, "grad_norm": 1.096690926973307, "learning_rate": 8.829190033560473e-07, "loss": 0.0176, "step": 8013 }, { "epoch": 1.8234357224118316, "grad_norm": 0.9261907876289526, "learning_rate": 8.828376123466072e-07, "loss": 0.02, "step": 8014 }, { "epoch": 1.8236632536973834, "grad_norm": 0.9210807078893485, "learning_rate": 8.827562160674387e-07, "loss": 0.0268, "step": 8015 }, { "epoch": 1.823890784982935, "grad_norm": 1.0197961436664829, "learning_rate": 8.826748145202052e-07, "loss": 0.0302, "step": 8016 }, { "epoch": 1.8241183162684869, "grad_norm": 0.8048295376769727, "learning_rate": 8.8259340770657e-07, "loss": 0.0254, "step": 8017 }, { "epoch": 1.8243458475540386, "grad_norm": 0.656143016454103, "learning_rate": 8.82511995628198e-07, "loss": 0.0121, "step": 8018 }, { "epoch": 1.8245733788395904, "grad_norm": 0.8966243554035213, "learning_rate": 8.824305782867521e-07, "loss": 0.0247, "step": 8019 }, { "epoch": 1.8248009101251421, "grad_norm": 1.1794697891283992, "learning_rate": 8.823491556838964e-07, "loss": 0.019, "step": 8020 }, { "epoch": 1.8250284414106939, "grad_norm": 1.1709408047651713, "learning_rate": 8.822677278212955e-07, "loss": 0.0255, "step": 8021 }, { "epoch": 1.8252559726962456, "grad_norm": 1.5148678871584642, "learning_rate": 8.821862947006134e-07, "loss": 0.0318, "step": 8022 }, { "epoch": 1.8254835039817974, "grad_norm": 1.0770996966783146, "learning_rate": 8.821048563235143e-07, "loss": 0.0223, "step": 8023 }, { "epoch": 1.8257110352673491, "grad_norm": 0.8636016370138058, "learning_rate": 8.820234126916631e-07, "loss": 0.026, "step": 8024 }, { "epoch": 1.8259385665529009, "grad_norm": 1.3682922135783069, "learning_rate": 8.81941963806724e-07, "loss": 0.0381, "step": 8025 }, { "epoch": 1.8261660978384526, "grad_norm": 0.9348631362839371, "learning_rate": 8.818605096703614e-07, "loss": 0.0209, "step": 8026 }, { "epoch": 1.8263936291240046, "grad_norm": 0.4823581924596872, "learning_rate": 8.817790502842406e-07, "loss": 0.0068, "step": 8027 }, { "epoch": 1.8266211604095564, "grad_norm": 1.0951844413599716, "learning_rate": 8.816975856500264e-07, "loss": 0.0458, "step": 8028 }, { "epoch": 1.826848691695108, "grad_norm": 0.6118985572179404, "learning_rate": 8.816161157693837e-07, "loss": 0.0093, "step": 8029 }, { "epoch": 1.8270762229806599, "grad_norm": 1.3843657752880825, "learning_rate": 8.815346406439773e-07, "loss": 0.0308, "step": 8030 }, { "epoch": 1.8273037542662116, "grad_norm": 1.4803326193372828, "learning_rate": 8.814531602754728e-07, "loss": 0.0299, "step": 8031 }, { "epoch": 1.8275312855517634, "grad_norm": 1.1534269433987345, "learning_rate": 8.813716746655354e-07, "loss": 0.0524, "step": 8032 }, { "epoch": 1.8277588168373151, "grad_norm": 1.5295917320213728, "learning_rate": 8.812901838158304e-07, "loss": 0.0294, "step": 8033 }, { "epoch": 1.8279863481228669, "grad_norm": 1.0053889970420802, "learning_rate": 8.812086877280234e-07, "loss": 0.0246, "step": 8034 }, { "epoch": 1.8282138794084186, "grad_norm": 1.3826048330122502, "learning_rate": 8.811271864037802e-07, "loss": 0.0208, "step": 8035 }, { "epoch": 1.8284414106939704, "grad_norm": 1.166250279335265, "learning_rate": 8.810456798447664e-07, "loss": 0.0259, "step": 8036 }, { "epoch": 1.8286689419795223, "grad_norm": 1.2992210361531946, "learning_rate": 8.809641680526477e-07, "loss": 0.0357, "step": 8037 }, { "epoch": 1.828896473265074, "grad_norm": 1.2611943101937177, "learning_rate": 8.808826510290904e-07, "loss": 0.025, "step": 8038 }, { "epoch": 1.8291240045506258, "grad_norm": 0.8944973994876296, "learning_rate": 8.808011287757601e-07, "loss": 0.0207, "step": 8039 }, { "epoch": 1.8293515358361776, "grad_norm": 0.6737233324041566, "learning_rate": 8.807196012943231e-07, "loss": 0.0153, "step": 8040 }, { "epoch": 1.8295790671217294, "grad_norm": 0.9209907577540047, "learning_rate": 8.80638068586446e-07, "loss": 0.0206, "step": 8041 }, { "epoch": 1.829806598407281, "grad_norm": 1.8380608111866295, "learning_rate": 8.805565306537949e-07, "loss": 0.068, "step": 8042 }, { "epoch": 1.8300341296928329, "grad_norm": 1.4638737163478475, "learning_rate": 8.804749874980364e-07, "loss": 0.051, "step": 8043 }, { "epoch": 1.8302616609783846, "grad_norm": 0.8030032240145646, "learning_rate": 8.80393439120837e-07, "loss": 0.0128, "step": 8044 }, { "epoch": 1.8304891922639364, "grad_norm": 1.479151025541696, "learning_rate": 8.803118855238635e-07, "loss": 0.0245, "step": 8045 }, { "epoch": 1.8307167235494881, "grad_norm": 1.551454930794604, "learning_rate": 8.802303267087825e-07, "loss": 0.0454, "step": 8046 }, { "epoch": 1.8309442548350399, "grad_norm": 1.0588321775780563, "learning_rate": 8.801487626772611e-07, "loss": 0.0229, "step": 8047 }, { "epoch": 1.8311717861205916, "grad_norm": 0.9880309790661209, "learning_rate": 8.800671934309663e-07, "loss": 0.0283, "step": 8048 }, { "epoch": 1.8313993174061434, "grad_norm": 1.0102582573010799, "learning_rate": 8.799856189715653e-07, "loss": 0.0159, "step": 8049 }, { "epoch": 1.8316268486916951, "grad_norm": 1.5479135317155381, "learning_rate": 8.79904039300725e-07, "loss": 0.0267, "step": 8050 }, { "epoch": 1.8318543799772469, "grad_norm": 1.2096856484161966, "learning_rate": 8.798224544201132e-07, "loss": 0.019, "step": 8051 }, { "epoch": 1.8320819112627986, "grad_norm": 0.8689124435880402, "learning_rate": 8.79740864331397e-07, "loss": 0.0149, "step": 8052 }, { "epoch": 1.8323094425483504, "grad_norm": 0.7456276512123506, "learning_rate": 8.796592690362439e-07, "loss": 0.0138, "step": 8053 }, { "epoch": 1.8325369738339021, "grad_norm": 0.7019017016852895, "learning_rate": 8.795776685363219e-07, "loss": 0.0178, "step": 8054 }, { "epoch": 1.8327645051194539, "grad_norm": 1.6867823859871856, "learning_rate": 8.794960628332986e-07, "loss": 0.0354, "step": 8055 }, { "epoch": 1.8329920364050056, "grad_norm": 0.7129105859531477, "learning_rate": 8.794144519288419e-07, "loss": 0.0178, "step": 8056 }, { "epoch": 1.8332195676905574, "grad_norm": 1.397218489443679, "learning_rate": 8.793328358246198e-07, "loss": 0.0428, "step": 8057 }, { "epoch": 1.8334470989761091, "grad_norm": 0.9872685652054035, "learning_rate": 8.792512145223002e-07, "loss": 0.0188, "step": 8058 }, { "epoch": 1.8336746302616609, "grad_norm": 0.8253338993696552, "learning_rate": 8.791695880235515e-07, "loss": 0.0254, "step": 8059 }, { "epoch": 1.8339021615472126, "grad_norm": 1.4428772827676484, "learning_rate": 8.790879563300416e-07, "loss": 0.0415, "step": 8060 }, { "epoch": 1.8341296928327644, "grad_norm": 0.4796256176224381, "learning_rate": 8.790063194434397e-07, "loss": 0.0097, "step": 8061 }, { "epoch": 1.8343572241183161, "grad_norm": 1.3614523308348847, "learning_rate": 8.789246773654136e-07, "loss": 0.0346, "step": 8062 }, { "epoch": 1.834584755403868, "grad_norm": 0.7218128847996856, "learning_rate": 8.788430300976321e-07, "loss": 0.0172, "step": 8063 }, { "epoch": 1.8348122866894196, "grad_norm": 1.126640751968136, "learning_rate": 8.787613776417639e-07, "loss": 0.0214, "step": 8064 }, { "epoch": 1.8350398179749714, "grad_norm": 0.7030643016032253, "learning_rate": 8.78679719999478e-07, "loss": 0.0161, "step": 8065 }, { "epoch": 1.8352673492605234, "grad_norm": 1.6419687984973874, "learning_rate": 8.785980571724433e-07, "loss": 0.0374, "step": 8066 }, { "epoch": 1.8354948805460751, "grad_norm": 0.8975514934459169, "learning_rate": 8.785163891623284e-07, "loss": 0.022, "step": 8067 }, { "epoch": 1.8357224118316269, "grad_norm": 0.9300496560802327, "learning_rate": 8.784347159708033e-07, "loss": 0.0199, "step": 8068 }, { "epoch": 1.8359499431171786, "grad_norm": 1.4971241023373771, "learning_rate": 8.783530375995364e-07, "loss": 0.0266, "step": 8069 }, { "epoch": 1.8361774744027304, "grad_norm": 0.9699889820724317, "learning_rate": 8.782713540501975e-07, "loss": 0.0142, "step": 8070 }, { "epoch": 1.8364050056882821, "grad_norm": 1.2128464569079203, "learning_rate": 8.78189665324456e-07, "loss": 0.0385, "step": 8071 }, { "epoch": 1.8366325369738339, "grad_norm": 1.1994951742697528, "learning_rate": 8.781079714239812e-07, "loss": 0.0376, "step": 8072 }, { "epoch": 1.8368600682593856, "grad_norm": 1.1920945512599561, "learning_rate": 8.780262723504434e-07, "loss": 0.0276, "step": 8073 }, { "epoch": 1.8370875995449374, "grad_norm": 1.3444176419784304, "learning_rate": 8.779445681055115e-07, "loss": 0.0282, "step": 8074 }, { "epoch": 1.8373151308304891, "grad_norm": 1.170851765126466, "learning_rate": 8.778628586908563e-07, "loss": 0.0314, "step": 8075 }, { "epoch": 1.8375426621160411, "grad_norm": 1.2510038912360684, "learning_rate": 8.777811441081475e-07, "loss": 0.0331, "step": 8076 }, { "epoch": 1.8377701934015929, "grad_norm": 0.9620868665247972, "learning_rate": 8.776994243590545e-07, "loss": 0.0183, "step": 8077 }, { "epoch": 1.8379977246871446, "grad_norm": 1.6724076249826632, "learning_rate": 8.776176994452485e-07, "loss": 0.0296, "step": 8078 }, { "epoch": 1.8382252559726964, "grad_norm": 1.2051672402979108, "learning_rate": 8.775359693683991e-07, "loss": 0.0271, "step": 8079 }, { "epoch": 1.8384527872582481, "grad_norm": 0.9151061745096992, "learning_rate": 8.77454234130177e-07, "loss": 0.0378, "step": 8080 }, { "epoch": 1.8386803185437999, "grad_norm": 0.7885192147031999, "learning_rate": 8.773724937322531e-07, "loss": 0.0276, "step": 8081 }, { "epoch": 1.8389078498293516, "grad_norm": 1.2112579740172957, "learning_rate": 8.772907481762973e-07, "loss": 0.0255, "step": 8082 }, { "epoch": 1.8391353811149034, "grad_norm": 1.0373152134098131, "learning_rate": 8.772089974639806e-07, "loss": 0.0411, "step": 8083 }, { "epoch": 1.8393629124004551, "grad_norm": 1.4909226278812742, "learning_rate": 8.77127241596974e-07, "loss": 0.0312, "step": 8084 }, { "epoch": 1.8395904436860069, "grad_norm": 0.745134622479357, "learning_rate": 8.770454805769482e-07, "loss": 0.0132, "step": 8085 }, { "epoch": 1.8398179749715586, "grad_norm": 0.8888480022796346, "learning_rate": 8.769637144055745e-07, "loss": 0.0184, "step": 8086 }, { "epoch": 1.8400455062571104, "grad_norm": 0.6860545499445614, "learning_rate": 8.768819430845241e-07, "loss": 0.0125, "step": 8087 }, { "epoch": 1.8402730375426621, "grad_norm": 1.072302307300318, "learning_rate": 8.768001666154678e-07, "loss": 0.0298, "step": 8088 }, { "epoch": 1.840500568828214, "grad_norm": 1.4163122617295698, "learning_rate": 8.767183850000774e-07, "loss": 0.0281, "step": 8089 }, { "epoch": 1.8407281001137656, "grad_norm": 1.4432293918074388, "learning_rate": 8.76636598240024e-07, "loss": 0.0387, "step": 8090 }, { "epoch": 1.8409556313993174, "grad_norm": 1.2795674829954617, "learning_rate": 8.765548063369795e-07, "loss": 0.0333, "step": 8091 }, { "epoch": 1.8411831626848691, "grad_norm": 1.462900154174469, "learning_rate": 8.764730092926156e-07, "loss": 0.0318, "step": 8092 }, { "epoch": 1.841410693970421, "grad_norm": 1.4294887556185045, "learning_rate": 8.763912071086039e-07, "loss": 0.0329, "step": 8093 }, { "epoch": 1.8416382252559726, "grad_norm": 0.8439752476671738, "learning_rate": 8.763093997866162e-07, "loss": 0.0135, "step": 8094 }, { "epoch": 1.8418657565415244, "grad_norm": 1.1771260310982554, "learning_rate": 8.762275873283248e-07, "loss": 0.0147, "step": 8095 }, { "epoch": 1.8420932878270762, "grad_norm": 0.9091367917683557, "learning_rate": 8.761457697354015e-07, "loss": 0.016, "step": 8096 }, { "epoch": 1.842320819112628, "grad_norm": 1.1114412202510104, "learning_rate": 8.760639470095185e-07, "loss": 0.018, "step": 8097 }, { "epoch": 1.8425483503981797, "grad_norm": 1.1552516434606968, "learning_rate": 8.759821191523483e-07, "loss": 0.0338, "step": 8098 }, { "epoch": 1.8427758816837314, "grad_norm": 1.2468801216425964, "learning_rate": 8.759002861655633e-07, "loss": 0.0448, "step": 8099 }, { "epoch": 1.8430034129692832, "grad_norm": 39.45012838572698, "learning_rate": 8.75818448050836e-07, "loss": 0.2857, "step": 8100 }, { "epoch": 1.843230944254835, "grad_norm": 1.8524240850943434, "learning_rate": 8.757366048098389e-07, "loss": 0.0466, "step": 8101 }, { "epoch": 1.8434584755403867, "grad_norm": 2.1307221074840736, "learning_rate": 8.756547564442449e-07, "loss": 0.0401, "step": 8102 }, { "epoch": 1.8436860068259384, "grad_norm": 1.6032119560353542, "learning_rate": 8.755729029557266e-07, "loss": 0.0459, "step": 8103 }, { "epoch": 1.8439135381114902, "grad_norm": 1.5350656489130663, "learning_rate": 8.75491044345957e-07, "loss": 0.0358, "step": 8104 }, { "epoch": 1.8441410693970421, "grad_norm": 1.8138288140149013, "learning_rate": 8.754091806166092e-07, "loss": 0.02, "step": 8105 }, { "epoch": 1.844368600682594, "grad_norm": 0.8666408926248154, "learning_rate": 8.753273117693567e-07, "loss": 0.0385, "step": 8106 }, { "epoch": 1.8445961319681456, "grad_norm": 0.9479917894975077, "learning_rate": 8.752454378058721e-07, "loss": 0.0207, "step": 8107 }, { "epoch": 1.8448236632536974, "grad_norm": 1.154157519846883, "learning_rate": 8.751635587278291e-07, "loss": 0.0281, "step": 8108 }, { "epoch": 1.8450511945392492, "grad_norm": 1.6293722575021, "learning_rate": 8.750816745369012e-07, "loss": 0.0635, "step": 8109 }, { "epoch": 1.845278725824801, "grad_norm": 0.8664652175714467, "learning_rate": 8.749997852347619e-07, "loss": 0.0246, "step": 8110 }, { "epoch": 1.8455062571103527, "grad_norm": 1.1023719369301574, "learning_rate": 8.749178908230845e-07, "loss": 0.0267, "step": 8111 }, { "epoch": 1.8457337883959044, "grad_norm": 1.9613190675321726, "learning_rate": 8.748359913035436e-07, "loss": 0.0594, "step": 8112 }, { "epoch": 1.8459613196814562, "grad_norm": 0.8660552025551523, "learning_rate": 8.747540866778124e-07, "loss": 0.0199, "step": 8113 }, { "epoch": 1.846188850967008, "grad_norm": 0.9647974529318043, "learning_rate": 8.74672176947565e-07, "loss": 0.0177, "step": 8114 }, { "epoch": 1.8464163822525599, "grad_norm": 0.9109302295934834, "learning_rate": 8.745902621144755e-07, "loss": 0.0103, "step": 8115 }, { "epoch": 1.8466439135381116, "grad_norm": 1.0544271239090326, "learning_rate": 8.745083421802183e-07, "loss": 0.0252, "step": 8116 }, { "epoch": 1.8468714448236634, "grad_norm": 0.9489229943769115, "learning_rate": 8.744264171464673e-07, "loss": 0.0226, "step": 8117 }, { "epoch": 1.8470989761092151, "grad_norm": 1.213440014870784, "learning_rate": 8.743444870148974e-07, "loss": 0.0437, "step": 8118 }, { "epoch": 1.847326507394767, "grad_norm": 0.7128445249897901, "learning_rate": 8.742625517871828e-07, "loss": 0.0242, "step": 8119 }, { "epoch": 1.8475540386803186, "grad_norm": 0.7222931636864752, "learning_rate": 8.74180611464998e-07, "loss": 0.0169, "step": 8120 }, { "epoch": 1.8477815699658704, "grad_norm": 0.7458819467430629, "learning_rate": 8.74098666050018e-07, "loss": 0.0142, "step": 8121 }, { "epoch": 1.8480091012514221, "grad_norm": 1.0424051331077842, "learning_rate": 8.740167155439173e-07, "loss": 0.0281, "step": 8122 }, { "epoch": 1.848236632536974, "grad_norm": 0.670541698875183, "learning_rate": 8.739347599483712e-07, "loss": 0.0174, "step": 8123 }, { "epoch": 1.8484641638225257, "grad_norm": 1.5332876404909188, "learning_rate": 8.738527992650542e-07, "loss": 0.0402, "step": 8124 }, { "epoch": 1.8486916951080774, "grad_norm": 1.3030714005810538, "learning_rate": 8.737708334956421e-07, "loss": 0.0197, "step": 8125 }, { "epoch": 1.8489192263936292, "grad_norm": 0.5780610713137618, "learning_rate": 8.736888626418097e-07, "loss": 0.0081, "step": 8126 }, { "epoch": 1.849146757679181, "grad_norm": 1.7468565482433127, "learning_rate": 8.736068867052322e-07, "loss": 0.0519, "step": 8127 }, { "epoch": 1.8493742889647327, "grad_norm": 1.328356788559963, "learning_rate": 8.735249056875852e-07, "loss": 0.0341, "step": 8128 }, { "epoch": 1.8496018202502844, "grad_norm": 1.3535479602581053, "learning_rate": 8.734429195905446e-07, "loss": 0.0312, "step": 8129 }, { "epoch": 1.8498293515358362, "grad_norm": 1.0241814023510956, "learning_rate": 8.733609284157855e-07, "loss": 0.0172, "step": 8130 }, { "epoch": 1.850056882821388, "grad_norm": 0.5782127255787763, "learning_rate": 8.73278932164984e-07, "loss": 0.0073, "step": 8131 }, { "epoch": 1.8502844141069397, "grad_norm": 1.5278820863630018, "learning_rate": 8.731969308398158e-07, "loss": 0.0238, "step": 8132 }, { "epoch": 1.8505119453924914, "grad_norm": 0.9456727006478703, "learning_rate": 8.731149244419568e-07, "loss": 0.0142, "step": 8133 }, { "epoch": 1.8507394766780432, "grad_norm": 1.1745932934806986, "learning_rate": 8.73032912973083e-07, "loss": 0.0342, "step": 8134 }, { "epoch": 1.850967007963595, "grad_norm": 1.190670680670907, "learning_rate": 8.72950896434871e-07, "loss": 0.0228, "step": 8135 }, { "epoch": 1.8511945392491467, "grad_norm": 1.1365337146814314, "learning_rate": 8.728688748289966e-07, "loss": 0.0282, "step": 8136 }, { "epoch": 1.8514220705346984, "grad_norm": 1.323317823211618, "learning_rate": 8.727868481571365e-07, "loss": 0.0402, "step": 8137 }, { "epoch": 1.8516496018202502, "grad_norm": 1.4579556474234914, "learning_rate": 8.72704816420967e-07, "loss": 0.0276, "step": 8138 }, { "epoch": 1.851877133105802, "grad_norm": 1.1704165060413627, "learning_rate": 8.726227796221646e-07, "loss": 0.0155, "step": 8139 }, { "epoch": 1.8521046643913537, "grad_norm": 1.4181298343589945, "learning_rate": 8.725407377624062e-07, "loss": 0.0167, "step": 8140 }, { "epoch": 1.8523321956769054, "grad_norm": 1.759030380654703, "learning_rate": 8.724586908433682e-07, "loss": 0.0335, "step": 8141 }, { "epoch": 1.8525597269624572, "grad_norm": 0.9073385097866469, "learning_rate": 8.72376638866728e-07, "loss": 0.0214, "step": 8142 }, { "epoch": 1.852787258248009, "grad_norm": 1.669887811223168, "learning_rate": 8.722945818341624e-07, "loss": 0.0436, "step": 8143 }, { "epoch": 1.853014789533561, "grad_norm": 0.9318869567954254, "learning_rate": 8.722125197473483e-07, "loss": 0.0272, "step": 8144 }, { "epoch": 1.8532423208191127, "grad_norm": 0.6154076945150099, "learning_rate": 8.721304526079631e-07, "loss": 0.0104, "step": 8145 }, { "epoch": 1.8534698521046644, "grad_norm": 1.016481897300851, "learning_rate": 8.72048380417684e-07, "loss": 0.0178, "step": 8146 }, { "epoch": 1.8536973833902162, "grad_norm": 1.5921420686048406, "learning_rate": 8.719663031781884e-07, "loss": 0.0401, "step": 8147 }, { "epoch": 1.853924914675768, "grad_norm": 1.1899281281195604, "learning_rate": 8.71884220891154e-07, "loss": 0.0401, "step": 8148 }, { "epoch": 1.8541524459613197, "grad_norm": 0.7938509448775539, "learning_rate": 8.718021335582583e-07, "loss": 0.0239, "step": 8149 }, { "epoch": 1.8543799772468714, "grad_norm": 1.5250257259638016, "learning_rate": 8.71720041181179e-07, "loss": 0.0596, "step": 8150 }, { "epoch": 1.8546075085324232, "grad_norm": 0.9788138895139992, "learning_rate": 8.716379437615937e-07, "loss": 0.0278, "step": 8151 }, { "epoch": 1.854835039817975, "grad_norm": 1.1219617460465487, "learning_rate": 8.715558413011807e-07, "loss": 0.0145, "step": 8152 }, { "epoch": 1.8550625711035267, "grad_norm": 1.4696274577201667, "learning_rate": 8.714737338016178e-07, "loss": 0.0299, "step": 8153 }, { "epoch": 1.8552901023890787, "grad_norm": 0.5461938735927995, "learning_rate": 8.713916212645832e-07, "loss": 0.0087, "step": 8154 }, { "epoch": 1.8555176336746304, "grad_norm": 0.8728191580515667, "learning_rate": 8.713095036917551e-07, "loss": 0.0155, "step": 8155 }, { "epoch": 1.8557451649601822, "grad_norm": 0.8112633112836988, "learning_rate": 8.712273810848118e-07, "loss": 0.0279, "step": 8156 }, { "epoch": 1.855972696245734, "grad_norm": 1.6292544002783556, "learning_rate": 8.711452534454318e-07, "loss": 0.0473, "step": 8157 }, { "epoch": 1.8562002275312857, "grad_norm": 1.0397332926990757, "learning_rate": 8.710631207752936e-07, "loss": 0.013, "step": 8158 }, { "epoch": 1.8564277588168374, "grad_norm": 1.2786813393181724, "learning_rate": 8.709809830760759e-07, "loss": 0.0391, "step": 8159 }, { "epoch": 1.8566552901023892, "grad_norm": 0.6749339564730344, "learning_rate": 8.708988403494572e-07, "loss": 0.0101, "step": 8160 }, { "epoch": 1.856882821387941, "grad_norm": 1.6928146286387011, "learning_rate": 8.708166925971168e-07, "loss": 0.0387, "step": 8161 }, { "epoch": 1.8571103526734927, "grad_norm": 1.0430859648850737, "learning_rate": 8.707345398207332e-07, "loss": 0.0219, "step": 8162 }, { "epoch": 1.8573378839590444, "grad_norm": 1.5665708380160561, "learning_rate": 8.706523820219858e-07, "loss": 0.0368, "step": 8163 }, { "epoch": 1.8575654152445962, "grad_norm": 1.2966054046900626, "learning_rate": 8.705702192025537e-07, "loss": 0.0238, "step": 8164 }, { "epoch": 1.857792946530148, "grad_norm": 0.7008382933501183, "learning_rate": 8.704880513641158e-07, "loss": 0.017, "step": 8165 }, { "epoch": 1.8580204778156997, "grad_norm": 1.7406243621129194, "learning_rate": 8.704058785083519e-07, "loss": 0.0288, "step": 8166 }, { "epoch": 1.8582480091012514, "grad_norm": 1.288425765941536, "learning_rate": 8.703237006369411e-07, "loss": 0.0242, "step": 8167 }, { "epoch": 1.8584755403868032, "grad_norm": 1.1833551322412017, "learning_rate": 8.702415177515633e-07, "loss": 0.0305, "step": 8168 }, { "epoch": 1.858703071672355, "grad_norm": 1.1465262412236228, "learning_rate": 8.70159329853898e-07, "loss": 0.0301, "step": 8169 }, { "epoch": 1.8589306029579067, "grad_norm": 0.7748327018464876, "learning_rate": 8.700771369456249e-07, "loss": 0.0209, "step": 8170 }, { "epoch": 1.8591581342434584, "grad_norm": 1.6454633052536287, "learning_rate": 8.69994939028424e-07, "loss": 0.0299, "step": 8171 }, { "epoch": 1.8593856655290102, "grad_norm": 1.5662753124634492, "learning_rate": 8.699127361039753e-07, "loss": 0.0176, "step": 8172 }, { "epoch": 1.859613196814562, "grad_norm": 1.131248911574669, "learning_rate": 8.698305281739589e-07, "loss": 0.0201, "step": 8173 }, { "epoch": 1.8598407281001137, "grad_norm": 0.694155214467786, "learning_rate": 8.697483152400546e-07, "loss": 0.0133, "step": 8174 }, { "epoch": 1.8600682593856654, "grad_norm": 1.4592860863841783, "learning_rate": 8.696660973039432e-07, "loss": 0.0262, "step": 8175 }, { "epoch": 1.8602957906712172, "grad_norm": 1.118677631149914, "learning_rate": 8.695838743673048e-07, "loss": 0.022, "step": 8176 }, { "epoch": 1.860523321956769, "grad_norm": 1.518417282459403, "learning_rate": 8.695016464318199e-07, "loss": 0.0385, "step": 8177 }, { "epoch": 1.8607508532423207, "grad_norm": 1.5865200516406028, "learning_rate": 8.69419413499169e-07, "loss": 0.0354, "step": 8178 }, { "epoch": 1.8609783845278725, "grad_norm": 1.2344679505066096, "learning_rate": 8.693371755710332e-07, "loss": 0.0222, "step": 8179 }, { "epoch": 1.8612059158134242, "grad_norm": 0.6543044122992299, "learning_rate": 8.692549326490929e-07, "loss": 0.0103, "step": 8180 }, { "epoch": 1.861433447098976, "grad_norm": 1.512390509071281, "learning_rate": 8.69172684735029e-07, "loss": 0.0322, "step": 8181 }, { "epoch": 1.8616609783845277, "grad_norm": 1.22591570641012, "learning_rate": 8.690904318305228e-07, "loss": 0.021, "step": 8182 }, { "epoch": 1.8618885096700797, "grad_norm": 1.9278847876150058, "learning_rate": 8.690081739372553e-07, "loss": 0.0516, "step": 8183 }, { "epoch": 1.8621160409556314, "grad_norm": 1.8449505870002252, "learning_rate": 8.689259110569072e-07, "loss": 0.0316, "step": 8184 }, { "epoch": 1.8623435722411832, "grad_norm": 0.8653952943404032, "learning_rate": 8.688436431911604e-07, "loss": 0.0129, "step": 8185 }, { "epoch": 1.862571103526735, "grad_norm": 1.1405663906685892, "learning_rate": 8.687613703416962e-07, "loss": 0.0256, "step": 8186 }, { "epoch": 1.8627986348122867, "grad_norm": 1.4081877293849094, "learning_rate": 8.686790925101959e-07, "loss": 0.0237, "step": 8187 }, { "epoch": 1.8630261660978384, "grad_norm": 2.5371190563306456, "learning_rate": 8.685968096983413e-07, "loss": 0.0637, "step": 8188 }, { "epoch": 1.8632536973833902, "grad_norm": 1.2527275753497469, "learning_rate": 8.685145219078141e-07, "loss": 0.028, "step": 8189 }, { "epoch": 1.863481228668942, "grad_norm": 1.119060898694386, "learning_rate": 8.684322291402959e-07, "loss": 0.0265, "step": 8190 }, { "epoch": 1.8637087599544937, "grad_norm": 1.000037810567958, "learning_rate": 8.683499313974687e-07, "loss": 0.0188, "step": 8191 }, { "epoch": 1.8639362912400455, "grad_norm": 1.076779356138403, "learning_rate": 8.682676286810147e-07, "loss": 0.0228, "step": 8192 }, { "epoch": 1.8641638225255974, "grad_norm": 0.8435905221735835, "learning_rate": 8.68185320992616e-07, "loss": 0.0176, "step": 8193 }, { "epoch": 1.8643913538111492, "grad_norm": 1.478401595078038, "learning_rate": 8.681030083339545e-07, "loss": 0.037, "step": 8194 }, { "epoch": 1.864618885096701, "grad_norm": 1.9537854826335743, "learning_rate": 8.680206907067129e-07, "loss": 0.0501, "step": 8195 }, { "epoch": 1.8648464163822527, "grad_norm": 0.9494709693436619, "learning_rate": 8.679383681125735e-07, "loss": 0.0197, "step": 8196 }, { "epoch": 1.8650739476678044, "grad_norm": 1.1114327548613774, "learning_rate": 8.678560405532186e-07, "loss": 0.0213, "step": 8197 }, { "epoch": 1.8653014789533562, "grad_norm": 1.1110146727410957, "learning_rate": 8.67773708030331e-07, "loss": 0.0184, "step": 8198 }, { "epoch": 1.865529010238908, "grad_norm": 1.029742737133803, "learning_rate": 8.676913705455935e-07, "loss": 0.0201, "step": 8199 }, { "epoch": 1.8657565415244597, "grad_norm": 1.6329280956348171, "learning_rate": 8.676090281006889e-07, "loss": 0.0317, "step": 8200 }, { "epoch": 1.8659840728100114, "grad_norm": 0.8949711512027054, "learning_rate": 8.675266806972999e-07, "loss": 0.0304, "step": 8201 }, { "epoch": 1.8662116040955632, "grad_norm": 1.256881973616814, "learning_rate": 8.674443283371099e-07, "loss": 0.0492, "step": 8202 }, { "epoch": 1.866439135381115, "grad_norm": 1.3417083032412724, "learning_rate": 8.673619710218018e-07, "loss": 0.0315, "step": 8203 }, { "epoch": 1.8666666666666667, "grad_norm": 1.0536476664103258, "learning_rate": 8.672796087530589e-07, "loss": 0.015, "step": 8204 }, { "epoch": 1.8668941979522184, "grad_norm": 1.2340572224640087, "learning_rate": 8.671972415325644e-07, "loss": 0.0425, "step": 8205 }, { "epoch": 1.8671217292377702, "grad_norm": 1.8631204208091858, "learning_rate": 8.671148693620019e-07, "loss": 0.0464, "step": 8206 }, { "epoch": 1.867349260523322, "grad_norm": 1.0989421249652223, "learning_rate": 8.67032492243055e-07, "loss": 0.0219, "step": 8207 }, { "epoch": 1.8675767918088737, "grad_norm": 1.4023003837125518, "learning_rate": 8.66950110177407e-07, "loss": 0.0276, "step": 8208 }, { "epoch": 1.8678043230944255, "grad_norm": 0.8082699835707026, "learning_rate": 8.668677231667422e-07, "loss": 0.0093, "step": 8209 }, { "epoch": 1.8680318543799772, "grad_norm": 0.7929493429012964, "learning_rate": 8.667853312127439e-07, "loss": 0.0134, "step": 8210 }, { "epoch": 1.868259385665529, "grad_norm": 0.6808371029554392, "learning_rate": 8.66702934317096e-07, "loss": 0.0117, "step": 8211 }, { "epoch": 1.8684869169510807, "grad_norm": 0.9747831617637507, "learning_rate": 8.66620532481483e-07, "loss": 0.0306, "step": 8212 }, { "epoch": 1.8687144482366325, "grad_norm": 1.0522411306849213, "learning_rate": 8.66538125707589e-07, "loss": 0.0279, "step": 8213 }, { "epoch": 1.8689419795221842, "grad_norm": 0.7338998590475417, "learning_rate": 8.664557139970978e-07, "loss": 0.0243, "step": 8214 }, { "epoch": 1.869169510807736, "grad_norm": 0.8452592387133178, "learning_rate": 8.663732973516942e-07, "loss": 0.0141, "step": 8215 }, { "epoch": 1.8693970420932877, "grad_norm": 1.367535148889206, "learning_rate": 8.662908757730623e-07, "loss": 0.0398, "step": 8216 }, { "epoch": 1.8696245733788395, "grad_norm": 1.7047697026339605, "learning_rate": 8.66208449262887e-07, "loss": 0.038, "step": 8217 }, { "epoch": 1.8698521046643912, "grad_norm": 0.785015744452762, "learning_rate": 8.661260178228524e-07, "loss": 0.0148, "step": 8218 }, { "epoch": 1.870079635949943, "grad_norm": 1.7605600844674687, "learning_rate": 8.660435814546439e-07, "loss": 0.0781, "step": 8219 }, { "epoch": 1.8703071672354947, "grad_norm": 0.9382348167426583, "learning_rate": 8.65961140159946e-07, "loss": 0.0207, "step": 8220 }, { "epoch": 1.8705346985210465, "grad_norm": 0.75663826121798, "learning_rate": 8.658786939404435e-07, "loss": 0.0169, "step": 8221 }, { "epoch": 1.8707622298065985, "grad_norm": 0.6536072701513589, "learning_rate": 8.657962427978219e-07, "loss": 0.0116, "step": 8222 }, { "epoch": 1.8709897610921502, "grad_norm": 0.9362614356572078, "learning_rate": 8.657137867337659e-07, "loss": 0.0138, "step": 8223 }, { "epoch": 1.871217292377702, "grad_norm": 1.2693273865002388, "learning_rate": 8.65631325749961e-07, "loss": 0.0262, "step": 8224 }, { "epoch": 1.8714448236632537, "grad_norm": 1.5432025950539427, "learning_rate": 8.655488598480925e-07, "loss": 0.0336, "step": 8225 }, { "epoch": 1.8716723549488055, "grad_norm": 1.0516533581879228, "learning_rate": 8.65466389029846e-07, "loss": 0.0211, "step": 8226 }, { "epoch": 1.8718998862343572, "grad_norm": 0.8833787381932218, "learning_rate": 8.65383913296907e-07, "loss": 0.0204, "step": 8227 }, { "epoch": 1.872127417519909, "grad_norm": 0.7146310943505433, "learning_rate": 8.653014326509605e-07, "loss": 0.0141, "step": 8228 }, { "epoch": 1.8723549488054607, "grad_norm": 0.9123200522206458, "learning_rate": 8.652189470936932e-07, "loss": 0.0249, "step": 8229 }, { "epoch": 1.8725824800910125, "grad_norm": 0.9608224087050663, "learning_rate": 8.651364566267906e-07, "loss": 0.0164, "step": 8230 }, { "epoch": 1.8728100113765644, "grad_norm": 0.855934814485092, "learning_rate": 8.650539612519385e-07, "loss": 0.012, "step": 8231 }, { "epoch": 1.8730375426621162, "grad_norm": 0.7144672916403165, "learning_rate": 8.64971460970823e-07, "loss": 0.0104, "step": 8232 }, { "epoch": 1.873265073947668, "grad_norm": 1.0071325611643098, "learning_rate": 8.648889557851306e-07, "loss": 0.0271, "step": 8233 }, { "epoch": 1.8734926052332197, "grad_norm": 1.2665565567523644, "learning_rate": 8.64806445696547e-07, "loss": 0.0245, "step": 8234 }, { "epoch": 1.8737201365187715, "grad_norm": 2.605935561638153, "learning_rate": 8.647239307067588e-07, "loss": 0.0448, "step": 8235 }, { "epoch": 1.8739476678043232, "grad_norm": 1.2983317147503366, "learning_rate": 8.646414108174527e-07, "loss": 0.0392, "step": 8236 }, { "epoch": 1.874175199089875, "grad_norm": 1.3224033440777954, "learning_rate": 8.64558886030315e-07, "loss": 0.0361, "step": 8237 }, { "epoch": 1.8744027303754267, "grad_norm": 2.2075340386024758, "learning_rate": 8.644763563470324e-07, "loss": 0.0875, "step": 8238 }, { "epoch": 1.8746302616609785, "grad_norm": 0.6781023089584342, "learning_rate": 8.643938217692916e-07, "loss": 0.0074, "step": 8239 }, { "epoch": 1.8748577929465302, "grad_norm": 1.4762759957767195, "learning_rate": 8.643112822987795e-07, "loss": 0.0346, "step": 8240 }, { "epoch": 1.875085324232082, "grad_norm": 1.1333048212875287, "learning_rate": 8.642287379371831e-07, "loss": 0.0188, "step": 8241 }, { "epoch": 1.8753128555176337, "grad_norm": 0.8747310853807329, "learning_rate": 8.641461886861893e-07, "loss": 0.0182, "step": 8242 }, { "epoch": 1.8755403868031855, "grad_norm": 1.5952524050355323, "learning_rate": 8.640636345474857e-07, "loss": 0.0329, "step": 8243 }, { "epoch": 1.8757679180887372, "grad_norm": 1.1585605892511983, "learning_rate": 8.639810755227591e-07, "loss": 0.0204, "step": 8244 }, { "epoch": 1.875995449374289, "grad_norm": 1.1746025644765552, "learning_rate": 8.638985116136968e-07, "loss": 0.0192, "step": 8245 }, { "epoch": 1.8762229806598407, "grad_norm": 0.8194477397027945, "learning_rate": 8.638159428219866e-07, "loss": 0.0126, "step": 8246 }, { "epoch": 1.8764505119453925, "grad_norm": 1.1857441250834007, "learning_rate": 8.637333691493159e-07, "loss": 0.0202, "step": 8247 }, { "epoch": 1.8766780432309442, "grad_norm": 0.9836177902311528, "learning_rate": 8.636507905973722e-07, "loss": 0.0154, "step": 8248 }, { "epoch": 1.876905574516496, "grad_norm": 1.47601792495945, "learning_rate": 8.635682071678437e-07, "loss": 0.0531, "step": 8249 }, { "epoch": 1.8771331058020477, "grad_norm": 2.0178451609426338, "learning_rate": 8.634856188624177e-07, "loss": 0.0489, "step": 8250 }, { "epoch": 1.8773606370875995, "grad_norm": 1.30969876612142, "learning_rate": 8.634030256827825e-07, "loss": 0.041, "step": 8251 }, { "epoch": 1.8775881683731512, "grad_norm": 2.304683562391102, "learning_rate": 8.633204276306261e-07, "loss": 0.0542, "step": 8252 }, { "epoch": 1.877815699658703, "grad_norm": 1.1487287390747398, "learning_rate": 8.632378247076366e-07, "loss": 0.0194, "step": 8253 }, { "epoch": 1.8780432309442547, "grad_norm": 1.0025919212586938, "learning_rate": 8.631552169155023e-07, "loss": 0.0164, "step": 8254 }, { "epoch": 1.8782707622298065, "grad_norm": 0.9283184333065247, "learning_rate": 8.630726042559115e-07, "loss": 0.022, "step": 8255 }, { "epoch": 1.8784982935153582, "grad_norm": 1.4582280154857454, "learning_rate": 8.629899867305526e-07, "loss": 0.0313, "step": 8256 }, { "epoch": 1.87872582480091, "grad_norm": 1.259728727435889, "learning_rate": 8.629073643411145e-07, "loss": 0.0321, "step": 8257 }, { "epoch": 1.8789533560864617, "grad_norm": 0.8530994350432433, "learning_rate": 8.628247370892853e-07, "loss": 0.0273, "step": 8258 }, { "epoch": 1.8791808873720135, "grad_norm": 0.7361257055990824, "learning_rate": 8.627421049767541e-07, "loss": 0.0112, "step": 8259 }, { "epoch": 1.8794084186575652, "grad_norm": 0.9977469415704284, "learning_rate": 8.626594680052097e-07, "loss": 0.0314, "step": 8260 }, { "epoch": 1.8796359499431172, "grad_norm": 0.8056748779789032, "learning_rate": 8.62576826176341e-07, "loss": 0.0088, "step": 8261 }, { "epoch": 1.879863481228669, "grad_norm": 1.0730418628235108, "learning_rate": 8.62494179491837e-07, "loss": 0.0293, "step": 8262 }, { "epoch": 1.8800910125142207, "grad_norm": 1.8855161438814367, "learning_rate": 8.624115279533872e-07, "loss": 0.0296, "step": 8263 }, { "epoch": 1.8803185437997725, "grad_norm": 1.5978910261364054, "learning_rate": 8.623288715626804e-07, "loss": 0.0228, "step": 8264 }, { "epoch": 1.8805460750853242, "grad_norm": 1.0656659599434526, "learning_rate": 8.62246210321406e-07, "loss": 0.0213, "step": 8265 }, { "epoch": 1.880773606370876, "grad_norm": 1.7230213852178717, "learning_rate": 8.621635442312537e-07, "loss": 0.0355, "step": 8266 }, { "epoch": 1.8810011376564277, "grad_norm": 0.9435954471011736, "learning_rate": 8.620808732939129e-07, "loss": 0.0156, "step": 8267 }, { "epoch": 1.8812286689419795, "grad_norm": 0.6950460880605197, "learning_rate": 8.619981975110731e-07, "loss": 0.0092, "step": 8268 }, { "epoch": 1.8814562002275312, "grad_norm": 1.6645334845711723, "learning_rate": 8.619155168844243e-07, "loss": 0.0618, "step": 8269 }, { "epoch": 1.8816837315130832, "grad_norm": 0.8924934641594449, "learning_rate": 8.618328314156564e-07, "loss": 0.0193, "step": 8270 }, { "epoch": 1.881911262798635, "grad_norm": 1.3551138271734262, "learning_rate": 8.617501411064588e-07, "loss": 0.0454, "step": 8271 }, { "epoch": 1.8821387940841867, "grad_norm": 1.4834150753791622, "learning_rate": 8.616674459585221e-07, "loss": 0.0335, "step": 8272 }, { "epoch": 1.8823663253697385, "grad_norm": 1.4119258619306272, "learning_rate": 8.615847459735363e-07, "loss": 0.0344, "step": 8273 }, { "epoch": 1.8825938566552902, "grad_norm": 1.4662232210629031, "learning_rate": 8.615020411531915e-07, "loss": 0.0446, "step": 8274 }, { "epoch": 1.882821387940842, "grad_norm": 1.5611191254491945, "learning_rate": 8.61419331499178e-07, "loss": 0.0465, "step": 8275 }, { "epoch": 1.8830489192263937, "grad_norm": 0.9048921640349314, "learning_rate": 8.613366170131867e-07, "loss": 0.0198, "step": 8276 }, { "epoch": 1.8832764505119455, "grad_norm": 1.035461174281126, "learning_rate": 8.612538976969074e-07, "loss": 0.0267, "step": 8277 }, { "epoch": 1.8835039817974972, "grad_norm": 0.9855906993732617, "learning_rate": 8.611711735520312e-07, "loss": 0.0218, "step": 8278 }, { "epoch": 1.883731513083049, "grad_norm": 1.6015259785217235, "learning_rate": 8.610884445802488e-07, "loss": 0.0489, "step": 8279 }, { "epoch": 1.8839590443686007, "grad_norm": 1.2880775218416267, "learning_rate": 8.610057107832509e-07, "loss": 0.0415, "step": 8280 }, { "epoch": 1.8841865756541525, "grad_norm": 1.2850990560050233, "learning_rate": 8.609229721627287e-07, "loss": 0.0473, "step": 8281 }, { "epoch": 1.8844141069397042, "grad_norm": 1.149003164747692, "learning_rate": 8.608402287203728e-07, "loss": 0.0281, "step": 8282 }, { "epoch": 1.884641638225256, "grad_norm": 1.7925247412693852, "learning_rate": 8.607574804578747e-07, "loss": 0.0551, "step": 8283 }, { "epoch": 1.8848691695108077, "grad_norm": 1.0529263745268131, "learning_rate": 8.606747273769253e-07, "loss": 0.0273, "step": 8284 }, { "epoch": 1.8850967007963595, "grad_norm": 1.2214684896707904, "learning_rate": 8.605919694792161e-07, "loss": 0.0227, "step": 8285 }, { "epoch": 1.8853242320819112, "grad_norm": 0.8930497461591164, "learning_rate": 8.605092067664386e-07, "loss": 0.0189, "step": 8286 }, { "epoch": 1.885551763367463, "grad_norm": 1.3666208480259556, "learning_rate": 8.604264392402842e-07, "loss": 0.0455, "step": 8287 }, { "epoch": 1.8857792946530147, "grad_norm": 1.042161632021271, "learning_rate": 8.603436669024446e-07, "loss": 0.0205, "step": 8288 }, { "epoch": 1.8860068259385665, "grad_norm": 1.162512156537206, "learning_rate": 8.602608897546115e-07, "loss": 0.0324, "step": 8289 }, { "epoch": 1.8862343572241183, "grad_norm": 1.0589861861099525, "learning_rate": 8.601781077984767e-07, "loss": 0.0256, "step": 8290 }, { "epoch": 1.88646188850967, "grad_norm": 1.1272093093873294, "learning_rate": 8.600953210357319e-07, "loss": 0.0232, "step": 8291 }, { "epoch": 1.8866894197952218, "grad_norm": 1.7153443676242524, "learning_rate": 8.600125294680692e-07, "loss": 0.0518, "step": 8292 }, { "epoch": 1.8869169510807735, "grad_norm": 0.9046095063866724, "learning_rate": 8.59929733097181e-07, "loss": 0.0267, "step": 8293 }, { "epoch": 1.8871444823663253, "grad_norm": 0.9024113986945366, "learning_rate": 8.598469319247593e-07, "loss": 0.0157, "step": 8294 }, { "epoch": 1.887372013651877, "grad_norm": 1.4704486910706056, "learning_rate": 8.597641259524965e-07, "loss": 0.0285, "step": 8295 }, { "epoch": 1.8875995449374288, "grad_norm": 0.9238625713309345, "learning_rate": 8.596813151820849e-07, "loss": 0.0306, "step": 8296 }, { "epoch": 1.8878270762229805, "grad_norm": 0.6184180904992845, "learning_rate": 8.595984996152168e-07, "loss": 0.011, "step": 8297 }, { "epoch": 1.8880546075085323, "grad_norm": 1.4313296623449745, "learning_rate": 8.595156792535852e-07, "loss": 0.0453, "step": 8298 }, { "epoch": 1.8882821387940842, "grad_norm": 1.9252715310610398, "learning_rate": 8.594328540988825e-07, "loss": 0.0331, "step": 8299 }, { "epoch": 1.888509670079636, "grad_norm": 0.5403949165206943, "learning_rate": 8.593500241528016e-07, "loss": 0.0075, "step": 8300 }, { "epoch": 1.8887372013651877, "grad_norm": 1.0057890593594268, "learning_rate": 8.592671894170356e-07, "loss": 0.0282, "step": 8301 }, { "epoch": 1.8889647326507395, "grad_norm": 0.9380707896915073, "learning_rate": 8.59184349893277e-07, "loss": 0.0249, "step": 8302 }, { "epoch": 1.8891922639362912, "grad_norm": 1.1957453930209534, "learning_rate": 8.591015055832195e-07, "loss": 0.0336, "step": 8303 }, { "epoch": 1.889419795221843, "grad_norm": 1.6201566102692035, "learning_rate": 8.590186564885557e-07, "loss": 0.0342, "step": 8304 }, { "epoch": 1.8896473265073948, "grad_norm": 1.242275772305289, "learning_rate": 8.589358026109792e-07, "loss": 0.04, "step": 8305 }, { "epoch": 1.8898748577929465, "grad_norm": 0.9816574572206791, "learning_rate": 8.588529439521834e-07, "loss": 0.0182, "step": 8306 }, { "epoch": 1.8901023890784983, "grad_norm": 0.5762361178743535, "learning_rate": 8.587700805138617e-07, "loss": 0.0132, "step": 8307 }, { "epoch": 1.89032992036405, "grad_norm": 2.230891945909432, "learning_rate": 8.586872122977076e-07, "loss": 0.0649, "step": 8308 }, { "epoch": 1.890557451649602, "grad_norm": 0.8923642900973059, "learning_rate": 8.58604339305415e-07, "loss": 0.0221, "step": 8309 }, { "epoch": 1.8907849829351537, "grad_norm": 1.413899468730935, "learning_rate": 8.585214615386773e-07, "loss": 0.0216, "step": 8310 }, { "epoch": 1.8910125142207055, "grad_norm": 0.8754816462406323, "learning_rate": 8.584385789991887e-07, "loss": 0.0258, "step": 8311 }, { "epoch": 1.8912400455062572, "grad_norm": 0.924318820572518, "learning_rate": 8.583556916886432e-07, "loss": 0.0162, "step": 8312 }, { "epoch": 1.891467576791809, "grad_norm": 0.7762207786564913, "learning_rate": 8.582727996087345e-07, "loss": 0.0156, "step": 8313 }, { "epoch": 1.8916951080773607, "grad_norm": 1.7197432751965054, "learning_rate": 8.581899027611574e-07, "loss": 0.0432, "step": 8314 }, { "epoch": 1.8919226393629125, "grad_norm": 0.6713325945774121, "learning_rate": 8.581070011476053e-07, "loss": 0.0079, "step": 8315 }, { "epoch": 1.8921501706484642, "grad_norm": 0.9139036984168137, "learning_rate": 8.580240947697732e-07, "loss": 0.0216, "step": 8316 }, { "epoch": 1.892377701934016, "grad_norm": 1.0287826908579287, "learning_rate": 8.579411836293555e-07, "loss": 0.0184, "step": 8317 }, { "epoch": 1.8926052332195678, "grad_norm": 1.343241857750621, "learning_rate": 8.578582677280464e-07, "loss": 0.0596, "step": 8318 }, { "epoch": 1.8928327645051195, "grad_norm": 1.0633519122682107, "learning_rate": 8.577753470675408e-07, "loss": 0.0245, "step": 8319 }, { "epoch": 1.8930602957906713, "grad_norm": 0.7693491499865697, "learning_rate": 8.576924216495336e-07, "loss": 0.0157, "step": 8320 }, { "epoch": 1.893287827076223, "grad_norm": 2.098129898190892, "learning_rate": 8.576094914757194e-07, "loss": 0.0531, "step": 8321 }, { "epoch": 1.8935153583617748, "grad_norm": 1.029612822990029, "learning_rate": 8.575265565477931e-07, "loss": 0.0225, "step": 8322 }, { "epoch": 1.8937428896473265, "grad_norm": 1.5486723274495435, "learning_rate": 8.574436168674498e-07, "loss": 0.0426, "step": 8323 }, { "epoch": 1.8939704209328783, "grad_norm": 2.937968838728311, "learning_rate": 8.573606724363848e-07, "loss": 0.0223, "step": 8324 }, { "epoch": 1.89419795221843, "grad_norm": 0.9186281289893576, "learning_rate": 8.572777232562929e-07, "loss": 0.0183, "step": 8325 }, { "epoch": 1.8944254835039818, "grad_norm": 0.7647862792866064, "learning_rate": 8.571947693288702e-07, "loss": 0.0159, "step": 8326 }, { "epoch": 1.8946530147895335, "grad_norm": 1.191823185384319, "learning_rate": 8.571118106558114e-07, "loss": 0.023, "step": 8327 }, { "epoch": 1.8948805460750853, "grad_norm": 1.3699185120387163, "learning_rate": 8.570288472388122e-07, "loss": 0.0332, "step": 8328 }, { "epoch": 1.895108077360637, "grad_norm": 1.5461111445030973, "learning_rate": 8.569458790795685e-07, "loss": 0.0352, "step": 8329 }, { "epoch": 1.8953356086461888, "grad_norm": 0.8535154134838575, "learning_rate": 8.568629061797757e-07, "loss": 0.011, "step": 8330 }, { "epoch": 1.8955631399317405, "grad_norm": 1.570117171787192, "learning_rate": 8.567799285411298e-07, "loss": 0.0326, "step": 8331 }, { "epoch": 1.8957906712172923, "grad_norm": 1.791000174693399, "learning_rate": 8.566969461653266e-07, "loss": 0.0449, "step": 8332 }, { "epoch": 1.896018202502844, "grad_norm": 1.295995913807357, "learning_rate": 8.566139590540622e-07, "loss": 0.0275, "step": 8333 }, { "epoch": 1.8962457337883958, "grad_norm": 1.394137990033564, "learning_rate": 8.565309672090328e-07, "loss": 0.0418, "step": 8334 }, { "epoch": 1.8964732650739475, "grad_norm": 1.3620290597281826, "learning_rate": 8.564479706319339e-07, "loss": 0.0296, "step": 8335 }, { "epoch": 1.8967007963594993, "grad_norm": 1.1143468857481267, "learning_rate": 8.563649693244629e-07, "loss": 0.0283, "step": 8336 }, { "epoch": 1.896928327645051, "grad_norm": 0.8195233179892975, "learning_rate": 8.562819632883155e-07, "loss": 0.0127, "step": 8337 }, { "epoch": 1.897155858930603, "grad_norm": 1.101615888325663, "learning_rate": 8.561989525251883e-07, "loss": 0.0426, "step": 8338 }, { "epoch": 1.8973833902161548, "grad_norm": 1.0049505213568453, "learning_rate": 8.56115937036778e-07, "loss": 0.0121, "step": 8339 }, { "epoch": 1.8976109215017065, "grad_norm": 20.746676374419295, "learning_rate": 8.560329168247812e-07, "loss": 0.239, "step": 8340 }, { "epoch": 1.8978384527872583, "grad_norm": 0.931717318764593, "learning_rate": 8.559498918908948e-07, "loss": 0.0375, "step": 8341 }, { "epoch": 1.89806598407281, "grad_norm": 1.3285902856176695, "learning_rate": 8.558668622368154e-07, "loss": 0.0279, "step": 8342 }, { "epoch": 1.8982935153583618, "grad_norm": 1.3731987814764735, "learning_rate": 8.557838278642401e-07, "loss": 0.0451, "step": 8343 }, { "epoch": 1.8985210466439135, "grad_norm": 0.9111979938572307, "learning_rate": 8.557007887748661e-07, "loss": 0.0135, "step": 8344 }, { "epoch": 1.8987485779294653, "grad_norm": 1.546482352872472, "learning_rate": 8.556177449703906e-07, "loss": 0.0458, "step": 8345 }, { "epoch": 1.898976109215017, "grad_norm": 1.1451460927699535, "learning_rate": 8.555346964525107e-07, "loss": 0.0227, "step": 8346 }, { "epoch": 1.8992036405005688, "grad_norm": 1.2348650674661643, "learning_rate": 8.554516432229238e-07, "loss": 0.0198, "step": 8347 }, { "epoch": 1.8994311717861208, "grad_norm": 0.8684328346312213, "learning_rate": 8.553685852833274e-07, "loss": 0.0136, "step": 8348 }, { "epoch": 1.8996587030716725, "grad_norm": 0.8459249699601199, "learning_rate": 8.552855226354187e-07, "loss": 0.0183, "step": 8349 }, { "epoch": 1.8998862343572243, "grad_norm": 0.8398092931808809, "learning_rate": 8.55202455280896e-07, "loss": 0.0169, "step": 8350 }, { "epoch": 1.900113765642776, "grad_norm": 0.6669246846137414, "learning_rate": 8.551193832214567e-07, "loss": 0.013, "step": 8351 }, { "epoch": 1.9003412969283278, "grad_norm": 1.427927029573224, "learning_rate": 8.550363064587985e-07, "loss": 0.0241, "step": 8352 }, { "epoch": 1.9005688282138795, "grad_norm": 0.8047507484768054, "learning_rate": 8.549532249946197e-07, "loss": 0.0162, "step": 8353 }, { "epoch": 1.9007963594994313, "grad_norm": 0.9422455546065365, "learning_rate": 8.548701388306179e-07, "loss": 0.0141, "step": 8354 }, { "epoch": 1.901023890784983, "grad_norm": 1.3733449199975643, "learning_rate": 8.547870479684916e-07, "loss": 0.0399, "step": 8355 }, { "epoch": 1.9012514220705348, "grad_norm": 0.7185318034874649, "learning_rate": 8.547039524099387e-07, "loss": 0.009, "step": 8356 }, { "epoch": 1.9014789533560865, "grad_norm": 0.7062440700182584, "learning_rate": 8.546208521566578e-07, "loss": 0.0199, "step": 8357 }, { "epoch": 1.9017064846416383, "grad_norm": 1.2742509728964952, "learning_rate": 8.545377472103474e-07, "loss": 0.0388, "step": 8358 }, { "epoch": 1.90193401592719, "grad_norm": 1.411456477780153, "learning_rate": 8.544546375727055e-07, "loss": 0.0144, "step": 8359 }, { "epoch": 1.9021615472127418, "grad_norm": 0.9938212599624071, "learning_rate": 8.543715232454311e-07, "loss": 0.0225, "step": 8360 }, { "epoch": 1.9023890784982935, "grad_norm": 1.5657277168399435, "learning_rate": 8.54288404230223e-07, "loss": 0.0528, "step": 8361 }, { "epoch": 1.9026166097838453, "grad_norm": 0.9284050397615987, "learning_rate": 8.542052805287797e-07, "loss": 0.0246, "step": 8362 }, { "epoch": 1.902844141069397, "grad_norm": 1.3974079836594142, "learning_rate": 8.541221521428003e-07, "loss": 0.0451, "step": 8363 }, { "epoch": 1.9030716723549488, "grad_norm": 1.04036961337048, "learning_rate": 8.540390190739839e-07, "loss": 0.0232, "step": 8364 }, { "epoch": 1.9032992036405005, "grad_norm": 1.7484807221488305, "learning_rate": 8.539558813240291e-07, "loss": 0.0293, "step": 8365 }, { "epoch": 1.9035267349260523, "grad_norm": 1.1185754396379677, "learning_rate": 8.538727388946356e-07, "loss": 0.0267, "step": 8366 }, { "epoch": 1.903754266211604, "grad_norm": 0.9078252207266309, "learning_rate": 8.537895917875023e-07, "loss": 0.0324, "step": 8367 }, { "epoch": 1.9039817974971558, "grad_norm": 0.9124967769994551, "learning_rate": 8.537064400043289e-07, "loss": 0.0186, "step": 8368 }, { "epoch": 1.9042093287827075, "grad_norm": 1.8217424208968858, "learning_rate": 8.536232835468145e-07, "loss": 0.0327, "step": 8369 }, { "epoch": 1.9044368600682593, "grad_norm": 1.061454118710923, "learning_rate": 8.535401224166593e-07, "loss": 0.0257, "step": 8370 }, { "epoch": 1.904664391353811, "grad_norm": 1.2108474848055848, "learning_rate": 8.534569566155623e-07, "loss": 0.0346, "step": 8371 }, { "epoch": 1.9048919226393628, "grad_norm": 0.9981046532618882, "learning_rate": 8.533737861452235e-07, "loss": 0.0367, "step": 8372 }, { "epoch": 1.9051194539249146, "grad_norm": 0.7137971971995981, "learning_rate": 8.532906110073427e-07, "loss": 0.022, "step": 8373 }, { "epoch": 1.9053469852104663, "grad_norm": 1.1506401527737304, "learning_rate": 8.5320743120362e-07, "loss": 0.034, "step": 8374 }, { "epoch": 1.905574516496018, "grad_norm": 1.0060448506402915, "learning_rate": 8.531242467357555e-07, "loss": 0.0176, "step": 8375 }, { "epoch": 1.9058020477815698, "grad_norm": 1.2596476896044595, "learning_rate": 8.530410576054489e-07, "loss": 0.0296, "step": 8376 }, { "epoch": 1.9060295790671218, "grad_norm": 1.270048014695048, "learning_rate": 8.52957863814401e-07, "loss": 0.0225, "step": 8377 }, { "epoch": 1.9062571103526735, "grad_norm": 0.9739005233427236, "learning_rate": 8.528746653643116e-07, "loss": 0.0219, "step": 8378 }, { "epoch": 1.9064846416382253, "grad_norm": 1.4836478796362045, "learning_rate": 8.527914622568814e-07, "loss": 0.0326, "step": 8379 }, { "epoch": 1.906712172923777, "grad_norm": 1.0636112939942484, "learning_rate": 8.527082544938111e-07, "loss": 0.0186, "step": 8380 }, { "epoch": 1.9069397042093288, "grad_norm": 0.6927545848426347, "learning_rate": 8.526250420768009e-07, "loss": 0.0138, "step": 8381 }, { "epoch": 1.9071672354948805, "grad_norm": 1.3875179087787834, "learning_rate": 8.525418250075518e-07, "loss": 0.0568, "step": 8382 }, { "epoch": 1.9073947667804323, "grad_norm": 0.9015365651542543, "learning_rate": 8.524586032877645e-07, "loss": 0.022, "step": 8383 }, { "epoch": 1.907622298065984, "grad_norm": 1.343545255745248, "learning_rate": 8.523753769191399e-07, "loss": 0.0266, "step": 8384 }, { "epoch": 1.9078498293515358, "grad_norm": 1.0970681264556976, "learning_rate": 8.522921459033791e-07, "loss": 0.0219, "step": 8385 }, { "epoch": 1.9080773606370875, "grad_norm": 0.8250759323381144, "learning_rate": 8.522089102421829e-07, "loss": 0.0177, "step": 8386 }, { "epoch": 1.9083048919226395, "grad_norm": 0.9830515935915406, "learning_rate": 8.52125669937253e-07, "loss": 0.0218, "step": 8387 }, { "epoch": 1.9085324232081913, "grad_norm": 1.4564551244698456, "learning_rate": 8.5204242499029e-07, "loss": 0.044, "step": 8388 }, { "epoch": 1.908759954493743, "grad_norm": 1.1380219813778063, "learning_rate": 8.51959175402996e-07, "loss": 0.0259, "step": 8389 }, { "epoch": 1.9089874857792948, "grad_norm": 0.8340581743448356, "learning_rate": 8.518759211770719e-07, "loss": 0.015, "step": 8390 }, { "epoch": 1.9092150170648465, "grad_norm": 1.143444324624706, "learning_rate": 8.517926623142196e-07, "loss": 0.0266, "step": 8391 }, { "epoch": 1.9094425483503983, "grad_norm": 1.4872909115352921, "learning_rate": 8.517093988161404e-07, "loss": 0.055, "step": 8392 }, { "epoch": 1.90967007963595, "grad_norm": 1.1451443339474134, "learning_rate": 8.516261306845365e-07, "loss": 0.0349, "step": 8393 }, { "epoch": 1.9098976109215018, "grad_norm": 0.964597490955224, "learning_rate": 8.515428579211095e-07, "loss": 0.0146, "step": 8394 }, { "epoch": 1.9101251422070535, "grad_norm": 1.0969553831767032, "learning_rate": 8.514595805275614e-07, "loss": 0.0241, "step": 8395 }, { "epoch": 1.9103526734926053, "grad_norm": 1.4080510598222953, "learning_rate": 8.513762985055942e-07, "loss": 0.0567, "step": 8396 }, { "epoch": 1.910580204778157, "grad_norm": 1.2786633879760345, "learning_rate": 8.5129301185691e-07, "loss": 0.0382, "step": 8397 }, { "epoch": 1.9108077360637088, "grad_norm": 0.6123509234890121, "learning_rate": 8.512097205832111e-07, "loss": 0.008, "step": 8398 }, { "epoch": 1.9110352673492605, "grad_norm": 0.6834596034514431, "learning_rate": 8.511264246861997e-07, "loss": 0.0158, "step": 8399 }, { "epoch": 1.9112627986348123, "grad_norm": 1.4469107691813436, "learning_rate": 8.510431241675784e-07, "loss": 0.0207, "step": 8400 }, { "epoch": 1.911490329920364, "grad_norm": 1.1241815055827342, "learning_rate": 8.509598190290497e-07, "loss": 0.0192, "step": 8401 }, { "epoch": 1.9117178612059158, "grad_norm": 0.8640122699671915, "learning_rate": 8.508765092723159e-07, "loss": 0.0252, "step": 8402 }, { "epoch": 1.9119453924914676, "grad_norm": 1.1837107369030004, "learning_rate": 8.507931948990801e-07, "loss": 0.0538, "step": 8403 }, { "epoch": 1.9121729237770193, "grad_norm": 1.1995895924958146, "learning_rate": 8.507098759110449e-07, "loss": 0.0453, "step": 8404 }, { "epoch": 1.912400455062571, "grad_norm": 1.619876859968541, "learning_rate": 8.506265523099133e-07, "loss": 0.0339, "step": 8405 }, { "epoch": 1.9126279863481228, "grad_norm": 0.9981047989443556, "learning_rate": 8.50543224097388e-07, "loss": 0.0231, "step": 8406 }, { "epoch": 1.9128555176336746, "grad_norm": 0.6500442811423481, "learning_rate": 8.504598912751722e-07, "loss": 0.0135, "step": 8407 }, { "epoch": 1.9130830489192263, "grad_norm": 1.7930157168186414, "learning_rate": 8.503765538449695e-07, "loss": 0.0746, "step": 8408 }, { "epoch": 1.913310580204778, "grad_norm": 1.1905903879309094, "learning_rate": 8.502932118084825e-07, "loss": 0.0274, "step": 8409 }, { "epoch": 1.9135381114903298, "grad_norm": 1.3730986996255226, "learning_rate": 8.502098651674148e-07, "loss": 0.0512, "step": 8410 }, { "epoch": 1.9137656427758816, "grad_norm": 0.7930623765153966, "learning_rate": 8.501265139234702e-07, "loss": 0.0216, "step": 8411 }, { "epoch": 1.9139931740614333, "grad_norm": 1.484429796680887, "learning_rate": 8.500431580783518e-07, "loss": 0.0213, "step": 8412 }, { "epoch": 1.914220705346985, "grad_norm": 3.26329303562541, "learning_rate": 8.499597976337632e-07, "loss": 0.0298, "step": 8413 }, { "epoch": 1.9144482366325368, "grad_norm": 0.6275582463172372, "learning_rate": 8.498764325914087e-07, "loss": 0.0092, "step": 8414 }, { "epoch": 1.9146757679180886, "grad_norm": 0.8514325784501834, "learning_rate": 8.497930629529916e-07, "loss": 0.0135, "step": 8415 }, { "epoch": 1.9149032992036406, "grad_norm": 0.7539415354286236, "learning_rate": 8.497096887202158e-07, "loss": 0.0186, "step": 8416 }, { "epoch": 1.9151308304891923, "grad_norm": 0.8168825733483965, "learning_rate": 8.496263098947857e-07, "loss": 0.0171, "step": 8417 }, { "epoch": 1.915358361774744, "grad_norm": 1.2697149138942274, "learning_rate": 8.495429264784051e-07, "loss": 0.0336, "step": 8418 }, { "epoch": 1.9155858930602958, "grad_norm": 1.3946643910966317, "learning_rate": 8.494595384727783e-07, "loss": 0.0224, "step": 8419 }, { "epoch": 1.9158134243458476, "grad_norm": 1.4038850936602918, "learning_rate": 8.493761458796098e-07, "loss": 0.057, "step": 8420 }, { "epoch": 1.9160409556313993, "grad_norm": 0.6736182648948457, "learning_rate": 8.492927487006037e-07, "loss": 0.0095, "step": 8421 }, { "epoch": 1.916268486916951, "grad_norm": 1.564923396961411, "learning_rate": 8.492093469374646e-07, "loss": 0.035, "step": 8422 }, { "epoch": 1.9164960182025028, "grad_norm": 1.1021686395110446, "learning_rate": 8.491259405918969e-07, "loss": 0.0267, "step": 8423 }, { "epoch": 1.9167235494880546, "grad_norm": 1.4012443000406984, "learning_rate": 8.490425296656057e-07, "loss": 0.0345, "step": 8424 }, { "epoch": 1.9169510807736063, "grad_norm": 0.7791471615047615, "learning_rate": 8.489591141602954e-07, "loss": 0.0167, "step": 8425 }, { "epoch": 1.9171786120591583, "grad_norm": 1.2116548450079154, "learning_rate": 8.488756940776709e-07, "loss": 0.0323, "step": 8426 }, { "epoch": 1.91740614334471, "grad_norm": 2.33324083375237, "learning_rate": 8.487922694194374e-07, "loss": 0.0579, "step": 8427 }, { "epoch": 1.9176336746302618, "grad_norm": 1.6160700922599316, "learning_rate": 8.487088401872997e-07, "loss": 0.0319, "step": 8428 }, { "epoch": 1.9178612059158135, "grad_norm": 0.797732964369814, "learning_rate": 8.486254063829628e-07, "loss": 0.013, "step": 8429 }, { "epoch": 1.9180887372013653, "grad_norm": 0.731455585098273, "learning_rate": 8.485419680081324e-07, "loss": 0.013, "step": 8430 }, { "epoch": 1.918316268486917, "grad_norm": 1.3985142165078785, "learning_rate": 8.484585250645134e-07, "loss": 0.0312, "step": 8431 }, { "epoch": 1.9185437997724688, "grad_norm": 1.0826264757291735, "learning_rate": 8.483750775538116e-07, "loss": 0.0258, "step": 8432 }, { "epoch": 1.9187713310580206, "grad_norm": 0.9737168446676101, "learning_rate": 8.482916254777321e-07, "loss": 0.0165, "step": 8433 }, { "epoch": 1.9189988623435723, "grad_norm": 1.1198156854996961, "learning_rate": 8.482081688379809e-07, "loss": 0.0185, "step": 8434 }, { "epoch": 1.919226393629124, "grad_norm": 1.2595137307081843, "learning_rate": 8.481247076362633e-07, "loss": 0.0369, "step": 8435 }, { "epoch": 1.9194539249146758, "grad_norm": 0.6339129462269499, "learning_rate": 8.480412418742855e-07, "loss": 0.0078, "step": 8436 }, { "epoch": 1.9196814562002276, "grad_norm": 0.8520737497185187, "learning_rate": 8.479577715537531e-07, "loss": 0.0236, "step": 8437 }, { "epoch": 1.9199089874857793, "grad_norm": 1.445989975288279, "learning_rate": 8.478742966763721e-07, "loss": 0.0327, "step": 8438 }, { "epoch": 1.920136518771331, "grad_norm": 1.2746227485283579, "learning_rate": 8.477908172438488e-07, "loss": 0.0469, "step": 8439 }, { "epoch": 1.9203640500568828, "grad_norm": 0.89691294519206, "learning_rate": 8.477073332578892e-07, "loss": 0.0097, "step": 8440 }, { "epoch": 1.9205915813424346, "grad_norm": 0.962306699621327, "learning_rate": 8.476238447201995e-07, "loss": 0.0291, "step": 8441 }, { "epoch": 1.9208191126279863, "grad_norm": 1.1132372477063388, "learning_rate": 8.475403516324863e-07, "loss": 0.0202, "step": 8442 }, { "epoch": 1.921046643913538, "grad_norm": 1.6547856713968876, "learning_rate": 8.474568539964556e-07, "loss": 0.056, "step": 8443 }, { "epoch": 1.9212741751990898, "grad_norm": 0.4787214241555102, "learning_rate": 8.473733518138145e-07, "loss": 0.01, "step": 8444 }, { "epoch": 1.9215017064846416, "grad_norm": 1.1801121879348935, "learning_rate": 8.472898450862691e-07, "loss": 0.028, "step": 8445 }, { "epoch": 1.9217292377701933, "grad_norm": 1.0120828980304566, "learning_rate": 8.472063338155265e-07, "loss": 0.0325, "step": 8446 }, { "epoch": 1.921956769055745, "grad_norm": 2.2691336336786536, "learning_rate": 8.471228180032934e-07, "loss": 0.0511, "step": 8447 }, { "epoch": 1.9221843003412968, "grad_norm": 0.679913941000245, "learning_rate": 8.470392976512768e-07, "loss": 0.0098, "step": 8448 }, { "epoch": 1.9224118316268486, "grad_norm": 1.2697734767948874, "learning_rate": 8.469557727611833e-07, "loss": 0.0435, "step": 8449 }, { "epoch": 1.9226393629124003, "grad_norm": 1.2670054762383771, "learning_rate": 8.468722433347204e-07, "loss": 0.0361, "step": 8450 }, { "epoch": 1.922866894197952, "grad_norm": 0.9079601191824546, "learning_rate": 8.467887093735953e-07, "loss": 0.0138, "step": 8451 }, { "epoch": 1.9230944254835038, "grad_norm": 0.9892645162047258, "learning_rate": 8.467051708795152e-07, "loss": 0.0194, "step": 8452 }, { "epoch": 1.9233219567690556, "grad_norm": 4.904769472158991, "learning_rate": 8.466216278541874e-07, "loss": 0.118, "step": 8453 }, { "epoch": 1.9235494880546073, "grad_norm": 1.0889415277846448, "learning_rate": 8.465380802993193e-07, "loss": 0.0205, "step": 8454 }, { "epoch": 1.9237770193401593, "grad_norm": 0.9092866101119246, "learning_rate": 8.464545282166187e-07, "loss": 0.0177, "step": 8455 }, { "epoch": 1.924004550625711, "grad_norm": 1.0710600040644043, "learning_rate": 8.463709716077929e-07, "loss": 0.0473, "step": 8456 }, { "epoch": 1.9242320819112628, "grad_norm": 0.7766173559355853, "learning_rate": 8.4628741047455e-07, "loss": 0.0141, "step": 8457 }, { "epoch": 1.9244596131968146, "grad_norm": 1.5075758759501967, "learning_rate": 8.462038448185977e-07, "loss": 0.0425, "step": 8458 }, { "epoch": 1.9246871444823663, "grad_norm": 1.0521484097583373, "learning_rate": 8.461202746416442e-07, "loss": 0.034, "step": 8459 }, { "epoch": 1.924914675767918, "grad_norm": 0.9737045662390044, "learning_rate": 8.460366999453968e-07, "loss": 0.0349, "step": 8460 }, { "epoch": 1.9251422070534698, "grad_norm": 1.3360371476012138, "learning_rate": 8.459531207315644e-07, "loss": 0.0309, "step": 8461 }, { "epoch": 1.9253697383390216, "grad_norm": 1.1388283401231705, "learning_rate": 8.458695370018546e-07, "loss": 0.0249, "step": 8462 }, { "epoch": 1.9255972696245733, "grad_norm": 1.543023823562457, "learning_rate": 8.457859487579762e-07, "loss": 0.0419, "step": 8463 }, { "epoch": 1.925824800910125, "grad_norm": 0.7045927127470623, "learning_rate": 8.457023560016371e-07, "loss": 0.019, "step": 8464 }, { "epoch": 1.926052332195677, "grad_norm": 1.5680014180333777, "learning_rate": 8.456187587345463e-07, "loss": 0.0377, "step": 8465 }, { "epoch": 1.9262798634812288, "grad_norm": 0.6049760071323378, "learning_rate": 8.455351569584119e-07, "loss": 0.0123, "step": 8466 }, { "epoch": 1.9265073947667806, "grad_norm": 0.6949704083990405, "learning_rate": 8.454515506749431e-07, "loss": 0.0127, "step": 8467 }, { "epoch": 1.9267349260523323, "grad_norm": 1.2166952289517832, "learning_rate": 8.453679398858481e-07, "loss": 0.0337, "step": 8468 }, { "epoch": 1.926962457337884, "grad_norm": 0.5643035292729891, "learning_rate": 8.452843245928359e-07, "loss": 0.0083, "step": 8469 }, { "epoch": 1.9271899886234358, "grad_norm": 1.268210689591597, "learning_rate": 8.452007047976155e-07, "loss": 0.0393, "step": 8470 }, { "epoch": 1.9274175199089876, "grad_norm": 1.2083646438460875, "learning_rate": 8.451170805018964e-07, "loss": 0.0436, "step": 8471 }, { "epoch": 1.9276450511945393, "grad_norm": 0.5160432674685907, "learning_rate": 8.45033451707387e-07, "loss": 0.0126, "step": 8472 }, { "epoch": 1.927872582480091, "grad_norm": 2.133760042950509, "learning_rate": 8.449498184157968e-07, "loss": 0.0464, "step": 8473 }, { "epoch": 1.9281001137656428, "grad_norm": 1.1469697318986505, "learning_rate": 8.448661806288352e-07, "loss": 0.0248, "step": 8474 }, { "epoch": 1.9283276450511946, "grad_norm": 1.1436508537006016, "learning_rate": 8.447825383482116e-07, "loss": 0.0202, "step": 8475 }, { "epoch": 1.9285551763367463, "grad_norm": 1.1291433248112117, "learning_rate": 8.446988915756353e-07, "loss": 0.0214, "step": 8476 }, { "epoch": 1.928782707622298, "grad_norm": 1.4797955722306269, "learning_rate": 8.446152403128161e-07, "loss": 0.0467, "step": 8477 }, { "epoch": 1.9290102389078498, "grad_norm": 1.0466122715039303, "learning_rate": 8.445315845614636e-07, "loss": 0.0421, "step": 8478 }, { "epoch": 1.9292377701934016, "grad_norm": 1.5338203633569327, "learning_rate": 8.444479243232875e-07, "loss": 0.0208, "step": 8479 }, { "epoch": 1.9294653014789533, "grad_norm": 1.123497381048244, "learning_rate": 8.443642595999977e-07, "loss": 0.0185, "step": 8480 }, { "epoch": 1.929692832764505, "grad_norm": 0.9233591838211316, "learning_rate": 8.442805903933041e-07, "loss": 0.013, "step": 8481 }, { "epoch": 1.9299203640500568, "grad_norm": 1.3201346964016165, "learning_rate": 8.441969167049171e-07, "loss": 0.0316, "step": 8482 }, { "epoch": 1.9301478953356086, "grad_norm": 1.332722144963646, "learning_rate": 8.441132385365462e-07, "loss": 0.0132, "step": 8483 }, { "epoch": 1.9303754266211604, "grad_norm": 0.5893018408424756, "learning_rate": 8.440295558899024e-07, "loss": 0.0095, "step": 8484 }, { "epoch": 1.930602957906712, "grad_norm": 1.3366402306240976, "learning_rate": 8.439458687666954e-07, "loss": 0.0354, "step": 8485 }, { "epoch": 1.9308304891922639, "grad_norm": 1.825240306798872, "learning_rate": 8.438621771686358e-07, "loss": 0.0419, "step": 8486 }, { "epoch": 1.9310580204778156, "grad_norm": 1.1674735397270124, "learning_rate": 8.437784810974343e-07, "loss": 0.014, "step": 8487 }, { "epoch": 1.9312855517633674, "grad_norm": 0.8491504333753751, "learning_rate": 8.43694780554801e-07, "loss": 0.019, "step": 8488 }, { "epoch": 1.931513083048919, "grad_norm": 0.9480439733246314, "learning_rate": 8.436110755424472e-07, "loss": 0.0174, "step": 8489 }, { "epoch": 1.9317406143344709, "grad_norm": 0.6821732577268869, "learning_rate": 8.435273660620833e-07, "loss": 0.0111, "step": 8490 }, { "epoch": 1.9319681456200226, "grad_norm": 1.3852655380236707, "learning_rate": 8.434436521154202e-07, "loss": 0.0317, "step": 8491 }, { "epoch": 1.9321956769055744, "grad_norm": 1.0833902156494688, "learning_rate": 8.43359933704169e-07, "loss": 0.0244, "step": 8492 }, { "epoch": 1.9324232081911261, "grad_norm": 1.0603517811346055, "learning_rate": 8.432762108300404e-07, "loss": 0.027, "step": 8493 }, { "epoch": 1.932650739476678, "grad_norm": 1.366822240460289, "learning_rate": 8.431924834947461e-07, "loss": 0.0633, "step": 8494 }, { "epoch": 1.9328782707622298, "grad_norm": 1.0761472143543775, "learning_rate": 8.431087516999969e-07, "loss": 0.0256, "step": 8495 }, { "epoch": 1.9331058020477816, "grad_norm": 1.4433162417817076, "learning_rate": 8.430250154475044e-07, "loss": 0.0476, "step": 8496 }, { "epoch": 1.9333333333333333, "grad_norm": 0.9530040965882383, "learning_rate": 8.429412747389798e-07, "loss": 0.024, "step": 8497 }, { "epoch": 1.933560864618885, "grad_norm": 0.4978173677065556, "learning_rate": 8.428575295761346e-07, "loss": 0.0067, "step": 8498 }, { "epoch": 1.9337883959044369, "grad_norm": 1.1905435081364368, "learning_rate": 8.427737799606806e-07, "loss": 0.018, "step": 8499 }, { "epoch": 1.9340159271899886, "grad_norm": 1.012957267594849, "learning_rate": 8.426900258943292e-07, "loss": 0.0195, "step": 8500 }, { "epoch": 1.9342434584755404, "grad_norm": 0.9089230595622955, "learning_rate": 8.426062673787926e-07, "loss": 0.0242, "step": 8501 }, { "epoch": 1.934470989761092, "grad_norm": 1.3717302642577704, "learning_rate": 8.425225044157823e-07, "loss": 0.0312, "step": 8502 }, { "epoch": 1.9346985210466439, "grad_norm": 0.9103155187419014, "learning_rate": 8.424387370070102e-07, "loss": 0.0226, "step": 8503 }, { "epoch": 1.9349260523321958, "grad_norm": 1.9980223373755015, "learning_rate": 8.423549651541889e-07, "loss": 0.0469, "step": 8504 }, { "epoch": 1.9351535836177476, "grad_norm": 1.2594006281057515, "learning_rate": 8.4227118885903e-07, "loss": 0.0252, "step": 8505 }, { "epoch": 1.9353811149032993, "grad_norm": 1.3049118421199715, "learning_rate": 8.421874081232459e-07, "loss": 0.0379, "step": 8506 }, { "epoch": 1.935608646188851, "grad_norm": 0.6477534084196926, "learning_rate": 8.421036229485489e-07, "loss": 0.01, "step": 8507 }, { "epoch": 1.9358361774744028, "grad_norm": 1.1696824410055644, "learning_rate": 8.420198333366513e-07, "loss": 0.0259, "step": 8508 }, { "epoch": 1.9360637087599546, "grad_norm": 0.8079709445966895, "learning_rate": 8.419360392892663e-07, "loss": 0.0248, "step": 8509 }, { "epoch": 1.9362912400455063, "grad_norm": 1.4511253525917833, "learning_rate": 8.418522408081054e-07, "loss": 0.0214, "step": 8510 }, { "epoch": 1.936518771331058, "grad_norm": 1.129206025501574, "learning_rate": 8.417684378948822e-07, "loss": 0.0222, "step": 8511 }, { "epoch": 1.9367463026166098, "grad_norm": 0.842844775664999, "learning_rate": 8.416846305513089e-07, "loss": 0.0106, "step": 8512 }, { "epoch": 1.9369738339021616, "grad_norm": 0.9685261785917861, "learning_rate": 8.416008187790986e-07, "loss": 0.0125, "step": 8513 }, { "epoch": 1.9372013651877134, "grad_norm": 1.0807816755151243, "learning_rate": 8.415170025799644e-07, "loss": 0.0302, "step": 8514 }, { "epoch": 1.937428896473265, "grad_norm": 0.745171251075455, "learning_rate": 8.414331819556193e-07, "loss": 0.0064, "step": 8515 }, { "epoch": 1.9376564277588169, "grad_norm": 1.1211889513548992, "learning_rate": 8.41349356907776e-07, "loss": 0.0439, "step": 8516 }, { "epoch": 1.9378839590443686, "grad_norm": 0.8981099859269421, "learning_rate": 8.412655274381481e-07, "loss": 0.0195, "step": 8517 }, { "epoch": 1.9381114903299204, "grad_norm": 1.671316797260959, "learning_rate": 8.411816935484491e-07, "loss": 0.0426, "step": 8518 }, { "epoch": 1.9383390216154721, "grad_norm": 1.183556123114075, "learning_rate": 8.410978552403923e-07, "loss": 0.0414, "step": 8519 }, { "epoch": 1.9385665529010239, "grad_norm": 0.716076072097655, "learning_rate": 8.410140125156907e-07, "loss": 0.0126, "step": 8520 }, { "epoch": 1.9387940841865756, "grad_norm": 1.33588142832007, "learning_rate": 8.409301653760587e-07, "loss": 0.0271, "step": 8521 }, { "epoch": 1.9390216154721274, "grad_norm": 1.0341941339252312, "learning_rate": 8.408463138232094e-07, "loss": 0.0188, "step": 8522 }, { "epoch": 1.9392491467576791, "grad_norm": 1.5332586177406462, "learning_rate": 8.407624578588566e-07, "loss": 0.0439, "step": 8523 }, { "epoch": 1.9394766780432309, "grad_norm": 2.010127945672871, "learning_rate": 8.406785974847145e-07, "loss": 0.0328, "step": 8524 }, { "epoch": 1.9397042093287826, "grad_norm": 0.6510194841437853, "learning_rate": 8.405947327024968e-07, "loss": 0.0151, "step": 8525 }, { "epoch": 1.9399317406143344, "grad_norm": 1.2048863218421133, "learning_rate": 8.405108635139178e-07, "loss": 0.0184, "step": 8526 }, { "epoch": 1.9401592718998861, "grad_norm": 1.2753069331272529, "learning_rate": 8.404269899206911e-07, "loss": 0.0286, "step": 8527 }, { "epoch": 1.9403868031854379, "grad_norm": 1.3484674371507035, "learning_rate": 8.403431119245316e-07, "loss": 0.054, "step": 8528 }, { "epoch": 1.9406143344709896, "grad_norm": 0.9252465578467342, "learning_rate": 8.40259229527153e-07, "loss": 0.0242, "step": 8529 }, { "epoch": 1.9408418657565414, "grad_norm": 0.83021150201602, "learning_rate": 8.4017534273027e-07, "loss": 0.0304, "step": 8530 }, { "epoch": 1.9410693970420931, "grad_norm": 1.925549082550425, "learning_rate": 8.400914515355972e-07, "loss": 0.0526, "step": 8531 }, { "epoch": 1.9412969283276449, "grad_norm": 1.3291950350754373, "learning_rate": 8.40007555944849e-07, "loss": 0.0215, "step": 8532 }, { "epoch": 1.9415244596131969, "grad_norm": 0.917419574940405, "learning_rate": 8.399236559597403e-07, "loss": 0.0162, "step": 8533 }, { "epoch": 1.9417519908987486, "grad_norm": 1.0211676002921655, "learning_rate": 8.398397515819855e-07, "loss": 0.0244, "step": 8534 }, { "epoch": 1.9419795221843004, "grad_norm": 0.7994766197110429, "learning_rate": 8.397558428132995e-07, "loss": 0.0185, "step": 8535 }, { "epoch": 1.9422070534698521, "grad_norm": 3.5208757922434404, "learning_rate": 8.396719296553976e-07, "loss": 0.089, "step": 8536 }, { "epoch": 1.9424345847554039, "grad_norm": 1.9415052584017878, "learning_rate": 8.395880121099944e-07, "loss": 0.0625, "step": 8537 }, { "epoch": 1.9426621160409556, "grad_norm": 1.470869728302076, "learning_rate": 8.395040901788056e-07, "loss": 0.0397, "step": 8538 }, { "epoch": 1.9428896473265074, "grad_norm": 1.21141425306834, "learning_rate": 8.394201638635458e-07, "loss": 0.0299, "step": 8539 }, { "epoch": 1.9431171786120591, "grad_norm": 1.537393692306185, "learning_rate": 8.393362331659305e-07, "loss": 0.0484, "step": 8540 }, { "epoch": 1.9433447098976109, "grad_norm": 0.6350523232851911, "learning_rate": 8.392522980876752e-07, "loss": 0.0226, "step": 8541 }, { "epoch": 1.9435722411831626, "grad_norm": 0.9508792371503724, "learning_rate": 8.391683586304954e-07, "loss": 0.0232, "step": 8542 }, { "epoch": 1.9437997724687146, "grad_norm": 0.6704249174467451, "learning_rate": 8.390844147961064e-07, "loss": 0.014, "step": 8543 }, { "epoch": 1.9440273037542664, "grad_norm": 0.8355479717869047, "learning_rate": 8.39000466586224e-07, "loss": 0.0132, "step": 8544 }, { "epoch": 1.944254835039818, "grad_norm": 1.4903130768638286, "learning_rate": 8.389165140025642e-07, "loss": 0.0557, "step": 8545 }, { "epoch": 1.9444823663253699, "grad_norm": 1.4269569411160623, "learning_rate": 8.388325570468425e-07, "loss": 0.042, "step": 8546 }, { "epoch": 1.9447098976109216, "grad_norm": 2.033630266136058, "learning_rate": 8.38748595720775e-07, "loss": 0.0447, "step": 8547 }, { "epoch": 1.9449374288964734, "grad_norm": 1.1410481102298577, "learning_rate": 8.386646300260777e-07, "loss": 0.0355, "step": 8548 }, { "epoch": 1.9451649601820251, "grad_norm": 0.6948480718809295, "learning_rate": 8.385806599644666e-07, "loss": 0.0137, "step": 8549 }, { "epoch": 1.9453924914675769, "grad_norm": 0.8140264883212044, "learning_rate": 8.384966855376579e-07, "loss": 0.0113, "step": 8550 }, { "epoch": 1.9456200227531286, "grad_norm": 33.67982669588671, "learning_rate": 8.384127067473681e-07, "loss": 0.2731, "step": 8551 }, { "epoch": 1.9458475540386804, "grad_norm": 0.5758831577423202, "learning_rate": 8.383287235953133e-07, "loss": 0.0068, "step": 8552 }, { "epoch": 1.9460750853242321, "grad_norm": 2.176471810433205, "learning_rate": 8.382447360832102e-07, "loss": 0.0658, "step": 8553 }, { "epoch": 1.9463026166097839, "grad_norm": 0.8880018882253463, "learning_rate": 8.381607442127753e-07, "loss": 0.0201, "step": 8554 }, { "epoch": 1.9465301478953356, "grad_norm": 1.199962036020287, "learning_rate": 8.38076747985725e-07, "loss": 0.0464, "step": 8555 }, { "epoch": 1.9467576791808874, "grad_norm": 0.9170057900529092, "learning_rate": 8.379927474037762e-07, "loss": 0.0362, "step": 8556 }, { "epoch": 1.9469852104664391, "grad_norm": 1.3486833781970093, "learning_rate": 8.379087424686458e-07, "loss": 0.0575, "step": 8557 }, { "epoch": 1.9472127417519909, "grad_norm": 1.0923652362515575, "learning_rate": 8.378247331820504e-07, "loss": 0.0163, "step": 8558 }, { "epoch": 1.9474402730375426, "grad_norm": 1.0406302266708418, "learning_rate": 8.377407195457077e-07, "loss": 0.0237, "step": 8559 }, { "epoch": 1.9476678043230944, "grad_norm": 1.1825169811274758, "learning_rate": 8.376567015613339e-07, "loss": 0.0275, "step": 8560 }, { "epoch": 1.9478953356086461, "grad_norm": 1.1379295942906191, "learning_rate": 8.375726792306467e-07, "loss": 0.027, "step": 8561 }, { "epoch": 1.948122866894198, "grad_norm": 0.8123191602181989, "learning_rate": 8.374886525553632e-07, "loss": 0.0194, "step": 8562 }, { "epoch": 1.9483503981797496, "grad_norm": 1.3859418636229017, "learning_rate": 8.374046215372011e-07, "loss": 0.0303, "step": 8563 }, { "epoch": 1.9485779294653014, "grad_norm": 0.8420953328721413, "learning_rate": 8.37320586177877e-07, "loss": 0.0126, "step": 8564 }, { "epoch": 1.9488054607508531, "grad_norm": 1.1485451801525324, "learning_rate": 8.372365464791094e-07, "loss": 0.0177, "step": 8565 }, { "epoch": 1.949032992036405, "grad_norm": 1.3651782750195407, "learning_rate": 8.371525024426153e-07, "loss": 0.0254, "step": 8566 }, { "epoch": 1.9492605233219567, "grad_norm": 0.9995494790984009, "learning_rate": 8.370684540701126e-07, "loss": 0.0204, "step": 8567 }, { "epoch": 1.9494880546075084, "grad_norm": 1.5531029356279562, "learning_rate": 8.369844013633191e-07, "loss": 0.0479, "step": 8568 }, { "epoch": 1.9497155858930602, "grad_norm": 1.024177847940211, "learning_rate": 8.369003443239528e-07, "loss": 0.0309, "step": 8569 }, { "epoch": 1.949943117178612, "grad_norm": 0.9818950626534545, "learning_rate": 8.368162829537313e-07, "loss": 0.0165, "step": 8570 }, { "epoch": 1.9501706484641637, "grad_norm": 0.5604927182640149, "learning_rate": 8.367322172543729e-07, "loss": 0.0117, "step": 8571 }, { "epoch": 1.9503981797497156, "grad_norm": 0.9133949942753709, "learning_rate": 8.366481472275961e-07, "loss": 0.0188, "step": 8572 }, { "epoch": 1.9506257110352674, "grad_norm": 1.2929357532911143, "learning_rate": 8.365640728751184e-07, "loss": 0.0302, "step": 8573 }, { "epoch": 1.9508532423208191, "grad_norm": 0.7383923021797731, "learning_rate": 8.364799941986587e-07, "loss": 0.0187, "step": 8574 }, { "epoch": 1.9510807736063709, "grad_norm": 1.1839304673917488, "learning_rate": 8.363959111999352e-07, "loss": 0.0377, "step": 8575 }, { "epoch": 1.9513083048919226, "grad_norm": 1.1521870732115753, "learning_rate": 8.363118238806663e-07, "loss": 0.0208, "step": 8576 }, { "epoch": 1.9515358361774744, "grad_norm": 0.8447968990656242, "learning_rate": 8.362277322425709e-07, "loss": 0.0207, "step": 8577 }, { "epoch": 1.9517633674630261, "grad_norm": 3.511151868539225, "learning_rate": 8.361436362873676e-07, "loss": 0.1474, "step": 8578 }, { "epoch": 1.951990898748578, "grad_norm": 1.0506334321174164, "learning_rate": 8.360595360167748e-07, "loss": 0.03, "step": 8579 }, { "epoch": 1.9522184300341296, "grad_norm": 1.4388344988078532, "learning_rate": 8.359754314325117e-07, "loss": 0.0428, "step": 8580 }, { "epoch": 1.9524459613196816, "grad_norm": 0.9175157387598539, "learning_rate": 8.358913225362972e-07, "loss": 0.0212, "step": 8581 }, { "epoch": 1.9526734926052334, "grad_norm": 1.1474465849879947, "learning_rate": 8.358072093298503e-07, "loss": 0.0319, "step": 8582 }, { "epoch": 1.9529010238907851, "grad_norm": 0.9606355848307595, "learning_rate": 8.357230918148901e-07, "loss": 0.0261, "step": 8583 }, { "epoch": 1.9531285551763369, "grad_norm": 0.8419564239260479, "learning_rate": 8.356389699931359e-07, "loss": 0.0295, "step": 8584 }, { "epoch": 1.9533560864618886, "grad_norm": 0.739116737145134, "learning_rate": 8.35554843866307e-07, "loss": 0.0142, "step": 8585 }, { "epoch": 1.9535836177474404, "grad_norm": 1.4018896373401493, "learning_rate": 8.354707134361224e-07, "loss": 0.0415, "step": 8586 }, { "epoch": 1.9538111490329921, "grad_norm": 1.4592585138779515, "learning_rate": 8.35386578704302e-07, "loss": 0.0287, "step": 8587 }, { "epoch": 1.9540386803185439, "grad_norm": 1.7715878188929528, "learning_rate": 8.353024396725653e-07, "loss": 0.0538, "step": 8588 }, { "epoch": 1.9542662116040956, "grad_norm": 1.3693419664466187, "learning_rate": 8.352182963426317e-07, "loss": 0.0198, "step": 8589 }, { "epoch": 1.9544937428896474, "grad_norm": 1.1544739346084487, "learning_rate": 8.351341487162214e-07, "loss": 0.0254, "step": 8590 }, { "epoch": 1.9547212741751991, "grad_norm": 2.1104083976647012, "learning_rate": 8.350499967950538e-07, "loss": 0.0492, "step": 8591 }, { "epoch": 1.954948805460751, "grad_norm": 1.1692825473703383, "learning_rate": 8.349658405808489e-07, "loss": 0.0258, "step": 8592 }, { "epoch": 1.9551763367463026, "grad_norm": 1.2419564910884628, "learning_rate": 8.348816800753269e-07, "loss": 0.0293, "step": 8593 }, { "epoch": 1.9554038680318544, "grad_norm": 1.1309042179682378, "learning_rate": 8.347975152802075e-07, "loss": 0.0264, "step": 8594 }, { "epoch": 1.9556313993174061, "grad_norm": 1.4290099146173239, "learning_rate": 8.347133461972112e-07, "loss": 0.0395, "step": 8595 }, { "epoch": 1.955858930602958, "grad_norm": 1.4936400221654453, "learning_rate": 8.346291728280584e-07, "loss": 0.0467, "step": 8596 }, { "epoch": 1.9560864618885097, "grad_norm": 0.7787161379756027, "learning_rate": 8.345449951744692e-07, "loss": 0.0117, "step": 8597 }, { "epoch": 1.9563139931740614, "grad_norm": 0.9751506057590029, "learning_rate": 8.344608132381637e-07, "loss": 0.014, "step": 8598 }, { "epoch": 1.9565415244596132, "grad_norm": 1.3679824917988233, "learning_rate": 8.343766270208631e-07, "loss": 0.0309, "step": 8599 }, { "epoch": 1.956769055745165, "grad_norm": 0.7086403075954423, "learning_rate": 8.342924365242878e-07, "loss": 0.0187, "step": 8600 }, { "epoch": 1.9569965870307167, "grad_norm": 1.5007550176295361, "learning_rate": 8.342082417501579e-07, "loss": 0.0214, "step": 8601 }, { "epoch": 1.9572241183162684, "grad_norm": 0.9485634208130852, "learning_rate": 8.341240427001951e-07, "loss": 0.024, "step": 8602 }, { "epoch": 1.9574516496018202, "grad_norm": 1.1100998316363275, "learning_rate": 8.340398393761199e-07, "loss": 0.0275, "step": 8603 }, { "epoch": 1.957679180887372, "grad_norm": 0.8641664732957542, "learning_rate": 8.339556317796529e-07, "loss": 0.0133, "step": 8604 }, { "epoch": 1.9579067121729237, "grad_norm": 0.8683423704438483, "learning_rate": 8.338714199125157e-07, "loss": 0.0192, "step": 8605 }, { "epoch": 1.9581342434584754, "grad_norm": 1.128209302155601, "learning_rate": 8.337872037764292e-07, "loss": 0.0278, "step": 8606 }, { "epoch": 1.9583617747440272, "grad_norm": 0.766978896602123, "learning_rate": 8.337029833731145e-07, "loss": 0.0173, "step": 8607 }, { "epoch": 1.958589306029579, "grad_norm": 1.2084456984055698, "learning_rate": 8.336187587042932e-07, "loss": 0.0299, "step": 8608 }, { "epoch": 1.9588168373151307, "grad_norm": 0.9962730444702378, "learning_rate": 8.335345297716863e-07, "loss": 0.0386, "step": 8609 }, { "epoch": 1.9590443686006824, "grad_norm": 1.1075213008133922, "learning_rate": 8.334502965770158e-07, "loss": 0.0402, "step": 8610 }, { "epoch": 1.9592718998862344, "grad_norm": 0.6988985648561841, "learning_rate": 8.33366059122003e-07, "loss": 0.011, "step": 8611 }, { "epoch": 1.9594994311717862, "grad_norm": 1.144705818495189, "learning_rate": 8.332818174083694e-07, "loss": 0.0297, "step": 8612 }, { "epoch": 1.959726962457338, "grad_norm": 1.058490736306249, "learning_rate": 8.331975714378369e-07, "loss": 0.0159, "step": 8613 }, { "epoch": 1.9599544937428897, "grad_norm": 1.091387700160984, "learning_rate": 8.331133212121273e-07, "loss": 0.0262, "step": 8614 }, { "epoch": 1.9601820250284414, "grad_norm": 0.9330086964337541, "learning_rate": 8.330290667329627e-07, "loss": 0.0225, "step": 8615 }, { "epoch": 1.9604095563139932, "grad_norm": 1.967442475907266, "learning_rate": 8.329448080020651e-07, "loss": 0.074, "step": 8616 }, { "epoch": 1.960637087599545, "grad_norm": 1.1163242624066385, "learning_rate": 8.32860545021156e-07, "loss": 0.0386, "step": 8617 }, { "epoch": 1.9608646188850967, "grad_norm": 0.970224699604641, "learning_rate": 8.327762777919585e-07, "loss": 0.0164, "step": 8618 }, { "epoch": 1.9610921501706484, "grad_norm": 0.5276741067783547, "learning_rate": 8.326920063161942e-07, "loss": 0.0178, "step": 8619 }, { "epoch": 1.9613196814562004, "grad_norm": 1.0009730372919852, "learning_rate": 8.326077305955857e-07, "loss": 0.0231, "step": 8620 }, { "epoch": 1.9615472127417521, "grad_norm": 1.2868902445641404, "learning_rate": 8.325234506318553e-07, "loss": 0.033, "step": 8621 }, { "epoch": 1.961774744027304, "grad_norm": 1.1246482872639023, "learning_rate": 8.324391664267257e-07, "loss": 0.0394, "step": 8622 }, { "epoch": 1.9620022753128556, "grad_norm": 1.3455231367897886, "learning_rate": 8.323548779819194e-07, "loss": 0.0298, "step": 8623 }, { "epoch": 1.9622298065984074, "grad_norm": 1.6393099584172452, "learning_rate": 8.322705852991592e-07, "loss": 0.0436, "step": 8624 }, { "epoch": 1.9624573378839592, "grad_norm": 0.7536024647751555, "learning_rate": 8.321862883801677e-07, "loss": 0.0111, "step": 8625 }, { "epoch": 1.962684869169511, "grad_norm": 0.773879477613702, "learning_rate": 8.321019872266682e-07, "loss": 0.022, "step": 8626 }, { "epoch": 1.9629124004550627, "grad_norm": 1.1594672149335956, "learning_rate": 8.320176818403831e-07, "loss": 0.0174, "step": 8627 }, { "epoch": 1.9631399317406144, "grad_norm": 1.0266232168394467, "learning_rate": 8.319333722230359e-07, "loss": 0.0328, "step": 8628 }, { "epoch": 1.9633674630261662, "grad_norm": 0.8306133501722734, "learning_rate": 8.318490583763494e-07, "loss": 0.0253, "step": 8629 }, { "epoch": 1.963594994311718, "grad_norm": 1.2649424862296168, "learning_rate": 8.317647403020472e-07, "loss": 0.0475, "step": 8630 }, { "epoch": 1.9638225255972697, "grad_norm": 2.014816072849253, "learning_rate": 8.316804180018522e-07, "loss": 0.0552, "step": 8631 }, { "epoch": 1.9640500568828214, "grad_norm": 0.49606898198662275, "learning_rate": 8.315960914774878e-07, "loss": 0.0075, "step": 8632 }, { "epoch": 1.9642775881683732, "grad_norm": 1.2199716369799536, "learning_rate": 8.31511760730678e-07, "loss": 0.0305, "step": 8633 }, { "epoch": 1.964505119453925, "grad_norm": 1.320463922808405, "learning_rate": 8.314274257631458e-07, "loss": 0.0253, "step": 8634 }, { "epoch": 1.9647326507394767, "grad_norm": 0.7885505673935275, "learning_rate": 8.313430865766154e-07, "loss": 0.014, "step": 8635 }, { "epoch": 1.9649601820250284, "grad_norm": 1.484537808236749, "learning_rate": 8.312587431728098e-07, "loss": 0.0246, "step": 8636 }, { "epoch": 1.9651877133105802, "grad_norm": 1.8047818594210032, "learning_rate": 8.311743955534533e-07, "loss": 0.0434, "step": 8637 }, { "epoch": 1.965415244596132, "grad_norm": 1.6774167551399732, "learning_rate": 8.310900437202699e-07, "loss": 0.0204, "step": 8638 }, { "epoch": 1.9656427758816837, "grad_norm": 0.7736895427296182, "learning_rate": 8.310056876749833e-07, "loss": 0.017, "step": 8639 }, { "epoch": 1.9658703071672354, "grad_norm": 1.3766383243992963, "learning_rate": 8.309213274193179e-07, "loss": 0.0243, "step": 8640 }, { "epoch": 1.9660978384527872, "grad_norm": 10.91155799511908, "learning_rate": 8.308369629549976e-07, "loss": 0.1186, "step": 8641 }, { "epoch": 1.966325369738339, "grad_norm": 0.9268482275101161, "learning_rate": 8.307525942837468e-07, "loss": 0.0327, "step": 8642 }, { "epoch": 1.9665529010238907, "grad_norm": 1.9798265565542381, "learning_rate": 8.306682214072897e-07, "loss": 0.0719, "step": 8643 }, { "epoch": 1.9667804323094424, "grad_norm": 1.1530434422527183, "learning_rate": 8.305838443273508e-07, "loss": 0.0312, "step": 8644 }, { "epoch": 1.9670079635949942, "grad_norm": 1.6632270883813656, "learning_rate": 8.304994630456546e-07, "loss": 0.0382, "step": 8645 }, { "epoch": 1.967235494880546, "grad_norm": 1.2715280679684924, "learning_rate": 8.304150775639258e-07, "loss": 0.0234, "step": 8646 }, { "epoch": 1.9674630261660977, "grad_norm": 1.3001980514362683, "learning_rate": 8.303306878838892e-07, "loss": 0.031, "step": 8647 }, { "epoch": 1.9676905574516494, "grad_norm": 0.8323495719274282, "learning_rate": 8.302462940072691e-07, "loss": 0.0167, "step": 8648 }, { "epoch": 1.9679180887372012, "grad_norm": 0.7713193628443443, "learning_rate": 8.301618959357908e-07, "loss": 0.0276, "step": 8649 }, { "epoch": 1.9681456200227532, "grad_norm": 1.1972821252281405, "learning_rate": 8.300774936711792e-07, "loss": 0.0371, "step": 8650 }, { "epoch": 1.968373151308305, "grad_norm": 1.1958620099652342, "learning_rate": 8.299930872151589e-07, "loss": 0.0206, "step": 8651 }, { "epoch": 1.9686006825938567, "grad_norm": 1.3119998832101367, "learning_rate": 8.299086765694554e-07, "loss": 0.0227, "step": 8652 }, { "epoch": 1.9688282138794084, "grad_norm": 1.1941007190599409, "learning_rate": 8.298242617357939e-07, "loss": 0.0185, "step": 8653 }, { "epoch": 1.9690557451649602, "grad_norm": 1.017322753157879, "learning_rate": 8.297398427158996e-07, "loss": 0.0255, "step": 8654 }, { "epoch": 1.969283276450512, "grad_norm": 1.073134449146722, "learning_rate": 8.296554195114979e-07, "loss": 0.0282, "step": 8655 }, { "epoch": 1.9695108077360637, "grad_norm": 0.9658448239276997, "learning_rate": 8.295709921243143e-07, "loss": 0.0227, "step": 8656 }, { "epoch": 1.9697383390216154, "grad_norm": 0.7505703251353405, "learning_rate": 8.294865605560743e-07, "loss": 0.0183, "step": 8657 }, { "epoch": 1.9699658703071672, "grad_norm": 0.8668155064414824, "learning_rate": 8.294021248085032e-07, "loss": 0.026, "step": 8658 }, { "epoch": 1.9701934015927192, "grad_norm": 0.9982979030777734, "learning_rate": 8.293176848833273e-07, "loss": 0.0234, "step": 8659 }, { "epoch": 1.970420932878271, "grad_norm": 1.051098783463964, "learning_rate": 8.292332407822721e-07, "loss": 0.0243, "step": 8660 }, { "epoch": 1.9706484641638227, "grad_norm": 0.6169455454202162, "learning_rate": 8.291487925070634e-07, "loss": 0.0102, "step": 8661 }, { "epoch": 1.9708759954493744, "grad_norm": 1.0371576668223386, "learning_rate": 8.290643400594273e-07, "loss": 0.0196, "step": 8662 }, { "epoch": 1.9711035267349262, "grad_norm": 0.8863462428028039, "learning_rate": 8.289798834410898e-07, "loss": 0.0195, "step": 8663 }, { "epoch": 1.971331058020478, "grad_norm": 0.9635487205693061, "learning_rate": 8.288954226537768e-07, "loss": 0.0267, "step": 8664 }, { "epoch": 1.9715585893060297, "grad_norm": 1.3899273814546962, "learning_rate": 8.288109576992151e-07, "loss": 0.0502, "step": 8665 }, { "epoch": 1.9717861205915814, "grad_norm": 1.2549189430443881, "learning_rate": 8.287264885791308e-07, "loss": 0.034, "step": 8666 }, { "epoch": 1.9720136518771332, "grad_norm": 1.1139192118311512, "learning_rate": 8.286420152952499e-07, "loss": 0.0306, "step": 8667 }, { "epoch": 1.972241183162685, "grad_norm": 1.794539989066656, "learning_rate": 8.285575378492992e-07, "loss": 0.0303, "step": 8668 }, { "epoch": 1.9724687144482367, "grad_norm": 1.1435782603954396, "learning_rate": 8.284730562430053e-07, "loss": 0.0307, "step": 8669 }, { "epoch": 1.9726962457337884, "grad_norm": 1.5817748755403243, "learning_rate": 8.283885704780947e-07, "loss": 0.0645, "step": 8670 }, { "epoch": 1.9729237770193402, "grad_norm": 1.1078746367367593, "learning_rate": 8.283040805562942e-07, "loss": 0.0215, "step": 8671 }, { "epoch": 1.973151308304892, "grad_norm": 1.1982443797342497, "learning_rate": 8.282195864793307e-07, "loss": 0.0429, "step": 8672 }, { "epoch": 1.9733788395904437, "grad_norm": 0.9251868863784273, "learning_rate": 8.281350882489311e-07, "loss": 0.0167, "step": 8673 }, { "epoch": 1.9736063708759954, "grad_norm": 1.6819184224447514, "learning_rate": 8.280505858668222e-07, "loss": 0.0594, "step": 8674 }, { "epoch": 1.9738339021615472, "grad_norm": 1.5761511420584504, "learning_rate": 8.279660793347314e-07, "loss": 0.0338, "step": 8675 }, { "epoch": 1.974061433447099, "grad_norm": 1.1263389265862735, "learning_rate": 8.278815686543854e-07, "loss": 0.0416, "step": 8676 }, { "epoch": 1.9742889647326507, "grad_norm": 1.3226547626551843, "learning_rate": 8.277970538275118e-07, "loss": 0.0232, "step": 8677 }, { "epoch": 1.9745164960182024, "grad_norm": 0.7605156284409775, "learning_rate": 8.277125348558376e-07, "loss": 0.0204, "step": 8678 }, { "epoch": 1.9747440273037542, "grad_norm": 1.0778152208501437, "learning_rate": 8.276280117410909e-07, "loss": 0.0315, "step": 8679 }, { "epoch": 1.974971558589306, "grad_norm": 0.843263740923669, "learning_rate": 8.275434844849985e-07, "loss": 0.0163, "step": 8680 }, { "epoch": 1.9751990898748577, "grad_norm": 0.822307963561357, "learning_rate": 8.274589530892881e-07, "loss": 0.0124, "step": 8681 }, { "epoch": 1.9754266211604095, "grad_norm": 1.6723312975706626, "learning_rate": 8.273744175556877e-07, "loss": 0.0534, "step": 8682 }, { "epoch": 1.9756541524459612, "grad_norm": 0.9524245517137393, "learning_rate": 8.272898778859247e-07, "loss": 0.0213, "step": 8683 }, { "epoch": 1.975881683731513, "grad_norm": 0.9428614388231097, "learning_rate": 8.272053340817271e-07, "loss": 0.0174, "step": 8684 }, { "epoch": 1.9761092150170647, "grad_norm": 0.982980553142515, "learning_rate": 8.271207861448227e-07, "loss": 0.0434, "step": 8685 }, { "epoch": 1.9763367463026165, "grad_norm": 1.7503072386779026, "learning_rate": 8.270362340769397e-07, "loss": 0.0701, "step": 8686 }, { "epoch": 1.9765642775881682, "grad_norm": 1.4686852590607977, "learning_rate": 8.269516778798062e-07, "loss": 0.0609, "step": 8687 }, { "epoch": 1.9767918088737202, "grad_norm": 1.1044537127744922, "learning_rate": 8.268671175551499e-07, "loss": 0.0279, "step": 8688 }, { "epoch": 1.977019340159272, "grad_norm": 0.8675072379447266, "learning_rate": 8.267825531046997e-07, "loss": 0.0162, "step": 8689 }, { "epoch": 1.9772468714448237, "grad_norm": 1.3340777286424959, "learning_rate": 8.266979845301837e-07, "loss": 0.0347, "step": 8690 }, { "epoch": 1.9774744027303754, "grad_norm": 1.862082949838379, "learning_rate": 8.266134118333302e-07, "loss": 0.0605, "step": 8691 }, { "epoch": 1.9777019340159272, "grad_norm": 1.724709349145104, "learning_rate": 8.265288350158677e-07, "loss": 0.0421, "step": 8692 }, { "epoch": 1.977929465301479, "grad_norm": 0.6617687281908288, "learning_rate": 8.26444254079525e-07, "loss": 0.0155, "step": 8693 }, { "epoch": 1.9781569965870307, "grad_norm": 0.9542020767481539, "learning_rate": 8.263596690260306e-07, "loss": 0.0283, "step": 8694 }, { "epoch": 1.9783845278725825, "grad_norm": 1.1565542136712943, "learning_rate": 8.262750798571134e-07, "loss": 0.0267, "step": 8695 }, { "epoch": 1.9786120591581342, "grad_norm": 1.0125080139878857, "learning_rate": 8.261904865745022e-07, "loss": 0.0297, "step": 8696 }, { "epoch": 1.978839590443686, "grad_norm": 1.061120595054336, "learning_rate": 8.26105889179926e-07, "loss": 0.0233, "step": 8697 }, { "epoch": 1.979067121729238, "grad_norm": 1.3367563555151707, "learning_rate": 8.260212876751137e-07, "loss": 0.0356, "step": 8698 }, { "epoch": 1.9792946530147897, "grad_norm": 1.064582252802175, "learning_rate": 8.259366820617943e-07, "loss": 0.0325, "step": 8699 }, { "epoch": 1.9795221843003414, "grad_norm": 1.0237417556631883, "learning_rate": 8.258520723416972e-07, "loss": 0.0172, "step": 8700 }, { "epoch": 1.9797497155858932, "grad_norm": 0.9079968240893457, "learning_rate": 8.257674585165515e-07, "loss": 0.0234, "step": 8701 }, { "epoch": 1.979977246871445, "grad_norm": 0.8075493458055363, "learning_rate": 8.256828405880868e-07, "loss": 0.02, "step": 8702 }, { "epoch": 1.9802047781569967, "grad_norm": 0.8383755855854361, "learning_rate": 8.255982185580323e-07, "loss": 0.0176, "step": 8703 }, { "epoch": 1.9804323094425484, "grad_norm": 0.8588452408891758, "learning_rate": 8.255135924281175e-07, "loss": 0.0156, "step": 8704 }, { "epoch": 1.9806598407281002, "grad_norm": 0.8869722078081338, "learning_rate": 8.254289622000724e-07, "loss": 0.0156, "step": 8705 }, { "epoch": 1.980887372013652, "grad_norm": 1.1750604227287018, "learning_rate": 8.253443278756261e-07, "loss": 0.0265, "step": 8706 }, { "epoch": 1.9811149032992037, "grad_norm": 1.0528660523642601, "learning_rate": 8.252596894565088e-07, "loss": 0.0287, "step": 8707 }, { "epoch": 1.9813424345847555, "grad_norm": 0.8956761378108004, "learning_rate": 8.251750469444498e-07, "loss": 0.017, "step": 8708 }, { "epoch": 1.9815699658703072, "grad_norm": 1.1554876465333057, "learning_rate": 8.250904003411798e-07, "loss": 0.0269, "step": 8709 }, { "epoch": 1.981797497155859, "grad_norm": 1.0472399011095535, "learning_rate": 8.250057496484285e-07, "loss": 0.0164, "step": 8710 }, { "epoch": 1.9820250284414107, "grad_norm": 1.0382917933149183, "learning_rate": 8.24921094867926e-07, "loss": 0.0208, "step": 8711 }, { "epoch": 1.9822525597269625, "grad_norm": 1.7066003981198556, "learning_rate": 8.248364360014023e-07, "loss": 0.0321, "step": 8712 }, { "epoch": 1.9824800910125142, "grad_norm": 1.2766339870608123, "learning_rate": 8.247517730505879e-07, "loss": 0.0211, "step": 8713 }, { "epoch": 1.982707622298066, "grad_norm": 0.9057421594594387, "learning_rate": 8.24667106017213e-07, "loss": 0.0147, "step": 8714 }, { "epoch": 1.9829351535836177, "grad_norm": 0.7963917651951765, "learning_rate": 8.245824349030082e-07, "loss": 0.0266, "step": 8715 }, { "epoch": 1.9831626848691695, "grad_norm": 1.1790616888344387, "learning_rate": 8.244977597097039e-07, "loss": 0.0467, "step": 8716 }, { "epoch": 1.9833902161547212, "grad_norm": 1.0535014935954148, "learning_rate": 8.244130804390311e-07, "loss": 0.0205, "step": 8717 }, { "epoch": 1.983617747440273, "grad_norm": 1.5597342762619004, "learning_rate": 8.243283970927196e-07, "loss": 0.0392, "step": 8718 }, { "epoch": 1.9838452787258247, "grad_norm": 2.409043909428933, "learning_rate": 8.242437096725009e-07, "loss": 0.0171, "step": 8719 }, { "epoch": 1.9840728100113765, "grad_norm": 0.9772091858306938, "learning_rate": 8.241590181801059e-07, "loss": 0.023, "step": 8720 }, { "epoch": 1.9843003412969282, "grad_norm": 1.1293407226155698, "learning_rate": 8.240743226172651e-07, "loss": 0.0207, "step": 8721 }, { "epoch": 1.98452787258248, "grad_norm": 0.8710932227462915, "learning_rate": 8.239896229857096e-07, "loss": 0.0237, "step": 8722 }, { "epoch": 1.9847554038680317, "grad_norm": 2.231175161679385, "learning_rate": 8.23904919287171e-07, "loss": 0.0975, "step": 8723 }, { "epoch": 1.9849829351535835, "grad_norm": 0.7504706692267575, "learning_rate": 8.2382021152338e-07, "loss": 0.0207, "step": 8724 }, { "epoch": 1.9852104664391352, "grad_norm": 1.2591709035268228, "learning_rate": 8.237354996960678e-07, "loss": 0.029, "step": 8725 }, { "epoch": 1.985437997724687, "grad_norm": 1.4421635680441485, "learning_rate": 8.23650783806966e-07, "loss": 0.0511, "step": 8726 }, { "epoch": 1.985665529010239, "grad_norm": 1.1065774105002377, "learning_rate": 8.235660638578061e-07, "loss": 0.0245, "step": 8727 }, { "epoch": 1.9858930602957907, "grad_norm": 0.9428658154154148, "learning_rate": 8.234813398503194e-07, "loss": 0.0186, "step": 8728 }, { "epoch": 1.9861205915813425, "grad_norm": 1.2117398012423959, "learning_rate": 8.233966117862378e-07, "loss": 0.0313, "step": 8729 }, { "epoch": 1.9863481228668942, "grad_norm": 0.8148093077827677, "learning_rate": 8.233118796672929e-07, "loss": 0.0332, "step": 8730 }, { "epoch": 1.986575654152446, "grad_norm": 1.3969644663564684, "learning_rate": 8.23227143495216e-07, "loss": 0.0206, "step": 8731 }, { "epoch": 1.9868031854379977, "grad_norm": 0.9233518202413549, "learning_rate": 8.231424032717395e-07, "loss": 0.015, "step": 8732 }, { "epoch": 1.9870307167235495, "grad_norm": 0.8688629986743365, "learning_rate": 8.230576589985951e-07, "loss": 0.0179, "step": 8733 }, { "epoch": 1.9872582480091012, "grad_norm": 1.7448892272436423, "learning_rate": 8.22972910677515e-07, "loss": 0.0364, "step": 8734 }, { "epoch": 1.987485779294653, "grad_norm": 0.9001399229517513, "learning_rate": 8.22888158310231e-07, "loss": 0.0189, "step": 8735 }, { "epoch": 1.9877133105802047, "grad_norm": 1.0684583399613707, "learning_rate": 8.228034018984757e-07, "loss": 0.0225, "step": 8736 }, { "epoch": 1.9879408418657567, "grad_norm": 1.137792725613461, "learning_rate": 8.227186414439812e-07, "loss": 0.0321, "step": 8737 }, { "epoch": 1.9881683731513085, "grad_norm": 0.4761613521913952, "learning_rate": 8.226338769484793e-07, "loss": 0.013, "step": 8738 }, { "epoch": 1.9883959044368602, "grad_norm": 1.300142438025343, "learning_rate": 8.225491084137032e-07, "loss": 0.0338, "step": 8739 }, { "epoch": 1.988623435722412, "grad_norm": 1.2848804726113578, "learning_rate": 8.22464335841385e-07, "loss": 0.0243, "step": 8740 }, { "epoch": 1.9888509670079637, "grad_norm": 0.9225434461136578, "learning_rate": 8.223795592332575e-07, "loss": 0.0165, "step": 8741 }, { "epoch": 1.9890784982935155, "grad_norm": 0.8346238495866183, "learning_rate": 8.222947785910534e-07, "loss": 0.0179, "step": 8742 }, { "epoch": 1.9893060295790672, "grad_norm": 0.9537674858060043, "learning_rate": 8.222099939165053e-07, "loss": 0.0217, "step": 8743 }, { "epoch": 1.989533560864619, "grad_norm": 1.545983536426448, "learning_rate": 8.221252052113461e-07, "loss": 0.0314, "step": 8744 }, { "epoch": 1.9897610921501707, "grad_norm": 1.2343476530361066, "learning_rate": 8.220404124773084e-07, "loss": 0.033, "step": 8745 }, { "epoch": 1.9899886234357225, "grad_norm": 1.0359221806193963, "learning_rate": 8.21955615716126e-07, "loss": 0.0308, "step": 8746 }, { "epoch": 1.9902161547212742, "grad_norm": 0.6159295211114494, "learning_rate": 8.218708149295312e-07, "loss": 0.0146, "step": 8747 }, { "epoch": 1.990443686006826, "grad_norm": 1.0030155089241657, "learning_rate": 8.217860101192578e-07, "loss": 0.0296, "step": 8748 }, { "epoch": 1.9906712172923777, "grad_norm": 1.2171836674531433, "learning_rate": 8.217012012870384e-07, "loss": 0.0332, "step": 8749 }, { "epoch": 1.9908987485779295, "grad_norm": 1.0134205901264104, "learning_rate": 8.21616388434607e-07, "loss": 0.0407, "step": 8750 }, { "epoch": 1.9911262798634812, "grad_norm": 1.2613091211202425, "learning_rate": 8.215315715636965e-07, "loss": 0.0271, "step": 8751 }, { "epoch": 1.991353811149033, "grad_norm": 0.9701824049292664, "learning_rate": 8.214467506760407e-07, "loss": 0.0188, "step": 8752 }, { "epoch": 1.9915813424345847, "grad_norm": 1.1531015931207416, "learning_rate": 8.213619257733729e-07, "loss": 0.0172, "step": 8753 }, { "epoch": 1.9918088737201365, "grad_norm": 1.223676074095881, "learning_rate": 8.212770968574274e-07, "loss": 0.0295, "step": 8754 }, { "epoch": 1.9920364050056882, "grad_norm": 0.9060703172422784, "learning_rate": 8.211922639299372e-07, "loss": 0.0197, "step": 8755 }, { "epoch": 1.99226393629124, "grad_norm": 1.1589371318449682, "learning_rate": 8.211074269926364e-07, "loss": 0.0319, "step": 8756 }, { "epoch": 1.9924914675767917, "grad_norm": 1.162550322553717, "learning_rate": 8.21022586047259e-07, "loss": 0.027, "step": 8757 }, { "epoch": 1.9927189988623435, "grad_norm": 1.483389942397735, "learning_rate": 8.209377410955388e-07, "loss": 0.058, "step": 8758 }, { "epoch": 1.9929465301478952, "grad_norm": 0.5989315482760323, "learning_rate": 8.208528921392101e-07, "loss": 0.0179, "step": 8759 }, { "epoch": 1.993174061433447, "grad_norm": 0.8224236601819628, "learning_rate": 8.207680391800071e-07, "loss": 0.0143, "step": 8760 }, { "epoch": 1.9934015927189987, "grad_norm": 1.5769681179365267, "learning_rate": 8.206831822196639e-07, "loss": 0.0389, "step": 8761 }, { "epoch": 1.9936291240045505, "grad_norm": 1.2642461984598112, "learning_rate": 8.205983212599147e-07, "loss": 0.0355, "step": 8762 }, { "epoch": 1.9938566552901023, "grad_norm": 0.8245254804736318, "learning_rate": 8.205134563024942e-07, "loss": 0.0138, "step": 8763 }, { "epoch": 1.994084186575654, "grad_norm": 1.027387146344138, "learning_rate": 8.204285873491366e-07, "loss": 0.019, "step": 8764 }, { "epoch": 1.9943117178612058, "grad_norm": 1.2011382159350898, "learning_rate": 8.203437144015766e-07, "loss": 0.026, "step": 8765 }, { "epoch": 1.9945392491467577, "grad_norm": 1.5164278338793358, "learning_rate": 8.202588374615489e-07, "loss": 0.0325, "step": 8766 }, { "epoch": 1.9947667804323095, "grad_norm": 1.9144837369745, "learning_rate": 8.201739565307881e-07, "loss": 0.0286, "step": 8767 }, { "epoch": 1.9949943117178612, "grad_norm": 1.1510459916405422, "learning_rate": 8.200890716110291e-07, "loss": 0.0348, "step": 8768 }, { "epoch": 1.995221843003413, "grad_norm": 1.0579695094334676, "learning_rate": 8.200041827040067e-07, "loss": 0.0333, "step": 8769 }, { "epoch": 1.9954493742889647, "grad_norm": 0.8783006470237726, "learning_rate": 8.19919289811456e-07, "loss": 0.0188, "step": 8770 }, { "epoch": 1.9956769055745165, "grad_norm": 1.2337651735307613, "learning_rate": 8.19834392935112e-07, "loss": 0.0351, "step": 8771 }, { "epoch": 1.9959044368600682, "grad_norm": 1.614485473246855, "learning_rate": 8.197494920767098e-07, "loss": 0.0377, "step": 8772 }, { "epoch": 1.99613196814562, "grad_norm": 1.1038033120164423, "learning_rate": 8.196645872379847e-07, "loss": 0.0364, "step": 8773 }, { "epoch": 1.9963594994311717, "grad_norm": 0.8343418233948011, "learning_rate": 8.19579678420672e-07, "loss": 0.0187, "step": 8774 }, { "epoch": 1.9965870307167235, "grad_norm": 1.8439932234882088, "learning_rate": 8.194947656265068e-07, "loss": 0.051, "step": 8775 }, { "epoch": 1.9968145620022755, "grad_norm": 0.9058793146661416, "learning_rate": 8.19409848857225e-07, "loss": 0.0203, "step": 8776 }, { "epoch": 1.9970420932878272, "grad_norm": 0.5079408054170178, "learning_rate": 8.193249281145618e-07, "loss": 0.0059, "step": 8777 }, { "epoch": 1.997269624573379, "grad_norm": 1.1010114111421319, "learning_rate": 8.19240003400253e-07, "loss": 0.0251, "step": 8778 }, { "epoch": 1.9974971558589307, "grad_norm": 1.23466091864004, "learning_rate": 8.191550747160343e-07, "loss": 0.0429, "step": 8779 }, { "epoch": 1.9977246871444825, "grad_norm": 1.0463717415347287, "learning_rate": 8.190701420636415e-07, "loss": 0.0301, "step": 8780 }, { "epoch": 1.9979522184300342, "grad_norm": 0.9950332785403146, "learning_rate": 8.189852054448104e-07, "loss": 0.0145, "step": 8781 }, { "epoch": 1.998179749715586, "grad_norm": 0.712340126659162, "learning_rate": 8.189002648612768e-07, "loss": 0.0136, "step": 8782 }, { "epoch": 1.9984072810011377, "grad_norm": 1.1073274746874324, "learning_rate": 8.188153203147769e-07, "loss": 0.0227, "step": 8783 }, { "epoch": 1.9986348122866895, "grad_norm": 1.964238477769453, "learning_rate": 8.18730371807047e-07, "loss": 0.041, "step": 8784 }, { "epoch": 1.9988623435722412, "grad_norm": 1.1408478266591537, "learning_rate": 8.18645419339823e-07, "loss": 0.0302, "step": 8785 }, { "epoch": 1.999089874857793, "grad_norm": 0.8521168973864249, "learning_rate": 8.185604629148413e-07, "loss": 0.0218, "step": 8786 }, { "epoch": 1.9993174061433447, "grad_norm": 1.1445024111773645, "learning_rate": 8.184755025338384e-07, "loss": 0.0376, "step": 8787 }, { "epoch": 1.9995449374288965, "grad_norm": 1.3001610144118216, "learning_rate": 8.183905381985503e-07, "loss": 0.0377, "step": 8788 }, { "epoch": 1.9997724687144482, "grad_norm": 1.2615958821120417, "learning_rate": 8.183055699107139e-07, "loss": 0.0319, "step": 8789 }, { "epoch": 2.0, "grad_norm": 0.6899444448695068, "learning_rate": 8.182205976720656e-07, "loss": 0.0119, "step": 8790 }, { "epoch": 2.0002275312855518, "grad_norm": 1.1709402263017634, "learning_rate": 8.181356214843422e-07, "loss": 0.0157, "step": 8791 }, { "epoch": 2.0004550625711035, "grad_norm": 0.5198887943317367, "learning_rate": 8.180506413492804e-07, "loss": 0.0071, "step": 8792 }, { "epoch": 2.0006825938566553, "grad_norm": 0.6737223248160417, "learning_rate": 8.17965657268617e-07, "loss": 0.0125, "step": 8793 }, { "epoch": 2.000910125142207, "grad_norm": 0.5483868719187942, "learning_rate": 8.178806692440891e-07, "loss": 0.0102, "step": 8794 }, { "epoch": 2.0011376564277588, "grad_norm": 0.5878907649994676, "learning_rate": 8.177956772774334e-07, "loss": 0.0081, "step": 8795 }, { "epoch": 2.0013651877133105, "grad_norm": 0.37933293187237677, "learning_rate": 8.177106813703872e-07, "loss": 0.0048, "step": 8796 }, { "epoch": 2.0015927189988623, "grad_norm": 0.4588421161999595, "learning_rate": 8.176256815246878e-07, "loss": 0.0047, "step": 8797 }, { "epoch": 2.001820250284414, "grad_norm": 0.30559275383648293, "learning_rate": 8.175406777420721e-07, "loss": 0.005, "step": 8798 }, { "epoch": 2.0020477815699658, "grad_norm": 0.5216872504697616, "learning_rate": 8.174556700242775e-07, "loss": 0.0116, "step": 8799 }, { "epoch": 2.0022753128555175, "grad_norm": 1.1268898419644335, "learning_rate": 8.173706583730414e-07, "loss": 0.0251, "step": 8800 }, { "epoch": 2.0025028441410693, "grad_norm": 0.46037136526859374, "learning_rate": 8.172856427901015e-07, "loss": 0.0116, "step": 8801 }, { "epoch": 2.002730375426621, "grad_norm": 0.724042564589398, "learning_rate": 8.17200623277195e-07, "loss": 0.0145, "step": 8802 }, { "epoch": 2.0029579067121728, "grad_norm": 0.5425702242840104, "learning_rate": 8.171155998360601e-07, "loss": 0.0145, "step": 8803 }, { "epoch": 2.0031854379977245, "grad_norm": 0.8078346383668281, "learning_rate": 8.170305724684341e-07, "loss": 0.019, "step": 8804 }, { "epoch": 2.0034129692832763, "grad_norm": 0.32320182253326146, "learning_rate": 8.169455411760547e-07, "loss": 0.0042, "step": 8805 }, { "epoch": 2.003640500568828, "grad_norm": 0.9468999792481988, "learning_rate": 8.168605059606601e-07, "loss": 0.027, "step": 8806 }, { "epoch": 2.00386803185438, "grad_norm": 0.504492378138807, "learning_rate": 8.167754668239883e-07, "loss": 0.0104, "step": 8807 }, { "epoch": 2.0040955631399315, "grad_norm": 0.5334252678232069, "learning_rate": 8.16690423767777e-07, "loss": 0.0168, "step": 8808 }, { "epoch": 2.0043230944254833, "grad_norm": 0.789594051061424, "learning_rate": 8.166053767937643e-07, "loss": 0.0209, "step": 8809 }, { "epoch": 2.0045506257110355, "grad_norm": 0.7868203944304248, "learning_rate": 8.165203259036888e-07, "loss": 0.0123, "step": 8810 }, { "epoch": 2.0047781569965872, "grad_norm": 0.93302247140185, "learning_rate": 8.164352710992887e-07, "loss": 0.0218, "step": 8811 }, { "epoch": 2.005005688282139, "grad_norm": 0.37733358623075536, "learning_rate": 8.163502123823021e-07, "loss": 0.0042, "step": 8812 }, { "epoch": 2.0052332195676907, "grad_norm": 0.9133370079801494, "learning_rate": 8.162651497544677e-07, "loss": 0.0132, "step": 8813 }, { "epoch": 2.0054607508532425, "grad_norm": 0.5344697172467202, "learning_rate": 8.161800832175239e-07, "loss": 0.0109, "step": 8814 }, { "epoch": 2.0056882821387942, "grad_norm": 0.686643323340727, "learning_rate": 8.160950127732093e-07, "loss": 0.0118, "step": 8815 }, { "epoch": 2.005915813424346, "grad_norm": 0.45484660984425673, "learning_rate": 8.160099384232625e-07, "loss": 0.0127, "step": 8816 }, { "epoch": 2.0061433447098977, "grad_norm": 0.4897150816329801, "learning_rate": 8.159248601694226e-07, "loss": 0.0083, "step": 8817 }, { "epoch": 2.0063708759954495, "grad_norm": 0.7942946127916877, "learning_rate": 8.158397780134281e-07, "loss": 0.0102, "step": 8818 }, { "epoch": 2.0065984072810013, "grad_norm": 0.9250277086108294, "learning_rate": 8.157546919570181e-07, "loss": 0.0085, "step": 8819 }, { "epoch": 2.006825938566553, "grad_norm": 1.8778433152181757, "learning_rate": 8.156696020019314e-07, "loss": 0.0244, "step": 8820 }, { "epoch": 2.0070534698521048, "grad_norm": 1.1993933794239524, "learning_rate": 8.155845081499074e-07, "loss": 0.0247, "step": 8821 }, { "epoch": 2.0072810011376565, "grad_norm": 0.8085508317141936, "learning_rate": 8.154994104026849e-07, "loss": 0.0076, "step": 8822 }, { "epoch": 2.0075085324232083, "grad_norm": 0.8970209300212904, "learning_rate": 8.154143087620035e-07, "loss": 0.0121, "step": 8823 }, { "epoch": 2.00773606370876, "grad_norm": 0.3197563006884184, "learning_rate": 8.153292032296025e-07, "loss": 0.0036, "step": 8824 }, { "epoch": 2.0079635949943118, "grad_norm": 0.9785745980402671, "learning_rate": 8.152440938072208e-07, "loss": 0.0297, "step": 8825 }, { "epoch": 2.0081911262798635, "grad_norm": 0.5102544551579844, "learning_rate": 8.151589804965984e-07, "loss": 0.0028, "step": 8826 }, { "epoch": 2.0084186575654153, "grad_norm": 0.7165375825214081, "learning_rate": 8.150738632994748e-07, "loss": 0.0091, "step": 8827 }, { "epoch": 2.008646188850967, "grad_norm": 1.7463034408814433, "learning_rate": 8.149887422175895e-07, "loss": 0.019, "step": 8828 }, { "epoch": 2.0088737201365188, "grad_norm": 0.5830956601446018, "learning_rate": 8.149036172526821e-07, "loss": 0.0134, "step": 8829 }, { "epoch": 2.0091012514220705, "grad_norm": 0.4365691139998193, "learning_rate": 8.148184884064928e-07, "loss": 0.0058, "step": 8830 }, { "epoch": 2.0093287827076223, "grad_norm": 0.4129790478139653, "learning_rate": 8.14733355680761e-07, "loss": 0.0063, "step": 8831 }, { "epoch": 2.009556313993174, "grad_norm": 0.9807885758736534, "learning_rate": 8.146482190772271e-07, "loss": 0.0191, "step": 8832 }, { "epoch": 2.0097838452787258, "grad_norm": 0.6550137634160845, "learning_rate": 8.145630785976307e-07, "loss": 0.0109, "step": 8833 }, { "epoch": 2.0100113765642775, "grad_norm": 0.1815930144575576, "learning_rate": 8.144779342437123e-07, "loss": 0.0014, "step": 8834 }, { "epoch": 2.0102389078498293, "grad_norm": 0.30779399114598865, "learning_rate": 8.143927860172118e-07, "loss": 0.0032, "step": 8835 }, { "epoch": 2.010466439135381, "grad_norm": 1.3781483552111702, "learning_rate": 8.143076339198698e-07, "loss": 0.0154, "step": 8836 }, { "epoch": 2.010693970420933, "grad_norm": 0.5163766547449539, "learning_rate": 8.142224779534263e-07, "loss": 0.0046, "step": 8837 }, { "epoch": 2.0109215017064845, "grad_norm": 0.14466268063999152, "learning_rate": 8.14137318119622e-07, "loss": 0.0012, "step": 8838 }, { "epoch": 2.0111490329920363, "grad_norm": 0.5235772934791572, "learning_rate": 8.140521544201972e-07, "loss": 0.0076, "step": 8839 }, { "epoch": 2.011376564277588, "grad_norm": 0.5237206847198979, "learning_rate": 8.139669868568927e-07, "loss": 0.0079, "step": 8840 }, { "epoch": 2.01160409556314, "grad_norm": 0.6991686108225662, "learning_rate": 8.13881815431449e-07, "loss": 0.0066, "step": 8841 }, { "epoch": 2.0118316268486915, "grad_norm": 0.6117733013500027, "learning_rate": 8.13796640145607e-07, "loss": 0.017, "step": 8842 }, { "epoch": 2.0120591581342433, "grad_norm": 0.6344895088061999, "learning_rate": 8.137114610011074e-07, "loss": 0.0093, "step": 8843 }, { "epoch": 2.012286689419795, "grad_norm": 0.3577293634720585, "learning_rate": 8.136262779996912e-07, "loss": 0.0041, "step": 8844 }, { "epoch": 2.012514220705347, "grad_norm": 1.2364309637360462, "learning_rate": 8.135410911430992e-07, "loss": 0.0251, "step": 8845 }, { "epoch": 2.0127417519908986, "grad_norm": 1.2064652484875504, "learning_rate": 8.134559004330725e-07, "loss": 0.0126, "step": 8846 }, { "epoch": 2.0129692832764503, "grad_norm": 1.055012807579558, "learning_rate": 8.133707058713525e-07, "loss": 0.006, "step": 8847 }, { "epoch": 2.013196814562002, "grad_norm": 0.44452896500004035, "learning_rate": 8.132855074596803e-07, "loss": 0.0036, "step": 8848 }, { "epoch": 2.0134243458475543, "grad_norm": 0.5888926114458435, "learning_rate": 8.132003051997972e-07, "loss": 0.0096, "step": 8849 }, { "epoch": 2.013651877133106, "grad_norm": 0.8079902367360255, "learning_rate": 8.131150990934445e-07, "loss": 0.0249, "step": 8850 }, { "epoch": 2.0138794084186578, "grad_norm": 0.8703704206567516, "learning_rate": 8.130298891423636e-07, "loss": 0.0223, "step": 8851 }, { "epoch": 2.0141069397042095, "grad_norm": 0.5693115424305861, "learning_rate": 8.12944675348296e-07, "loss": 0.0081, "step": 8852 }, { "epoch": 2.0143344709897613, "grad_norm": 0.8080455402833692, "learning_rate": 8.128594577129836e-07, "loss": 0.0121, "step": 8853 }, { "epoch": 2.014562002275313, "grad_norm": 0.5479120256962474, "learning_rate": 8.12774236238168e-07, "loss": 0.0088, "step": 8854 }, { "epoch": 2.0147895335608648, "grad_norm": 0.5649292330396924, "learning_rate": 8.126890109255908e-07, "loss": 0.0099, "step": 8855 }, { "epoch": 2.0150170648464165, "grad_norm": 0.38594711490407335, "learning_rate": 8.126037817769939e-07, "loss": 0.0084, "step": 8856 }, { "epoch": 2.0152445961319683, "grad_norm": 0.7510993902450168, "learning_rate": 8.125185487941195e-07, "loss": 0.0152, "step": 8857 }, { "epoch": 2.01547212741752, "grad_norm": 0.5835031624844149, "learning_rate": 8.124333119787093e-07, "loss": 0.0064, "step": 8858 }, { "epoch": 2.0156996587030718, "grad_norm": 0.5597660181617066, "learning_rate": 8.123480713325053e-07, "loss": 0.0087, "step": 8859 }, { "epoch": 2.0159271899886235, "grad_norm": 0.5247004899887854, "learning_rate": 8.122628268572499e-07, "loss": 0.0128, "step": 8860 }, { "epoch": 2.0161547212741753, "grad_norm": 0.566052373412455, "learning_rate": 8.121775785546855e-07, "loss": 0.0058, "step": 8861 }, { "epoch": 2.016382252559727, "grad_norm": 1.0307604808475657, "learning_rate": 8.120923264265539e-07, "loss": 0.0131, "step": 8862 }, { "epoch": 2.016609783845279, "grad_norm": 1.1342668705919563, "learning_rate": 8.120070704745979e-07, "loss": 0.012, "step": 8863 }, { "epoch": 2.0168373151308305, "grad_norm": 0.7561026503729099, "learning_rate": 8.1192181070056e-07, "loss": 0.009, "step": 8864 }, { "epoch": 2.0170648464163823, "grad_norm": 0.6203228474509735, "learning_rate": 8.118365471061825e-07, "loss": 0.008, "step": 8865 }, { "epoch": 2.017292377701934, "grad_norm": 0.22624176637311427, "learning_rate": 8.117512796932079e-07, "loss": 0.0016, "step": 8866 }, { "epoch": 2.017519908987486, "grad_norm": 0.5131236339857828, "learning_rate": 8.116660084633796e-07, "loss": 0.0039, "step": 8867 }, { "epoch": 2.0177474402730375, "grad_norm": 1.3348847929301328, "learning_rate": 8.115807334184398e-07, "loss": 0.0149, "step": 8868 }, { "epoch": 2.0179749715585893, "grad_norm": 1.1999017764200335, "learning_rate": 8.114954545601314e-07, "loss": 0.0109, "step": 8869 }, { "epoch": 2.018202502844141, "grad_norm": 0.5178327281568412, "learning_rate": 8.114101718901976e-07, "loss": 0.0032, "step": 8870 }, { "epoch": 2.018430034129693, "grad_norm": 1.1828292639159577, "learning_rate": 8.113248854103811e-07, "loss": 0.0174, "step": 8871 }, { "epoch": 2.0186575654152445, "grad_norm": 0.5401939506471521, "learning_rate": 8.112395951224254e-07, "loss": 0.007, "step": 8872 }, { "epoch": 2.0188850967007963, "grad_norm": 0.9142563228439178, "learning_rate": 8.111543010280733e-07, "loss": 0.0097, "step": 8873 }, { "epoch": 2.019112627986348, "grad_norm": 0.8487407604632405, "learning_rate": 8.110690031290683e-07, "loss": 0.0083, "step": 8874 }, { "epoch": 2.0193401592719, "grad_norm": 0.4147069374933176, "learning_rate": 8.109837014271536e-07, "loss": 0.0048, "step": 8875 }, { "epoch": 2.0195676905574516, "grad_norm": 0.9484556445049667, "learning_rate": 8.108983959240726e-07, "loss": 0.0157, "step": 8876 }, { "epoch": 2.0197952218430033, "grad_norm": 0.9879486396757432, "learning_rate": 8.108130866215689e-07, "loss": 0.0153, "step": 8877 }, { "epoch": 2.020022753128555, "grad_norm": 1.266706637316005, "learning_rate": 8.107277735213861e-07, "loss": 0.0111, "step": 8878 }, { "epoch": 2.020250284414107, "grad_norm": 1.152593355345514, "learning_rate": 8.106424566252675e-07, "loss": 0.0121, "step": 8879 }, { "epoch": 2.0204778156996586, "grad_norm": 31.972085462973816, "learning_rate": 8.105571359349575e-07, "loss": 0.1801, "step": 8880 }, { "epoch": 2.0207053469852103, "grad_norm": 0.4667549780604784, "learning_rate": 8.104718114521993e-07, "loss": 0.0066, "step": 8881 }, { "epoch": 2.020932878270762, "grad_norm": 0.45424657076751523, "learning_rate": 8.103864831787367e-07, "loss": 0.0072, "step": 8882 }, { "epoch": 2.021160409556314, "grad_norm": 1.0788646098304586, "learning_rate": 8.103011511163141e-07, "loss": 0.015, "step": 8883 }, { "epoch": 2.0213879408418656, "grad_norm": 0.8152685446198583, "learning_rate": 8.102158152666753e-07, "loss": 0.0178, "step": 8884 }, { "epoch": 2.0216154721274173, "grad_norm": 1.0690660609265563, "learning_rate": 8.101304756315645e-07, "loss": 0.0103, "step": 8885 }, { "epoch": 2.021843003412969, "grad_norm": 1.21599202432248, "learning_rate": 8.100451322127257e-07, "loss": 0.0176, "step": 8886 }, { "epoch": 2.022070534698521, "grad_norm": 1.285998257222507, "learning_rate": 8.099597850119035e-07, "loss": 0.0174, "step": 8887 }, { "epoch": 2.022298065984073, "grad_norm": 0.2775345886721884, "learning_rate": 8.098744340308419e-07, "loss": 0.0095, "step": 8888 }, { "epoch": 2.0225255972696248, "grad_norm": 1.1182380948561206, "learning_rate": 8.097890792712853e-07, "loss": 0.0151, "step": 8889 }, { "epoch": 2.0227531285551765, "grad_norm": 0.9201126822233208, "learning_rate": 8.097037207349785e-07, "loss": 0.0082, "step": 8890 }, { "epoch": 2.0229806598407283, "grad_norm": 0.5416948586082189, "learning_rate": 8.096183584236659e-07, "loss": 0.0057, "step": 8891 }, { "epoch": 2.02320819112628, "grad_norm": 0.855302718014359, "learning_rate": 8.095329923390924e-07, "loss": 0.0094, "step": 8892 }, { "epoch": 2.023435722411832, "grad_norm": 0.38112583835583014, "learning_rate": 8.094476224830022e-07, "loss": 0.0042, "step": 8893 }, { "epoch": 2.0236632536973835, "grad_norm": 0.5106426753539933, "learning_rate": 8.093622488571405e-07, "loss": 0.008, "step": 8894 }, { "epoch": 2.0238907849829353, "grad_norm": 0.7454048076191544, "learning_rate": 8.09276871463252e-07, "loss": 0.0146, "step": 8895 }, { "epoch": 2.024118316268487, "grad_norm": 0.6556405334233815, "learning_rate": 8.091914903030818e-07, "loss": 0.0026, "step": 8896 }, { "epoch": 2.024345847554039, "grad_norm": 1.1409480911971517, "learning_rate": 8.091061053783748e-07, "loss": 0.0171, "step": 8897 }, { "epoch": 2.0245733788395905, "grad_norm": 1.2140550836825563, "learning_rate": 8.090207166908763e-07, "loss": 0.0197, "step": 8898 }, { "epoch": 2.0248009101251423, "grad_norm": 0.6006794941935975, "learning_rate": 8.089353242423313e-07, "loss": 0.0069, "step": 8899 }, { "epoch": 2.025028441410694, "grad_norm": 0.42400098109544904, "learning_rate": 8.088499280344851e-07, "loss": 0.0042, "step": 8900 }, { "epoch": 2.025255972696246, "grad_norm": 1.486151136146108, "learning_rate": 8.087645280690831e-07, "loss": 0.0134, "step": 8901 }, { "epoch": 2.0254835039817976, "grad_norm": 0.4257547909991054, "learning_rate": 8.086791243478709e-07, "loss": 0.0062, "step": 8902 }, { "epoch": 2.0257110352673493, "grad_norm": 1.6558564991678728, "learning_rate": 8.085937168725934e-07, "loss": 0.0347, "step": 8903 }, { "epoch": 2.025938566552901, "grad_norm": 0.47041782103413543, "learning_rate": 8.085083056449968e-07, "loss": 0.007, "step": 8904 }, { "epoch": 2.026166097838453, "grad_norm": 0.6524053506495247, "learning_rate": 8.084228906668267e-07, "loss": 0.0108, "step": 8905 }, { "epoch": 2.0263936291240046, "grad_norm": 0.9388216844035533, "learning_rate": 8.083374719398282e-07, "loss": 0.0154, "step": 8906 }, { "epoch": 2.0266211604095563, "grad_norm": 0.5932063789932374, "learning_rate": 8.082520494657478e-07, "loss": 0.0131, "step": 8907 }, { "epoch": 2.026848691695108, "grad_norm": 1.7723010612424621, "learning_rate": 8.08166623246331e-07, "loss": 0.0551, "step": 8908 }, { "epoch": 2.02707622298066, "grad_norm": 1.112698680144584, "learning_rate": 8.08081193283324e-07, "loss": 0.0219, "step": 8909 }, { "epoch": 2.0273037542662116, "grad_norm": 1.072970555370149, "learning_rate": 8.079957595784727e-07, "loss": 0.0103, "step": 8910 }, { "epoch": 2.0275312855517633, "grad_norm": 0.5308079651026945, "learning_rate": 8.07910322133523e-07, "loss": 0.0108, "step": 8911 }, { "epoch": 2.027758816837315, "grad_norm": 0.5035444843591139, "learning_rate": 8.078248809502215e-07, "loss": 0.0063, "step": 8912 }, { "epoch": 2.027986348122867, "grad_norm": 1.6508397127082461, "learning_rate": 8.077394360303143e-07, "loss": 0.0158, "step": 8913 }, { "epoch": 2.0282138794084186, "grad_norm": 0.47561982426348653, "learning_rate": 8.076539873755476e-07, "loss": 0.0104, "step": 8914 }, { "epoch": 2.0284414106939703, "grad_norm": 0.6109694524570578, "learning_rate": 8.07568534987668e-07, "loss": 0.0055, "step": 8915 }, { "epoch": 2.028668941979522, "grad_norm": 1.0892654029210274, "learning_rate": 8.074830788684218e-07, "loss": 0.0189, "step": 8916 }, { "epoch": 2.028896473265074, "grad_norm": 1.2435519587854957, "learning_rate": 8.073976190195557e-07, "loss": 0.0252, "step": 8917 }, { "epoch": 2.0291240045506256, "grad_norm": 1.181853187713523, "learning_rate": 8.073121554428165e-07, "loss": 0.017, "step": 8918 }, { "epoch": 2.0293515358361773, "grad_norm": 0.2538018454491145, "learning_rate": 8.072266881399504e-07, "loss": 0.0021, "step": 8919 }, { "epoch": 2.029579067121729, "grad_norm": 1.1916511430599437, "learning_rate": 8.071412171127047e-07, "loss": 0.0184, "step": 8920 }, { "epoch": 2.029806598407281, "grad_norm": 1.1753077172107904, "learning_rate": 8.070557423628262e-07, "loss": 0.0122, "step": 8921 }, { "epoch": 2.0300341296928326, "grad_norm": 0.33606691616182494, "learning_rate": 8.069702638920615e-07, "loss": 0.003, "step": 8922 }, { "epoch": 2.0302616609783843, "grad_norm": 1.248770958358262, "learning_rate": 8.06884781702158e-07, "loss": 0.0054, "step": 8923 }, { "epoch": 2.030489192263936, "grad_norm": 0.6781973886658158, "learning_rate": 8.067992957948628e-07, "loss": 0.0124, "step": 8924 }, { "epoch": 2.030716723549488, "grad_norm": 0.3502584562776884, "learning_rate": 8.067138061719227e-07, "loss": 0.0037, "step": 8925 }, { "epoch": 2.03094425483504, "grad_norm": 0.718987233926625, "learning_rate": 8.066283128350854e-07, "loss": 0.008, "step": 8926 }, { "epoch": 2.031171786120592, "grad_norm": 0.8505287305354738, "learning_rate": 8.065428157860978e-07, "loss": 0.0085, "step": 8927 }, { "epoch": 2.0313993174061435, "grad_norm": 0.4810527989255863, "learning_rate": 8.064573150267077e-07, "loss": 0.0048, "step": 8928 }, { "epoch": 2.0316268486916953, "grad_norm": 1.5732589603701084, "learning_rate": 8.063718105586624e-07, "loss": 0.02, "step": 8929 }, { "epoch": 2.031854379977247, "grad_norm": 1.3526673760378833, "learning_rate": 8.062863023837093e-07, "loss": 0.0262, "step": 8930 }, { "epoch": 2.032081911262799, "grad_norm": 0.9043005415960484, "learning_rate": 8.062007905035965e-07, "loss": 0.0043, "step": 8931 }, { "epoch": 2.0323094425483506, "grad_norm": 0.9672668132294734, "learning_rate": 8.061152749200713e-07, "loss": 0.0122, "step": 8932 }, { "epoch": 2.0325369738339023, "grad_norm": 1.4872897063402382, "learning_rate": 8.060297556348812e-07, "loss": 0.0224, "step": 8933 }, { "epoch": 2.032764505119454, "grad_norm": 1.2655166086852356, "learning_rate": 8.059442326497748e-07, "loss": 0.008, "step": 8934 }, { "epoch": 2.032992036405006, "grad_norm": 0.3450765270112137, "learning_rate": 8.058587059664996e-07, "loss": 0.0021, "step": 8935 }, { "epoch": 2.0332195676905576, "grad_norm": 0.8752950335447572, "learning_rate": 8.057731755868036e-07, "loss": 0.0236, "step": 8936 }, { "epoch": 2.0334470989761093, "grad_norm": 0.3176354327883559, "learning_rate": 8.056876415124352e-07, "loss": 0.0037, "step": 8937 }, { "epoch": 2.033674630261661, "grad_norm": 0.5461737826476964, "learning_rate": 8.056021037451422e-07, "loss": 0.0067, "step": 8938 }, { "epoch": 2.033902161547213, "grad_norm": 0.17133198077613082, "learning_rate": 8.055165622866726e-07, "loss": 0.0014, "step": 8939 }, { "epoch": 2.0341296928327646, "grad_norm": 0.6883875443062736, "learning_rate": 8.054310171387756e-07, "loss": 0.0081, "step": 8940 }, { "epoch": 2.0343572241183163, "grad_norm": 0.9648755007275786, "learning_rate": 8.053454683031987e-07, "loss": 0.0071, "step": 8941 }, { "epoch": 2.034584755403868, "grad_norm": 0.8409550490400549, "learning_rate": 8.052599157816908e-07, "loss": 0.0101, "step": 8942 }, { "epoch": 2.03481228668942, "grad_norm": 0.7630549826940595, "learning_rate": 8.051743595760005e-07, "loss": 0.0102, "step": 8943 }, { "epoch": 2.0350398179749716, "grad_norm": 2.6379153822624373, "learning_rate": 8.050887996878761e-07, "loss": 0.0134, "step": 8944 }, { "epoch": 2.0352673492605233, "grad_norm": 0.8411361959733792, "learning_rate": 8.050032361190666e-07, "loss": 0.0132, "step": 8945 }, { "epoch": 2.035494880546075, "grad_norm": 0.4110485826017932, "learning_rate": 8.049176688713203e-07, "loss": 0.004, "step": 8946 }, { "epoch": 2.035722411831627, "grad_norm": 2.8164652778212638, "learning_rate": 8.048320979463867e-07, "loss": 0.0268, "step": 8947 }, { "epoch": 2.0359499431171786, "grad_norm": 1.1851892458441966, "learning_rate": 8.047465233460141e-07, "loss": 0.0285, "step": 8948 }, { "epoch": 2.0361774744027303, "grad_norm": 0.7574982419765178, "learning_rate": 8.04660945071952e-07, "loss": 0.0069, "step": 8949 }, { "epoch": 2.036405005688282, "grad_norm": 0.44412736517823015, "learning_rate": 8.045753631259491e-07, "loss": 0.004, "step": 8950 }, { "epoch": 2.036632536973834, "grad_norm": 1.117920730656277, "learning_rate": 8.044897775097548e-07, "loss": 0.0075, "step": 8951 }, { "epoch": 2.0368600682593856, "grad_norm": 0.3887980095836281, "learning_rate": 8.04404188225118e-07, "loss": 0.0022, "step": 8952 }, { "epoch": 2.0370875995449373, "grad_norm": 0.591381612688169, "learning_rate": 8.043185952737881e-07, "loss": 0.0089, "step": 8953 }, { "epoch": 2.037315130830489, "grad_norm": 1.1988230813792133, "learning_rate": 8.042329986575145e-07, "loss": 0.0046, "step": 8954 }, { "epoch": 2.037542662116041, "grad_norm": 0.8524240083915456, "learning_rate": 8.041473983780467e-07, "loss": 0.011, "step": 8955 }, { "epoch": 2.0377701934015926, "grad_norm": 0.45742066554856414, "learning_rate": 8.040617944371343e-07, "loss": 0.0068, "step": 8956 }, { "epoch": 2.0379977246871444, "grad_norm": 0.7032718802225679, "learning_rate": 8.039761868365267e-07, "loss": 0.0067, "step": 8957 }, { "epoch": 2.038225255972696, "grad_norm": 0.46332231714977534, "learning_rate": 8.038905755779737e-07, "loss": 0.0028, "step": 8958 }, { "epoch": 2.038452787258248, "grad_norm": 0.6953451418720955, "learning_rate": 8.038049606632248e-07, "loss": 0.0063, "step": 8959 }, { "epoch": 2.0386803185437996, "grad_norm": 1.5095349839763461, "learning_rate": 8.0371934209403e-07, "loss": 0.0062, "step": 8960 }, { "epoch": 2.0389078498293514, "grad_norm": 0.38559837742698944, "learning_rate": 8.036337198721392e-07, "loss": 0.0047, "step": 8961 }, { "epoch": 2.039135381114903, "grad_norm": 0.7901667788607518, "learning_rate": 8.035480939993025e-07, "loss": 0.0089, "step": 8962 }, { "epoch": 2.039362912400455, "grad_norm": 0.35187461070550274, "learning_rate": 8.034624644772694e-07, "loss": 0.0039, "step": 8963 }, { "epoch": 2.0395904436860066, "grad_norm": 0.4561879491037348, "learning_rate": 8.033768313077905e-07, "loss": 0.0029, "step": 8964 }, { "epoch": 2.039817974971559, "grad_norm": 0.9242655687617418, "learning_rate": 8.03291194492616e-07, "loss": 0.0125, "step": 8965 }, { "epoch": 2.0400455062571106, "grad_norm": 0.5578814606863747, "learning_rate": 8.032055540334958e-07, "loss": 0.0122, "step": 8966 }, { "epoch": 2.0402730375426623, "grad_norm": 0.3959293258283401, "learning_rate": 8.031199099321804e-07, "loss": 0.0044, "step": 8967 }, { "epoch": 2.040500568828214, "grad_norm": 0.7511332134836817, "learning_rate": 8.030342621904205e-07, "loss": 0.0104, "step": 8968 }, { "epoch": 2.040728100113766, "grad_norm": 0.7723017158992114, "learning_rate": 8.029486108099663e-07, "loss": 0.0092, "step": 8969 }, { "epoch": 2.0409556313993176, "grad_norm": 0.41211418590091653, "learning_rate": 8.028629557925683e-07, "loss": 0.0034, "step": 8970 }, { "epoch": 2.0411831626848693, "grad_norm": 1.0666589983958474, "learning_rate": 8.027772971399773e-07, "loss": 0.0172, "step": 8971 }, { "epoch": 2.041410693970421, "grad_norm": 0.3495874852861486, "learning_rate": 8.026916348539438e-07, "loss": 0.0027, "step": 8972 }, { "epoch": 2.041638225255973, "grad_norm": 0.7646452546610977, "learning_rate": 8.026059689362186e-07, "loss": 0.0074, "step": 8973 }, { "epoch": 2.0418657565415246, "grad_norm": 0.5174281054023835, "learning_rate": 8.025202993885528e-07, "loss": 0.0094, "step": 8974 }, { "epoch": 2.0420932878270763, "grad_norm": 0.3397256143456613, "learning_rate": 8.024346262126976e-07, "loss": 0.0033, "step": 8975 }, { "epoch": 2.042320819112628, "grad_norm": 0.7890970235106726, "learning_rate": 8.02348949410403e-07, "loss": 0.0107, "step": 8976 }, { "epoch": 2.04254835039818, "grad_norm": 0.5797109965203312, "learning_rate": 8.022632689834209e-07, "loss": 0.0085, "step": 8977 }, { "epoch": 2.0427758816837316, "grad_norm": 0.5763357319854734, "learning_rate": 8.021775849335023e-07, "loss": 0.0071, "step": 8978 }, { "epoch": 2.0430034129692833, "grad_norm": 0.3898751609908676, "learning_rate": 8.020918972623983e-07, "loss": 0.003, "step": 8979 }, { "epoch": 2.043230944254835, "grad_norm": 0.6442895083097052, "learning_rate": 8.0200620597186e-07, "loss": 0.0052, "step": 8980 }, { "epoch": 2.043458475540387, "grad_norm": 0.4535279724862919, "learning_rate": 8.019205110636394e-07, "loss": 0.0037, "step": 8981 }, { "epoch": 2.0436860068259386, "grad_norm": 0.928510455854613, "learning_rate": 8.018348125394874e-07, "loss": 0.018, "step": 8982 }, { "epoch": 2.0439135381114903, "grad_norm": 0.7955962332228361, "learning_rate": 8.017491104011557e-07, "loss": 0.0051, "step": 8983 }, { "epoch": 2.044141069397042, "grad_norm": 1.0844193492460144, "learning_rate": 8.016634046503958e-07, "loss": 0.0077, "step": 8984 }, { "epoch": 2.044368600682594, "grad_norm": 0.48679526588275923, "learning_rate": 8.015776952889596e-07, "loss": 0.0062, "step": 8985 }, { "epoch": 2.0445961319681456, "grad_norm": 0.8517562832191604, "learning_rate": 8.014919823185987e-07, "loss": 0.0082, "step": 8986 }, { "epoch": 2.0448236632536974, "grad_norm": 0.3585852442137168, "learning_rate": 8.014062657410648e-07, "loss": 0.0027, "step": 8987 }, { "epoch": 2.045051194539249, "grad_norm": 0.9666705604775362, "learning_rate": 8.0132054555811e-07, "loss": 0.008, "step": 8988 }, { "epoch": 2.045278725824801, "grad_norm": 0.2820816822059884, "learning_rate": 8.012348217714861e-07, "loss": 0.0016, "step": 8989 }, { "epoch": 2.0455062571103526, "grad_norm": 0.6974292898679078, "learning_rate": 8.011490943829451e-07, "loss": 0.0103, "step": 8990 }, { "epoch": 2.0457337883959044, "grad_norm": 1.480846150230487, "learning_rate": 8.010633633942394e-07, "loss": 0.0239, "step": 8991 }, { "epoch": 2.045961319681456, "grad_norm": 0.8994891189070041, "learning_rate": 8.00977628807121e-07, "loss": 0.0125, "step": 8992 }, { "epoch": 2.046188850967008, "grad_norm": 0.36008186965231853, "learning_rate": 8.008918906233421e-07, "loss": 0.0025, "step": 8993 }, { "epoch": 2.0464163822525596, "grad_norm": 0.8791914697678171, "learning_rate": 8.00806148844655e-07, "loss": 0.0069, "step": 8994 }, { "epoch": 2.0466439135381114, "grad_norm": 3.43520789541217, "learning_rate": 8.007204034728123e-07, "loss": 0.0131, "step": 8995 }, { "epoch": 2.046871444823663, "grad_norm": 0.292058078028329, "learning_rate": 8.006346545095664e-07, "loss": 0.0023, "step": 8996 }, { "epoch": 2.047098976109215, "grad_norm": 0.34017235538854296, "learning_rate": 8.005489019566697e-07, "loss": 0.0024, "step": 8997 }, { "epoch": 2.0473265073947666, "grad_norm": 0.4926797661406449, "learning_rate": 8.004631458158749e-07, "loss": 0.002, "step": 8998 }, { "epoch": 2.0475540386803184, "grad_norm": 0.3789956350727742, "learning_rate": 8.00377386088935e-07, "loss": 0.0026, "step": 8999 }, { "epoch": 2.04778156996587, "grad_norm": 1.730755383544156, "learning_rate": 8.002916227776023e-07, "loss": 0.0166, "step": 9000 }, { "epoch": 2.048009101251422, "grad_norm": 0.6316753185358109, "learning_rate": 8.002058558836298e-07, "loss": 0.0051, "step": 9001 }, { "epoch": 2.0482366325369736, "grad_norm": 1.9860353721035442, "learning_rate": 8.001200854087707e-07, "loss": 0.0234, "step": 9002 }, { "epoch": 2.0484641638225254, "grad_norm": 0.81057068106513, "learning_rate": 8.000343113547777e-07, "loss": 0.0103, "step": 9003 }, { "epoch": 2.0486916951080776, "grad_norm": 0.7193106660313205, "learning_rate": 7.999485337234038e-07, "loss": 0.016, "step": 9004 }, { "epoch": 2.0489192263936293, "grad_norm": 0.5644722293497418, "learning_rate": 7.998627525164024e-07, "loss": 0.005, "step": 9005 }, { "epoch": 2.049146757679181, "grad_norm": 1.0815118124610577, "learning_rate": 7.997769677355266e-07, "loss": 0.013, "step": 9006 }, { "epoch": 2.049374288964733, "grad_norm": 0.8002532530964719, "learning_rate": 7.996911793825296e-07, "loss": 0.0101, "step": 9007 }, { "epoch": 2.0496018202502846, "grad_norm": 0.12992878138807132, "learning_rate": 7.996053874591649e-07, "loss": 0.0009, "step": 9008 }, { "epoch": 2.0498293515358363, "grad_norm": 0.911223912401618, "learning_rate": 7.995195919671858e-07, "loss": 0.0116, "step": 9009 }, { "epoch": 2.050056882821388, "grad_norm": 1.0486722371360964, "learning_rate": 7.994337929083458e-07, "loss": 0.0086, "step": 9010 }, { "epoch": 2.05028441410694, "grad_norm": 1.0615929119317695, "learning_rate": 7.993479902843987e-07, "loss": 0.0172, "step": 9011 }, { "epoch": 2.0505119453924916, "grad_norm": 0.5975358571869722, "learning_rate": 7.99262184097098e-07, "loss": 0.0034, "step": 9012 }, { "epoch": 2.0507394766780433, "grad_norm": 1.64169570548523, "learning_rate": 7.991763743481971e-07, "loss": 0.0087, "step": 9013 }, { "epoch": 2.050967007963595, "grad_norm": 1.6125987888060733, "learning_rate": 7.990905610394503e-07, "loss": 0.0134, "step": 9014 }, { "epoch": 2.051194539249147, "grad_norm": 1.2438605249431998, "learning_rate": 7.990047441726114e-07, "loss": 0.0185, "step": 9015 }, { "epoch": 2.0514220705346986, "grad_norm": 1.3056193907972449, "learning_rate": 7.989189237494339e-07, "loss": 0.0207, "step": 9016 }, { "epoch": 2.0516496018202504, "grad_norm": 0.30070481424774803, "learning_rate": 7.988330997716723e-07, "loss": 0.0028, "step": 9017 }, { "epoch": 2.051877133105802, "grad_norm": 1.960063236002895, "learning_rate": 7.987472722410805e-07, "loss": 0.0159, "step": 9018 }, { "epoch": 2.052104664391354, "grad_norm": 1.9691086699401108, "learning_rate": 7.986614411594126e-07, "loss": 0.0143, "step": 9019 }, { "epoch": 2.0523321956769056, "grad_norm": 0.5811928503525017, "learning_rate": 7.98575606528423e-07, "loss": 0.0044, "step": 9020 }, { "epoch": 2.0525597269624574, "grad_norm": 0.30333396807128366, "learning_rate": 7.984897683498658e-07, "loss": 0.0089, "step": 9021 }, { "epoch": 2.052787258248009, "grad_norm": 2.612841603050968, "learning_rate": 7.984039266254955e-07, "loss": 0.0128, "step": 9022 }, { "epoch": 2.053014789533561, "grad_norm": 0.8940541192462828, "learning_rate": 7.983180813570665e-07, "loss": 0.009, "step": 9023 }, { "epoch": 2.0532423208191126, "grad_norm": 0.3836424193297725, "learning_rate": 7.982322325463331e-07, "loss": 0.0032, "step": 9024 }, { "epoch": 2.0534698521046644, "grad_norm": 1.1020748181251936, "learning_rate": 7.981463801950507e-07, "loss": 0.0162, "step": 9025 }, { "epoch": 2.053697383390216, "grad_norm": 0.3686580621761448, "learning_rate": 7.98060524304973e-07, "loss": 0.0023, "step": 9026 }, { "epoch": 2.053924914675768, "grad_norm": 0.463039486142923, "learning_rate": 7.97974664877855e-07, "loss": 0.0065, "step": 9027 }, { "epoch": 2.0541524459613196, "grad_norm": 0.9088558526550047, "learning_rate": 7.978888019154518e-07, "loss": 0.0104, "step": 9028 }, { "epoch": 2.0543799772468714, "grad_norm": 1.4091409433907256, "learning_rate": 7.978029354195182e-07, "loss": 0.0061, "step": 9029 }, { "epoch": 2.054607508532423, "grad_norm": 1.219361089372488, "learning_rate": 7.977170653918088e-07, "loss": 0.0218, "step": 9030 }, { "epoch": 2.054835039817975, "grad_norm": 1.909174154160353, "learning_rate": 7.976311918340792e-07, "loss": 0.0156, "step": 9031 }, { "epoch": 2.0550625711035266, "grad_norm": 1.1829264688338827, "learning_rate": 7.975453147480843e-07, "loss": 0.0219, "step": 9032 }, { "epoch": 2.0552901023890784, "grad_norm": 0.4502657256145903, "learning_rate": 7.974594341355787e-07, "loss": 0.0018, "step": 9033 }, { "epoch": 2.05551763367463, "grad_norm": 0.7235083335627304, "learning_rate": 7.973735499983185e-07, "loss": 0.0063, "step": 9034 }, { "epoch": 2.055745164960182, "grad_norm": 0.9685176320074955, "learning_rate": 7.972876623380585e-07, "loss": 0.0099, "step": 9035 }, { "epoch": 2.0559726962457336, "grad_norm": 1.3658844400261203, "learning_rate": 7.972017711565543e-07, "loss": 0.0175, "step": 9036 }, { "epoch": 2.0562002275312854, "grad_norm": 0.8263557490636182, "learning_rate": 7.971158764555611e-07, "loss": 0.0112, "step": 9037 }, { "epoch": 2.056427758816837, "grad_norm": 0.8469835655904605, "learning_rate": 7.970299782368347e-07, "loss": 0.012, "step": 9038 }, { "epoch": 2.056655290102389, "grad_norm": 0.7231820277605212, "learning_rate": 7.969440765021306e-07, "loss": 0.0066, "step": 9039 }, { "epoch": 2.0568828213879407, "grad_norm": 0.8362930993172825, "learning_rate": 7.968581712532044e-07, "loss": 0.021, "step": 9040 }, { "epoch": 2.0571103526734924, "grad_norm": 0.7421397318368298, "learning_rate": 7.96772262491812e-07, "loss": 0.0162, "step": 9041 }, { "epoch": 2.057337883959044, "grad_norm": 0.5653847698807809, "learning_rate": 7.966863502197092e-07, "loss": 0.004, "step": 9042 }, { "epoch": 2.0575654152445964, "grad_norm": 0.65364731752615, "learning_rate": 7.966004344386517e-07, "loss": 0.0068, "step": 9043 }, { "epoch": 2.057792946530148, "grad_norm": 0.6117471407776484, "learning_rate": 7.965145151503957e-07, "loss": 0.0065, "step": 9044 }, { "epoch": 2.0580204778157, "grad_norm": 1.0405491373096627, "learning_rate": 7.964285923566971e-07, "loss": 0.0138, "step": 9045 }, { "epoch": 2.0582480091012516, "grad_norm": 0.6073237190255709, "learning_rate": 7.963426660593121e-07, "loss": 0.0085, "step": 9046 }, { "epoch": 2.0584755403868034, "grad_norm": 0.5217280099543562, "learning_rate": 7.962567362599965e-07, "loss": 0.0054, "step": 9047 }, { "epoch": 2.058703071672355, "grad_norm": 0.6392511927922102, "learning_rate": 7.961708029605072e-07, "loss": 0.0072, "step": 9048 }, { "epoch": 2.058930602957907, "grad_norm": 1.0489995042804985, "learning_rate": 7.960848661626e-07, "loss": 0.0188, "step": 9049 }, { "epoch": 2.0591581342434586, "grad_norm": 0.8699211663352905, "learning_rate": 7.959989258680314e-07, "loss": 0.0191, "step": 9050 }, { "epoch": 2.0593856655290104, "grad_norm": 0.9463630209342634, "learning_rate": 7.959129820785581e-07, "loss": 0.0047, "step": 9051 }, { "epoch": 2.059613196814562, "grad_norm": 0.7350564549155074, "learning_rate": 7.958270347959365e-07, "loss": 0.0064, "step": 9052 }, { "epoch": 2.059840728100114, "grad_norm": 0.28592973791968845, "learning_rate": 7.95741084021923e-07, "loss": 0.0025, "step": 9053 }, { "epoch": 2.0600682593856656, "grad_norm": 1.1979986828968519, "learning_rate": 7.956551297582744e-07, "loss": 0.0253, "step": 9054 }, { "epoch": 2.0602957906712174, "grad_norm": 0.6510461072413279, "learning_rate": 7.955691720067476e-07, "loss": 0.0177, "step": 9055 }, { "epoch": 2.060523321956769, "grad_norm": 1.3235521566375663, "learning_rate": 7.954832107690994e-07, "loss": 0.0049, "step": 9056 }, { "epoch": 2.060750853242321, "grad_norm": 0.7983932906236514, "learning_rate": 7.953972460470865e-07, "loss": 0.0091, "step": 9057 }, { "epoch": 2.0609783845278726, "grad_norm": 0.6824474435128689, "learning_rate": 7.953112778424658e-07, "loss": 0.0097, "step": 9058 }, { "epoch": 2.0612059158134244, "grad_norm": 0.982846671224294, "learning_rate": 7.952253061569946e-07, "loss": 0.0144, "step": 9059 }, { "epoch": 2.061433447098976, "grad_norm": 0.7138860884269269, "learning_rate": 7.951393309924299e-07, "loss": 0.0087, "step": 9060 }, { "epoch": 2.061660978384528, "grad_norm": 0.1514271661885778, "learning_rate": 7.950533523505288e-07, "loss": 0.0007, "step": 9061 }, { "epoch": 2.0618885096700796, "grad_norm": 0.49698122778317044, "learning_rate": 7.949673702330487e-07, "loss": 0.0033, "step": 9062 }, { "epoch": 2.0621160409556314, "grad_norm": 1.011219866728441, "learning_rate": 7.948813846417469e-07, "loss": 0.015, "step": 9063 }, { "epoch": 2.062343572241183, "grad_norm": 0.94325376844481, "learning_rate": 7.947953955783808e-07, "loss": 0.0074, "step": 9064 }, { "epoch": 2.062571103526735, "grad_norm": 0.43953814787234513, "learning_rate": 7.947094030447077e-07, "loss": 0.0082, "step": 9065 }, { "epoch": 2.0627986348122866, "grad_norm": 0.770624168599187, "learning_rate": 7.946234070424852e-07, "loss": 0.0087, "step": 9066 }, { "epoch": 2.0630261660978384, "grad_norm": 0.7852652186331285, "learning_rate": 7.945374075734706e-07, "loss": 0.0066, "step": 9067 }, { "epoch": 2.06325369738339, "grad_norm": 1.8257160662731842, "learning_rate": 7.944514046394222e-07, "loss": 0.0166, "step": 9068 }, { "epoch": 2.063481228668942, "grad_norm": 0.6943659241212427, "learning_rate": 7.943653982420976e-07, "loss": 0.0091, "step": 9069 }, { "epoch": 2.0637087599544937, "grad_norm": 0.337384829969432, "learning_rate": 7.942793883832541e-07, "loss": 0.0019, "step": 9070 }, { "epoch": 2.0639362912400454, "grad_norm": 0.7104155788709781, "learning_rate": 7.9419337506465e-07, "loss": 0.0095, "step": 9071 }, { "epoch": 2.064163822525597, "grad_norm": 0.5962868363957752, "learning_rate": 7.941073582880431e-07, "loss": 0.0044, "step": 9072 }, { "epoch": 2.064391353811149, "grad_norm": 0.338270468072751, "learning_rate": 7.940213380551918e-07, "loss": 0.0035, "step": 9073 }, { "epoch": 2.0646188850967007, "grad_norm": 0.6378159954038821, "learning_rate": 7.939353143678535e-07, "loss": 0.005, "step": 9074 }, { "epoch": 2.0648464163822524, "grad_norm": 0.6056365879021587, "learning_rate": 7.938492872277872e-07, "loss": 0.0096, "step": 9075 }, { "epoch": 2.065073947667804, "grad_norm": 1.5006441244985853, "learning_rate": 7.937632566367505e-07, "loss": 0.0182, "step": 9076 }, { "epoch": 2.065301478953356, "grad_norm": 0.38261687744743117, "learning_rate": 7.936772225965018e-07, "loss": 0.0024, "step": 9077 }, { "epoch": 2.0655290102389077, "grad_norm": 0.9294557601529759, "learning_rate": 7.935911851087996e-07, "loss": 0.0072, "step": 9078 }, { "epoch": 2.0657565415244594, "grad_norm": 1.479721573100565, "learning_rate": 7.935051441754024e-07, "loss": 0.0264, "step": 9079 }, { "epoch": 2.065984072810011, "grad_norm": 0.6723776429149259, "learning_rate": 7.934190997980687e-07, "loss": 0.0172, "step": 9080 }, { "epoch": 2.066211604095563, "grad_norm": 0.9140317493359092, "learning_rate": 7.933330519785569e-07, "loss": 0.0097, "step": 9081 }, { "epoch": 2.066439135381115, "grad_norm": 0.44542992334864684, "learning_rate": 7.93247000718626e-07, "loss": 0.005, "step": 9082 }, { "epoch": 2.066666666666667, "grad_norm": 0.6753816366432505, "learning_rate": 7.931609460200345e-07, "loss": 0.0086, "step": 9083 }, { "epoch": 2.0668941979522186, "grad_norm": 0.19971118324429432, "learning_rate": 7.930748878845411e-07, "loss": 0.0016, "step": 9084 }, { "epoch": 2.0671217292377704, "grad_norm": 1.0544990680868636, "learning_rate": 7.929888263139049e-07, "loss": 0.0189, "step": 9085 }, { "epoch": 2.067349260523322, "grad_norm": 0.5973469577421473, "learning_rate": 7.929027613098848e-07, "loss": 0.0084, "step": 9086 }, { "epoch": 2.067576791808874, "grad_norm": 0.5179164240056631, "learning_rate": 7.928166928742398e-07, "loss": 0.0093, "step": 9087 }, { "epoch": 2.0678043230944256, "grad_norm": 0.8743029146459123, "learning_rate": 7.927306210087287e-07, "loss": 0.0168, "step": 9088 }, { "epoch": 2.0680318543799774, "grad_norm": 0.6036406868986361, "learning_rate": 7.926445457151111e-07, "loss": 0.0099, "step": 9089 }, { "epoch": 2.068259385665529, "grad_norm": 0.3948224738554559, "learning_rate": 7.925584669951459e-07, "loss": 0.0021, "step": 9090 }, { "epoch": 2.068486916951081, "grad_norm": 0.44922581856779004, "learning_rate": 7.924723848505925e-07, "loss": 0.0028, "step": 9091 }, { "epoch": 2.0687144482366326, "grad_norm": 1.1948292410858585, "learning_rate": 7.923862992832103e-07, "loss": 0.0217, "step": 9092 }, { "epoch": 2.0689419795221844, "grad_norm": 1.100180037321114, "learning_rate": 7.923002102947587e-07, "loss": 0.01, "step": 9093 }, { "epoch": 2.069169510807736, "grad_norm": 0.6937675534728082, "learning_rate": 7.922141178869973e-07, "loss": 0.009, "step": 9094 }, { "epoch": 2.069397042093288, "grad_norm": 0.7752258384880746, "learning_rate": 7.921280220616855e-07, "loss": 0.0152, "step": 9095 }, { "epoch": 2.0696245733788396, "grad_norm": 1.0795686730194802, "learning_rate": 7.920419228205829e-07, "loss": 0.0166, "step": 9096 }, { "epoch": 2.0698521046643914, "grad_norm": 0.607461867352133, "learning_rate": 7.919558201654493e-07, "loss": 0.0086, "step": 9097 }, { "epoch": 2.070079635949943, "grad_norm": 0.3311543160766717, "learning_rate": 7.918697140980447e-07, "loss": 0.0018, "step": 9098 }, { "epoch": 2.070307167235495, "grad_norm": 1.362904988120221, "learning_rate": 7.917836046201285e-07, "loss": 0.0195, "step": 9099 }, { "epoch": 2.0705346985210467, "grad_norm": 0.5205124460602316, "learning_rate": 7.916974917334612e-07, "loss": 0.0042, "step": 9100 }, { "epoch": 2.0707622298065984, "grad_norm": 0.8203663475474079, "learning_rate": 7.916113754398022e-07, "loss": 0.0156, "step": 9101 }, { "epoch": 2.07098976109215, "grad_norm": 1.1847893907735414, "learning_rate": 7.91525255740912e-07, "loss": 0.0231, "step": 9102 }, { "epoch": 2.071217292377702, "grad_norm": 1.242769539345523, "learning_rate": 7.914391326385504e-07, "loss": 0.0294, "step": 9103 }, { "epoch": 2.0714448236632537, "grad_norm": 0.6044568620800408, "learning_rate": 7.913530061344778e-07, "loss": 0.0086, "step": 9104 }, { "epoch": 2.0716723549488054, "grad_norm": 0.8446369320506389, "learning_rate": 7.912668762304544e-07, "loss": 0.0069, "step": 9105 }, { "epoch": 2.071899886234357, "grad_norm": 0.9634276448430097, "learning_rate": 7.911807429282406e-07, "loss": 0.0108, "step": 9106 }, { "epoch": 2.072127417519909, "grad_norm": 0.4255837431208955, "learning_rate": 7.910946062295967e-07, "loss": 0.0026, "step": 9107 }, { "epoch": 2.0723549488054607, "grad_norm": 1.0929442448124784, "learning_rate": 7.910084661362832e-07, "loss": 0.0201, "step": 9108 }, { "epoch": 2.0725824800910124, "grad_norm": 1.0093345207408708, "learning_rate": 7.909223226500609e-07, "loss": 0.0091, "step": 9109 }, { "epoch": 2.072810011376564, "grad_norm": 0.3205405530723011, "learning_rate": 7.908361757726901e-07, "loss": 0.0042, "step": 9110 }, { "epoch": 2.073037542662116, "grad_norm": 0.4914107843164597, "learning_rate": 7.907500255059314e-07, "loss": 0.0057, "step": 9111 }, { "epoch": 2.0732650739476677, "grad_norm": 2.183497169121956, "learning_rate": 7.906638718515458e-07, "loss": 0.0328, "step": 9112 }, { "epoch": 2.0734926052332194, "grad_norm": 0.6287818502880228, "learning_rate": 7.905777148112943e-07, "loss": 0.0043, "step": 9113 }, { "epoch": 2.073720136518771, "grad_norm": 1.770987962266444, "learning_rate": 7.904915543869372e-07, "loss": 0.0161, "step": 9114 }, { "epoch": 2.073947667804323, "grad_norm": 1.6020734134248478, "learning_rate": 7.904053905802362e-07, "loss": 0.0116, "step": 9115 }, { "epoch": 2.0741751990898747, "grad_norm": 0.4170883617995873, "learning_rate": 7.903192233929515e-07, "loss": 0.0026, "step": 9116 }, { "epoch": 2.0744027303754264, "grad_norm": 1.4426736839119412, "learning_rate": 7.90233052826845e-07, "loss": 0.0136, "step": 9117 }, { "epoch": 2.074630261660978, "grad_norm": 0.5364490219390096, "learning_rate": 7.901468788836771e-07, "loss": 0.0125, "step": 9118 }, { "epoch": 2.07485779294653, "grad_norm": 0.8118218292814557, "learning_rate": 7.900607015652101e-07, "loss": 0.0123, "step": 9119 }, { "epoch": 2.0750853242320817, "grad_norm": 0.4720346997920728, "learning_rate": 7.899745208732043e-07, "loss": 0.0038, "step": 9120 }, { "epoch": 2.075312855517634, "grad_norm": 1.1400139505704863, "learning_rate": 7.898883368094213e-07, "loss": 0.0143, "step": 9121 }, { "epoch": 2.0755403868031856, "grad_norm": 1.5162285525990173, "learning_rate": 7.898021493756228e-07, "loss": 0.0179, "step": 9122 }, { "epoch": 2.0757679180887374, "grad_norm": 1.2460561011233084, "learning_rate": 7.897159585735702e-07, "loss": 0.0145, "step": 9123 }, { "epoch": 2.075995449374289, "grad_norm": 0.4314378033216816, "learning_rate": 7.896297644050249e-07, "loss": 0.0031, "step": 9124 }, { "epoch": 2.076222980659841, "grad_norm": 0.5290618219543192, "learning_rate": 7.895435668717488e-07, "loss": 0.0031, "step": 9125 }, { "epoch": 2.0764505119453927, "grad_norm": 0.6912892689024679, "learning_rate": 7.894573659755038e-07, "loss": 0.0139, "step": 9126 }, { "epoch": 2.0766780432309444, "grad_norm": 0.24807223933852632, "learning_rate": 7.89371161718051e-07, "loss": 0.0014, "step": 9127 }, { "epoch": 2.076905574516496, "grad_norm": 0.5503007574869582, "learning_rate": 7.892849541011531e-07, "loss": 0.0063, "step": 9128 }, { "epoch": 2.077133105802048, "grad_norm": 1.195350434972967, "learning_rate": 7.891987431265714e-07, "loss": 0.0073, "step": 9129 }, { "epoch": 2.0773606370875997, "grad_norm": 0.379320486105582, "learning_rate": 7.891125287960682e-07, "loss": 0.0042, "step": 9130 }, { "epoch": 2.0775881683731514, "grad_norm": 1.1603595803509519, "learning_rate": 7.890263111114052e-07, "loss": 0.0126, "step": 9131 }, { "epoch": 2.077815699658703, "grad_norm": 0.46371094308488725, "learning_rate": 7.889400900743452e-07, "loss": 0.0017, "step": 9132 }, { "epoch": 2.078043230944255, "grad_norm": 1.4002252428358113, "learning_rate": 7.888538656866498e-07, "loss": 0.0115, "step": 9133 }, { "epoch": 2.0782707622298067, "grad_norm": 1.2049172434717985, "learning_rate": 7.887676379500814e-07, "loss": 0.01, "step": 9134 }, { "epoch": 2.0784982935153584, "grad_norm": 1.1518502874753174, "learning_rate": 7.886814068664025e-07, "loss": 0.0128, "step": 9135 }, { "epoch": 2.07872582480091, "grad_norm": 0.806150601896166, "learning_rate": 7.885951724373754e-07, "loss": 0.0095, "step": 9136 }, { "epoch": 2.078953356086462, "grad_norm": 0.7296766572310524, "learning_rate": 7.885089346647625e-07, "loss": 0.0052, "step": 9137 }, { "epoch": 2.0791808873720137, "grad_norm": 0.49776046586875056, "learning_rate": 7.884226935503267e-07, "loss": 0.0072, "step": 9138 }, { "epoch": 2.0794084186575654, "grad_norm": 1.029646859030014, "learning_rate": 7.8833644909583e-07, "loss": 0.0119, "step": 9139 }, { "epoch": 2.079635949943117, "grad_norm": 0.9421760038041583, "learning_rate": 7.882502013030358e-07, "loss": 0.0066, "step": 9140 }, { "epoch": 2.079863481228669, "grad_norm": 0.6011728262566824, "learning_rate": 7.881639501737059e-07, "loss": 0.006, "step": 9141 }, { "epoch": 2.0800910125142207, "grad_norm": 0.5032679788716042, "learning_rate": 7.880776957096041e-07, "loss": 0.005, "step": 9142 }, { "epoch": 2.0803185437997724, "grad_norm": 0.25041585957228457, "learning_rate": 7.879914379124928e-07, "loss": 0.002, "step": 9143 }, { "epoch": 2.080546075085324, "grad_norm": 1.3963677037027489, "learning_rate": 7.879051767841351e-07, "loss": 0.023, "step": 9144 }, { "epoch": 2.080773606370876, "grad_norm": 1.1133586768130128, "learning_rate": 7.878189123262937e-07, "loss": 0.0115, "step": 9145 }, { "epoch": 2.0810011376564277, "grad_norm": 0.4853918417871173, "learning_rate": 7.877326445407321e-07, "loss": 0.0074, "step": 9146 }, { "epoch": 2.0812286689419794, "grad_norm": 1.5958592224731931, "learning_rate": 7.876463734292133e-07, "loss": 0.041, "step": 9147 }, { "epoch": 2.081456200227531, "grad_norm": 0.5297580612008737, "learning_rate": 7.875600989935004e-07, "loss": 0.0051, "step": 9148 }, { "epoch": 2.081683731513083, "grad_norm": 0.7536803610859091, "learning_rate": 7.874738212353567e-07, "loss": 0.0045, "step": 9149 }, { "epoch": 2.0819112627986347, "grad_norm": 0.6095081040006934, "learning_rate": 7.873875401565458e-07, "loss": 0.0079, "step": 9150 }, { "epoch": 2.0821387940841865, "grad_norm": 0.6794626976572432, "learning_rate": 7.873012557588309e-07, "loss": 0.0056, "step": 9151 }, { "epoch": 2.082366325369738, "grad_norm": 2.24210983874312, "learning_rate": 7.872149680439756e-07, "loss": 0.0245, "step": 9152 }, { "epoch": 2.08259385665529, "grad_norm": 0.5269261677908896, "learning_rate": 7.871286770137434e-07, "loss": 0.0095, "step": 9153 }, { "epoch": 2.0828213879408417, "grad_norm": 2.916933169794705, "learning_rate": 7.870423826698981e-07, "loss": 0.0797, "step": 9154 }, { "epoch": 2.0830489192263935, "grad_norm": 1.0193821841518744, "learning_rate": 7.869560850142031e-07, "loss": 0.0206, "step": 9155 }, { "epoch": 2.083276450511945, "grad_norm": 1.9213408348971808, "learning_rate": 7.868697840484225e-07, "loss": 0.0402, "step": 9156 }, { "epoch": 2.083503981797497, "grad_norm": 0.7622595400971296, "learning_rate": 7.867834797743199e-07, "loss": 0.0109, "step": 9157 }, { "epoch": 2.0837315130830487, "grad_norm": 1.5515544467481235, "learning_rate": 7.866971721936592e-07, "loss": 0.0065, "step": 9158 }, { "epoch": 2.0839590443686005, "grad_norm": 1.1105111985696028, "learning_rate": 7.866108613082045e-07, "loss": 0.0066, "step": 9159 }, { "epoch": 2.0841865756541527, "grad_norm": 1.0582893467528578, "learning_rate": 7.865245471197198e-07, "loss": 0.0188, "step": 9160 }, { "epoch": 2.0844141069397044, "grad_norm": 1.0595467375317473, "learning_rate": 7.864382296299689e-07, "loss": 0.0216, "step": 9161 }, { "epoch": 2.084641638225256, "grad_norm": 0.4115376271807129, "learning_rate": 7.863519088407164e-07, "loss": 0.0031, "step": 9162 }, { "epoch": 2.084869169510808, "grad_norm": 0.8300257540667818, "learning_rate": 7.862655847537265e-07, "loss": 0.0132, "step": 9163 }, { "epoch": 2.0850967007963597, "grad_norm": 0.9064075749899173, "learning_rate": 7.861792573707634e-07, "loss": 0.0123, "step": 9164 }, { "epoch": 2.0853242320819114, "grad_norm": 0.6521758632997577, "learning_rate": 7.860929266935914e-07, "loss": 0.0052, "step": 9165 }, { "epoch": 2.085551763367463, "grad_norm": 0.27304053836354314, "learning_rate": 7.860065927239752e-07, "loss": 0.0024, "step": 9166 }, { "epoch": 2.085779294653015, "grad_norm": 0.5580775824388217, "learning_rate": 7.859202554636788e-07, "loss": 0.0093, "step": 9167 }, { "epoch": 2.0860068259385667, "grad_norm": 0.7820423391937628, "learning_rate": 7.858339149144671e-07, "loss": 0.0148, "step": 9168 }, { "epoch": 2.0862343572241184, "grad_norm": 0.6776426598903225, "learning_rate": 7.857475710781048e-07, "loss": 0.0099, "step": 9169 }, { "epoch": 2.08646188850967, "grad_norm": 0.9597468223496397, "learning_rate": 7.856612239563568e-07, "loss": 0.012, "step": 9170 }, { "epoch": 2.086689419795222, "grad_norm": 0.5301772798675423, "learning_rate": 7.855748735509873e-07, "loss": 0.004, "step": 9171 }, { "epoch": 2.0869169510807737, "grad_norm": 0.32840689959966574, "learning_rate": 7.854885198637615e-07, "loss": 0.0021, "step": 9172 }, { "epoch": 2.0871444823663254, "grad_norm": 0.8126896608713541, "learning_rate": 7.854021628964445e-07, "loss": 0.0072, "step": 9173 }, { "epoch": 2.087372013651877, "grad_norm": 0.966939750909063, "learning_rate": 7.853158026508009e-07, "loss": 0.0135, "step": 9174 }, { "epoch": 2.087599544937429, "grad_norm": 0.5501365281480977, "learning_rate": 7.852294391285959e-07, "loss": 0.0105, "step": 9175 }, { "epoch": 2.0878270762229807, "grad_norm": 0.8447495657831009, "learning_rate": 7.851430723315946e-07, "loss": 0.0088, "step": 9176 }, { "epoch": 2.0880546075085324, "grad_norm": 0.3759611000792586, "learning_rate": 7.850567022615623e-07, "loss": 0.0025, "step": 9177 }, { "epoch": 2.088282138794084, "grad_norm": 0.7947305000118688, "learning_rate": 7.84970328920264e-07, "loss": 0.0071, "step": 9178 }, { "epoch": 2.088509670079636, "grad_norm": 0.6577083486461792, "learning_rate": 7.848839523094651e-07, "loss": 0.0098, "step": 9179 }, { "epoch": 2.0887372013651877, "grad_norm": 1.7343321429669305, "learning_rate": 7.847975724309312e-07, "loss": 0.014, "step": 9180 }, { "epoch": 2.0889647326507395, "grad_norm": 0.680123524548824, "learning_rate": 7.847111892864275e-07, "loss": 0.0051, "step": 9181 }, { "epoch": 2.089192263936291, "grad_norm": 0.6703674002826029, "learning_rate": 7.846248028777195e-07, "loss": 0.0131, "step": 9182 }, { "epoch": 2.089419795221843, "grad_norm": 1.1807807672416228, "learning_rate": 7.845384132065731e-07, "loss": 0.0181, "step": 9183 }, { "epoch": 2.0896473265073947, "grad_norm": 0.703243120255615, "learning_rate": 7.844520202747534e-07, "loss": 0.0118, "step": 9184 }, { "epoch": 2.0898748577929465, "grad_norm": 1.5320837528615565, "learning_rate": 7.843656240840265e-07, "loss": 0.0247, "step": 9185 }, { "epoch": 2.090102389078498, "grad_norm": 0.5499001837311822, "learning_rate": 7.842792246361583e-07, "loss": 0.0026, "step": 9186 }, { "epoch": 2.09032992036405, "grad_norm": 1.267889632928035, "learning_rate": 7.841928219329144e-07, "loss": 0.0192, "step": 9187 }, { "epoch": 2.0905574516496017, "grad_norm": 0.8294155358531753, "learning_rate": 7.841064159760605e-07, "loss": 0.0061, "step": 9188 }, { "epoch": 2.0907849829351535, "grad_norm": 0.6126934411224282, "learning_rate": 7.840200067673632e-07, "loss": 0.0053, "step": 9189 }, { "epoch": 2.091012514220705, "grad_norm": 0.21523624283281878, "learning_rate": 7.839335943085882e-07, "loss": 0.002, "step": 9190 }, { "epoch": 2.091240045506257, "grad_norm": 0.48695567972763487, "learning_rate": 7.838471786015013e-07, "loss": 0.0095, "step": 9191 }, { "epoch": 2.0914675767918087, "grad_norm": 1.2177457934193316, "learning_rate": 7.837607596478691e-07, "loss": 0.0173, "step": 9192 }, { "epoch": 2.0916951080773605, "grad_norm": 1.0586963752613927, "learning_rate": 7.836743374494579e-07, "loss": 0.01, "step": 9193 }, { "epoch": 2.0919226393629122, "grad_norm": 0.9346759824478303, "learning_rate": 7.835879120080337e-07, "loss": 0.0224, "step": 9194 }, { "epoch": 2.092150170648464, "grad_norm": 0.4713141300371756, "learning_rate": 7.83501483325363e-07, "loss": 0.0049, "step": 9195 }, { "epoch": 2.0923777019340157, "grad_norm": 1.162350148632552, "learning_rate": 7.834150514032124e-07, "loss": 0.0139, "step": 9196 }, { "epoch": 2.0926052332195675, "grad_norm": 0.41157198927050814, "learning_rate": 7.833286162433481e-07, "loss": 0.0028, "step": 9197 }, { "epoch": 2.0928327645051192, "grad_norm": 1.1099539495640738, "learning_rate": 7.832421778475369e-07, "loss": 0.0149, "step": 9198 }, { "epoch": 2.0930602957906714, "grad_norm": 2.1675030701503224, "learning_rate": 7.831557362175454e-07, "loss": 0.0311, "step": 9199 }, { "epoch": 2.093287827076223, "grad_norm": 0.6801671638755284, "learning_rate": 7.830692913551403e-07, "loss": 0.015, "step": 9200 }, { "epoch": 2.093515358361775, "grad_norm": 0.9498273562152529, "learning_rate": 7.829828432620884e-07, "loss": 0.0139, "step": 9201 }, { "epoch": 2.0937428896473267, "grad_norm": 0.17757418969248412, "learning_rate": 7.828963919401567e-07, "loss": 0.0005, "step": 9202 }, { "epoch": 2.0939704209328784, "grad_norm": 0.9529307364560996, "learning_rate": 7.828099373911116e-07, "loss": 0.0206, "step": 9203 }, { "epoch": 2.09419795221843, "grad_norm": 0.4720678151921159, "learning_rate": 7.827234796167206e-07, "loss": 0.0045, "step": 9204 }, { "epoch": 2.094425483503982, "grad_norm": 0.5564923865534581, "learning_rate": 7.826370186187503e-07, "loss": 0.0112, "step": 9205 }, { "epoch": 2.0946530147895337, "grad_norm": 1.1258120369556415, "learning_rate": 7.825505543989682e-07, "loss": 0.0116, "step": 9206 }, { "epoch": 2.0948805460750854, "grad_norm": 1.739664201930335, "learning_rate": 7.824640869591412e-07, "loss": 0.0234, "step": 9207 }, { "epoch": 2.095108077360637, "grad_norm": 0.23151133729253148, "learning_rate": 7.823776163010369e-07, "loss": 0.0022, "step": 9208 }, { "epoch": 2.095335608646189, "grad_norm": 1.3191505337757548, "learning_rate": 7.822911424264222e-07, "loss": 0.0108, "step": 9209 }, { "epoch": 2.0955631399317407, "grad_norm": 1.0201891270344592, "learning_rate": 7.822046653370647e-07, "loss": 0.012, "step": 9210 }, { "epoch": 2.0957906712172925, "grad_norm": 1.1541827816719863, "learning_rate": 7.821181850347316e-07, "loss": 0.0055, "step": 9211 }, { "epoch": 2.096018202502844, "grad_norm": 1.3264770201849603, "learning_rate": 7.820317015211905e-07, "loss": 0.0162, "step": 9212 }, { "epoch": 2.096245733788396, "grad_norm": 0.6640103343814197, "learning_rate": 7.819452147982091e-07, "loss": 0.0064, "step": 9213 }, { "epoch": 2.0964732650739477, "grad_norm": 0.6796595134370279, "learning_rate": 7.81858724867555e-07, "loss": 0.0095, "step": 9214 }, { "epoch": 2.0967007963594995, "grad_norm": 0.5236777498501003, "learning_rate": 7.817722317309958e-07, "loss": 0.0055, "step": 9215 }, { "epoch": 2.096928327645051, "grad_norm": 1.2277357402303708, "learning_rate": 7.816857353902993e-07, "loss": 0.0171, "step": 9216 }, { "epoch": 2.097155858930603, "grad_norm": 0.628137537522774, "learning_rate": 7.815992358472332e-07, "loss": 0.0063, "step": 9217 }, { "epoch": 2.0973833902161547, "grad_norm": 0.14293525239107094, "learning_rate": 7.815127331035656e-07, "loss": 0.0013, "step": 9218 }, { "epoch": 2.0976109215017065, "grad_norm": 1.9126063856709803, "learning_rate": 7.814262271610645e-07, "loss": 0.0361, "step": 9219 }, { "epoch": 2.0978384527872582, "grad_norm": 0.5261794311393453, "learning_rate": 7.813397180214978e-07, "loss": 0.0063, "step": 9220 }, { "epoch": 2.09806598407281, "grad_norm": 0.8997752073865246, "learning_rate": 7.812532056866334e-07, "loss": 0.0157, "step": 9221 }, { "epoch": 2.0982935153583617, "grad_norm": 2.3182064053409372, "learning_rate": 7.811666901582397e-07, "loss": 0.0236, "step": 9222 }, { "epoch": 2.0985210466439135, "grad_norm": 0.6059480274929465, "learning_rate": 7.81080171438085e-07, "loss": 0.0048, "step": 9223 }, { "epoch": 2.0987485779294652, "grad_norm": 0.7420906474117391, "learning_rate": 7.809936495279373e-07, "loss": 0.0043, "step": 9224 }, { "epoch": 2.098976109215017, "grad_norm": 0.7252106301316513, "learning_rate": 7.80907124429565e-07, "loss": 0.013, "step": 9225 }, { "epoch": 2.0992036405005687, "grad_norm": 0.9002092835571386, "learning_rate": 7.808205961447368e-07, "loss": 0.0138, "step": 9226 }, { "epoch": 2.0994311717861205, "grad_norm": 1.2342077987333195, "learning_rate": 7.80734064675221e-07, "loss": 0.0174, "step": 9227 }, { "epoch": 2.0996587030716722, "grad_norm": 1.1037992668025742, "learning_rate": 7.806475300227859e-07, "loss": 0.0192, "step": 9228 }, { "epoch": 2.099886234357224, "grad_norm": 0.786939908331163, "learning_rate": 7.805609921892006e-07, "loss": 0.0056, "step": 9229 }, { "epoch": 2.1001137656427757, "grad_norm": 0.7136016687159851, "learning_rate": 7.804744511762334e-07, "loss": 0.0139, "step": 9230 }, { "epoch": 2.1003412969283275, "grad_norm": 0.2289243124919493, "learning_rate": 7.803879069856532e-07, "loss": 0.0017, "step": 9231 }, { "epoch": 2.1005688282138792, "grad_norm": 1.1413146766634172, "learning_rate": 7.803013596192285e-07, "loss": 0.0216, "step": 9232 }, { "epoch": 2.100796359499431, "grad_norm": 1.0425362067877952, "learning_rate": 7.802148090787289e-07, "loss": 0.0113, "step": 9233 }, { "epoch": 2.1010238907849828, "grad_norm": 1.1714190250184568, "learning_rate": 7.801282553659225e-07, "loss": 0.0261, "step": 9234 }, { "epoch": 2.1012514220705345, "grad_norm": 0.8367732906380727, "learning_rate": 7.800416984825787e-07, "loss": 0.0106, "step": 9235 }, { "epoch": 2.1014789533560863, "grad_norm": 0.542809043926659, "learning_rate": 7.799551384304666e-07, "loss": 0.006, "step": 9236 }, { "epoch": 2.101706484641638, "grad_norm": 0.7628267307241319, "learning_rate": 7.798685752113553e-07, "loss": 0.0055, "step": 9237 }, { "epoch": 2.10193401592719, "grad_norm": 0.955833216060939, "learning_rate": 7.797820088270141e-07, "loss": 0.0156, "step": 9238 }, { "epoch": 2.102161547212742, "grad_norm": 1.0564360349539703, "learning_rate": 7.796954392792119e-07, "loss": 0.0173, "step": 9239 }, { "epoch": 2.1023890784982937, "grad_norm": 0.9030033078460505, "learning_rate": 7.796088665697183e-07, "loss": 0.0129, "step": 9240 }, { "epoch": 2.1026166097838455, "grad_norm": 0.8232700967230857, "learning_rate": 7.795222907003025e-07, "loss": 0.0246, "step": 9241 }, { "epoch": 2.102844141069397, "grad_norm": 0.7206999137991037, "learning_rate": 7.794357116727342e-07, "loss": 0.0091, "step": 9242 }, { "epoch": 2.103071672354949, "grad_norm": 0.9851948857694026, "learning_rate": 7.793491294887827e-07, "loss": 0.0158, "step": 9243 }, { "epoch": 2.1032992036405007, "grad_norm": 2.514647738852726, "learning_rate": 7.792625441502178e-07, "loss": 0.0316, "step": 9244 }, { "epoch": 2.1035267349260525, "grad_norm": 1.0881689113865614, "learning_rate": 7.791759556588088e-07, "loss": 0.0166, "step": 9245 }, { "epoch": 2.103754266211604, "grad_norm": 1.0334434567242292, "learning_rate": 7.790893640163258e-07, "loss": 0.0106, "step": 9246 }, { "epoch": 2.103981797497156, "grad_norm": 0.6278377087522803, "learning_rate": 7.790027692245383e-07, "loss": 0.0035, "step": 9247 }, { "epoch": 2.1042093287827077, "grad_norm": 0.5021948878550339, "learning_rate": 7.789161712852163e-07, "loss": 0.0071, "step": 9248 }, { "epoch": 2.1044368600682595, "grad_norm": 0.8056033221753424, "learning_rate": 7.788295702001296e-07, "loss": 0.0117, "step": 9249 }, { "epoch": 2.1046643913538112, "grad_norm": 0.9880212959773031, "learning_rate": 7.787429659710483e-07, "loss": 0.0133, "step": 9250 }, { "epoch": 2.104891922639363, "grad_norm": 0.4645349011452211, "learning_rate": 7.786563585997422e-07, "loss": 0.0023, "step": 9251 }, { "epoch": 2.1051194539249147, "grad_norm": 1.1823375145594603, "learning_rate": 7.785697480879817e-07, "loss": 0.0082, "step": 9252 }, { "epoch": 2.1053469852104665, "grad_norm": 0.32591765668254413, "learning_rate": 7.784831344375368e-07, "loss": 0.0034, "step": 9253 }, { "epoch": 2.1055745164960182, "grad_norm": 1.2687961743078804, "learning_rate": 7.783965176501776e-07, "loss": 0.0244, "step": 9254 }, { "epoch": 2.10580204778157, "grad_norm": 1.433044307027219, "learning_rate": 7.783098977276746e-07, "loss": 0.0105, "step": 9255 }, { "epoch": 2.1060295790671217, "grad_norm": 1.3256650687936837, "learning_rate": 7.78223274671798e-07, "loss": 0.0312, "step": 9256 }, { "epoch": 2.1062571103526735, "grad_norm": 0.5832669016674589, "learning_rate": 7.781366484843184e-07, "loss": 0.005, "step": 9257 }, { "epoch": 2.1064846416382252, "grad_norm": 0.9472246850407402, "learning_rate": 7.78050019167006e-07, "loss": 0.0058, "step": 9258 }, { "epoch": 2.106712172923777, "grad_norm": 0.7969795958517575, "learning_rate": 7.779633867216316e-07, "loss": 0.0071, "step": 9259 }, { "epoch": 2.1069397042093287, "grad_norm": 0.5809977867579027, "learning_rate": 7.778767511499657e-07, "loss": 0.007, "step": 9260 }, { "epoch": 2.1071672354948805, "grad_norm": 1.376158219884132, "learning_rate": 7.77790112453779e-07, "loss": 0.0161, "step": 9261 }, { "epoch": 2.1073947667804322, "grad_norm": 0.8468462694388563, "learning_rate": 7.77703470634842e-07, "loss": 0.0171, "step": 9262 }, { "epoch": 2.107622298065984, "grad_norm": 1.0779180072248953, "learning_rate": 7.77616825694926e-07, "loss": 0.0151, "step": 9263 }, { "epoch": 2.1078498293515358, "grad_norm": 0.4770232661427211, "learning_rate": 7.775301776358017e-07, "loss": 0.0056, "step": 9264 }, { "epoch": 2.1080773606370875, "grad_norm": 0.6523197760944786, "learning_rate": 7.774435264592396e-07, "loss": 0.0129, "step": 9265 }, { "epoch": 2.1083048919226393, "grad_norm": 0.5892460233849306, "learning_rate": 7.773568721670111e-07, "loss": 0.0075, "step": 9266 }, { "epoch": 2.108532423208191, "grad_norm": 0.1882698375940475, "learning_rate": 7.772702147608873e-07, "loss": 0.0011, "step": 9267 }, { "epoch": 2.1087599544937428, "grad_norm": 0.52604619679456, "learning_rate": 7.771835542426389e-07, "loss": 0.006, "step": 9268 }, { "epoch": 2.1089874857792945, "grad_norm": 1.2801995718752124, "learning_rate": 7.770968906140376e-07, "loss": 0.0178, "step": 9269 }, { "epoch": 2.1092150170648463, "grad_norm": 0.7107149098259525, "learning_rate": 7.770102238768543e-07, "loss": 0.0055, "step": 9270 }, { "epoch": 2.109442548350398, "grad_norm": 0.7205424693820719, "learning_rate": 7.769235540328607e-07, "loss": 0.0082, "step": 9271 }, { "epoch": 2.1096700796359498, "grad_norm": 1.2027000820028442, "learning_rate": 7.768368810838275e-07, "loss": 0.0209, "step": 9272 }, { "epoch": 2.1098976109215015, "grad_norm": 0.39160046850220054, "learning_rate": 7.767502050315266e-07, "loss": 0.0033, "step": 9273 }, { "epoch": 2.1101251422070533, "grad_norm": 0.9445899721010399, "learning_rate": 7.766635258777296e-07, "loss": 0.0133, "step": 9274 }, { "epoch": 2.110352673492605, "grad_norm": 0.5626709367148773, "learning_rate": 7.765768436242077e-07, "loss": 0.002, "step": 9275 }, { "epoch": 2.1105802047781568, "grad_norm": 0.48427462422275985, "learning_rate": 7.764901582727328e-07, "loss": 0.0057, "step": 9276 }, { "epoch": 2.110807736063709, "grad_norm": 1.3310285303812137, "learning_rate": 7.764034698250767e-07, "loss": 0.0311, "step": 9277 }, { "epoch": 2.1110352673492607, "grad_norm": 1.027449064610943, "learning_rate": 7.763167782830107e-07, "loss": 0.0121, "step": 9278 }, { "epoch": 2.1112627986348125, "grad_norm": 0.435579428938178, "learning_rate": 7.762300836483069e-07, "loss": 0.005, "step": 9279 }, { "epoch": 2.1114903299203642, "grad_norm": 0.679395307172523, "learning_rate": 7.761433859227373e-07, "loss": 0.0056, "step": 9280 }, { "epoch": 2.111717861205916, "grad_norm": 0.6020128188175906, "learning_rate": 7.760566851080736e-07, "loss": 0.0032, "step": 9281 }, { "epoch": 2.1119453924914677, "grad_norm": 1.5418885483278937, "learning_rate": 7.759699812060878e-07, "loss": 0.0167, "step": 9282 }, { "epoch": 2.1121729237770195, "grad_norm": 0.44315206311217464, "learning_rate": 7.758832742185523e-07, "loss": 0.0042, "step": 9283 }, { "epoch": 2.1124004550625712, "grad_norm": 0.5147215125707401, "learning_rate": 7.75796564147239e-07, "loss": 0.0078, "step": 9284 }, { "epoch": 2.112627986348123, "grad_norm": 1.0508832902452774, "learning_rate": 7.757098509939198e-07, "loss": 0.0168, "step": 9285 }, { "epoch": 2.1128555176336747, "grad_norm": 0.3557205753864511, "learning_rate": 7.756231347603674e-07, "loss": 0.0024, "step": 9286 }, { "epoch": 2.1130830489192265, "grad_norm": 1.2853552341593915, "learning_rate": 7.755364154483541e-07, "loss": 0.0167, "step": 9287 }, { "epoch": 2.1133105802047782, "grad_norm": 0.7996360698496128, "learning_rate": 7.754496930596521e-07, "loss": 0.0148, "step": 9288 }, { "epoch": 2.11353811149033, "grad_norm": 1.2944159932286945, "learning_rate": 7.753629675960339e-07, "loss": 0.0223, "step": 9289 }, { "epoch": 2.1137656427758817, "grad_norm": 0.5226371406326719, "learning_rate": 7.752762390592721e-07, "loss": 0.0058, "step": 9290 }, { "epoch": 2.1139931740614335, "grad_norm": 1.2825230470735336, "learning_rate": 7.751895074511391e-07, "loss": 0.0235, "step": 9291 }, { "epoch": 2.1142207053469853, "grad_norm": 0.9194008058245837, "learning_rate": 7.751027727734075e-07, "loss": 0.0153, "step": 9292 }, { "epoch": 2.114448236632537, "grad_norm": 0.32574968478101046, "learning_rate": 7.750160350278503e-07, "loss": 0.004, "step": 9293 }, { "epoch": 2.1146757679180888, "grad_norm": 1.0432027991771016, "learning_rate": 7.7492929421624e-07, "loss": 0.0062, "step": 9294 }, { "epoch": 2.1149032992036405, "grad_norm": 0.9357758012635514, "learning_rate": 7.748425503403497e-07, "loss": 0.0177, "step": 9295 }, { "epoch": 2.1151308304891923, "grad_norm": 0.45931328768984875, "learning_rate": 7.747558034019518e-07, "loss": 0.0055, "step": 9296 }, { "epoch": 2.115358361774744, "grad_norm": 0.7789332237598385, "learning_rate": 7.746690534028198e-07, "loss": 0.008, "step": 9297 }, { "epoch": 2.1155858930602958, "grad_norm": 0.585121819859683, "learning_rate": 7.745823003447262e-07, "loss": 0.0028, "step": 9298 }, { "epoch": 2.1158134243458475, "grad_norm": 0.5964394322215167, "learning_rate": 7.744955442294444e-07, "loss": 0.0137, "step": 9299 }, { "epoch": 2.1160409556313993, "grad_norm": 0.3205518841094132, "learning_rate": 7.744087850587476e-07, "loss": 0.0026, "step": 9300 }, { "epoch": 2.116268486916951, "grad_norm": 0.5896827856487226, "learning_rate": 7.743220228344088e-07, "loss": 0.0086, "step": 9301 }, { "epoch": 2.1164960182025028, "grad_norm": 0.49452255381969884, "learning_rate": 7.742352575582014e-07, "loss": 0.0022, "step": 9302 }, { "epoch": 2.1167235494880545, "grad_norm": 0.6386684805696516, "learning_rate": 7.741484892318986e-07, "loss": 0.0065, "step": 9303 }, { "epoch": 2.1169510807736063, "grad_norm": 1.1712198529798117, "learning_rate": 7.740617178572738e-07, "loss": 0.0238, "step": 9304 }, { "epoch": 2.117178612059158, "grad_norm": 1.443578022133474, "learning_rate": 7.739749434361003e-07, "loss": 0.0246, "step": 9305 }, { "epoch": 2.11740614334471, "grad_norm": 1.148306127698227, "learning_rate": 7.738881659701521e-07, "loss": 0.0181, "step": 9306 }, { "epoch": 2.1176336746302615, "grad_norm": 0.5081086380563994, "learning_rate": 7.738013854612023e-07, "loss": 0.003, "step": 9307 }, { "epoch": 2.1178612059158133, "grad_norm": 1.09859413339637, "learning_rate": 7.737146019110247e-07, "loss": 0.0183, "step": 9308 }, { "epoch": 2.118088737201365, "grad_norm": 0.6865986011028383, "learning_rate": 7.736278153213932e-07, "loss": 0.0097, "step": 9309 }, { "epoch": 2.118316268486917, "grad_norm": 0.5334779464452993, "learning_rate": 7.735410256940812e-07, "loss": 0.0013, "step": 9310 }, { "epoch": 2.1185437997724685, "grad_norm": 0.8175878440237826, "learning_rate": 7.734542330308626e-07, "loss": 0.0112, "step": 9311 }, { "epoch": 2.1187713310580203, "grad_norm": 0.4008291179836023, "learning_rate": 7.733674373335112e-07, "loss": 0.006, "step": 9312 }, { "epoch": 2.118998862343572, "grad_norm": 0.7555099706421347, "learning_rate": 7.732806386038013e-07, "loss": 0.0104, "step": 9313 }, { "epoch": 2.119226393629124, "grad_norm": 1.8740139859178238, "learning_rate": 7.731938368435069e-07, "loss": 0.0277, "step": 9314 }, { "epoch": 2.1194539249146755, "grad_norm": 0.4870467351113141, "learning_rate": 7.731070320544017e-07, "loss": 0.0043, "step": 9315 }, { "epoch": 2.1196814562002277, "grad_norm": 0.8886276104915918, "learning_rate": 7.7302022423826e-07, "loss": 0.0145, "step": 9316 }, { "epoch": 2.1199089874857795, "grad_norm": 0.4939635956444076, "learning_rate": 7.72933413396856e-07, "loss": 0.0055, "step": 9317 }, { "epoch": 2.1201365187713312, "grad_norm": 0.8002772576797165, "learning_rate": 7.728465995319641e-07, "loss": 0.009, "step": 9318 }, { "epoch": 2.120364050056883, "grad_norm": 1.5504173847797766, "learning_rate": 7.727597826453583e-07, "loss": 0.0199, "step": 9319 }, { "epoch": 2.1205915813424348, "grad_norm": 0.601728472782442, "learning_rate": 7.726729627388132e-07, "loss": 0.0086, "step": 9320 }, { "epoch": 2.1208191126279865, "grad_norm": 1.2178644264360978, "learning_rate": 7.725861398141032e-07, "loss": 0.0095, "step": 9321 }, { "epoch": 2.1210466439135383, "grad_norm": 0.99613089080032, "learning_rate": 7.724993138730027e-07, "loss": 0.0163, "step": 9322 }, { "epoch": 2.12127417519909, "grad_norm": 0.7812678431584936, "learning_rate": 7.724124849172865e-07, "loss": 0.0155, "step": 9323 }, { "epoch": 2.1215017064846418, "grad_norm": 0.21368210456392514, "learning_rate": 7.723256529487291e-07, "loss": 0.0014, "step": 9324 }, { "epoch": 2.1217292377701935, "grad_norm": 0.5802091982439473, "learning_rate": 7.722388179691051e-07, "loss": 0.0075, "step": 9325 }, { "epoch": 2.1219567690557453, "grad_norm": 1.0755789840402012, "learning_rate": 7.721519799801892e-07, "loss": 0.0273, "step": 9326 }, { "epoch": 2.122184300341297, "grad_norm": 0.8044716006135214, "learning_rate": 7.720651389837566e-07, "loss": 0.0122, "step": 9327 }, { "epoch": 2.1224118316268488, "grad_norm": 0.9656814876357248, "learning_rate": 7.719782949815817e-07, "loss": 0.0107, "step": 9328 }, { "epoch": 2.1226393629124005, "grad_norm": 0.6167739226303055, "learning_rate": 7.718914479754395e-07, "loss": 0.0067, "step": 9329 }, { "epoch": 2.1228668941979523, "grad_norm": 0.6281965997313745, "learning_rate": 7.718045979671054e-07, "loss": 0.007, "step": 9330 }, { "epoch": 2.123094425483504, "grad_norm": 1.2644289472661194, "learning_rate": 7.717177449583541e-07, "loss": 0.0122, "step": 9331 }, { "epoch": 2.1233219567690558, "grad_norm": 0.7532418971080128, "learning_rate": 7.716308889509608e-07, "loss": 0.0134, "step": 9332 }, { "epoch": 2.1235494880546075, "grad_norm": 1.3621156306514859, "learning_rate": 7.715440299467006e-07, "loss": 0.0124, "step": 9333 }, { "epoch": 2.1237770193401593, "grad_norm": 0.4543555285791925, "learning_rate": 7.714571679473489e-07, "loss": 0.0058, "step": 9334 }, { "epoch": 2.124004550625711, "grad_norm": 0.7671567799229786, "learning_rate": 7.713703029546809e-07, "loss": 0.0095, "step": 9335 }, { "epoch": 2.124232081911263, "grad_norm": 0.43296493773638894, "learning_rate": 7.712834349704718e-07, "loss": 0.0038, "step": 9336 }, { "epoch": 2.1244596131968145, "grad_norm": 0.5458144248235711, "learning_rate": 7.711965639964971e-07, "loss": 0.0041, "step": 9337 }, { "epoch": 2.1246871444823663, "grad_norm": 0.42708321909161506, "learning_rate": 7.711096900345327e-07, "loss": 0.0032, "step": 9338 }, { "epoch": 2.124914675767918, "grad_norm": 0.49979807660715214, "learning_rate": 7.710228130863537e-07, "loss": 0.0043, "step": 9339 }, { "epoch": 2.12514220705347, "grad_norm": 0.7726907736551077, "learning_rate": 7.70935933153736e-07, "loss": 0.0184, "step": 9340 }, { "epoch": 2.1253697383390215, "grad_norm": 0.5235571073448476, "learning_rate": 7.70849050238455e-07, "loss": 0.0058, "step": 9341 }, { "epoch": 2.1255972696245733, "grad_norm": 0.7968690006044425, "learning_rate": 7.707621643422862e-07, "loss": 0.0054, "step": 9342 }, { "epoch": 2.125824800910125, "grad_norm": 1.1544453770762437, "learning_rate": 7.706752754670061e-07, "loss": 0.0189, "step": 9343 }, { "epoch": 2.126052332195677, "grad_norm": 1.308513866531729, "learning_rate": 7.7058838361439e-07, "loss": 0.0097, "step": 9344 }, { "epoch": 2.1262798634812285, "grad_norm": 0.6257116060187328, "learning_rate": 7.70501488786214e-07, "loss": 0.0053, "step": 9345 }, { "epoch": 2.1265073947667803, "grad_norm": 0.730755860799214, "learning_rate": 7.70414590984254e-07, "loss": 0.0078, "step": 9346 }, { "epoch": 2.126734926052332, "grad_norm": 1.922894797173844, "learning_rate": 7.703276902102863e-07, "loss": 0.0295, "step": 9347 }, { "epoch": 2.126962457337884, "grad_norm": 1.090601578418276, "learning_rate": 7.702407864660865e-07, "loss": 0.0124, "step": 9348 }, { "epoch": 2.1271899886234356, "grad_norm": 0.8431305968754933, "learning_rate": 7.701538797534311e-07, "loss": 0.0076, "step": 9349 }, { "epoch": 2.1274175199089873, "grad_norm": 0.6170441599759381, "learning_rate": 7.700669700740962e-07, "loss": 0.0038, "step": 9350 }, { "epoch": 2.127645051194539, "grad_norm": 0.6726246066713302, "learning_rate": 7.699800574298582e-07, "loss": 0.0059, "step": 9351 }, { "epoch": 2.127872582480091, "grad_norm": 0.7135448306323023, "learning_rate": 7.698931418224934e-07, "loss": 0.0032, "step": 9352 }, { "epoch": 2.128100113765643, "grad_norm": 0.40119783725346897, "learning_rate": 7.69806223253778e-07, "loss": 0.0032, "step": 9353 }, { "epoch": 2.1283276450511943, "grad_norm": 0.7506531546338718, "learning_rate": 7.697193017254886e-07, "loss": 0.0187, "step": 9354 }, { "epoch": 2.1285551763367465, "grad_norm": 0.7292817188394103, "learning_rate": 7.696323772394018e-07, "loss": 0.009, "step": 9355 }, { "epoch": 2.1287827076222983, "grad_norm": 1.2231787447525568, "learning_rate": 7.69545449797294e-07, "loss": 0.0112, "step": 9356 }, { "epoch": 2.12901023890785, "grad_norm": 1.4078722402329749, "learning_rate": 7.694585194009419e-07, "loss": 0.0248, "step": 9357 }, { "epoch": 2.1292377701934018, "grad_norm": 1.241126896304744, "learning_rate": 7.693715860521224e-07, "loss": 0.0197, "step": 9358 }, { "epoch": 2.1294653014789535, "grad_norm": 0.958743737993354, "learning_rate": 7.69284649752612e-07, "loss": 0.0047, "step": 9359 }, { "epoch": 2.1296928327645053, "grad_norm": 0.3864131477070798, "learning_rate": 7.691977105041876e-07, "loss": 0.003, "step": 9360 }, { "epoch": 2.129920364050057, "grad_norm": 0.5648643571031741, "learning_rate": 7.69110768308626e-07, "loss": 0.0068, "step": 9361 }, { "epoch": 2.1301478953356088, "grad_norm": 0.6800766992493664, "learning_rate": 7.690238231677045e-07, "loss": 0.0108, "step": 9362 }, { "epoch": 2.1303754266211605, "grad_norm": 0.44346652887854204, "learning_rate": 7.689368750831994e-07, "loss": 0.0021, "step": 9363 }, { "epoch": 2.1306029579067123, "grad_norm": 0.7125435430441509, "learning_rate": 7.688499240568883e-07, "loss": 0.0098, "step": 9364 }, { "epoch": 2.130830489192264, "grad_norm": 1.026299881027326, "learning_rate": 7.687629700905485e-07, "loss": 0.0083, "step": 9365 }, { "epoch": 2.131058020477816, "grad_norm": 0.47670964321392373, "learning_rate": 7.686760131859566e-07, "loss": 0.0047, "step": 9366 }, { "epoch": 2.1312855517633675, "grad_norm": 0.6884814697189213, "learning_rate": 7.685890533448901e-07, "loss": 0.0097, "step": 9367 }, { "epoch": 2.1315130830489193, "grad_norm": 0.7542648522616061, "learning_rate": 7.685020905691265e-07, "loss": 0.0086, "step": 9368 }, { "epoch": 2.131740614334471, "grad_norm": 0.7224628820415935, "learning_rate": 7.684151248604428e-07, "loss": 0.0112, "step": 9369 }, { "epoch": 2.131968145620023, "grad_norm": 0.6788415249222343, "learning_rate": 7.683281562206167e-07, "loss": 0.0087, "step": 9370 }, { "epoch": 2.1321956769055745, "grad_norm": 0.7639085976718167, "learning_rate": 7.682411846514258e-07, "loss": 0.0101, "step": 9371 }, { "epoch": 2.1324232081911263, "grad_norm": 1.6322301282997367, "learning_rate": 7.68154210154647e-07, "loss": 0.0231, "step": 9372 }, { "epoch": 2.132650739476678, "grad_norm": 1.2027520015285638, "learning_rate": 7.680672327320586e-07, "loss": 0.0148, "step": 9373 }, { "epoch": 2.13287827076223, "grad_norm": 0.5244073897194097, "learning_rate": 7.679802523854379e-07, "loss": 0.0063, "step": 9374 }, { "epoch": 2.1331058020477816, "grad_norm": 0.7908598612710436, "learning_rate": 7.678932691165628e-07, "loss": 0.0129, "step": 9375 }, { "epoch": 2.1333333333333333, "grad_norm": 0.6648253075564049, "learning_rate": 7.678062829272107e-07, "loss": 0.0093, "step": 9376 }, { "epoch": 2.133560864618885, "grad_norm": 0.9794199826108704, "learning_rate": 7.677192938191599e-07, "loss": 0.0111, "step": 9377 }, { "epoch": 2.133788395904437, "grad_norm": 0.6293850966699488, "learning_rate": 7.676323017941882e-07, "loss": 0.0098, "step": 9378 }, { "epoch": 2.1340159271899886, "grad_norm": 1.644736601726369, "learning_rate": 7.675453068540733e-07, "loss": 0.0348, "step": 9379 }, { "epoch": 2.1342434584755403, "grad_norm": 1.0251249555513355, "learning_rate": 7.674583090005934e-07, "loss": 0.0166, "step": 9380 }, { "epoch": 2.134470989761092, "grad_norm": 0.4773574826145558, "learning_rate": 7.673713082355267e-07, "loss": 0.0041, "step": 9381 }, { "epoch": 2.134698521046644, "grad_norm": 1.3226375035547333, "learning_rate": 7.672843045606511e-07, "loss": 0.0196, "step": 9382 }, { "epoch": 2.1349260523321956, "grad_norm": 0.4583698966991122, "learning_rate": 7.671972979777448e-07, "loss": 0.0053, "step": 9383 }, { "epoch": 2.1351535836177473, "grad_norm": 0.7776463167696871, "learning_rate": 7.671102884885865e-07, "loss": 0.0181, "step": 9384 }, { "epoch": 2.135381114903299, "grad_norm": 1.2305453054299074, "learning_rate": 7.67023276094954e-07, "loss": 0.0112, "step": 9385 }, { "epoch": 2.135608646188851, "grad_norm": 0.48346899208308736, "learning_rate": 7.669362607986256e-07, "loss": 0.0053, "step": 9386 }, { "epoch": 2.1358361774744026, "grad_norm": 0.4860523191118012, "learning_rate": 7.668492426013802e-07, "loss": 0.0053, "step": 9387 }, { "epoch": 2.1360637087599543, "grad_norm": 0.9433358046702833, "learning_rate": 7.667622215049959e-07, "loss": 0.0123, "step": 9388 }, { "epoch": 2.136291240045506, "grad_norm": 0.7274294117541075, "learning_rate": 7.666751975112515e-07, "loss": 0.0095, "step": 9389 }, { "epoch": 2.136518771331058, "grad_norm": 1.0546542084991488, "learning_rate": 7.665881706219256e-07, "loss": 0.0233, "step": 9390 }, { "epoch": 2.1367463026166096, "grad_norm": 0.882509401051961, "learning_rate": 7.665011408387968e-07, "loss": 0.0102, "step": 9391 }, { "epoch": 2.136973833902162, "grad_norm": 0.5578384978853934, "learning_rate": 7.664141081636438e-07, "loss": 0.0079, "step": 9392 }, { "epoch": 2.137201365187713, "grad_norm": 0.3448317807599905, "learning_rate": 7.663270725982452e-07, "loss": 0.0021, "step": 9393 }, { "epoch": 2.1374288964732653, "grad_norm": 3.8326467239135615, "learning_rate": 7.662400341443804e-07, "loss": 0.031, "step": 9394 }, { "epoch": 2.137656427758817, "grad_norm": 0.3455373445440017, "learning_rate": 7.661529928038277e-07, "loss": 0.0031, "step": 9395 }, { "epoch": 2.137883959044369, "grad_norm": 1.7781400920219406, "learning_rate": 7.660659485783667e-07, "loss": 0.0192, "step": 9396 }, { "epoch": 2.1381114903299205, "grad_norm": 0.31969138424410454, "learning_rate": 7.659789014697758e-07, "loss": 0.0038, "step": 9397 }, { "epoch": 2.1383390216154723, "grad_norm": 0.9760789003130663, "learning_rate": 7.658918514798345e-07, "loss": 0.007, "step": 9398 }, { "epoch": 2.138566552901024, "grad_norm": 1.201389914321695, "learning_rate": 7.658047986103215e-07, "loss": 0.0175, "step": 9399 }, { "epoch": 2.138794084186576, "grad_norm": 0.6620998699159607, "learning_rate": 7.657177428630164e-07, "loss": 0.0122, "step": 9400 }, { "epoch": 2.1390216154721275, "grad_norm": 0.5768939443316081, "learning_rate": 7.656306842396985e-07, "loss": 0.0026, "step": 9401 }, { "epoch": 2.1392491467576793, "grad_norm": 0.4747361814852938, "learning_rate": 7.655436227421468e-07, "loss": 0.0087, "step": 9402 }, { "epoch": 2.139476678043231, "grad_norm": 4.4082218686791395, "learning_rate": 7.65456558372141e-07, "loss": 0.0753, "step": 9403 }, { "epoch": 2.139704209328783, "grad_norm": 0.6337172476139666, "learning_rate": 7.653694911314602e-07, "loss": 0.006, "step": 9404 }, { "epoch": 2.1399317406143346, "grad_norm": 0.7778276823461497, "learning_rate": 7.652824210218842e-07, "loss": 0.0044, "step": 9405 }, { "epoch": 2.1401592718998863, "grad_norm": 0.742826462321095, "learning_rate": 7.651953480451923e-07, "loss": 0.0047, "step": 9406 }, { "epoch": 2.140386803185438, "grad_norm": 0.6461920198683603, "learning_rate": 7.651082722031644e-07, "loss": 0.0101, "step": 9407 }, { "epoch": 2.14061433447099, "grad_norm": 0.4830927720119314, "learning_rate": 7.6502119349758e-07, "loss": 0.0058, "step": 9408 }, { "epoch": 2.1408418657565416, "grad_norm": 0.5529198464070699, "learning_rate": 7.649341119302188e-07, "loss": 0.0019, "step": 9409 }, { "epoch": 2.1410693970420933, "grad_norm": 0.39647907236481306, "learning_rate": 7.648470275028607e-07, "loss": 0.0037, "step": 9410 }, { "epoch": 2.141296928327645, "grad_norm": 0.6657325208626473, "learning_rate": 7.647599402172854e-07, "loss": 0.0104, "step": 9411 }, { "epoch": 2.141524459613197, "grad_norm": 2.7179501593018385, "learning_rate": 7.646728500752729e-07, "loss": 0.0165, "step": 9412 }, { "epoch": 2.1417519908987486, "grad_norm": 0.32904954642977885, "learning_rate": 7.645857570786029e-07, "loss": 0.0031, "step": 9413 }, { "epoch": 2.1419795221843003, "grad_norm": 0.5212837342866012, "learning_rate": 7.64498661229056e-07, "loss": 0.0118, "step": 9414 }, { "epoch": 2.142207053469852, "grad_norm": 0.43073017368918903, "learning_rate": 7.644115625284119e-07, "loss": 0.0035, "step": 9415 }, { "epoch": 2.142434584755404, "grad_norm": 0.822263994599402, "learning_rate": 7.643244609784506e-07, "loss": 0.0079, "step": 9416 }, { "epoch": 2.1426621160409556, "grad_norm": 0.27185301200030915, "learning_rate": 7.642373565809527e-07, "loss": 0.0019, "step": 9417 }, { "epoch": 2.1428896473265073, "grad_norm": 0.3921986137982507, "learning_rate": 7.641502493376981e-07, "loss": 0.0025, "step": 9418 }, { "epoch": 2.143117178612059, "grad_norm": 0.7397915172226301, "learning_rate": 7.640631392504673e-07, "loss": 0.0083, "step": 9419 }, { "epoch": 2.143344709897611, "grad_norm": 0.4658356579199857, "learning_rate": 7.639760263210405e-07, "loss": 0.0084, "step": 9420 }, { "epoch": 2.1435722411831626, "grad_norm": 0.7949755028655057, "learning_rate": 7.638889105511983e-07, "loss": 0.0085, "step": 9421 }, { "epoch": 2.1437997724687143, "grad_norm": 0.9932434767294664, "learning_rate": 7.638017919427212e-07, "loss": 0.0153, "step": 9422 }, { "epoch": 2.144027303754266, "grad_norm": 1.0447557791765516, "learning_rate": 7.637146704973897e-07, "loss": 0.0087, "step": 9423 }, { "epoch": 2.144254835039818, "grad_norm": 1.5602769173885682, "learning_rate": 7.636275462169843e-07, "loss": 0.0107, "step": 9424 }, { "epoch": 2.1444823663253696, "grad_norm": 0.8175983751576256, "learning_rate": 7.635404191032858e-07, "loss": 0.0078, "step": 9425 }, { "epoch": 2.1447098976109213, "grad_norm": 0.7162127814674661, "learning_rate": 7.634532891580748e-07, "loss": 0.0085, "step": 9426 }, { "epoch": 2.144937428896473, "grad_norm": 0.435753980256996, "learning_rate": 7.633661563831321e-07, "loss": 0.0063, "step": 9427 }, { "epoch": 2.145164960182025, "grad_norm": 0.4886704090763667, "learning_rate": 7.632790207802388e-07, "loss": 0.0023, "step": 9428 }, { "epoch": 2.1453924914675766, "grad_norm": 0.6109087044132886, "learning_rate": 7.631918823511751e-07, "loss": 0.007, "step": 9429 }, { "epoch": 2.1456200227531284, "grad_norm": 0.3862693252752087, "learning_rate": 7.631047410977227e-07, "loss": 0.004, "step": 9430 }, { "epoch": 2.1458475540386805, "grad_norm": 0.3916971112441679, "learning_rate": 7.630175970216625e-07, "loss": 0.0053, "step": 9431 }, { "epoch": 2.146075085324232, "grad_norm": 1.3442295458084064, "learning_rate": 7.629304501247751e-07, "loss": 0.0105, "step": 9432 }, { "epoch": 2.146302616609784, "grad_norm": 1.5551974523245196, "learning_rate": 7.628433004088419e-07, "loss": 0.0226, "step": 9433 }, { "epoch": 2.146530147895336, "grad_norm": 0.6491097255442313, "learning_rate": 7.627561478756443e-07, "loss": 0.0096, "step": 9434 }, { "epoch": 2.1467576791808876, "grad_norm": 0.28904704944060683, "learning_rate": 7.626689925269633e-07, "loss": 0.0013, "step": 9435 }, { "epoch": 2.1469852104664393, "grad_norm": 0.4286314484617513, "learning_rate": 7.625818343645799e-07, "loss": 0.0022, "step": 9436 }, { "epoch": 2.147212741751991, "grad_norm": 0.9002201977623425, "learning_rate": 7.624946733902762e-07, "loss": 0.0184, "step": 9437 }, { "epoch": 2.147440273037543, "grad_norm": 1.4108767985987805, "learning_rate": 7.624075096058329e-07, "loss": 0.0055, "step": 9438 }, { "epoch": 2.1476678043230946, "grad_norm": 0.5736247065532574, "learning_rate": 7.623203430130319e-07, "loss": 0.0116, "step": 9439 }, { "epoch": 2.1478953356086463, "grad_norm": 0.5929807473323365, "learning_rate": 7.622331736136546e-07, "loss": 0.0043, "step": 9440 }, { "epoch": 2.148122866894198, "grad_norm": 0.7999228166052934, "learning_rate": 7.621460014094825e-07, "loss": 0.0071, "step": 9441 }, { "epoch": 2.14835039817975, "grad_norm": 1.4416999673958641, "learning_rate": 7.620588264022973e-07, "loss": 0.0169, "step": 9442 }, { "epoch": 2.1485779294653016, "grad_norm": 0.7268189408978283, "learning_rate": 7.619716485938805e-07, "loss": 0.0148, "step": 9443 }, { "epoch": 2.1488054607508533, "grad_norm": 1.3340670521142381, "learning_rate": 7.618844679860144e-07, "loss": 0.0115, "step": 9444 }, { "epoch": 2.149032992036405, "grad_norm": 0.8489110923475581, "learning_rate": 7.617972845804802e-07, "loss": 0.007, "step": 9445 }, { "epoch": 2.149260523321957, "grad_norm": 0.33573283808619636, "learning_rate": 7.617100983790603e-07, "loss": 0.0021, "step": 9446 }, { "epoch": 2.1494880546075086, "grad_norm": 0.9083438426522049, "learning_rate": 7.616229093835361e-07, "loss": 0.0089, "step": 9447 }, { "epoch": 2.1497155858930603, "grad_norm": 0.907124212698418, "learning_rate": 7.6153571759569e-07, "loss": 0.0131, "step": 9448 }, { "epoch": 2.149943117178612, "grad_norm": 0.7957411004808628, "learning_rate": 7.61448523017304e-07, "loss": 0.0155, "step": 9449 }, { "epoch": 2.150170648464164, "grad_norm": 1.116334684654861, "learning_rate": 7.613613256501598e-07, "loss": 0.0064, "step": 9450 }, { "epoch": 2.1503981797497156, "grad_norm": 0.781080877759642, "learning_rate": 7.612741254960398e-07, "loss": 0.0048, "step": 9451 }, { "epoch": 2.1506257110352673, "grad_norm": 0.4434069545080247, "learning_rate": 7.611869225567266e-07, "loss": 0.0088, "step": 9452 }, { "epoch": 2.150853242320819, "grad_norm": 0.7341899066169435, "learning_rate": 7.610997168340019e-07, "loss": 0.0106, "step": 9453 }, { "epoch": 2.151080773606371, "grad_norm": 0.4168473038479732, "learning_rate": 7.610125083296482e-07, "loss": 0.0028, "step": 9454 }, { "epoch": 2.1513083048919226, "grad_norm": 0.5004702961294094, "learning_rate": 7.609252970454481e-07, "loss": 0.0072, "step": 9455 }, { "epoch": 2.1515358361774743, "grad_norm": 0.7117155550645661, "learning_rate": 7.608380829831838e-07, "loss": 0.0048, "step": 9456 }, { "epoch": 2.151763367463026, "grad_norm": 0.678480481404826, "learning_rate": 7.607508661446378e-07, "loss": 0.0074, "step": 9457 }, { "epoch": 2.151990898748578, "grad_norm": 21.208307356330284, "learning_rate": 7.606636465315927e-07, "loss": 0.105, "step": 9458 }, { "epoch": 2.1522184300341296, "grad_norm": 0.3955596626341846, "learning_rate": 7.605764241458313e-07, "loss": 0.0029, "step": 9459 }, { "epoch": 2.1524459613196814, "grad_norm": 0.4107454918314581, "learning_rate": 7.604891989891358e-07, "loss": 0.0039, "step": 9460 }, { "epoch": 2.152673492605233, "grad_norm": 0.3899303997658839, "learning_rate": 7.604019710632895e-07, "loss": 0.0024, "step": 9461 }, { "epoch": 2.152901023890785, "grad_norm": 0.7710664044298258, "learning_rate": 7.603147403700746e-07, "loss": 0.0088, "step": 9462 }, { "epoch": 2.1531285551763366, "grad_norm": 0.47473637556937476, "learning_rate": 7.602275069112742e-07, "loss": 0.0027, "step": 9463 }, { "epoch": 2.1533560864618884, "grad_norm": 0.6280469293683831, "learning_rate": 7.601402706886716e-07, "loss": 0.0118, "step": 9464 }, { "epoch": 2.15358361774744, "grad_norm": 2.528224730827772, "learning_rate": 7.60053031704049e-07, "loss": 0.0279, "step": 9465 }, { "epoch": 2.153811149032992, "grad_norm": 0.8247841083927588, "learning_rate": 7.5996578995919e-07, "loss": 0.0075, "step": 9466 }, { "epoch": 2.1540386803185436, "grad_norm": 0.5704115365180658, "learning_rate": 7.598785454558773e-07, "loss": 0.0052, "step": 9467 }, { "epoch": 2.1542662116040954, "grad_norm": 2.3222450091342335, "learning_rate": 7.597912981958943e-07, "loss": 0.0413, "step": 9468 }, { "epoch": 2.154493742889647, "grad_norm": 0.5098409311036662, "learning_rate": 7.597040481810239e-07, "loss": 0.0063, "step": 9469 }, { "epoch": 2.1547212741751993, "grad_norm": 1.2227569097380357, "learning_rate": 7.596167954130493e-07, "loss": 0.0288, "step": 9470 }, { "epoch": 2.1549488054607506, "grad_norm": 0.8736975235883835, "learning_rate": 7.595295398937541e-07, "loss": 0.0079, "step": 9471 }, { "epoch": 2.155176336746303, "grad_norm": 0.4426169456378669, "learning_rate": 7.594422816249217e-07, "loss": 0.0052, "step": 9472 }, { "epoch": 2.1554038680318546, "grad_norm": 0.7438309941974948, "learning_rate": 7.59355020608335e-07, "loss": 0.0122, "step": 9473 }, { "epoch": 2.1556313993174063, "grad_norm": 0.8106342731576935, "learning_rate": 7.592677568457778e-07, "loss": 0.0131, "step": 9474 }, { "epoch": 2.155858930602958, "grad_norm": 0.49358495814785197, "learning_rate": 7.591804903390336e-07, "loss": 0.0048, "step": 9475 }, { "epoch": 2.15608646188851, "grad_norm": 0.4796019135160445, "learning_rate": 7.590932210898859e-07, "loss": 0.0077, "step": 9476 }, { "epoch": 2.1563139931740616, "grad_norm": 1.0830714330161306, "learning_rate": 7.590059491001183e-07, "loss": 0.0048, "step": 9477 }, { "epoch": 2.1565415244596133, "grad_norm": 0.438982850407974, "learning_rate": 7.589186743715146e-07, "loss": 0.005, "step": 9478 }, { "epoch": 2.156769055745165, "grad_norm": 0.9225043810448496, "learning_rate": 7.588313969058584e-07, "loss": 0.01, "step": 9479 }, { "epoch": 2.156996587030717, "grad_norm": 1.43802230176175, "learning_rate": 7.587441167049335e-07, "loss": 0.0192, "step": 9480 }, { "epoch": 2.1572241183162686, "grad_norm": 0.6073054248035636, "learning_rate": 7.586568337705239e-07, "loss": 0.0082, "step": 9481 }, { "epoch": 2.1574516496018203, "grad_norm": 0.8121646376322118, "learning_rate": 7.585695481044133e-07, "loss": 0.0098, "step": 9482 }, { "epoch": 2.157679180887372, "grad_norm": 1.2052420742230894, "learning_rate": 7.584822597083859e-07, "loss": 0.0107, "step": 9483 }, { "epoch": 2.157906712172924, "grad_norm": 0.539829338418699, "learning_rate": 7.583949685842253e-07, "loss": 0.0086, "step": 9484 }, { "epoch": 2.1581342434584756, "grad_norm": 2.1410125457119964, "learning_rate": 7.583076747337162e-07, "loss": 0.0142, "step": 9485 }, { "epoch": 2.1583617747440274, "grad_norm": 1.045962862510021, "learning_rate": 7.582203781586422e-07, "loss": 0.0165, "step": 9486 }, { "epoch": 2.158589306029579, "grad_norm": 0.8260434410402252, "learning_rate": 7.581330788607875e-07, "loss": 0.0109, "step": 9487 }, { "epoch": 2.158816837315131, "grad_norm": 0.56108076832216, "learning_rate": 7.580457768419367e-07, "loss": 0.0061, "step": 9488 }, { "epoch": 2.1590443686006826, "grad_norm": 1.2999028206144865, "learning_rate": 7.579584721038738e-07, "loss": 0.0142, "step": 9489 }, { "epoch": 2.1592718998862344, "grad_norm": 0.9635014287836223, "learning_rate": 7.578711646483832e-07, "loss": 0.0115, "step": 9490 }, { "epoch": 2.159499431171786, "grad_norm": 0.8058367882164231, "learning_rate": 7.577838544772495e-07, "loss": 0.0123, "step": 9491 }, { "epoch": 2.159726962457338, "grad_norm": 1.5356278393841314, "learning_rate": 7.576965415922569e-07, "loss": 0.0188, "step": 9492 }, { "epoch": 2.1599544937428896, "grad_norm": 0.9017611836523003, "learning_rate": 7.576092259951899e-07, "loss": 0.0068, "step": 9493 }, { "epoch": 2.1601820250284414, "grad_norm": 1.0158966242457275, "learning_rate": 7.575219076878332e-07, "loss": 0.0098, "step": 9494 }, { "epoch": 2.160409556313993, "grad_norm": 1.1609277837198713, "learning_rate": 7.574345866719715e-07, "loss": 0.0136, "step": 9495 }, { "epoch": 2.160637087599545, "grad_norm": 1.0316834767383807, "learning_rate": 7.573472629493894e-07, "loss": 0.0051, "step": 9496 }, { "epoch": 2.1608646188850966, "grad_norm": 0.7475939929931336, "learning_rate": 7.572599365218716e-07, "loss": 0.0031, "step": 9497 }, { "epoch": 2.1610921501706484, "grad_norm": 1.674687281291756, "learning_rate": 7.571726073912031e-07, "loss": 0.0347, "step": 9498 }, { "epoch": 2.1613196814562, "grad_norm": 1.7016842706971795, "learning_rate": 7.570852755591683e-07, "loss": 0.0239, "step": 9499 }, { "epoch": 2.161547212741752, "grad_norm": 2.3793445926790495, "learning_rate": 7.569979410275525e-07, "loss": 0.0543, "step": 9500 }, { "epoch": 2.1617747440273036, "grad_norm": 0.42685115386991784, "learning_rate": 7.569106037981403e-07, "loss": 0.0025, "step": 9501 }, { "epoch": 2.1620022753128554, "grad_norm": 0.25006824516898746, "learning_rate": 7.568232638727173e-07, "loss": 0.001, "step": 9502 }, { "epoch": 2.162229806598407, "grad_norm": 0.5397028423357909, "learning_rate": 7.56735921253068e-07, "loss": 0.0052, "step": 9503 }, { "epoch": 2.162457337883959, "grad_norm": 1.272527147351978, "learning_rate": 7.566485759409778e-07, "loss": 0.0226, "step": 9504 }, { "epoch": 2.1626848691695106, "grad_norm": 1.2505588806487877, "learning_rate": 7.565612279382318e-07, "loss": 0.0278, "step": 9505 }, { "epoch": 2.1629124004550624, "grad_norm": 0.860761461786727, "learning_rate": 7.564738772466153e-07, "loss": 0.0241, "step": 9506 }, { "epoch": 2.163139931740614, "grad_norm": 1.0340484413917008, "learning_rate": 7.563865238679133e-07, "loss": 0.0153, "step": 9507 }, { "epoch": 2.163367463026166, "grad_norm": 1.1514085735061357, "learning_rate": 7.562991678039116e-07, "loss": 0.0129, "step": 9508 }, { "epoch": 2.163594994311718, "grad_norm": 0.9495706834279195, "learning_rate": 7.562118090563953e-07, "loss": 0.0123, "step": 9509 }, { "epoch": 2.1638225255972694, "grad_norm": 0.8809677222109957, "learning_rate": 7.5612444762715e-07, "loss": 0.0078, "step": 9510 }, { "epoch": 2.1640500568828216, "grad_norm": 1.538436546868351, "learning_rate": 7.560370835179611e-07, "loss": 0.0144, "step": 9511 }, { "epoch": 2.1642775881683733, "grad_norm": 0.9998239799209021, "learning_rate": 7.559497167306141e-07, "loss": 0.016, "step": 9512 }, { "epoch": 2.164505119453925, "grad_norm": 0.6613338635935855, "learning_rate": 7.558623472668948e-07, "loss": 0.0049, "step": 9513 }, { "epoch": 2.164732650739477, "grad_norm": 0.6066017285747133, "learning_rate": 7.557749751285887e-07, "loss": 0.0053, "step": 9514 }, { "epoch": 2.1649601820250286, "grad_norm": 1.151736591714483, "learning_rate": 7.556876003174816e-07, "loss": 0.027, "step": 9515 }, { "epoch": 2.1651877133105804, "grad_norm": 0.49550111870942964, "learning_rate": 7.556002228353595e-07, "loss": 0.0074, "step": 9516 }, { "epoch": 2.165415244596132, "grad_norm": 0.6363948637670657, "learning_rate": 7.555128426840078e-07, "loss": 0.0058, "step": 9517 }, { "epoch": 2.165642775881684, "grad_norm": 0.4948378226484785, "learning_rate": 7.554254598652127e-07, "loss": 0.0055, "step": 9518 }, { "epoch": 2.1658703071672356, "grad_norm": 1.452716090777511, "learning_rate": 7.553380743807602e-07, "loss": 0.0192, "step": 9519 }, { "epoch": 2.1660978384527874, "grad_norm": 0.8510160858709985, "learning_rate": 7.552506862324358e-07, "loss": 0.0184, "step": 9520 }, { "epoch": 2.166325369738339, "grad_norm": 0.489365539657168, "learning_rate": 7.551632954220263e-07, "loss": 0.0035, "step": 9521 }, { "epoch": 2.166552901023891, "grad_norm": 0.5232426370605646, "learning_rate": 7.550759019513173e-07, "loss": 0.01, "step": 9522 }, { "epoch": 2.1667804323094426, "grad_norm": 0.9718750730295778, "learning_rate": 7.54988505822095e-07, "loss": 0.0196, "step": 9523 }, { "epoch": 2.1670079635949944, "grad_norm": 1.2403730448401704, "learning_rate": 7.549011070361459e-07, "loss": 0.0251, "step": 9524 }, { "epoch": 2.167235494880546, "grad_norm": 0.6693444311484058, "learning_rate": 7.548137055952559e-07, "loss": 0.0066, "step": 9525 }, { "epoch": 2.167463026166098, "grad_norm": 0.22895710946983883, "learning_rate": 7.547263015012116e-07, "loss": 0.0017, "step": 9526 }, { "epoch": 2.1676905574516496, "grad_norm": 1.164643709332353, "learning_rate": 7.54638894755799e-07, "loss": 0.0193, "step": 9527 }, { "epoch": 2.1679180887372014, "grad_norm": 1.420130355190451, "learning_rate": 7.54551485360805e-07, "loss": 0.0135, "step": 9528 }, { "epoch": 2.168145620022753, "grad_norm": 0.7162802351482405, "learning_rate": 7.544640733180161e-07, "loss": 0.0068, "step": 9529 }, { "epoch": 2.168373151308305, "grad_norm": 0.28723169717464375, "learning_rate": 7.543766586292185e-07, "loss": 0.0037, "step": 9530 }, { "epoch": 2.1686006825938566, "grad_norm": 0.566716757776061, "learning_rate": 7.542892412961988e-07, "loss": 0.0073, "step": 9531 }, { "epoch": 2.1688282138794084, "grad_norm": 0.48710835462811136, "learning_rate": 7.54201821320744e-07, "loss": 0.0046, "step": 9532 }, { "epoch": 2.16905574516496, "grad_norm": 0.5591433451852652, "learning_rate": 7.541143987046406e-07, "loss": 0.0052, "step": 9533 }, { "epoch": 2.169283276450512, "grad_norm": 0.2464541059726593, "learning_rate": 7.540269734496751e-07, "loss": 0.002, "step": 9534 }, { "epoch": 2.1695108077360636, "grad_norm": 1.543625162402517, "learning_rate": 7.539395455576348e-07, "loss": 0.0269, "step": 9535 }, { "epoch": 2.1697383390216154, "grad_norm": 1.1466098187169464, "learning_rate": 7.538521150303063e-07, "loss": 0.0252, "step": 9536 }, { "epoch": 2.169965870307167, "grad_norm": 0.9776895216291299, "learning_rate": 7.537646818694764e-07, "loss": 0.016, "step": 9537 }, { "epoch": 2.170193401592719, "grad_norm": 0.8253760441414819, "learning_rate": 7.536772460769324e-07, "loss": 0.0146, "step": 9538 }, { "epoch": 2.1704209328782706, "grad_norm": 0.7606950356821949, "learning_rate": 7.535898076544611e-07, "loss": 0.0136, "step": 9539 }, { "epoch": 2.1706484641638224, "grad_norm": 0.38894234172403025, "learning_rate": 7.535023666038497e-07, "loss": 0.0035, "step": 9540 }, { "epoch": 2.170875995449374, "grad_norm": 2.2465203039467516, "learning_rate": 7.534149229268852e-07, "loss": 0.0208, "step": 9541 }, { "epoch": 2.171103526734926, "grad_norm": 0.6776393549469663, "learning_rate": 7.533274766253548e-07, "loss": 0.0087, "step": 9542 }, { "epoch": 2.1713310580204777, "grad_norm": 2.600125250146677, "learning_rate": 7.532400277010458e-07, "loss": 0.0262, "step": 9543 }, { "epoch": 2.1715585893060294, "grad_norm": 1.0368628318608313, "learning_rate": 7.531525761557454e-07, "loss": 0.0238, "step": 9544 }, { "epoch": 2.171786120591581, "grad_norm": 0.8869741215994206, "learning_rate": 7.530651219912413e-07, "loss": 0.0126, "step": 9545 }, { "epoch": 2.172013651877133, "grad_norm": 1.5295344443470758, "learning_rate": 7.529776652093204e-07, "loss": 0.0175, "step": 9546 }, { "epoch": 2.1722411831626847, "grad_norm": 0.5789423568425607, "learning_rate": 7.528902058117707e-07, "loss": 0.0107, "step": 9547 }, { "epoch": 2.172468714448237, "grad_norm": 3.4095737366223253, "learning_rate": 7.528027438003792e-07, "loss": 0.0236, "step": 9548 }, { "epoch": 2.172696245733788, "grad_norm": 0.4689578531811793, "learning_rate": 7.527152791769338e-07, "loss": 0.0133, "step": 9549 }, { "epoch": 2.1729237770193404, "grad_norm": 0.6528946872508574, "learning_rate": 7.526278119432219e-07, "loss": 0.0065, "step": 9550 }, { "epoch": 2.173151308304892, "grad_norm": 1.2003887222435774, "learning_rate": 7.52540342101031e-07, "loss": 0.0124, "step": 9551 }, { "epoch": 2.173378839590444, "grad_norm": 1.8213001244569549, "learning_rate": 7.524528696521495e-07, "loss": 0.0184, "step": 9552 }, { "epoch": 2.1736063708759956, "grad_norm": 0.8017969114131855, "learning_rate": 7.523653945983645e-07, "loss": 0.0044, "step": 9553 }, { "epoch": 2.1738339021615474, "grad_norm": 0.8195679399688243, "learning_rate": 7.522779169414643e-07, "loss": 0.0193, "step": 9554 }, { "epoch": 2.174061433447099, "grad_norm": 0.4250071114814642, "learning_rate": 7.521904366832365e-07, "loss": 0.0041, "step": 9555 }, { "epoch": 2.174288964732651, "grad_norm": 1.5407280672030532, "learning_rate": 7.521029538254692e-07, "loss": 0.0175, "step": 9556 }, { "epoch": 2.1745164960182026, "grad_norm": 0.5890809727558881, "learning_rate": 7.520154683699501e-07, "loss": 0.0055, "step": 9557 }, { "epoch": 2.1747440273037544, "grad_norm": 0.4782126783491611, "learning_rate": 7.519279803184676e-07, "loss": 0.0048, "step": 9558 }, { "epoch": 2.174971558589306, "grad_norm": 0.9263124914046664, "learning_rate": 7.518404896728096e-07, "loss": 0.0043, "step": 9559 }, { "epoch": 2.175199089874858, "grad_norm": 0.811010556777203, "learning_rate": 7.517529964347642e-07, "loss": 0.0111, "step": 9560 }, { "epoch": 2.1754266211604096, "grad_norm": 0.43401860315958946, "learning_rate": 7.516655006061198e-07, "loss": 0.0073, "step": 9561 }, { "epoch": 2.1756541524459614, "grad_norm": 0.8439321416931025, "learning_rate": 7.515780021886646e-07, "loss": 0.0104, "step": 9562 }, { "epoch": 2.175881683731513, "grad_norm": 0.7400634559362335, "learning_rate": 7.514905011841867e-07, "loss": 0.0027, "step": 9563 }, { "epoch": 2.176109215017065, "grad_norm": 0.2752754746882654, "learning_rate": 7.514029975944746e-07, "loss": 0.0036, "step": 9564 }, { "epoch": 2.1763367463026166, "grad_norm": 0.6620459171025077, "learning_rate": 7.513154914213168e-07, "loss": 0.0097, "step": 9565 }, { "epoch": 2.1765642775881684, "grad_norm": 0.44617721651979925, "learning_rate": 7.512279826665018e-07, "loss": 0.0049, "step": 9566 }, { "epoch": 2.17679180887372, "grad_norm": 1.2066031048324617, "learning_rate": 7.511404713318178e-07, "loss": 0.004, "step": 9567 }, { "epoch": 2.177019340159272, "grad_norm": 1.1304229490260347, "learning_rate": 7.510529574190537e-07, "loss": 0.0339, "step": 9568 }, { "epoch": 2.1772468714448237, "grad_norm": 0.4524987590201581, "learning_rate": 7.50965440929998e-07, "loss": 0.0029, "step": 9569 }, { "epoch": 2.1774744027303754, "grad_norm": 0.5350983992744186, "learning_rate": 7.508779218664395e-07, "loss": 0.0039, "step": 9570 }, { "epoch": 2.177701934015927, "grad_norm": 0.9750713017031654, "learning_rate": 7.507904002301665e-07, "loss": 0.0172, "step": 9571 }, { "epoch": 2.177929465301479, "grad_norm": 0.31495024248764425, "learning_rate": 7.507028760229683e-07, "loss": 0.002, "step": 9572 }, { "epoch": 2.1781569965870307, "grad_norm": 0.9604492360124036, "learning_rate": 7.506153492466337e-07, "loss": 0.0114, "step": 9573 }, { "epoch": 2.1783845278725824, "grad_norm": 0.7212474160748571, "learning_rate": 7.505278199029511e-07, "loss": 0.0068, "step": 9574 }, { "epoch": 2.178612059158134, "grad_norm": 0.8209618033231948, "learning_rate": 7.504402879937098e-07, "loss": 0.0052, "step": 9575 }, { "epoch": 2.178839590443686, "grad_norm": 0.6558409298307322, "learning_rate": 7.503527535206989e-07, "loss": 0.0065, "step": 9576 }, { "epoch": 2.1790671217292377, "grad_norm": 1.3196233115801024, "learning_rate": 7.502652164857072e-07, "loss": 0.0186, "step": 9577 }, { "epoch": 2.1792946530147894, "grad_norm": 0.48778107650934543, "learning_rate": 7.501776768905238e-07, "loss": 0.0039, "step": 9578 }, { "epoch": 2.179522184300341, "grad_norm": 0.6238318071136563, "learning_rate": 7.500901347369382e-07, "loss": 0.0102, "step": 9579 }, { "epoch": 2.179749715585893, "grad_norm": 0.6164717050241055, "learning_rate": 7.500025900267391e-07, "loss": 0.0048, "step": 9580 }, { "epoch": 2.1799772468714447, "grad_norm": 0.5548015201688632, "learning_rate": 7.49915042761716e-07, "loss": 0.0073, "step": 9581 }, { "epoch": 2.1802047781569964, "grad_norm": 1.247205981181567, "learning_rate": 7.498274929436583e-07, "loss": 0.0169, "step": 9582 }, { "epoch": 2.180432309442548, "grad_norm": 1.0737704832054575, "learning_rate": 7.497399405743552e-07, "loss": 0.0136, "step": 9583 }, { "epoch": 2.1806598407281, "grad_norm": 1.5342282126082314, "learning_rate": 7.49652385655596e-07, "loss": 0.0097, "step": 9584 }, { "epoch": 2.1808873720136517, "grad_norm": 1.0253123674011426, "learning_rate": 7.495648281891707e-07, "loss": 0.0074, "step": 9585 }, { "epoch": 2.1811149032992034, "grad_norm": 0.3825181504641467, "learning_rate": 7.494772681768683e-07, "loss": 0.0025, "step": 9586 }, { "epoch": 2.1813424345847556, "grad_norm": 0.4871828783572268, "learning_rate": 7.493897056204784e-07, "loss": 0.0079, "step": 9587 }, { "epoch": 2.181569965870307, "grad_norm": 0.651545542290053, "learning_rate": 7.493021405217907e-07, "loss": 0.0063, "step": 9588 }, { "epoch": 2.181797497155859, "grad_norm": 0.8304489261902582, "learning_rate": 7.49214572882595e-07, "loss": 0.0112, "step": 9589 }, { "epoch": 2.182025028441411, "grad_norm": 0.39866347770388444, "learning_rate": 7.49127002704681e-07, "loss": 0.0022, "step": 9590 }, { "epoch": 2.1822525597269626, "grad_norm": 0.7037762301674517, "learning_rate": 7.490394299898382e-07, "loss": 0.0031, "step": 9591 }, { "epoch": 2.1824800910125144, "grad_norm": 1.1093378758916597, "learning_rate": 7.489518547398568e-07, "loss": 0.0162, "step": 9592 }, { "epoch": 2.182707622298066, "grad_norm": 1.1760891436295915, "learning_rate": 7.488642769565264e-07, "loss": 0.007, "step": 9593 }, { "epoch": 2.182935153583618, "grad_norm": 0.5126123392999782, "learning_rate": 7.48776696641637e-07, "loss": 0.0037, "step": 9594 }, { "epoch": 2.1831626848691696, "grad_norm": 1.2561187293283704, "learning_rate": 7.486891137969786e-07, "loss": 0.0192, "step": 9595 }, { "epoch": 2.1833902161547214, "grad_norm": 0.4428997280141245, "learning_rate": 7.486015284243413e-07, "loss": 0.0043, "step": 9596 }, { "epoch": 2.183617747440273, "grad_norm": 1.2136340385801088, "learning_rate": 7.485139405255151e-07, "loss": 0.0182, "step": 9597 }, { "epoch": 2.183845278725825, "grad_norm": 0.7423522442041067, "learning_rate": 7.484263501022902e-07, "loss": 0.0101, "step": 9598 }, { "epoch": 2.1840728100113767, "grad_norm": 1.647640497082901, "learning_rate": 7.483387571564567e-07, "loss": 0.0212, "step": 9599 }, { "epoch": 2.1843003412969284, "grad_norm": 0.8536106447079324, "learning_rate": 7.48251161689805e-07, "loss": 0.0023, "step": 9600 }, { "epoch": 2.18452787258248, "grad_norm": 0.4079155494938698, "learning_rate": 7.48163563704125e-07, "loss": 0.0045, "step": 9601 }, { "epoch": 2.184755403868032, "grad_norm": 1.1308651387228579, "learning_rate": 7.480759632012074e-07, "loss": 0.0122, "step": 9602 }, { "epoch": 2.1849829351535837, "grad_norm": 0.6120958606576665, "learning_rate": 7.479883601828428e-07, "loss": 0.0026, "step": 9603 }, { "epoch": 2.1852104664391354, "grad_norm": 1.9195083990763984, "learning_rate": 7.479007546508211e-07, "loss": 0.0925, "step": 9604 }, { "epoch": 2.185437997724687, "grad_norm": 0.9168734226255825, "learning_rate": 7.478131466069331e-07, "loss": 0.0079, "step": 9605 }, { "epoch": 2.185665529010239, "grad_norm": 1.7360235232611294, "learning_rate": 7.477255360529695e-07, "loss": 0.0516, "step": 9606 }, { "epoch": 2.1858930602957907, "grad_norm": 0.7931409946825114, "learning_rate": 7.476379229907205e-07, "loss": 0.0179, "step": 9607 }, { "epoch": 2.1861205915813424, "grad_norm": 0.5513116615728468, "learning_rate": 7.475503074219769e-07, "loss": 0.0084, "step": 9608 }, { "epoch": 2.186348122866894, "grad_norm": 1.5527290776544047, "learning_rate": 7.474626893485295e-07, "loss": 0.0244, "step": 9609 }, { "epoch": 2.186575654152446, "grad_norm": 0.9065148771736532, "learning_rate": 7.473750687721692e-07, "loss": 0.0034, "step": 9610 }, { "epoch": 2.1868031854379977, "grad_norm": 1.1387030868166388, "learning_rate": 7.472874456946865e-07, "loss": 0.018, "step": 9611 }, { "epoch": 2.1870307167235494, "grad_norm": 0.9547637556667848, "learning_rate": 7.471998201178724e-07, "loss": 0.0125, "step": 9612 }, { "epoch": 2.187258248009101, "grad_norm": 0.8066593350658329, "learning_rate": 7.471121920435176e-07, "loss": 0.0095, "step": 9613 }, { "epoch": 2.187485779294653, "grad_norm": 0.6157773503488007, "learning_rate": 7.470245614734132e-07, "loss": 0.0068, "step": 9614 }, { "epoch": 2.1877133105802047, "grad_norm": 0.5660108008538516, "learning_rate": 7.469369284093504e-07, "loss": 0.0064, "step": 9615 }, { "epoch": 2.1879408418657564, "grad_norm": 0.45605242358242326, "learning_rate": 7.468492928531201e-07, "loss": 0.0041, "step": 9616 }, { "epoch": 2.188168373151308, "grad_norm": 1.0144865167191068, "learning_rate": 7.467616548065134e-07, "loss": 0.0103, "step": 9617 }, { "epoch": 2.18839590443686, "grad_norm": 1.3562826992877761, "learning_rate": 7.466740142713217e-07, "loss": 0.0272, "step": 9618 }, { "epoch": 2.1886234357224117, "grad_norm": 0.9701025438819655, "learning_rate": 7.465863712493357e-07, "loss": 0.0135, "step": 9619 }, { "epoch": 2.1888509670079634, "grad_norm": 1.306657040530877, "learning_rate": 7.46498725742347e-07, "loss": 0.0214, "step": 9620 }, { "epoch": 2.189078498293515, "grad_norm": 0.42960866613517307, "learning_rate": 7.464110777521467e-07, "loss": 0.003, "step": 9621 }, { "epoch": 2.189306029579067, "grad_norm": 0.665244392318043, "learning_rate": 7.463234272805265e-07, "loss": 0.0114, "step": 9622 }, { "epoch": 2.1895335608646187, "grad_norm": 0.6265393107396235, "learning_rate": 7.462357743292778e-07, "loss": 0.0103, "step": 9623 }, { "epoch": 2.1897610921501705, "grad_norm": 0.7142319456732945, "learning_rate": 7.461481189001917e-07, "loss": 0.0071, "step": 9624 }, { "epoch": 2.189988623435722, "grad_norm": 1.432775949720797, "learning_rate": 7.460604609950599e-07, "loss": 0.0115, "step": 9625 }, { "epoch": 2.1902161547212744, "grad_norm": 0.2710535011303526, "learning_rate": 7.459728006156741e-07, "loss": 0.0017, "step": 9626 }, { "epoch": 2.1904436860068257, "grad_norm": 0.5450555510465152, "learning_rate": 7.458851377638257e-07, "loss": 0.0071, "step": 9627 }, { "epoch": 2.190671217292378, "grad_norm": 0.7651487622447738, "learning_rate": 7.457974724413065e-07, "loss": 0.0146, "step": 9628 }, { "epoch": 2.1908987485779297, "grad_norm": 0.40725306256207244, "learning_rate": 7.457098046499084e-07, "loss": 0.0039, "step": 9629 }, { "epoch": 2.1911262798634814, "grad_norm": 0.8897519703656316, "learning_rate": 7.456221343914228e-07, "loss": 0.0119, "step": 9630 }, { "epoch": 2.191353811149033, "grad_norm": 0.7763470111164008, "learning_rate": 7.455344616676415e-07, "loss": 0.0081, "step": 9631 }, { "epoch": 2.191581342434585, "grad_norm": 0.9182052754974886, "learning_rate": 7.454467864803567e-07, "loss": 0.021, "step": 9632 }, { "epoch": 2.1918088737201367, "grad_norm": 0.5725793457523388, "learning_rate": 7.453591088313603e-07, "loss": 0.0042, "step": 9633 }, { "epoch": 2.1920364050056884, "grad_norm": 1.0484063564453674, "learning_rate": 7.45271428722444e-07, "loss": 0.0064, "step": 9634 }, { "epoch": 2.19226393629124, "grad_norm": 1.064052456490962, "learning_rate": 7.451837461553998e-07, "loss": 0.0082, "step": 9635 }, { "epoch": 2.192491467576792, "grad_norm": 0.5759445364609324, "learning_rate": 7.450960611320204e-07, "loss": 0.0049, "step": 9636 }, { "epoch": 2.1927189988623437, "grad_norm": 1.3167249572075435, "learning_rate": 7.450083736540972e-07, "loss": 0.0097, "step": 9637 }, { "epoch": 2.1929465301478954, "grad_norm": 0.42812996497137096, "learning_rate": 7.449206837234224e-07, "loss": 0.0043, "step": 9638 }, { "epoch": 2.193174061433447, "grad_norm": 0.47389532187756966, "learning_rate": 7.448329913417887e-07, "loss": 0.0021, "step": 9639 }, { "epoch": 2.193401592718999, "grad_norm": 0.5674565541782638, "learning_rate": 7.447452965109882e-07, "loss": 0.0106, "step": 9640 }, { "epoch": 2.1936291240045507, "grad_norm": 0.6660424990854488, "learning_rate": 7.446575992328128e-07, "loss": 0.0086, "step": 9641 }, { "epoch": 2.1938566552901024, "grad_norm": 0.20386544039544735, "learning_rate": 7.445698995090557e-07, "loss": 0.0013, "step": 9642 }, { "epoch": 2.194084186575654, "grad_norm": 0.2099220833545005, "learning_rate": 7.444821973415086e-07, "loss": 0.0017, "step": 9643 }, { "epoch": 2.194311717861206, "grad_norm": 0.3596174077194479, "learning_rate": 7.443944927319641e-07, "loss": 0.0023, "step": 9644 }, { "epoch": 2.1945392491467577, "grad_norm": 0.9224216606352855, "learning_rate": 7.443067856822149e-07, "loss": 0.0129, "step": 9645 }, { "epoch": 2.1947667804323094, "grad_norm": 1.1469534965029151, "learning_rate": 7.442190761940535e-07, "loss": 0.014, "step": 9646 }, { "epoch": 2.194994311717861, "grad_norm": 0.5576731804567137, "learning_rate": 7.441313642692726e-07, "loss": 0.0046, "step": 9647 }, { "epoch": 2.195221843003413, "grad_norm": 1.0981045949315773, "learning_rate": 7.440436499096647e-07, "loss": 0.0199, "step": 9648 }, { "epoch": 2.1954493742889647, "grad_norm": 1.0420832800323911, "learning_rate": 7.439559331170226e-07, "loss": 0.0128, "step": 9649 }, { "epoch": 2.1956769055745164, "grad_norm": 0.626981774480868, "learning_rate": 7.438682138931393e-07, "loss": 0.0099, "step": 9650 }, { "epoch": 2.195904436860068, "grad_norm": 1.5521547661201331, "learning_rate": 7.43780492239807e-07, "loss": 0.0145, "step": 9651 }, { "epoch": 2.19613196814562, "grad_norm": 1.6288646101627964, "learning_rate": 7.436927681588192e-07, "loss": 0.0144, "step": 9652 }, { "epoch": 2.1963594994311717, "grad_norm": 0.8653215956544341, "learning_rate": 7.436050416519687e-07, "loss": 0.0135, "step": 9653 }, { "epoch": 2.1965870307167235, "grad_norm": 1.0809108035517665, "learning_rate": 7.435173127210482e-07, "loss": 0.013, "step": 9654 }, { "epoch": 2.196814562002275, "grad_norm": 2.1424690805717157, "learning_rate": 7.43429581367851e-07, "loss": 0.0073, "step": 9655 }, { "epoch": 2.197042093287827, "grad_norm": 0.9652735610174711, "learning_rate": 7.4334184759417e-07, "loss": 0.0211, "step": 9656 }, { "epoch": 2.1972696245733787, "grad_norm": 0.5538362197707377, "learning_rate": 7.432541114017984e-07, "loss": 0.004, "step": 9657 }, { "epoch": 2.1974971558589305, "grad_norm": 0.8759362034094246, "learning_rate": 7.431663727925293e-07, "loss": 0.0132, "step": 9658 }, { "epoch": 2.197724687144482, "grad_norm": 0.5670103598613527, "learning_rate": 7.430786317681559e-07, "loss": 0.0078, "step": 9659 }, { "epoch": 2.197952218430034, "grad_norm": 0.9198394347608914, "learning_rate": 7.429908883304716e-07, "loss": 0.0124, "step": 9660 }, { "epoch": 2.1981797497155857, "grad_norm": 0.6520971346068454, "learning_rate": 7.429031424812697e-07, "loss": 0.009, "step": 9661 }, { "epoch": 2.1984072810011375, "grad_norm": 1.0629564601389152, "learning_rate": 7.428153942223433e-07, "loss": 0.0188, "step": 9662 }, { "epoch": 2.198634812286689, "grad_norm": 1.1199232548698799, "learning_rate": 7.427276435554861e-07, "loss": 0.0206, "step": 9663 }, { "epoch": 2.198862343572241, "grad_norm": 1.0829610534274123, "learning_rate": 7.426398904824916e-07, "loss": 0.0287, "step": 9664 }, { "epoch": 2.199089874857793, "grad_norm": 0.8314551580354805, "learning_rate": 7.425521350051529e-07, "loss": 0.012, "step": 9665 }, { "epoch": 2.1993174061433445, "grad_norm": 0.8582521213620955, "learning_rate": 7.42464377125264e-07, "loss": 0.0089, "step": 9666 }, { "epoch": 2.1995449374288967, "grad_norm": 1.5212309016840493, "learning_rate": 7.423766168446187e-07, "loss": 0.0084, "step": 9667 }, { "epoch": 2.1997724687144484, "grad_norm": 0.5467873056219017, "learning_rate": 7.422888541650097e-07, "loss": 0.0082, "step": 9668 }, { "epoch": 2.2, "grad_norm": 1.1352258144425396, "learning_rate": 7.422010890882317e-07, "loss": 0.0059, "step": 9669 }, { "epoch": 2.200227531285552, "grad_norm": 0.36437846592903533, "learning_rate": 7.421133216160781e-07, "loss": 0.0022, "step": 9670 }, { "epoch": 2.2004550625711037, "grad_norm": 0.5866144111825328, "learning_rate": 7.420255517503424e-07, "loss": 0.0062, "step": 9671 }, { "epoch": 2.2006825938566554, "grad_norm": 0.8754788877292402, "learning_rate": 7.41937779492819e-07, "loss": 0.0146, "step": 9672 }, { "epoch": 2.200910125142207, "grad_norm": 0.4171831071461107, "learning_rate": 7.418500048453016e-07, "loss": 0.005, "step": 9673 }, { "epoch": 2.201137656427759, "grad_norm": 0.549269081954464, "learning_rate": 7.417622278095838e-07, "loss": 0.0139, "step": 9674 }, { "epoch": 2.2013651877133107, "grad_norm": 1.2626057795914143, "learning_rate": 7.416744483874602e-07, "loss": 0.01, "step": 9675 }, { "epoch": 2.2015927189988624, "grad_norm": 0.7629721611373987, "learning_rate": 7.415866665807245e-07, "loss": 0.0164, "step": 9676 }, { "epoch": 2.201820250284414, "grad_norm": 0.7138412826063927, "learning_rate": 7.414988823911708e-07, "loss": 0.0121, "step": 9677 }, { "epoch": 2.202047781569966, "grad_norm": 0.7896205889847766, "learning_rate": 7.41411095820593e-07, "loss": 0.0088, "step": 9678 }, { "epoch": 2.2022753128555177, "grad_norm": 0.5642157705673803, "learning_rate": 7.41323306870786e-07, "loss": 0.0063, "step": 9679 }, { "epoch": 2.2025028441410694, "grad_norm": 0.9039809924753222, "learning_rate": 7.412355155435437e-07, "loss": 0.005, "step": 9680 }, { "epoch": 2.202730375426621, "grad_norm": 1.0135787829433691, "learning_rate": 7.411477218406602e-07, "loss": 0.0086, "step": 9681 }, { "epoch": 2.202957906712173, "grad_norm": 0.744895167811461, "learning_rate": 7.410599257639299e-07, "loss": 0.0053, "step": 9682 }, { "epoch": 2.2031854379977247, "grad_norm": 1.9278466629666653, "learning_rate": 7.409721273151473e-07, "loss": 0.0215, "step": 9683 }, { "epoch": 2.2034129692832765, "grad_norm": 0.7608888631413969, "learning_rate": 7.408843264961068e-07, "loss": 0.0112, "step": 9684 }, { "epoch": 2.203640500568828, "grad_norm": 0.24319250994984543, "learning_rate": 7.407965233086029e-07, "loss": 0.0019, "step": 9685 }, { "epoch": 2.20386803185438, "grad_norm": 0.10591754608419379, "learning_rate": 7.407087177544304e-07, "loss": 0.0007, "step": 9686 }, { "epoch": 2.2040955631399317, "grad_norm": 0.49157075978222076, "learning_rate": 7.406209098353834e-07, "loss": 0.0035, "step": 9687 }, { "epoch": 2.2043230944254835, "grad_norm": 1.062474610827862, "learning_rate": 7.405330995532566e-07, "loss": 0.0206, "step": 9688 }, { "epoch": 2.204550625711035, "grad_norm": 1.5601721722500297, "learning_rate": 7.40445286909845e-07, "loss": 0.035, "step": 9689 }, { "epoch": 2.204778156996587, "grad_norm": 0.8126677089149624, "learning_rate": 7.403574719069431e-07, "loss": 0.0063, "step": 9690 }, { "epoch": 2.2050056882821387, "grad_norm": 0.31967234686748364, "learning_rate": 7.402696545463458e-07, "loss": 0.0024, "step": 9691 }, { "epoch": 2.2052332195676905, "grad_norm": 0.6296830929918673, "learning_rate": 7.401818348298478e-07, "loss": 0.0053, "step": 9692 }, { "epoch": 2.2054607508532422, "grad_norm": 2.4668394767267454, "learning_rate": 7.400940127592441e-07, "loss": 0.0176, "step": 9693 }, { "epoch": 2.205688282138794, "grad_norm": 1.1958832285426688, "learning_rate": 7.400061883363297e-07, "loss": 0.0074, "step": 9694 }, { "epoch": 2.2059158134243457, "grad_norm": 0.4318878369099174, "learning_rate": 7.399183615628991e-07, "loss": 0.0021, "step": 9695 }, { "epoch": 2.2061433447098975, "grad_norm": 1.2109798418826194, "learning_rate": 7.398305324407479e-07, "loss": 0.009, "step": 9696 }, { "epoch": 2.2063708759954492, "grad_norm": 0.8625508430982195, "learning_rate": 7.397427009716709e-07, "loss": 0.0042, "step": 9697 }, { "epoch": 2.206598407281001, "grad_norm": 0.9448879693822796, "learning_rate": 7.396548671574632e-07, "loss": 0.0156, "step": 9698 }, { "epoch": 2.2068259385665527, "grad_norm": 0.6357788236890357, "learning_rate": 7.395670309999201e-07, "loss": 0.0075, "step": 9699 }, { "epoch": 2.2070534698521045, "grad_norm": 0.4122020439740377, "learning_rate": 7.394791925008366e-07, "loss": 0.0041, "step": 9700 }, { "epoch": 2.2072810011376562, "grad_norm": 0.5639709716365757, "learning_rate": 7.39391351662008e-07, "loss": 0.0075, "step": 9701 }, { "epoch": 2.207508532423208, "grad_norm": 0.9578828855651509, "learning_rate": 7.393035084852296e-07, "loss": 0.0087, "step": 9702 }, { "epoch": 2.2077360637087597, "grad_norm": 0.8353081392030337, "learning_rate": 7.39215662972297e-07, "loss": 0.0079, "step": 9703 }, { "epoch": 2.207963594994312, "grad_norm": 0.6967088628881625, "learning_rate": 7.391278151250053e-07, "loss": 0.0071, "step": 9704 }, { "epoch": 2.2081911262798632, "grad_norm": 0.43785772640606097, "learning_rate": 7.390399649451501e-07, "loss": 0.0047, "step": 9705 }, { "epoch": 2.2084186575654154, "grad_norm": 1.2611023380849133, "learning_rate": 7.389521124345271e-07, "loss": 0.0144, "step": 9706 }, { "epoch": 2.208646188850967, "grad_norm": 0.9869060087712334, "learning_rate": 7.388642575949315e-07, "loss": 0.0128, "step": 9707 }, { "epoch": 2.208873720136519, "grad_norm": 1.1398066210971305, "learning_rate": 7.387764004281588e-07, "loss": 0.0125, "step": 9708 }, { "epoch": 2.2091012514220707, "grad_norm": 0.5568227219345118, "learning_rate": 7.38688540936005e-07, "loss": 0.0073, "step": 9709 }, { "epoch": 2.2093287827076225, "grad_norm": 0.5467974468137079, "learning_rate": 7.386006791202656e-07, "loss": 0.0048, "step": 9710 }, { "epoch": 2.209556313993174, "grad_norm": 0.9673966550728507, "learning_rate": 7.385128149827364e-07, "loss": 0.0111, "step": 9711 }, { "epoch": 2.209783845278726, "grad_norm": 2.183643309598677, "learning_rate": 7.384249485252132e-07, "loss": 0.0245, "step": 9712 }, { "epoch": 2.2100113765642777, "grad_norm": 0.36609886258629293, "learning_rate": 7.383370797494918e-07, "loss": 0.0057, "step": 9713 }, { "epoch": 2.2102389078498295, "grad_norm": 0.55612498845999, "learning_rate": 7.382492086573679e-07, "loss": 0.0044, "step": 9714 }, { "epoch": 2.210466439135381, "grad_norm": 0.8549709760846276, "learning_rate": 7.381613352506376e-07, "loss": 0.0166, "step": 9715 }, { "epoch": 2.210693970420933, "grad_norm": 1.600095099597409, "learning_rate": 7.380734595310969e-07, "loss": 0.0145, "step": 9716 }, { "epoch": 2.2109215017064847, "grad_norm": 1.6694806248049556, "learning_rate": 7.37985581500542e-07, "loss": 0.0295, "step": 9717 }, { "epoch": 2.2111490329920365, "grad_norm": 0.9519877093467191, "learning_rate": 7.378977011607684e-07, "loss": 0.0212, "step": 9718 }, { "epoch": 2.211376564277588, "grad_norm": 0.2321442487803093, "learning_rate": 7.378098185135727e-07, "loss": 0.0011, "step": 9719 }, { "epoch": 2.21160409556314, "grad_norm": 0.9669976328820756, "learning_rate": 7.377219335607509e-07, "loss": 0.0113, "step": 9720 }, { "epoch": 2.2118316268486917, "grad_norm": 0.35874159162276587, "learning_rate": 7.376340463040993e-07, "loss": 0.0035, "step": 9721 }, { "epoch": 2.2120591581342435, "grad_norm": 0.4911321717522839, "learning_rate": 7.375461567454138e-07, "loss": 0.0055, "step": 9722 }, { "epoch": 2.2122866894197952, "grad_norm": 1.6567432890342024, "learning_rate": 7.374582648864912e-07, "loss": 0.0173, "step": 9723 }, { "epoch": 2.212514220705347, "grad_norm": 0.7746999797599043, "learning_rate": 7.373703707291277e-07, "loss": 0.0141, "step": 9724 }, { "epoch": 2.2127417519908987, "grad_norm": 1.4899640213951841, "learning_rate": 7.372824742751194e-07, "loss": 0.0118, "step": 9725 }, { "epoch": 2.2129692832764505, "grad_norm": 1.1306059510750244, "learning_rate": 7.37194575526263e-07, "loss": 0.0192, "step": 9726 }, { "epoch": 2.2131968145620022, "grad_norm": 0.6094722469976073, "learning_rate": 7.371066744843551e-07, "loss": 0.0086, "step": 9727 }, { "epoch": 2.213424345847554, "grad_norm": 0.4183731135562627, "learning_rate": 7.37018771151192e-07, "loss": 0.0023, "step": 9728 }, { "epoch": 2.2136518771331057, "grad_norm": 0.775796211052962, "learning_rate": 7.369308655285702e-07, "loss": 0.007, "step": 9729 }, { "epoch": 2.2138794084186575, "grad_norm": 0.9083986254705477, "learning_rate": 7.368429576182869e-07, "loss": 0.0107, "step": 9730 }, { "epoch": 2.2141069397042092, "grad_norm": 0.5339774729572225, "learning_rate": 7.367550474221381e-07, "loss": 0.006, "step": 9731 }, { "epoch": 2.214334470989761, "grad_norm": 0.6757592444800821, "learning_rate": 7.366671349419207e-07, "loss": 0.0091, "step": 9732 }, { "epoch": 2.2145620022753127, "grad_norm": 0.7867220166681209, "learning_rate": 7.365792201794318e-07, "loss": 0.0128, "step": 9733 }, { "epoch": 2.2147895335608645, "grad_norm": 0.8067592322250232, "learning_rate": 7.364913031364679e-07, "loss": 0.0125, "step": 9734 }, { "epoch": 2.2150170648464163, "grad_norm": 0.8112378359985258, "learning_rate": 7.364033838148258e-07, "loss": 0.0166, "step": 9735 }, { "epoch": 2.215244596131968, "grad_norm": 0.9469113449795432, "learning_rate": 7.363154622163029e-07, "loss": 0.0054, "step": 9736 }, { "epoch": 2.2154721274175198, "grad_norm": 1.00490074369384, "learning_rate": 7.362275383426956e-07, "loss": 0.0174, "step": 9737 }, { "epoch": 2.2156996587030715, "grad_norm": 1.0761337878453314, "learning_rate": 7.361396121958011e-07, "loss": 0.0117, "step": 9738 }, { "epoch": 2.2159271899886233, "grad_norm": 0.5956420441176925, "learning_rate": 7.360516837774165e-07, "loss": 0.0086, "step": 9739 }, { "epoch": 2.216154721274175, "grad_norm": 0.3459081577385382, "learning_rate": 7.359637530893389e-07, "loss": 0.0025, "step": 9740 }, { "epoch": 2.2163822525597268, "grad_norm": 0.6341278133011897, "learning_rate": 7.358758201333654e-07, "loss": 0.0057, "step": 9741 }, { "epoch": 2.2166097838452785, "grad_norm": 1.338725636079328, "learning_rate": 7.357878849112932e-07, "loss": 0.019, "step": 9742 }, { "epoch": 2.2168373151308307, "grad_norm": 0.7306342018789439, "learning_rate": 7.356999474249196e-07, "loss": 0.0044, "step": 9743 }, { "epoch": 2.217064846416382, "grad_norm": 0.5703587106869538, "learning_rate": 7.356120076760417e-07, "loss": 0.0048, "step": 9744 }, { "epoch": 2.217292377701934, "grad_norm": 0.6262614362856112, "learning_rate": 7.35524065666457e-07, "loss": 0.0053, "step": 9745 }, { "epoch": 2.217519908987486, "grad_norm": 0.5066663290483394, "learning_rate": 7.354361213979627e-07, "loss": 0.0071, "step": 9746 }, { "epoch": 2.2177474402730377, "grad_norm": 0.9215848985052789, "learning_rate": 7.353481748723565e-07, "loss": 0.0221, "step": 9747 }, { "epoch": 2.2179749715585895, "grad_norm": 0.33323939733761665, "learning_rate": 7.352602260914356e-07, "loss": 0.0026, "step": 9748 }, { "epoch": 2.218202502844141, "grad_norm": 0.6033511338392381, "learning_rate": 7.351722750569976e-07, "loss": 0.0054, "step": 9749 }, { "epoch": 2.218430034129693, "grad_norm": 0.7854914660549903, "learning_rate": 7.350843217708402e-07, "loss": 0.0059, "step": 9750 }, { "epoch": 2.2186575654152447, "grad_norm": 0.9021074580889269, "learning_rate": 7.349963662347608e-07, "loss": 0.0164, "step": 9751 }, { "epoch": 2.2188850967007965, "grad_norm": 0.8693317117604361, "learning_rate": 7.349084084505569e-07, "loss": 0.0146, "step": 9752 }, { "epoch": 2.2191126279863482, "grad_norm": 0.7937928809056105, "learning_rate": 7.348204484200267e-07, "loss": 0.0129, "step": 9753 }, { "epoch": 2.2193401592719, "grad_norm": 0.30519990948993614, "learning_rate": 7.347324861449676e-07, "loss": 0.0034, "step": 9754 }, { "epoch": 2.2195676905574517, "grad_norm": 0.4962820337660195, "learning_rate": 7.346445216271773e-07, "loss": 0.0094, "step": 9755 }, { "epoch": 2.2197952218430035, "grad_norm": 0.6728902306719448, "learning_rate": 7.34556554868454e-07, "loss": 0.0063, "step": 9756 }, { "epoch": 2.2200227531285552, "grad_norm": 0.5476631883349763, "learning_rate": 7.344685858705952e-07, "loss": 0.0089, "step": 9757 }, { "epoch": 2.220250284414107, "grad_norm": 0.6789322134548239, "learning_rate": 7.343806146353991e-07, "loss": 0.0135, "step": 9758 }, { "epoch": 2.2204778156996587, "grad_norm": 1.327768648701586, "learning_rate": 7.342926411646634e-07, "loss": 0.0146, "step": 9759 }, { "epoch": 2.2207053469852105, "grad_norm": 0.7078627325170037, "learning_rate": 7.342046654601864e-07, "loss": 0.0122, "step": 9760 }, { "epoch": 2.2209328782707622, "grad_norm": 0.41115602276630314, "learning_rate": 7.341166875237661e-07, "loss": 0.0025, "step": 9761 }, { "epoch": 2.221160409556314, "grad_norm": 0.7821378111405003, "learning_rate": 7.340287073572004e-07, "loss": 0.0051, "step": 9762 }, { "epoch": 2.2213879408418657, "grad_norm": 0.7857378323477028, "learning_rate": 7.339407249622878e-07, "loss": 0.0107, "step": 9763 }, { "epoch": 2.2216154721274175, "grad_norm": 0.855086007831799, "learning_rate": 7.33852740340826e-07, "loss": 0.0071, "step": 9764 }, { "epoch": 2.2218430034129693, "grad_norm": 0.6590632272539326, "learning_rate": 7.337647534946137e-07, "loss": 0.0203, "step": 9765 }, { "epoch": 2.222070534698521, "grad_norm": 1.1095541097579438, "learning_rate": 7.33676764425449e-07, "loss": 0.0103, "step": 9766 }, { "epoch": 2.2222980659840728, "grad_norm": 0.3227748470963595, "learning_rate": 7.335887731351303e-07, "loss": 0.0016, "step": 9767 }, { "epoch": 2.2225255972696245, "grad_norm": 0.6449957560967259, "learning_rate": 7.33500779625456e-07, "loss": 0.0125, "step": 9768 }, { "epoch": 2.2227531285551763, "grad_norm": 0.5534057876566227, "learning_rate": 7.334127838982244e-07, "loss": 0.0057, "step": 9769 }, { "epoch": 2.222980659840728, "grad_norm": 1.1213144565575701, "learning_rate": 7.333247859552343e-07, "loss": 0.0237, "step": 9770 }, { "epoch": 2.2232081911262798, "grad_norm": 0.8275619816698846, "learning_rate": 7.332367857982836e-07, "loss": 0.0067, "step": 9771 }, { "epoch": 2.2234357224118315, "grad_norm": 0.8588347708509323, "learning_rate": 7.331487834291712e-07, "loss": 0.0031, "step": 9772 }, { "epoch": 2.2236632536973833, "grad_norm": 0.3094536562946835, "learning_rate": 7.33060778849696e-07, "loss": 0.0019, "step": 9773 }, { "epoch": 2.223890784982935, "grad_norm": 0.8073760403098668, "learning_rate": 7.329727720616564e-07, "loss": 0.0048, "step": 9774 }, { "epoch": 2.2241183162684868, "grad_norm": 1.2548787501600978, "learning_rate": 7.328847630668508e-07, "loss": 0.0192, "step": 9775 }, { "epoch": 2.2243458475540385, "grad_norm": 0.7128602785656115, "learning_rate": 7.327967518670784e-07, "loss": 0.0137, "step": 9776 }, { "epoch": 2.2245733788395903, "grad_norm": 0.4251278673353657, "learning_rate": 7.327087384641378e-07, "loss": 0.0047, "step": 9777 }, { "epoch": 2.224800910125142, "grad_norm": 0.46802974253945123, "learning_rate": 7.326207228598278e-07, "loss": 0.0045, "step": 9778 }, { "epoch": 2.225028441410694, "grad_norm": 0.8979014112760015, "learning_rate": 7.325327050559473e-07, "loss": 0.0112, "step": 9779 }, { "epoch": 2.2252559726962455, "grad_norm": 0.6449635573539817, "learning_rate": 7.324446850542954e-07, "loss": 0.0077, "step": 9780 }, { "epoch": 2.2254835039817973, "grad_norm": 0.8246198874106694, "learning_rate": 7.32356662856671e-07, "loss": 0.0112, "step": 9781 }, { "epoch": 2.2257110352673495, "grad_norm": 1.2500208791308247, "learning_rate": 7.322686384648727e-07, "loss": 0.0149, "step": 9782 }, { "epoch": 2.225938566552901, "grad_norm": 1.2583446061023784, "learning_rate": 7.321806118807e-07, "loss": 0.0129, "step": 9783 }, { "epoch": 2.226166097838453, "grad_norm": 0.9198656547797969, "learning_rate": 7.320925831059519e-07, "loss": 0.0076, "step": 9784 }, { "epoch": 2.2263936291240047, "grad_norm": 0.4272487565003964, "learning_rate": 7.320045521424277e-07, "loss": 0.006, "step": 9785 }, { "epoch": 2.2266211604095565, "grad_norm": 1.0344296558168884, "learning_rate": 7.31916518991926e-07, "loss": 0.0092, "step": 9786 }, { "epoch": 2.2268486916951082, "grad_norm": 1.6360792104942652, "learning_rate": 7.318284836562469e-07, "loss": 0.0207, "step": 9787 }, { "epoch": 2.22707622298066, "grad_norm": 1.0966744290252408, "learning_rate": 7.31740446137189e-07, "loss": 0.006, "step": 9788 }, { "epoch": 2.2273037542662117, "grad_norm": 1.3749354643682972, "learning_rate": 7.31652406436552e-07, "loss": 0.0258, "step": 9789 }, { "epoch": 2.2275312855517635, "grad_norm": 0.47025859751517013, "learning_rate": 7.315643645561351e-07, "loss": 0.0059, "step": 9790 }, { "epoch": 2.2277588168373152, "grad_norm": 0.651972060161316, "learning_rate": 7.314763204977376e-07, "loss": 0.007, "step": 9791 }, { "epoch": 2.227986348122867, "grad_norm": 1.700188115563667, "learning_rate": 7.313882742631594e-07, "loss": 0.0123, "step": 9792 }, { "epoch": 2.2282138794084188, "grad_norm": 0.7057590972401023, "learning_rate": 7.313002258541994e-07, "loss": 0.0051, "step": 9793 }, { "epoch": 2.2284414106939705, "grad_norm": 0.7493382724211597, "learning_rate": 7.312121752726577e-07, "loss": 0.0088, "step": 9794 }, { "epoch": 2.2286689419795223, "grad_norm": 1.2755977608794125, "learning_rate": 7.311241225203336e-07, "loss": 0.0091, "step": 9795 }, { "epoch": 2.228896473265074, "grad_norm": 0.8875300782255566, "learning_rate": 7.310360675990266e-07, "loss": 0.0127, "step": 9796 }, { "epoch": 2.2291240045506258, "grad_norm": 0.4464116210139732, "learning_rate": 7.309480105105368e-07, "loss": 0.0045, "step": 9797 }, { "epoch": 2.2293515358361775, "grad_norm": 0.41302439996092777, "learning_rate": 7.308599512566636e-07, "loss": 0.0043, "step": 9798 }, { "epoch": 2.2295790671217293, "grad_norm": 0.42317329442010415, "learning_rate": 7.30771889839207e-07, "loss": 0.0032, "step": 9799 }, { "epoch": 2.229806598407281, "grad_norm": 0.9520388291511624, "learning_rate": 7.306838262599666e-07, "loss": 0.0161, "step": 9800 }, { "epoch": 2.2300341296928328, "grad_norm": 1.0791193206115408, "learning_rate": 7.305957605207423e-07, "loss": 0.0136, "step": 9801 }, { "epoch": 2.2302616609783845, "grad_norm": 1.0202899053623313, "learning_rate": 7.30507692623334e-07, "loss": 0.0136, "step": 9802 }, { "epoch": 2.2304891922639363, "grad_norm": 1.02029317561812, "learning_rate": 7.304196225695417e-07, "loss": 0.0114, "step": 9803 }, { "epoch": 2.230716723549488, "grad_norm": 0.938555627060815, "learning_rate": 7.303315503611654e-07, "loss": 0.0179, "step": 9804 }, { "epoch": 2.2309442548350398, "grad_norm": 1.2107991875616646, "learning_rate": 7.302434760000053e-07, "loss": 0.0189, "step": 9805 }, { "epoch": 2.2311717861205915, "grad_norm": 0.7699707256139708, "learning_rate": 7.301553994878613e-07, "loss": 0.0136, "step": 9806 }, { "epoch": 2.2313993174061433, "grad_norm": 0.7647305051147182, "learning_rate": 7.300673208265332e-07, "loss": 0.0039, "step": 9807 }, { "epoch": 2.231626848691695, "grad_norm": 0.8948089189331243, "learning_rate": 7.299792400178219e-07, "loss": 0.01, "step": 9808 }, { "epoch": 2.231854379977247, "grad_norm": 0.5862296396402799, "learning_rate": 7.298911570635267e-07, "loss": 0.0046, "step": 9809 }, { "epoch": 2.2320819112627985, "grad_norm": 0.4150383305633549, "learning_rate": 7.298030719654487e-07, "loss": 0.0056, "step": 9810 }, { "epoch": 2.2323094425483503, "grad_norm": 0.3408274369550502, "learning_rate": 7.297149847253877e-07, "loss": 0.003, "step": 9811 }, { "epoch": 2.232536973833902, "grad_norm": 0.8281706591502016, "learning_rate": 7.296268953451443e-07, "loss": 0.0164, "step": 9812 }, { "epoch": 2.232764505119454, "grad_norm": 0.8156682184149247, "learning_rate": 7.295388038265188e-07, "loss": 0.0117, "step": 9813 }, { "epoch": 2.2329920364050055, "grad_norm": 0.31356092314798434, "learning_rate": 7.294507101713115e-07, "loss": 0.0026, "step": 9814 }, { "epoch": 2.2332195676905573, "grad_norm": 0.1722784825797177, "learning_rate": 7.29362614381323e-07, "loss": 0.0007, "step": 9815 }, { "epoch": 2.233447098976109, "grad_norm": 0.9121187741998779, "learning_rate": 7.292745164583537e-07, "loss": 0.0071, "step": 9816 }, { "epoch": 2.233674630261661, "grad_norm": 0.4238101403118168, "learning_rate": 7.291864164042042e-07, "loss": 0.0042, "step": 9817 }, { "epoch": 2.2339021615472126, "grad_norm": 1.3971696239193727, "learning_rate": 7.290983142206756e-07, "loss": 0.0062, "step": 9818 }, { "epoch": 2.2341296928327643, "grad_norm": 2.0004624861085603, "learning_rate": 7.290102099095676e-07, "loss": 0.0171, "step": 9819 }, { "epoch": 2.234357224118316, "grad_norm": 0.7587978882187041, "learning_rate": 7.289221034726816e-07, "loss": 0.0126, "step": 9820 }, { "epoch": 2.2345847554038683, "grad_norm": 1.5659537091049416, "learning_rate": 7.288339949118182e-07, "loss": 0.034, "step": 9821 }, { "epoch": 2.23481228668942, "grad_norm": 0.5574965430835777, "learning_rate": 7.287458842287781e-07, "loss": 0.0121, "step": 9822 }, { "epoch": 2.2350398179749718, "grad_norm": 0.8150748717339137, "learning_rate": 7.286577714253619e-07, "loss": 0.0055, "step": 9823 }, { "epoch": 2.2352673492605235, "grad_norm": 0.9893593702832831, "learning_rate": 7.285696565033711e-07, "loss": 0.013, "step": 9824 }, { "epoch": 2.2354948805460753, "grad_norm": 0.7106957623380815, "learning_rate": 7.284815394646058e-07, "loss": 0.0133, "step": 9825 }, { "epoch": 2.235722411831627, "grad_norm": 0.7503666419101801, "learning_rate": 7.283934203108675e-07, "loss": 0.0083, "step": 9826 }, { "epoch": 2.2359499431171788, "grad_norm": 1.5961350157212673, "learning_rate": 7.283052990439571e-07, "loss": 0.0203, "step": 9827 }, { "epoch": 2.2361774744027305, "grad_norm": 0.9131961315726698, "learning_rate": 7.282171756656756e-07, "loss": 0.0104, "step": 9828 }, { "epoch": 2.2364050056882823, "grad_norm": 0.7092513159299336, "learning_rate": 7.281290501778238e-07, "loss": 0.0083, "step": 9829 }, { "epoch": 2.236632536973834, "grad_norm": 0.6068866460357687, "learning_rate": 7.280409225822033e-07, "loss": 0.0045, "step": 9830 }, { "epoch": 2.2368600682593858, "grad_norm": 1.1164599055298168, "learning_rate": 7.279527928806152e-07, "loss": 0.0176, "step": 9831 }, { "epoch": 2.2370875995449375, "grad_norm": 1.7549284881303862, "learning_rate": 7.278646610748602e-07, "loss": 0.0042, "step": 9832 }, { "epoch": 2.2373151308304893, "grad_norm": 1.2772923637454094, "learning_rate": 7.277765271667402e-07, "loss": 0.0107, "step": 9833 }, { "epoch": 2.237542662116041, "grad_norm": 1.1558772857109452, "learning_rate": 7.276883911580561e-07, "loss": 0.0143, "step": 9834 }, { "epoch": 2.2377701934015928, "grad_norm": 0.13811763035516314, "learning_rate": 7.276002530506094e-07, "loss": 0.0008, "step": 9835 }, { "epoch": 2.2379977246871445, "grad_norm": 1.3838590962790442, "learning_rate": 7.275121128462012e-07, "loss": 0.0155, "step": 9836 }, { "epoch": 2.2382252559726963, "grad_norm": 0.5462770499918175, "learning_rate": 7.274239705466336e-07, "loss": 0.0032, "step": 9837 }, { "epoch": 2.238452787258248, "grad_norm": 0.4650289070672557, "learning_rate": 7.273358261537074e-07, "loss": 0.0077, "step": 9838 }, { "epoch": 2.2386803185438, "grad_norm": 0.7486677476056879, "learning_rate": 7.272476796692242e-07, "loss": 0.006, "step": 9839 }, { "epoch": 2.2389078498293515, "grad_norm": 1.240466182892842, "learning_rate": 7.271595310949858e-07, "loss": 0.0184, "step": 9840 }, { "epoch": 2.2391353811149033, "grad_norm": 1.198743237576172, "learning_rate": 7.270713804327937e-07, "loss": 0.0101, "step": 9841 }, { "epoch": 2.239362912400455, "grad_norm": 0.5698215052417472, "learning_rate": 7.269832276844494e-07, "loss": 0.0054, "step": 9842 }, { "epoch": 2.239590443686007, "grad_norm": 1.056366075940522, "learning_rate": 7.26895072851755e-07, "loss": 0.0158, "step": 9843 }, { "epoch": 2.2398179749715585, "grad_norm": 0.9334759081557071, "learning_rate": 7.268069159365117e-07, "loss": 0.0127, "step": 9844 }, { "epoch": 2.2400455062571103, "grad_norm": 0.3535598005006818, "learning_rate": 7.267187569405215e-07, "loss": 0.0006, "step": 9845 }, { "epoch": 2.240273037542662, "grad_norm": 1.6242627835730075, "learning_rate": 7.266305958655861e-07, "loss": 0.0142, "step": 9846 }, { "epoch": 2.240500568828214, "grad_norm": 0.8513253296568062, "learning_rate": 7.265424327135077e-07, "loss": 0.0057, "step": 9847 }, { "epoch": 2.2407281001137656, "grad_norm": 0.7318184998351438, "learning_rate": 7.264542674860878e-07, "loss": 0.0121, "step": 9848 }, { "epoch": 2.2409556313993173, "grad_norm": 2.563118781437132, "learning_rate": 7.263661001851284e-07, "loss": 0.0207, "step": 9849 }, { "epoch": 2.241183162684869, "grad_norm": 0.8512064646495209, "learning_rate": 7.262779308124317e-07, "loss": 0.0146, "step": 9850 }, { "epoch": 2.241410693970421, "grad_norm": 4.806450743698874, "learning_rate": 7.261897593697995e-07, "loss": 0.0623, "step": 9851 }, { "epoch": 2.2416382252559726, "grad_norm": 1.0142721854804766, "learning_rate": 7.26101585859034e-07, "loss": 0.0125, "step": 9852 }, { "epoch": 2.2418657565415243, "grad_norm": 0.8079928288463585, "learning_rate": 7.26013410281937e-07, "loss": 0.0112, "step": 9853 }, { "epoch": 2.242093287827076, "grad_norm": 0.7888005296122781, "learning_rate": 7.259252326403111e-07, "loss": 0.0047, "step": 9854 }, { "epoch": 2.242320819112628, "grad_norm": 2.24849494075429, "learning_rate": 7.258370529359583e-07, "loss": 0.0158, "step": 9855 }, { "epoch": 2.2425483503981796, "grad_norm": 0.6208958590234988, "learning_rate": 7.257488711706808e-07, "loss": 0.0079, "step": 9856 }, { "epoch": 2.2427758816837313, "grad_norm": 0.9012489777215701, "learning_rate": 7.256606873462808e-07, "loss": 0.0092, "step": 9857 }, { "epoch": 2.243003412969283, "grad_norm": 0.7591352408814183, "learning_rate": 7.255725014645608e-07, "loss": 0.0073, "step": 9858 }, { "epoch": 2.243230944254835, "grad_norm": 0.3850374073560092, "learning_rate": 7.254843135273229e-07, "loss": 0.0037, "step": 9859 }, { "epoch": 2.243458475540387, "grad_norm": 29.04233693615321, "learning_rate": 7.253961235363699e-07, "loss": 0.2345, "step": 9860 }, { "epoch": 2.2436860068259388, "grad_norm": 0.7503943638738817, "learning_rate": 7.253079314935038e-07, "loss": 0.0082, "step": 9861 }, { "epoch": 2.2439135381114905, "grad_norm": 0.6280314704325309, "learning_rate": 7.252197374005273e-07, "loss": 0.0033, "step": 9862 }, { "epoch": 2.2441410693970423, "grad_norm": 0.877531951440151, "learning_rate": 7.251315412592431e-07, "loss": 0.0146, "step": 9863 }, { "epoch": 2.244368600682594, "grad_norm": 1.9113251453059674, "learning_rate": 7.250433430714534e-07, "loss": 0.0267, "step": 9864 }, { "epoch": 2.244596131968146, "grad_norm": 1.5745364735804175, "learning_rate": 7.249551428389612e-07, "loss": 0.0246, "step": 9865 }, { "epoch": 2.2448236632536975, "grad_norm": 0.4839943673425523, "learning_rate": 7.248669405635686e-07, "loss": 0.0058, "step": 9866 }, { "epoch": 2.2450511945392493, "grad_norm": 0.7177519233243725, "learning_rate": 7.247787362470789e-07, "loss": 0.0088, "step": 9867 }, { "epoch": 2.245278725824801, "grad_norm": 1.415176869636246, "learning_rate": 7.246905298912945e-07, "loss": 0.016, "step": 9868 }, { "epoch": 2.245506257110353, "grad_norm": 0.6967365816363168, "learning_rate": 7.246023214980183e-07, "loss": 0.0134, "step": 9869 }, { "epoch": 2.2457337883959045, "grad_norm": 1.0011183121512415, "learning_rate": 7.245141110690529e-07, "loss": 0.0101, "step": 9870 }, { "epoch": 2.2459613196814563, "grad_norm": 0.9370592883686928, "learning_rate": 7.244258986062015e-07, "loss": 0.0093, "step": 9871 }, { "epoch": 2.246188850967008, "grad_norm": 0.6341452038820068, "learning_rate": 7.243376841112668e-07, "loss": 0.0164, "step": 9872 }, { "epoch": 2.24641638225256, "grad_norm": 1.3667640257675806, "learning_rate": 7.242494675860515e-07, "loss": 0.0284, "step": 9873 }, { "epoch": 2.2466439135381115, "grad_norm": 0.8021917566369299, "learning_rate": 7.241612490323591e-07, "loss": 0.0146, "step": 9874 }, { "epoch": 2.2468714448236633, "grad_norm": 0.8883651217319947, "learning_rate": 7.240730284519924e-07, "loss": 0.009, "step": 9875 }, { "epoch": 2.247098976109215, "grad_norm": 2.4725039536714735, "learning_rate": 7.239848058467544e-07, "loss": 0.0698, "step": 9876 }, { "epoch": 2.247326507394767, "grad_norm": 1.4231309129974479, "learning_rate": 7.238965812184482e-07, "loss": 0.0114, "step": 9877 }, { "epoch": 2.2475540386803186, "grad_norm": 1.3068513930464432, "learning_rate": 7.23808354568877e-07, "loss": 0.0209, "step": 9878 }, { "epoch": 2.2477815699658703, "grad_norm": 1.0823713349372646, "learning_rate": 7.23720125899844e-07, "loss": 0.0202, "step": 9879 }, { "epoch": 2.248009101251422, "grad_norm": 0.5500138347976872, "learning_rate": 7.236318952131524e-07, "loss": 0.0046, "step": 9880 }, { "epoch": 2.248236632536974, "grad_norm": 0.37886665958519683, "learning_rate": 7.235436625106057e-07, "loss": 0.005, "step": 9881 }, { "epoch": 2.2484641638225256, "grad_norm": 0.3946397254837047, "learning_rate": 7.234554277940067e-07, "loss": 0.0048, "step": 9882 }, { "epoch": 2.2486916951080773, "grad_norm": 0.3939158732248495, "learning_rate": 7.233671910651592e-07, "loss": 0.0033, "step": 9883 }, { "epoch": 2.248919226393629, "grad_norm": 0.8027707642637251, "learning_rate": 7.232789523258665e-07, "loss": 0.0154, "step": 9884 }, { "epoch": 2.249146757679181, "grad_norm": 1.937385341864222, "learning_rate": 7.23190711577932e-07, "loss": 0.0064, "step": 9885 }, { "epoch": 2.2493742889647326, "grad_norm": 0.6952305635914994, "learning_rate": 7.23102468823159e-07, "loss": 0.008, "step": 9886 }, { "epoch": 2.2496018202502843, "grad_norm": 1.6963099035620435, "learning_rate": 7.230142240633515e-07, "loss": 0.0236, "step": 9887 }, { "epoch": 2.249829351535836, "grad_norm": 1.4712782075428819, "learning_rate": 7.229259773003128e-07, "loss": 0.0067, "step": 9888 }, { "epoch": 2.250056882821388, "grad_norm": 1.4019973115143842, "learning_rate": 7.228377285358461e-07, "loss": 0.035, "step": 9889 }, { "epoch": 2.2502844141069396, "grad_norm": 0.8709636448921553, "learning_rate": 7.227494777717555e-07, "loss": 0.0216, "step": 9890 }, { "epoch": 2.2505119453924913, "grad_norm": 0.7050296491753442, "learning_rate": 7.226612250098449e-07, "loss": 0.0078, "step": 9891 }, { "epoch": 2.250739476678043, "grad_norm": 0.9119132940087451, "learning_rate": 7.225729702519174e-07, "loss": 0.0081, "step": 9892 }, { "epoch": 2.250967007963595, "grad_norm": 1.0967216519794032, "learning_rate": 7.224847134997772e-07, "loss": 0.0165, "step": 9893 }, { "epoch": 2.2511945392491466, "grad_norm": 3.341890355154279, "learning_rate": 7.223964547552281e-07, "loss": 0.042, "step": 9894 }, { "epoch": 2.2514220705346983, "grad_norm": 0.815166521141191, "learning_rate": 7.223081940200738e-07, "loss": 0.0118, "step": 9895 }, { "epoch": 2.25164960182025, "grad_norm": 0.7029474873706334, "learning_rate": 7.22219931296118e-07, "loss": 0.0091, "step": 9896 }, { "epoch": 2.2518771331058023, "grad_norm": 0.4637350032709855, "learning_rate": 7.22131666585165e-07, "loss": 0.0049, "step": 9897 }, { "epoch": 2.2521046643913536, "grad_norm": 0.8241535144472323, "learning_rate": 7.220433998890188e-07, "loss": 0.0112, "step": 9898 }, { "epoch": 2.252332195676906, "grad_norm": 0.20136236788487277, "learning_rate": 7.21955131209483e-07, "loss": 0.0011, "step": 9899 }, { "epoch": 2.252559726962457, "grad_norm": 2.2955301241086192, "learning_rate": 7.21866860548362e-07, "loss": 0.09, "step": 9900 }, { "epoch": 2.2527872582480093, "grad_norm": 0.5101507391858183, "learning_rate": 7.217785879074597e-07, "loss": 0.0075, "step": 9901 }, { "epoch": 2.253014789533561, "grad_norm": 1.111182269507313, "learning_rate": 7.216903132885803e-07, "loss": 0.0163, "step": 9902 }, { "epoch": 2.253242320819113, "grad_norm": 1.06050499482352, "learning_rate": 7.216020366935279e-07, "loss": 0.01, "step": 9903 }, { "epoch": 2.2534698521046646, "grad_norm": 1.1864788126580075, "learning_rate": 7.215137581241071e-07, "loss": 0.0178, "step": 9904 }, { "epoch": 2.2536973833902163, "grad_norm": 0.19975498662253313, "learning_rate": 7.214254775821216e-07, "loss": 0.0016, "step": 9905 }, { "epoch": 2.253924914675768, "grad_norm": 0.9805863537396342, "learning_rate": 7.213371950693759e-07, "loss": 0.016, "step": 9906 }, { "epoch": 2.25415244596132, "grad_norm": 0.5856181787788934, "learning_rate": 7.212489105876745e-07, "loss": 0.004, "step": 9907 }, { "epoch": 2.2543799772468716, "grad_norm": 0.2733027188406292, "learning_rate": 7.211606241388217e-07, "loss": 0.0018, "step": 9908 }, { "epoch": 2.2546075085324233, "grad_norm": 0.8297275694957768, "learning_rate": 7.210723357246216e-07, "loss": 0.0073, "step": 9909 }, { "epoch": 2.254835039817975, "grad_norm": 1.4004828171199504, "learning_rate": 7.20984045346879e-07, "loss": 0.0118, "step": 9910 }, { "epoch": 2.255062571103527, "grad_norm": 0.9866359770683436, "learning_rate": 7.208957530073983e-07, "loss": 0.0146, "step": 9911 }, { "epoch": 2.2552901023890786, "grad_norm": 0.4227327466029483, "learning_rate": 7.208074587079841e-07, "loss": 0.0039, "step": 9912 }, { "epoch": 2.2555176336746303, "grad_norm": 0.4973724222704189, "learning_rate": 7.207191624504409e-07, "loss": 0.0086, "step": 9913 }, { "epoch": 2.255745164960182, "grad_norm": 1.229368217025756, "learning_rate": 7.206308642365733e-07, "loss": 0.0071, "step": 9914 }, { "epoch": 2.255972696245734, "grad_norm": 0.6723357562159281, "learning_rate": 7.20542564068186e-07, "loss": 0.0085, "step": 9915 }, { "epoch": 2.2562002275312856, "grad_norm": 1.0110097193814493, "learning_rate": 7.204542619470837e-07, "loss": 0.0183, "step": 9916 }, { "epoch": 2.2564277588168373, "grad_norm": 0.8974325972903089, "learning_rate": 7.203659578750709e-07, "loss": 0.0197, "step": 9917 }, { "epoch": 2.256655290102389, "grad_norm": 0.394039393446717, "learning_rate": 7.202776518539527e-07, "loss": 0.0025, "step": 9918 }, { "epoch": 2.256882821387941, "grad_norm": 0.4378212666472128, "learning_rate": 7.20189343885534e-07, "loss": 0.0031, "step": 9919 }, { "epoch": 2.2571103526734926, "grad_norm": 1.841281636521212, "learning_rate": 7.201010339716191e-07, "loss": 0.0246, "step": 9920 }, { "epoch": 2.2573378839590443, "grad_norm": 0.39720051181356536, "learning_rate": 7.200127221140134e-07, "loss": 0.003, "step": 9921 }, { "epoch": 2.257565415244596, "grad_norm": 0.5699912064843484, "learning_rate": 7.199244083145217e-07, "loss": 0.006, "step": 9922 }, { "epoch": 2.257792946530148, "grad_norm": 1.1717119799885392, "learning_rate": 7.198360925749488e-07, "loss": 0.0049, "step": 9923 }, { "epoch": 2.2580204778156996, "grad_norm": 0.9444660239647007, "learning_rate": 7.197477748971e-07, "loss": 0.0081, "step": 9924 }, { "epoch": 2.2582480091012513, "grad_norm": 0.9901137476612132, "learning_rate": 7.196594552827803e-07, "loss": 0.0068, "step": 9925 }, { "epoch": 2.258475540386803, "grad_norm": 0.30968324441541073, "learning_rate": 7.195711337337943e-07, "loss": 0.0011, "step": 9926 }, { "epoch": 2.258703071672355, "grad_norm": 0.8325270530872885, "learning_rate": 7.194828102519479e-07, "loss": 0.0071, "step": 9927 }, { "epoch": 2.2589306029579066, "grad_norm": 0.19973341876397938, "learning_rate": 7.193944848390458e-07, "loss": 0.0011, "step": 9928 }, { "epoch": 2.2591581342434583, "grad_norm": 0.4684436667323531, "learning_rate": 7.193061574968932e-07, "loss": 0.0038, "step": 9929 }, { "epoch": 2.25938566552901, "grad_norm": 1.3140170788234653, "learning_rate": 7.192178282272955e-07, "loss": 0.0173, "step": 9930 }, { "epoch": 2.259613196814562, "grad_norm": 0.7982829634078386, "learning_rate": 7.191294970320581e-07, "loss": 0.0125, "step": 9931 }, { "epoch": 2.2598407281001136, "grad_norm": 0.4916575563622796, "learning_rate": 7.19041163912986e-07, "loss": 0.0032, "step": 9932 }, { "epoch": 2.2600682593856654, "grad_norm": 0.6790168057763974, "learning_rate": 7.189528288718846e-07, "loss": 0.0043, "step": 9933 }, { "epoch": 2.260295790671217, "grad_norm": 0.710603316218467, "learning_rate": 7.188644919105597e-07, "loss": 0.0109, "step": 9934 }, { "epoch": 2.260523321956769, "grad_norm": 0.5574884681160212, "learning_rate": 7.187761530308164e-07, "loss": 0.0056, "step": 9935 }, { "epoch": 2.260750853242321, "grad_norm": 0.15545496798330738, "learning_rate": 7.186878122344602e-07, "loss": 0.0008, "step": 9936 }, { "epoch": 2.2609783845278724, "grad_norm": 1.0354566137465173, "learning_rate": 7.185994695232967e-07, "loss": 0.0194, "step": 9937 }, { "epoch": 2.2612059158134246, "grad_norm": 1.225411334353908, "learning_rate": 7.185111248991318e-07, "loss": 0.0101, "step": 9938 }, { "epoch": 2.261433447098976, "grad_norm": 1.0853192029390262, "learning_rate": 7.184227783637705e-07, "loss": 0.0132, "step": 9939 }, { "epoch": 2.261660978384528, "grad_norm": 0.997677365905451, "learning_rate": 7.183344299190186e-07, "loss": 0.0106, "step": 9940 }, { "epoch": 2.26188850967008, "grad_norm": 1.0530305107415374, "learning_rate": 7.182460795666821e-07, "loss": 0.0058, "step": 9941 }, { "epoch": 2.2621160409556316, "grad_norm": 0.5645944290761391, "learning_rate": 7.181577273085663e-07, "loss": 0.0028, "step": 9942 }, { "epoch": 2.2623435722411833, "grad_norm": 0.9816035838856915, "learning_rate": 7.180693731464773e-07, "loss": 0.0067, "step": 9943 }, { "epoch": 2.262571103526735, "grad_norm": 1.1825829184757581, "learning_rate": 7.179810170822208e-07, "loss": 0.0172, "step": 9944 }, { "epoch": 2.262798634812287, "grad_norm": 1.3190733706115991, "learning_rate": 7.178926591176025e-07, "loss": 0.0113, "step": 9945 }, { "epoch": 2.2630261660978386, "grad_norm": 0.41854947979742385, "learning_rate": 7.178042992544284e-07, "loss": 0.0037, "step": 9946 }, { "epoch": 2.2632536973833903, "grad_norm": 1.0135387984521642, "learning_rate": 7.177159374945043e-07, "loss": 0.0149, "step": 9947 }, { "epoch": 2.263481228668942, "grad_norm": 1.3368543905703725, "learning_rate": 7.176275738396363e-07, "loss": 0.0214, "step": 9948 }, { "epoch": 2.263708759954494, "grad_norm": 0.2887938541541053, "learning_rate": 7.175392082916305e-07, "loss": 0.0023, "step": 9949 }, { "epoch": 2.2639362912400456, "grad_norm": 1.15649587646679, "learning_rate": 7.174508408522926e-07, "loss": 0.0218, "step": 9950 }, { "epoch": 2.2641638225255973, "grad_norm": 1.0800510285096194, "learning_rate": 7.173624715234288e-07, "loss": 0.0147, "step": 9951 }, { "epoch": 2.264391353811149, "grad_norm": 0.6160351854072771, "learning_rate": 7.172741003068454e-07, "loss": 0.0024, "step": 9952 }, { "epoch": 2.264618885096701, "grad_norm": 0.46827063444077116, "learning_rate": 7.171857272043481e-07, "loss": 0.0028, "step": 9953 }, { "epoch": 2.2648464163822526, "grad_norm": 0.5611835041131369, "learning_rate": 7.170973522177435e-07, "loss": 0.0025, "step": 9954 }, { "epoch": 2.2650739476678043, "grad_norm": 0.48161928840126583, "learning_rate": 7.170089753488378e-07, "loss": 0.0038, "step": 9955 }, { "epoch": 2.265301478953356, "grad_norm": 1.2060593955214094, "learning_rate": 7.169205965994371e-07, "loss": 0.0152, "step": 9956 }, { "epoch": 2.265529010238908, "grad_norm": 0.5609727284015287, "learning_rate": 7.168322159713477e-07, "loss": 0.007, "step": 9957 }, { "epoch": 2.2657565415244596, "grad_norm": 0.9866672053615072, "learning_rate": 7.16743833466376e-07, "loss": 0.0048, "step": 9958 }, { "epoch": 2.2659840728100114, "grad_norm": 0.37810180326163845, "learning_rate": 7.166554490863283e-07, "loss": 0.0039, "step": 9959 }, { "epoch": 2.266211604095563, "grad_norm": 0.1938911177720205, "learning_rate": 7.165670628330112e-07, "loss": 0.0013, "step": 9960 }, { "epoch": 2.266439135381115, "grad_norm": 0.4583894174535528, "learning_rate": 7.16478674708231e-07, "loss": 0.003, "step": 9961 }, { "epoch": 2.2666666666666666, "grad_norm": 1.3040987284773864, "learning_rate": 7.163902847137942e-07, "loss": 0.0091, "step": 9962 }, { "epoch": 2.2668941979522184, "grad_norm": 0.7712497233194445, "learning_rate": 7.163018928515074e-07, "loss": 0.0123, "step": 9963 }, { "epoch": 2.26712172923777, "grad_norm": 1.3301068153214746, "learning_rate": 7.16213499123177e-07, "loss": 0.014, "step": 9964 }, { "epoch": 2.267349260523322, "grad_norm": 2.8241366237585894, "learning_rate": 7.161251035306099e-07, "loss": 0.0132, "step": 9965 }, { "epoch": 2.2675767918088736, "grad_norm": 0.801120085816114, "learning_rate": 7.160367060756125e-07, "loss": 0.0088, "step": 9966 }, { "epoch": 2.2678043230944254, "grad_norm": 0.9044674800670061, "learning_rate": 7.159483067599913e-07, "loss": 0.0144, "step": 9967 }, { "epoch": 2.268031854379977, "grad_norm": 0.6123545477064796, "learning_rate": 7.158599055855536e-07, "loss": 0.007, "step": 9968 }, { "epoch": 2.268259385665529, "grad_norm": 0.819322434683557, "learning_rate": 7.157715025541059e-07, "loss": 0.0158, "step": 9969 }, { "epoch": 2.2684869169510806, "grad_norm": 0.7630562241053435, "learning_rate": 7.156830976674547e-07, "loss": 0.0108, "step": 9970 }, { "epoch": 2.2687144482366324, "grad_norm": 0.9506425770060405, "learning_rate": 7.155946909274071e-07, "loss": 0.0143, "step": 9971 }, { "epoch": 2.268941979522184, "grad_norm": 0.8054458430852172, "learning_rate": 7.1550628233577e-07, "loss": 0.0123, "step": 9972 }, { "epoch": 2.269169510807736, "grad_norm": 0.7467245350913407, "learning_rate": 7.154178718943502e-07, "loss": 0.0089, "step": 9973 }, { "epoch": 2.2693970420932876, "grad_norm": 0.378769776387463, "learning_rate": 7.153294596049547e-07, "loss": 0.0021, "step": 9974 }, { "epoch": 2.26962457337884, "grad_norm": 0.71241206876713, "learning_rate": 7.152410454693905e-07, "loss": 0.0035, "step": 9975 }, { "epoch": 2.269852104664391, "grad_norm": 0.7202281998843869, "learning_rate": 7.151526294894646e-07, "loss": 0.0053, "step": 9976 }, { "epoch": 2.2700796359499433, "grad_norm": 1.0591483680969398, "learning_rate": 7.150642116669839e-07, "loss": 0.0138, "step": 9977 }, { "epoch": 2.2703071672354946, "grad_norm": 1.2565068895872433, "learning_rate": 7.149757920037558e-07, "loss": 0.0067, "step": 9978 }, { "epoch": 2.270534698521047, "grad_norm": 1.9560005314266655, "learning_rate": 7.148873705015875e-07, "loss": 0.0256, "step": 9979 }, { "epoch": 2.2707622298065986, "grad_norm": 1.2639553498840532, "learning_rate": 7.147989471622856e-07, "loss": 0.0113, "step": 9980 }, { "epoch": 2.2709897610921503, "grad_norm": 0.8022394009151615, "learning_rate": 7.147105219876578e-07, "loss": 0.0106, "step": 9981 }, { "epoch": 2.271217292377702, "grad_norm": 1.279081961574699, "learning_rate": 7.146220949795114e-07, "loss": 0.0081, "step": 9982 }, { "epoch": 2.271444823663254, "grad_norm": 0.4727614962938117, "learning_rate": 7.145336661396532e-07, "loss": 0.0082, "step": 9983 }, { "epoch": 2.2716723549488056, "grad_norm": 1.579355647620252, "learning_rate": 7.144452354698911e-07, "loss": 0.0167, "step": 9984 }, { "epoch": 2.2718998862343573, "grad_norm": 0.3479168390911897, "learning_rate": 7.143568029720321e-07, "loss": 0.0033, "step": 9985 }, { "epoch": 2.272127417519909, "grad_norm": 0.38954071165498017, "learning_rate": 7.142683686478838e-07, "loss": 0.0027, "step": 9986 }, { "epoch": 2.272354948805461, "grad_norm": 1.5444954312199928, "learning_rate": 7.141799324992532e-07, "loss": 0.0369, "step": 9987 }, { "epoch": 2.2725824800910126, "grad_norm": 1.2367218560853073, "learning_rate": 7.140914945279486e-07, "loss": 0.0103, "step": 9988 }, { "epoch": 2.2728100113765644, "grad_norm": 1.8050517650851405, "learning_rate": 7.140030547357768e-07, "loss": 0.0247, "step": 9989 }, { "epoch": 2.273037542662116, "grad_norm": 1.1044644885041117, "learning_rate": 7.139146131245453e-07, "loss": 0.0082, "step": 9990 }, { "epoch": 2.273265073947668, "grad_norm": 0.7605022836195167, "learning_rate": 7.138261696960624e-07, "loss": 0.0074, "step": 9991 }, { "epoch": 2.2734926052332196, "grad_norm": 0.4662295929763949, "learning_rate": 7.137377244521348e-07, "loss": 0.0038, "step": 9992 }, { "epoch": 2.2737201365187714, "grad_norm": 0.4265516268164441, "learning_rate": 7.136492773945711e-07, "loss": 0.0072, "step": 9993 }, { "epoch": 2.273947667804323, "grad_norm": 0.9847251672394649, "learning_rate": 7.135608285251782e-07, "loss": 0.0192, "step": 9994 }, { "epoch": 2.274175199089875, "grad_norm": 0.7080968199697603, "learning_rate": 7.134723778457643e-07, "loss": 0.0201, "step": 9995 }, { "epoch": 2.2744027303754266, "grad_norm": 0.5598944897085955, "learning_rate": 7.13383925358137e-07, "loss": 0.0104, "step": 9996 }, { "epoch": 2.2746302616609784, "grad_norm": 0.45357930323991125, "learning_rate": 7.13295471064104e-07, "loss": 0.0037, "step": 9997 }, { "epoch": 2.27485779294653, "grad_norm": 0.6606402633592999, "learning_rate": 7.132070149654734e-07, "loss": 0.01, "step": 9998 }, { "epoch": 2.275085324232082, "grad_norm": 0.6040652589663185, "learning_rate": 7.131185570640529e-07, "loss": 0.0082, "step": 9999 }, { "epoch": 2.2753128555176336, "grad_norm": 0.48361786905756926, "learning_rate": 7.130300973616506e-07, "loss": 0.0096, "step": 10000 }, { "epoch": 2.2755403868031854, "grad_norm": 0.9217641953439821, "learning_rate": 7.129416358600742e-07, "loss": 0.0105, "step": 10001 }, { "epoch": 2.275767918088737, "grad_norm": 0.7915611281638778, "learning_rate": 7.12853172561132e-07, "loss": 0.0157, "step": 10002 }, { "epoch": 2.275995449374289, "grad_norm": 0.4006305306799819, "learning_rate": 7.127647074666316e-07, "loss": 0.0035, "step": 10003 }, { "epoch": 2.2762229806598406, "grad_norm": 0.6599102444683717, "learning_rate": 7.126762405783813e-07, "loss": 0.0072, "step": 10004 }, { "epoch": 2.2764505119453924, "grad_norm": 0.4386346150502673, "learning_rate": 7.125877718981894e-07, "loss": 0.0031, "step": 10005 }, { "epoch": 2.276678043230944, "grad_norm": 0.8895049122922013, "learning_rate": 7.124993014278639e-07, "loss": 0.0076, "step": 10006 }, { "epoch": 2.276905574516496, "grad_norm": 1.949633130222309, "learning_rate": 7.124108291692128e-07, "loss": 0.0208, "step": 10007 }, { "epoch": 2.2771331058020476, "grad_norm": 1.5142702596420026, "learning_rate": 7.123223551240445e-07, "loss": 0.034, "step": 10008 }, { "epoch": 2.2773606370875994, "grad_norm": 0.23044368180313446, "learning_rate": 7.122338792941671e-07, "loss": 0.0009, "step": 10009 }, { "epoch": 2.277588168373151, "grad_norm": 0.4780865092319979, "learning_rate": 7.121454016813889e-07, "loss": 0.0027, "step": 10010 }, { "epoch": 2.277815699658703, "grad_norm": 1.0151956937970705, "learning_rate": 7.120569222875184e-07, "loss": 0.0146, "step": 10011 }, { "epoch": 2.2780432309442546, "grad_norm": 1.0654937922067822, "learning_rate": 7.119684411143638e-07, "loss": 0.0173, "step": 10012 }, { "epoch": 2.2782707622298064, "grad_norm": 0.883401079623509, "learning_rate": 7.118799581637336e-07, "loss": 0.0164, "step": 10013 }, { "epoch": 2.2784982935153586, "grad_norm": 0.887390364713064, "learning_rate": 7.117914734374362e-07, "loss": 0.0067, "step": 10014 }, { "epoch": 2.27872582480091, "grad_norm": 0.3933398139009879, "learning_rate": 7.1170298693728e-07, "loss": 0.0026, "step": 10015 }, { "epoch": 2.278953356086462, "grad_norm": 1.9168196349046818, "learning_rate": 7.116144986650736e-07, "loss": 0.0111, "step": 10016 }, { "epoch": 2.2791808873720134, "grad_norm": 0.6502155931457713, "learning_rate": 7.115260086226253e-07, "loss": 0.0101, "step": 10017 }, { "epoch": 2.2794084186575656, "grad_norm": 1.4156317497899846, "learning_rate": 7.114375168117439e-07, "loss": 0.0234, "step": 10018 }, { "epoch": 2.2796359499431174, "grad_norm": 1.5539585486409775, "learning_rate": 7.113490232342381e-07, "loss": 0.0088, "step": 10019 }, { "epoch": 2.279863481228669, "grad_norm": 0.6150713350429942, "learning_rate": 7.112605278919163e-07, "loss": 0.0093, "step": 10020 }, { "epoch": 2.280091012514221, "grad_norm": 1.8471733426092465, "learning_rate": 7.111720307865872e-07, "loss": 0.0144, "step": 10021 }, { "epoch": 2.2803185437997726, "grad_norm": 0.6624382926463065, "learning_rate": 7.110835319200598e-07, "loss": 0.0078, "step": 10022 }, { "epoch": 2.2805460750853244, "grad_norm": 1.510100730103115, "learning_rate": 7.109950312941426e-07, "loss": 0.0256, "step": 10023 }, { "epoch": 2.280773606370876, "grad_norm": 0.5726783888687142, "learning_rate": 7.109065289106443e-07, "loss": 0.0076, "step": 10024 }, { "epoch": 2.281001137656428, "grad_norm": 0.8368819129223598, "learning_rate": 7.10818024771374e-07, "loss": 0.0036, "step": 10025 }, { "epoch": 2.2812286689419796, "grad_norm": 0.557860067095149, "learning_rate": 7.107295188781406e-07, "loss": 0.0107, "step": 10026 }, { "epoch": 2.2814562002275314, "grad_norm": 0.8697909466284374, "learning_rate": 7.106410112327526e-07, "loss": 0.0044, "step": 10027 }, { "epoch": 2.281683731513083, "grad_norm": 0.5414159230107753, "learning_rate": 7.105525018370193e-07, "loss": 0.0062, "step": 10028 }, { "epoch": 2.281911262798635, "grad_norm": 0.5059306646032439, "learning_rate": 7.104639906927495e-07, "loss": 0.0056, "step": 10029 }, { "epoch": 2.2821387940841866, "grad_norm": 0.6148837517707874, "learning_rate": 7.103754778017522e-07, "loss": 0.0076, "step": 10030 }, { "epoch": 2.2823663253697384, "grad_norm": 0.9240720387550072, "learning_rate": 7.102869631658366e-07, "loss": 0.0101, "step": 10031 }, { "epoch": 2.28259385665529, "grad_norm": 0.5819860623595451, "learning_rate": 7.101984467868117e-07, "loss": 0.0101, "step": 10032 }, { "epoch": 2.282821387940842, "grad_norm": 1.341054224471248, "learning_rate": 7.101099286664864e-07, "loss": 0.0151, "step": 10033 }, { "epoch": 2.2830489192263936, "grad_norm": 0.7666014608898726, "learning_rate": 7.100214088066701e-07, "loss": 0.0046, "step": 10034 }, { "epoch": 2.2832764505119454, "grad_norm": 1.2457918196046487, "learning_rate": 7.09932887209172e-07, "loss": 0.0118, "step": 10035 }, { "epoch": 2.283503981797497, "grad_norm": 0.6767802912441624, "learning_rate": 7.098443638758011e-07, "loss": 0.0074, "step": 10036 }, { "epoch": 2.283731513083049, "grad_norm": 1.963142095084728, "learning_rate": 7.097558388083669e-07, "loss": 0.0053, "step": 10037 }, { "epoch": 2.2839590443686006, "grad_norm": 0.45130274689490363, "learning_rate": 7.096673120086786e-07, "loss": 0.0038, "step": 10038 }, { "epoch": 2.2841865756541524, "grad_norm": 0.4999273557273774, "learning_rate": 7.095787834785454e-07, "loss": 0.0062, "step": 10039 }, { "epoch": 2.284414106939704, "grad_norm": 0.4800370083296574, "learning_rate": 7.094902532197768e-07, "loss": 0.0041, "step": 10040 }, { "epoch": 2.284641638225256, "grad_norm": 0.6073124811289312, "learning_rate": 7.094017212341821e-07, "loss": 0.0038, "step": 10041 }, { "epoch": 2.2848691695108077, "grad_norm": 0.47795522512049377, "learning_rate": 7.093131875235709e-07, "loss": 0.004, "step": 10042 }, { "epoch": 2.2850967007963594, "grad_norm": 0.5144374892997239, "learning_rate": 7.092246520897525e-07, "loss": 0.0067, "step": 10043 }, { "epoch": 2.285324232081911, "grad_norm": 1.2837917218462427, "learning_rate": 7.091361149345364e-07, "loss": 0.0275, "step": 10044 }, { "epoch": 2.285551763367463, "grad_norm": 0.729657838051333, "learning_rate": 7.090475760597323e-07, "loss": 0.006, "step": 10045 }, { "epoch": 2.2857792946530147, "grad_norm": 0.9888589220408079, "learning_rate": 7.089590354671496e-07, "loss": 0.0126, "step": 10046 }, { "epoch": 2.2860068259385664, "grad_norm": 0.8685462125690027, "learning_rate": 7.088704931585981e-07, "loss": 0.0046, "step": 10047 }, { "epoch": 2.286234357224118, "grad_norm": 0.9617253633105406, "learning_rate": 7.087819491358871e-07, "loss": 0.0117, "step": 10048 }, { "epoch": 2.28646188850967, "grad_norm": 1.006784982768661, "learning_rate": 7.086934034008268e-07, "loss": 0.0098, "step": 10049 }, { "epoch": 2.2866894197952217, "grad_norm": 0.9622839903925858, "learning_rate": 7.086048559552265e-07, "loss": 0.0167, "step": 10050 }, { "epoch": 2.2869169510807734, "grad_norm": 0.6703375723411117, "learning_rate": 7.08516306800896e-07, "loss": 0.0078, "step": 10051 }, { "epoch": 2.287144482366325, "grad_norm": 1.5503214542884183, "learning_rate": 7.084277559396452e-07, "loss": 0.0055, "step": 10052 }, { "epoch": 2.2873720136518774, "grad_norm": 1.1399657101169642, "learning_rate": 7.083392033732839e-07, "loss": 0.0149, "step": 10053 }, { "epoch": 2.2875995449374287, "grad_norm": 0.7503776422936881, "learning_rate": 7.082506491036216e-07, "loss": 0.0084, "step": 10054 }, { "epoch": 2.287827076222981, "grad_norm": 1.069962738667487, "learning_rate": 7.081620931324687e-07, "loss": 0.0089, "step": 10055 }, { "epoch": 2.288054607508532, "grad_norm": 0.8702126286675769, "learning_rate": 7.080735354616349e-07, "loss": 0.0181, "step": 10056 }, { "epoch": 2.2882821387940844, "grad_norm": 0.4913055841399376, "learning_rate": 7.079849760929304e-07, "loss": 0.006, "step": 10057 }, { "epoch": 2.288509670079636, "grad_norm": 0.4829282057235933, "learning_rate": 7.078964150281647e-07, "loss": 0.0078, "step": 10058 }, { "epoch": 2.288737201365188, "grad_norm": 0.901034969179354, "learning_rate": 7.078078522691482e-07, "loss": 0.0131, "step": 10059 }, { "epoch": 2.2889647326507396, "grad_norm": 0.6044249662432829, "learning_rate": 7.07719287817691e-07, "loss": 0.0091, "step": 10060 }, { "epoch": 2.2891922639362914, "grad_norm": 0.7203482555224818, "learning_rate": 7.076307216756028e-07, "loss": 0.0126, "step": 10061 }, { "epoch": 2.289419795221843, "grad_norm": 0.8454883651236138, "learning_rate": 7.07542153844694e-07, "loss": 0.0068, "step": 10062 }, { "epoch": 2.289647326507395, "grad_norm": 0.6557860607707027, "learning_rate": 7.074535843267749e-07, "loss": 0.0067, "step": 10063 }, { "epoch": 2.2898748577929466, "grad_norm": 0.32997153315314165, "learning_rate": 7.073650131236555e-07, "loss": 0.0025, "step": 10064 }, { "epoch": 2.2901023890784984, "grad_norm": 1.3921964732779324, "learning_rate": 7.07276440237146e-07, "loss": 0.0197, "step": 10065 }, { "epoch": 2.29032992036405, "grad_norm": 1.1680571691740689, "learning_rate": 7.071878656690567e-07, "loss": 0.0251, "step": 10066 }, { "epoch": 2.290557451649602, "grad_norm": 0.8566700209537361, "learning_rate": 7.070992894211981e-07, "loss": 0.0087, "step": 10067 }, { "epoch": 2.2907849829351536, "grad_norm": 1.3040121528607698, "learning_rate": 7.070107114953802e-07, "loss": 0.018, "step": 10068 }, { "epoch": 2.2910125142207054, "grad_norm": 0.2540897041326507, "learning_rate": 7.069221318934137e-07, "loss": 0.002, "step": 10069 }, { "epoch": 2.291240045506257, "grad_norm": 0.8418795648675906, "learning_rate": 7.068335506171089e-07, "loss": 0.0071, "step": 10070 }, { "epoch": 2.291467576791809, "grad_norm": 0.9765659872361724, "learning_rate": 7.067449676682761e-07, "loss": 0.0136, "step": 10071 }, { "epoch": 2.2916951080773607, "grad_norm": 1.0208987881110032, "learning_rate": 7.066563830487259e-07, "loss": 0.0295, "step": 10072 }, { "epoch": 2.2919226393629124, "grad_norm": 1.222408739546565, "learning_rate": 7.065677967602688e-07, "loss": 0.027, "step": 10073 }, { "epoch": 2.292150170648464, "grad_norm": 0.9982472069670923, "learning_rate": 7.064792088047151e-07, "loss": 0.016, "step": 10074 }, { "epoch": 2.292377701934016, "grad_norm": 1.6662994652817027, "learning_rate": 7.063906191838758e-07, "loss": 0.0144, "step": 10075 }, { "epoch": 2.2926052332195677, "grad_norm": 2.2127886112104265, "learning_rate": 7.063020278995615e-07, "loss": 0.0201, "step": 10076 }, { "epoch": 2.2928327645051194, "grad_norm": 0.4132904621812425, "learning_rate": 7.062134349535822e-07, "loss": 0.004, "step": 10077 }, { "epoch": 2.293060295790671, "grad_norm": 1.1242793590573408, "learning_rate": 7.061248403477493e-07, "loss": 0.0171, "step": 10078 }, { "epoch": 2.293287827076223, "grad_norm": 0.9270308427621402, "learning_rate": 7.060362440838732e-07, "loss": 0.0157, "step": 10079 }, { "epoch": 2.2935153583617747, "grad_norm": 0.8117483513548707, "learning_rate": 7.059476461637647e-07, "loss": 0.0131, "step": 10080 }, { "epoch": 2.2937428896473264, "grad_norm": 1.12705975126284, "learning_rate": 7.058590465892344e-07, "loss": 0.0255, "step": 10081 }, { "epoch": 2.293970420932878, "grad_norm": 0.6534125670998943, "learning_rate": 7.057704453620934e-07, "loss": 0.009, "step": 10082 }, { "epoch": 2.29419795221843, "grad_norm": 0.9899153515591961, "learning_rate": 7.056818424841526e-07, "loss": 0.0066, "step": 10083 }, { "epoch": 2.2944254835039817, "grad_norm": 0.641197098664469, "learning_rate": 7.055932379572225e-07, "loss": 0.0041, "step": 10084 }, { "epoch": 2.2946530147895334, "grad_norm": 1.175071144252044, "learning_rate": 7.055046317831142e-07, "loss": 0.0127, "step": 10085 }, { "epoch": 2.294880546075085, "grad_norm": 3.1059674284137944, "learning_rate": 7.054160239636387e-07, "loss": 0.0341, "step": 10086 }, { "epoch": 2.295108077360637, "grad_norm": 0.8812074684214019, "learning_rate": 7.05327414500607e-07, "loss": 0.0114, "step": 10087 }, { "epoch": 2.2953356086461887, "grad_norm": 1.0115692884554024, "learning_rate": 7.052388033958299e-07, "loss": 0.0181, "step": 10088 }, { "epoch": 2.2955631399317404, "grad_norm": 1.1972164747898673, "learning_rate": 7.051501906511189e-07, "loss": 0.0126, "step": 10089 }, { "epoch": 2.295790671217292, "grad_norm": 0.9623762135002488, "learning_rate": 7.050615762682845e-07, "loss": 0.0086, "step": 10090 }, { "epoch": 2.296018202502844, "grad_norm": 1.2698297857456664, "learning_rate": 7.049729602491381e-07, "loss": 0.017, "step": 10091 }, { "epoch": 2.296245733788396, "grad_norm": 0.6088258021036096, "learning_rate": 7.048843425954911e-07, "loss": 0.0053, "step": 10092 }, { "epoch": 2.2964732650739474, "grad_norm": 0.6963938748800188, "learning_rate": 7.047957233091543e-07, "loss": 0.0083, "step": 10093 }, { "epoch": 2.2967007963594996, "grad_norm": 1.6367487174096638, "learning_rate": 7.047071023919391e-07, "loss": 0.0172, "step": 10094 }, { "epoch": 2.296928327645051, "grad_norm": 0.4952579066680398, "learning_rate": 7.046184798456566e-07, "loss": 0.0052, "step": 10095 }, { "epoch": 2.297155858930603, "grad_norm": 0.4342389959114885, "learning_rate": 7.045298556721184e-07, "loss": 0.0071, "step": 10096 }, { "epoch": 2.297383390216155, "grad_norm": 1.6343953773185733, "learning_rate": 7.044412298731354e-07, "loss": 0.0037, "step": 10097 }, { "epoch": 2.2976109215017066, "grad_norm": 0.5992190661692139, "learning_rate": 7.043526024505191e-07, "loss": 0.0052, "step": 10098 }, { "epoch": 2.2978384527872584, "grad_norm": 0.6106168450315481, "learning_rate": 7.042639734060811e-07, "loss": 0.0068, "step": 10099 }, { "epoch": 2.29806598407281, "grad_norm": 0.7723000392882742, "learning_rate": 7.041753427416326e-07, "loss": 0.0141, "step": 10100 }, { "epoch": 2.298293515358362, "grad_norm": 0.9459950054210727, "learning_rate": 7.040867104589852e-07, "loss": 0.0095, "step": 10101 }, { "epoch": 2.2985210466439137, "grad_norm": 0.5395201244383016, "learning_rate": 7.0399807655995e-07, "loss": 0.0056, "step": 10102 }, { "epoch": 2.2987485779294654, "grad_norm": 0.7886853107634966, "learning_rate": 7.03909441046339e-07, "loss": 0.0115, "step": 10103 }, { "epoch": 2.298976109215017, "grad_norm": 0.5424232591146029, "learning_rate": 7.038208039199634e-07, "loss": 0.0055, "step": 10104 }, { "epoch": 2.299203640500569, "grad_norm": 0.5780576114855146, "learning_rate": 7.037321651826351e-07, "loss": 0.005, "step": 10105 }, { "epoch": 2.2994311717861207, "grad_norm": 0.10870746903202896, "learning_rate": 7.036435248361655e-07, "loss": 0.0004, "step": 10106 }, { "epoch": 2.2996587030716724, "grad_norm": 1.283048427785034, "learning_rate": 7.035548828823662e-07, "loss": 0.0158, "step": 10107 }, { "epoch": 2.299886234357224, "grad_norm": 0.8126385685064367, "learning_rate": 7.03466239323049e-07, "loss": 0.0068, "step": 10108 }, { "epoch": 2.300113765642776, "grad_norm": 2.652185567482076, "learning_rate": 7.033775941600257e-07, "loss": 0.0231, "step": 10109 }, { "epoch": 2.3003412969283277, "grad_norm": 0.8459085633109352, "learning_rate": 7.032889473951078e-07, "loss": 0.0087, "step": 10110 }, { "epoch": 2.3005688282138794, "grad_norm": 0.8166064686087698, "learning_rate": 7.032002990301071e-07, "loss": 0.0042, "step": 10111 }, { "epoch": 2.300796359499431, "grad_norm": 0.18571971530877573, "learning_rate": 7.031116490668355e-07, "loss": 0.0006, "step": 10112 }, { "epoch": 2.301023890784983, "grad_norm": 3.394120011385526, "learning_rate": 7.030229975071049e-07, "loss": 0.0371, "step": 10113 }, { "epoch": 2.3012514220705347, "grad_norm": 0.3221317015885059, "learning_rate": 7.029343443527273e-07, "loss": 0.0022, "step": 10114 }, { "epoch": 2.3014789533560864, "grad_norm": 0.6371287752828788, "learning_rate": 7.028456896055143e-07, "loss": 0.0045, "step": 10115 }, { "epoch": 2.301706484641638, "grad_norm": 0.9718614116039644, "learning_rate": 7.02757033267278e-07, "loss": 0.018, "step": 10116 }, { "epoch": 2.30193401592719, "grad_norm": 1.121249078570174, "learning_rate": 7.026683753398303e-07, "loss": 0.0109, "step": 10117 }, { "epoch": 2.3021615472127417, "grad_norm": 1.1162039781294801, "learning_rate": 7.02579715824983e-07, "loss": 0.0174, "step": 10118 }, { "epoch": 2.3023890784982934, "grad_norm": 0.4645536786622632, "learning_rate": 7.024910547245488e-07, "loss": 0.0028, "step": 10119 }, { "epoch": 2.302616609783845, "grad_norm": 1.6422586585063792, "learning_rate": 7.024023920403395e-07, "loss": 0.0171, "step": 10120 }, { "epoch": 2.302844141069397, "grad_norm": 1.0189572458925067, "learning_rate": 7.023137277741665e-07, "loss": 0.0076, "step": 10121 }, { "epoch": 2.3030716723549487, "grad_norm": 0.9210814203622263, "learning_rate": 7.022250619278428e-07, "loss": 0.0129, "step": 10122 }, { "epoch": 2.3032992036405004, "grad_norm": 1.56662707709944, "learning_rate": 7.021363945031804e-07, "loss": 0.0376, "step": 10123 }, { "epoch": 2.303526734926052, "grad_norm": 0.30620578728678866, "learning_rate": 7.020477255019911e-07, "loss": 0.0035, "step": 10124 }, { "epoch": 2.303754266211604, "grad_norm": 0.8454740613277052, "learning_rate": 7.019590549260874e-07, "loss": 0.0054, "step": 10125 }, { "epoch": 2.3039817974971557, "grad_norm": 0.8940448341119864, "learning_rate": 7.018703827772816e-07, "loss": 0.0088, "step": 10126 }, { "epoch": 2.3042093287827075, "grad_norm": 0.3611611108114472, "learning_rate": 7.017817090573863e-07, "loss": 0.0019, "step": 10127 }, { "epoch": 2.304436860068259, "grad_norm": 1.6106196842014078, "learning_rate": 7.016930337682131e-07, "loss": 0.0198, "step": 10128 }, { "epoch": 2.304664391353811, "grad_norm": 0.6589733849992382, "learning_rate": 7.016043569115747e-07, "loss": 0.0056, "step": 10129 }, { "epoch": 2.3048919226393627, "grad_norm": 0.8576267433273903, "learning_rate": 7.015156784892838e-07, "loss": 0.0094, "step": 10130 }, { "epoch": 2.305119453924915, "grad_norm": 1.165521261140456, "learning_rate": 7.014269985031523e-07, "loss": 0.0142, "step": 10131 }, { "epoch": 2.305346985210466, "grad_norm": 0.8231983314887558, "learning_rate": 7.01338316954993e-07, "loss": 0.0087, "step": 10132 }, { "epoch": 2.3055745164960184, "grad_norm": 0.4618300841413346, "learning_rate": 7.012496338466186e-07, "loss": 0.0057, "step": 10133 }, { "epoch": 2.3058020477815697, "grad_norm": 0.8766454004594978, "learning_rate": 7.01160949179841e-07, "loss": 0.0076, "step": 10134 }, { "epoch": 2.306029579067122, "grad_norm": 0.6043832004793516, "learning_rate": 7.010722629564732e-07, "loss": 0.0055, "step": 10135 }, { "epoch": 2.3062571103526737, "grad_norm": 2.1617953189932404, "learning_rate": 7.009835751783277e-07, "loss": 0.0192, "step": 10136 }, { "epoch": 2.3064846416382254, "grad_norm": 1.425843683424515, "learning_rate": 7.00894885847217e-07, "loss": 0.0157, "step": 10137 }, { "epoch": 2.306712172923777, "grad_norm": 0.5016052473696108, "learning_rate": 7.008061949649537e-07, "loss": 0.003, "step": 10138 }, { "epoch": 2.306939704209329, "grad_norm": 1.408428623100655, "learning_rate": 7.00717502533351e-07, "loss": 0.0298, "step": 10139 }, { "epoch": 2.3071672354948807, "grad_norm": 0.6137112937534689, "learning_rate": 7.00628808554221e-07, "loss": 0.013, "step": 10140 }, { "epoch": 2.3073947667804324, "grad_norm": 1.035944879790626, "learning_rate": 7.005401130293765e-07, "loss": 0.009, "step": 10141 }, { "epoch": 2.307622298065984, "grad_norm": 0.4448797865540451, "learning_rate": 7.004514159606307e-07, "loss": 0.0037, "step": 10142 }, { "epoch": 2.307849829351536, "grad_norm": 0.6633311086212903, "learning_rate": 7.00362717349796e-07, "loss": 0.0099, "step": 10143 }, { "epoch": 2.3080773606370877, "grad_norm": 0.9294502278268252, "learning_rate": 7.002740171986853e-07, "loss": 0.0115, "step": 10144 }, { "epoch": 2.3083048919226394, "grad_norm": 0.5863164660502906, "learning_rate": 7.001853155091117e-07, "loss": 0.007, "step": 10145 }, { "epoch": 2.308532423208191, "grad_norm": 1.099845087069409, "learning_rate": 7.00096612282888e-07, "loss": 0.0151, "step": 10146 }, { "epoch": 2.308759954493743, "grad_norm": 0.460113899733205, "learning_rate": 7.000079075218269e-07, "loss": 0.0049, "step": 10147 }, { "epoch": 2.3089874857792947, "grad_norm": 1.2032526281118214, "learning_rate": 6.999192012277416e-07, "loss": 0.0041, "step": 10148 }, { "epoch": 2.3092150170648464, "grad_norm": 0.841874313725959, "learning_rate": 6.99830493402445e-07, "loss": 0.0099, "step": 10149 }, { "epoch": 2.309442548350398, "grad_norm": 1.0973545092456485, "learning_rate": 6.997417840477502e-07, "loss": 0.0133, "step": 10150 }, { "epoch": 2.30967007963595, "grad_norm": 0.5777564289645852, "learning_rate": 6.996530731654704e-07, "loss": 0.0047, "step": 10151 }, { "epoch": 2.3098976109215017, "grad_norm": 0.7664781101966082, "learning_rate": 6.995643607574182e-07, "loss": 0.0097, "step": 10152 }, { "epoch": 2.3101251422070535, "grad_norm": 1.121609807780237, "learning_rate": 6.994756468254072e-07, "loss": 0.0226, "step": 10153 }, { "epoch": 2.310352673492605, "grad_norm": 0.6245775591143798, "learning_rate": 6.993869313712504e-07, "loss": 0.0098, "step": 10154 }, { "epoch": 2.310580204778157, "grad_norm": 0.5144413689256983, "learning_rate": 6.992982143967607e-07, "loss": 0.0038, "step": 10155 }, { "epoch": 2.3108077360637087, "grad_norm": 0.6268806686017621, "learning_rate": 6.992094959037518e-07, "loss": 0.0126, "step": 10156 }, { "epoch": 2.3110352673492605, "grad_norm": 0.736963761483954, "learning_rate": 6.991207758940367e-07, "loss": 0.0106, "step": 10157 }, { "epoch": 2.311262798634812, "grad_norm": 0.6445481733186926, "learning_rate": 6.990320543694287e-07, "loss": 0.0069, "step": 10158 }, { "epoch": 2.311490329920364, "grad_norm": 1.00092369210761, "learning_rate": 6.98943331331741e-07, "loss": 0.0217, "step": 10159 }, { "epoch": 2.3117178612059157, "grad_norm": 1.5579453570688413, "learning_rate": 6.988546067827872e-07, "loss": 0.0067, "step": 10160 }, { "epoch": 2.3119453924914675, "grad_norm": 0.5818854259944063, "learning_rate": 6.987658807243803e-07, "loss": 0.0066, "step": 10161 }, { "epoch": 2.312172923777019, "grad_norm": 0.5041380977550439, "learning_rate": 6.986771531583339e-07, "loss": 0.0054, "step": 10162 }, { "epoch": 2.312400455062571, "grad_norm": 0.736455366272796, "learning_rate": 6.985884240864614e-07, "loss": 0.0069, "step": 10163 }, { "epoch": 2.3126279863481227, "grad_norm": 0.6408263610707731, "learning_rate": 6.984996935105765e-07, "loss": 0.0101, "step": 10164 }, { "epoch": 2.3128555176336745, "grad_norm": 0.6972823910650949, "learning_rate": 6.984109614324923e-07, "loss": 0.0122, "step": 10165 }, { "epoch": 2.3130830489192262, "grad_norm": 1.7093318432235183, "learning_rate": 6.983222278540225e-07, "loss": 0.0345, "step": 10166 }, { "epoch": 2.313310580204778, "grad_norm": 1.134717732264216, "learning_rate": 6.982334927769807e-07, "loss": 0.0158, "step": 10167 }, { "epoch": 2.3135381114903297, "grad_norm": 3.4772368491534755, "learning_rate": 6.981447562031804e-07, "loss": 0.0409, "step": 10168 }, { "epoch": 2.3137656427758815, "grad_norm": 0.8053655230672714, "learning_rate": 6.980560181344352e-07, "loss": 0.0097, "step": 10169 }, { "epoch": 2.3139931740614337, "grad_norm": 0.5321463364019337, "learning_rate": 6.979672785725588e-07, "loss": 0.0057, "step": 10170 }, { "epoch": 2.314220705346985, "grad_norm": 0.2658758189926208, "learning_rate": 6.97878537519365e-07, "loss": 0.0018, "step": 10171 }, { "epoch": 2.314448236632537, "grad_norm": 0.3637818612105649, "learning_rate": 6.977897949766673e-07, "loss": 0.0076, "step": 10172 }, { "epoch": 2.3146757679180885, "grad_norm": 0.775098776639897, "learning_rate": 6.977010509462795e-07, "loss": 0.0069, "step": 10173 }, { "epoch": 2.3149032992036407, "grad_norm": 0.9435719851633139, "learning_rate": 6.976123054300153e-07, "loss": 0.0111, "step": 10174 }, { "epoch": 2.3151308304891924, "grad_norm": 0.7533302737117131, "learning_rate": 6.975235584296884e-07, "loss": 0.0181, "step": 10175 }, { "epoch": 2.315358361774744, "grad_norm": 1.104769907405495, "learning_rate": 6.97434809947113e-07, "loss": 0.0184, "step": 10176 }, { "epoch": 2.315585893060296, "grad_norm": 0.5451622658901014, "learning_rate": 6.973460599841029e-07, "loss": 0.0055, "step": 10177 }, { "epoch": 2.3158134243458477, "grad_norm": 0.8063769124343447, "learning_rate": 6.972573085424715e-07, "loss": 0.0135, "step": 10178 }, { "epoch": 2.3160409556313994, "grad_norm": 1.308678739721527, "learning_rate": 6.971685556240331e-07, "loss": 0.0176, "step": 10179 }, { "epoch": 2.316268486916951, "grad_norm": 0.7362767112801626, "learning_rate": 6.970798012306018e-07, "loss": 0.0162, "step": 10180 }, { "epoch": 2.316496018202503, "grad_norm": 0.5408181247226096, "learning_rate": 6.969910453639912e-07, "loss": 0.0032, "step": 10181 }, { "epoch": 2.3167235494880547, "grad_norm": 0.5028201237220994, "learning_rate": 6.969022880260155e-07, "loss": 0.0079, "step": 10182 }, { "epoch": 2.3169510807736065, "grad_norm": 0.29309546332648656, "learning_rate": 6.968135292184889e-07, "loss": 0.0024, "step": 10183 }, { "epoch": 2.317178612059158, "grad_norm": 0.6156454907724597, "learning_rate": 6.967247689432252e-07, "loss": 0.0058, "step": 10184 }, { "epoch": 2.31740614334471, "grad_norm": 1.1042308150028102, "learning_rate": 6.966360072020384e-07, "loss": 0.0083, "step": 10185 }, { "epoch": 2.3176336746302617, "grad_norm": 0.7049634194266385, "learning_rate": 6.965472439967428e-07, "loss": 0.0165, "step": 10186 }, { "epoch": 2.3178612059158135, "grad_norm": 0.664428666276751, "learning_rate": 6.964584793291527e-07, "loss": 0.0068, "step": 10187 }, { "epoch": 2.318088737201365, "grad_norm": 2.0273266740651597, "learning_rate": 6.963697132010822e-07, "loss": 0.023, "step": 10188 }, { "epoch": 2.318316268486917, "grad_norm": 1.1976622159938362, "learning_rate": 6.962809456143453e-07, "loss": 0.0144, "step": 10189 }, { "epoch": 2.3185437997724687, "grad_norm": 1.0814450619532463, "learning_rate": 6.961921765707567e-07, "loss": 0.0085, "step": 10190 }, { "epoch": 2.3187713310580205, "grad_norm": 0.9038190807401713, "learning_rate": 6.961034060721303e-07, "loss": 0.0179, "step": 10191 }, { "epoch": 2.318998862343572, "grad_norm": 0.3998709194199702, "learning_rate": 6.960146341202802e-07, "loss": 0.0059, "step": 10192 }, { "epoch": 2.319226393629124, "grad_norm": 1.001102458506643, "learning_rate": 6.959258607170213e-07, "loss": 0.0085, "step": 10193 }, { "epoch": 2.3194539249146757, "grad_norm": 0.6807175571911053, "learning_rate": 6.958370858641676e-07, "loss": 0.0062, "step": 10194 }, { "epoch": 2.3196814562002275, "grad_norm": 0.7363590450613621, "learning_rate": 6.957483095635335e-07, "loss": 0.0125, "step": 10195 }, { "epoch": 2.3199089874857792, "grad_norm": 0.7298814194853614, "learning_rate": 6.956595318169339e-07, "loss": 0.0105, "step": 10196 }, { "epoch": 2.320136518771331, "grad_norm": 2.2114499032849704, "learning_rate": 6.955707526261826e-07, "loss": 0.0345, "step": 10197 }, { "epoch": 2.3203640500568827, "grad_norm": 0.6745303666083613, "learning_rate": 6.954819719930944e-07, "loss": 0.0084, "step": 10198 }, { "epoch": 2.3205915813424345, "grad_norm": 0.9558309626109285, "learning_rate": 6.953931899194838e-07, "loss": 0.0118, "step": 10199 }, { "epoch": 2.3208191126279862, "grad_norm": 17.14208979132511, "learning_rate": 6.953044064071653e-07, "loss": 0.1309, "step": 10200 }, { "epoch": 2.321046643913538, "grad_norm": 0.39353173155384846, "learning_rate": 6.952156214579535e-07, "loss": 0.0027, "step": 10201 }, { "epoch": 2.3212741751990897, "grad_norm": 0.38007358116883855, "learning_rate": 6.95126835073663e-07, "loss": 0.0033, "step": 10202 }, { "epoch": 2.3215017064846415, "grad_norm": 0.05835751772407707, "learning_rate": 6.950380472561084e-07, "loss": 0.0003, "step": 10203 }, { "epoch": 2.3217292377701932, "grad_norm": 1.3619532891257295, "learning_rate": 6.949492580071044e-07, "loss": 0.0334, "step": 10204 }, { "epoch": 2.321956769055745, "grad_norm": 1.4111880980656355, "learning_rate": 6.948604673284655e-07, "loss": 0.0303, "step": 10205 }, { "epoch": 2.3221843003412967, "grad_norm": 1.538134694969402, "learning_rate": 6.947716752220069e-07, "loss": 0.0208, "step": 10206 }, { "epoch": 2.3224118316268485, "grad_norm": 0.1636886691166144, "learning_rate": 6.946828816895428e-07, "loss": 0.0007, "step": 10207 }, { "epoch": 2.3226393629124003, "grad_norm": 0.6273013839702676, "learning_rate": 6.945940867328883e-07, "loss": 0.0062, "step": 10208 }, { "epoch": 2.3228668941979524, "grad_norm": 0.8405334127317614, "learning_rate": 6.945052903538582e-07, "loss": 0.0163, "step": 10209 }, { "epoch": 2.3230944254835038, "grad_norm": 1.4563481750068972, "learning_rate": 6.944164925542672e-07, "loss": 0.0474, "step": 10210 }, { "epoch": 2.323321956769056, "grad_norm": 0.442823649886996, "learning_rate": 6.943276933359302e-07, "loss": 0.002, "step": 10211 }, { "epoch": 2.3235494880546073, "grad_norm": 0.33468952020815773, "learning_rate": 6.94238892700662e-07, "loss": 0.0026, "step": 10212 }, { "epoch": 2.3237770193401595, "grad_norm": 0.4656391228655939, "learning_rate": 6.941500906502778e-07, "loss": 0.0015, "step": 10213 }, { "epoch": 2.324004550625711, "grad_norm": 1.2902371638777126, "learning_rate": 6.940612871865924e-07, "loss": 0.0072, "step": 10214 }, { "epoch": 2.324232081911263, "grad_norm": 0.5821008230735917, "learning_rate": 6.939724823114206e-07, "loss": 0.0116, "step": 10215 }, { "epoch": 2.3244596131968147, "grad_norm": 2.3076356132267137, "learning_rate": 6.938836760265778e-07, "loss": 0.0078, "step": 10216 }, { "epoch": 2.3246871444823665, "grad_norm": 0.754135142158919, "learning_rate": 6.937948683338787e-07, "loss": 0.0053, "step": 10217 }, { "epoch": 2.324914675767918, "grad_norm": 1.1146662012035387, "learning_rate": 6.937060592351386e-07, "loss": 0.0148, "step": 10218 }, { "epoch": 2.32514220705347, "grad_norm": 1.1014436082027805, "learning_rate": 6.936172487321722e-07, "loss": 0.0204, "step": 10219 }, { "epoch": 2.3253697383390217, "grad_norm": 0.6589151916281193, "learning_rate": 6.935284368267951e-07, "loss": 0.0069, "step": 10220 }, { "epoch": 2.3255972696245735, "grad_norm": 1.7051667028208575, "learning_rate": 6.934396235208224e-07, "loss": 0.0152, "step": 10221 }, { "epoch": 2.3258248009101252, "grad_norm": 0.8616129889509784, "learning_rate": 6.933508088160689e-07, "loss": 0.0121, "step": 10222 }, { "epoch": 2.326052332195677, "grad_norm": 2.1315978615194444, "learning_rate": 6.932619927143501e-07, "loss": 0.0125, "step": 10223 }, { "epoch": 2.3262798634812287, "grad_norm": 0.6118079787131045, "learning_rate": 6.931731752174813e-07, "loss": 0.0053, "step": 10224 }, { "epoch": 2.3265073947667805, "grad_norm": 0.4796241597347531, "learning_rate": 6.930843563272774e-07, "loss": 0.004, "step": 10225 }, { "epoch": 2.3267349260523322, "grad_norm": 1.7335612334468335, "learning_rate": 6.929955360455542e-07, "loss": 0.0174, "step": 10226 }, { "epoch": 2.326962457337884, "grad_norm": 0.4318420488238688, "learning_rate": 6.929067143741267e-07, "loss": 0.0025, "step": 10227 }, { "epoch": 2.3271899886234357, "grad_norm": 0.8004250612131438, "learning_rate": 6.928178913148101e-07, "loss": 0.0201, "step": 10228 }, { "epoch": 2.3274175199089875, "grad_norm": 2.0647006695741945, "learning_rate": 6.927290668694203e-07, "loss": 0.0188, "step": 10229 }, { "epoch": 2.3276450511945392, "grad_norm": 0.7545750591533665, "learning_rate": 6.926402410397723e-07, "loss": 0.0101, "step": 10230 }, { "epoch": 2.327872582480091, "grad_norm": 0.7301237595193207, "learning_rate": 6.925514138276816e-07, "loss": 0.0142, "step": 10231 }, { "epoch": 2.3281001137656427, "grad_norm": 0.7215672144793023, "learning_rate": 6.924625852349637e-07, "loss": 0.0096, "step": 10232 }, { "epoch": 2.3283276450511945, "grad_norm": 0.9121978048079189, "learning_rate": 6.923737552634342e-07, "loss": 0.0202, "step": 10233 }, { "epoch": 2.3285551763367462, "grad_norm": 0.15744665419343642, "learning_rate": 6.922849239149087e-07, "loss": 0.0009, "step": 10234 }, { "epoch": 2.328782707622298, "grad_norm": 0.6050048738417826, "learning_rate": 6.921960911912024e-07, "loss": 0.0047, "step": 10235 }, { "epoch": 2.3290102389078498, "grad_norm": 0.3321319718559836, "learning_rate": 6.921072570941311e-07, "loss": 0.0029, "step": 10236 }, { "epoch": 2.3292377701934015, "grad_norm": 0.8703247264688811, "learning_rate": 6.920184216255102e-07, "loss": 0.0154, "step": 10237 }, { "epoch": 2.3294653014789533, "grad_norm": 0.7703988075504749, "learning_rate": 6.919295847871557e-07, "loss": 0.0092, "step": 10238 }, { "epoch": 2.329692832764505, "grad_norm": 0.897548658838121, "learning_rate": 6.918407465808828e-07, "loss": 0.0046, "step": 10239 }, { "epoch": 2.3299203640500568, "grad_norm": 1.1722249192531569, "learning_rate": 6.917519070085078e-07, "loss": 0.0253, "step": 10240 }, { "epoch": 2.3301478953356085, "grad_norm": 0.5420868005715489, "learning_rate": 6.91663066071846e-07, "loss": 0.0079, "step": 10241 }, { "epoch": 2.3303754266211603, "grad_norm": 0.6573127145842004, "learning_rate": 6.91574223772713e-07, "loss": 0.0059, "step": 10242 }, { "epoch": 2.330602957906712, "grad_norm": 0.7953931170852774, "learning_rate": 6.914853801129249e-07, "loss": 0.0094, "step": 10243 }, { "epoch": 2.3308304891922638, "grad_norm": 0.6057159042072666, "learning_rate": 6.913965350942975e-07, "loss": 0.0068, "step": 10244 }, { "epoch": 2.3310580204778155, "grad_norm": 0.3843344025693635, "learning_rate": 6.913076887186465e-07, "loss": 0.0027, "step": 10245 }, { "epoch": 2.3312855517633673, "grad_norm": 0.63514341438768, "learning_rate": 6.912188409877876e-07, "loss": 0.0142, "step": 10246 }, { "epoch": 2.331513083048919, "grad_norm": 0.5755471871418311, "learning_rate": 6.91129991903537e-07, "loss": 0.0099, "step": 10247 }, { "epoch": 2.331740614334471, "grad_norm": 0.5705863213767163, "learning_rate": 6.910411414677105e-07, "loss": 0.0098, "step": 10248 }, { "epoch": 2.3319681456200225, "grad_norm": 0.6345200474726558, "learning_rate": 6.909522896821239e-07, "loss": 0.0052, "step": 10249 }, { "epoch": 2.3321956769055747, "grad_norm": 0.35648860065748184, "learning_rate": 6.908634365485933e-07, "loss": 0.0027, "step": 10250 }, { "epoch": 2.3324232081911265, "grad_norm": 4.640973194660284, "learning_rate": 6.907745820689349e-07, "loss": 0.008, "step": 10251 }, { "epoch": 2.3326507394766782, "grad_norm": 1.4770508940217657, "learning_rate": 6.906857262449642e-07, "loss": 0.0229, "step": 10252 }, { "epoch": 2.33287827076223, "grad_norm": 0.8721902722513187, "learning_rate": 6.905968690784978e-07, "loss": 0.0106, "step": 10253 }, { "epoch": 2.3331058020477817, "grad_norm": 1.3227354683976476, "learning_rate": 6.905080105713514e-07, "loss": 0.0186, "step": 10254 }, { "epoch": 2.3333333333333335, "grad_norm": 1.139734824221267, "learning_rate": 6.904191507253412e-07, "loss": 0.0127, "step": 10255 }, { "epoch": 2.3335608646188852, "grad_norm": 0.785382871561001, "learning_rate": 6.903302895422835e-07, "loss": 0.013, "step": 10256 }, { "epoch": 2.333788395904437, "grad_norm": 1.3987309910321224, "learning_rate": 6.902414270239942e-07, "loss": 0.0117, "step": 10257 }, { "epoch": 2.3340159271899887, "grad_norm": 0.3358594402059059, "learning_rate": 6.901525631722896e-07, "loss": 0.0034, "step": 10258 }, { "epoch": 2.3342434584755405, "grad_norm": 1.1847550517300136, "learning_rate": 6.900636979889861e-07, "loss": 0.0307, "step": 10259 }, { "epoch": 2.3344709897610922, "grad_norm": 0.7342836329408435, "learning_rate": 6.899748314758997e-07, "loss": 0.003, "step": 10260 }, { "epoch": 2.334698521046644, "grad_norm": 1.044738686169953, "learning_rate": 6.898859636348468e-07, "loss": 0.0113, "step": 10261 }, { "epoch": 2.3349260523321957, "grad_norm": 0.8306027566417524, "learning_rate": 6.897970944676434e-07, "loss": 0.0038, "step": 10262 }, { "epoch": 2.3351535836177475, "grad_norm": 1.480957922340298, "learning_rate": 6.897082239761063e-07, "loss": 0.0327, "step": 10263 }, { "epoch": 2.3353811149032992, "grad_norm": 0.8188793876713951, "learning_rate": 6.896193521620514e-07, "loss": 0.0088, "step": 10264 }, { "epoch": 2.335608646188851, "grad_norm": 0.7069039583065618, "learning_rate": 6.895304790272956e-07, "loss": 0.0035, "step": 10265 }, { "epoch": 2.3358361774744028, "grad_norm": 1.2611831389532338, "learning_rate": 6.894416045736547e-07, "loss": 0.0167, "step": 10266 }, { "epoch": 2.3360637087599545, "grad_norm": 0.8398558176243138, "learning_rate": 6.893527288029456e-07, "loss": 0.0076, "step": 10267 }, { "epoch": 2.3362912400455063, "grad_norm": 2.1564711125623646, "learning_rate": 6.892638517169844e-07, "loss": 0.027, "step": 10268 }, { "epoch": 2.336518771331058, "grad_norm": 0.5300608441738138, "learning_rate": 6.891749733175879e-07, "loss": 0.0045, "step": 10269 }, { "epoch": 2.3367463026166098, "grad_norm": 0.6427242730231706, "learning_rate": 6.890860936065724e-07, "loss": 0.005, "step": 10270 }, { "epoch": 2.3369738339021615, "grad_norm": 0.432003278642133, "learning_rate": 6.889972125857547e-07, "loss": 0.0041, "step": 10271 }, { "epoch": 2.3372013651877133, "grad_norm": 0.8049855501382505, "learning_rate": 6.889083302569511e-07, "loss": 0.0083, "step": 10272 }, { "epoch": 2.337428896473265, "grad_norm": 0.8995613142175508, "learning_rate": 6.88819446621978e-07, "loss": 0.0089, "step": 10273 }, { "epoch": 2.3376564277588168, "grad_norm": 1.2693316381336106, "learning_rate": 6.887305616826525e-07, "loss": 0.0205, "step": 10274 }, { "epoch": 2.3378839590443685, "grad_norm": 0.9981056048290965, "learning_rate": 6.886416754407912e-07, "loss": 0.0089, "step": 10275 }, { "epoch": 2.3381114903299203, "grad_norm": 0.3100169796438469, "learning_rate": 6.885527878982103e-07, "loss": 0.0018, "step": 10276 }, { "epoch": 2.338339021615472, "grad_norm": 0.7292387907711666, "learning_rate": 6.88463899056727e-07, "loss": 0.015, "step": 10277 }, { "epoch": 2.3385665529010238, "grad_norm": 0.840870770785405, "learning_rate": 6.883750089181579e-07, "loss": 0.0052, "step": 10278 }, { "epoch": 2.3387940841865755, "grad_norm": 2.2981608484954945, "learning_rate": 6.882861174843194e-07, "loss": 0.0048, "step": 10279 }, { "epoch": 2.3390216154721273, "grad_norm": 0.6808529105694683, "learning_rate": 6.881972247570288e-07, "loss": 0.012, "step": 10280 }, { "epoch": 2.339249146757679, "grad_norm": 0.5616663039888988, "learning_rate": 6.881083307381026e-07, "loss": 0.0076, "step": 10281 }, { "epoch": 2.339476678043231, "grad_norm": 0.8578194579822541, "learning_rate": 6.880194354293577e-07, "loss": 0.0183, "step": 10282 }, { "epoch": 2.3397042093287825, "grad_norm": 0.859526909919855, "learning_rate": 6.879305388326109e-07, "loss": 0.0122, "step": 10283 }, { "epoch": 2.3399317406143343, "grad_norm": 1.0653542604877544, "learning_rate": 6.878416409496793e-07, "loss": 0.0148, "step": 10284 }, { "epoch": 2.3401592718998865, "grad_norm": 0.9077495968408823, "learning_rate": 6.877527417823795e-07, "loss": 0.0121, "step": 10285 }, { "epoch": 2.340386803185438, "grad_norm": 1.4335131513131403, "learning_rate": 6.876638413325286e-07, "loss": 0.0167, "step": 10286 }, { "epoch": 2.34061433447099, "grad_norm": 2.390299357673188, "learning_rate": 6.875749396019435e-07, "loss": 0.0313, "step": 10287 }, { "epoch": 2.3408418657565413, "grad_norm": 0.8590725667747523, "learning_rate": 6.874860365924415e-07, "loss": 0.011, "step": 10288 }, { "epoch": 2.3410693970420935, "grad_norm": 0.4425251076115152, "learning_rate": 6.873971323058389e-07, "loss": 0.0049, "step": 10289 }, { "epoch": 2.3412969283276452, "grad_norm": 1.0390028890503968, "learning_rate": 6.873082267439536e-07, "loss": 0.0095, "step": 10290 }, { "epoch": 2.341524459613197, "grad_norm": 1.7446248769943498, "learning_rate": 6.87219319908602e-07, "loss": 0.021, "step": 10291 }, { "epoch": 2.3417519908987487, "grad_norm": 1.017407687799404, "learning_rate": 6.871304118016015e-07, "loss": 0.0101, "step": 10292 }, { "epoch": 2.3419795221843005, "grad_norm": 1.0825832419917973, "learning_rate": 6.870415024247693e-07, "loss": 0.0088, "step": 10293 }, { "epoch": 2.3422070534698523, "grad_norm": 0.5640255416882559, "learning_rate": 6.869525917799223e-07, "loss": 0.0065, "step": 10294 }, { "epoch": 2.342434584755404, "grad_norm": 0.4134369884298572, "learning_rate": 6.868636798688778e-07, "loss": 0.0043, "step": 10295 }, { "epoch": 2.3426621160409558, "grad_norm": 0.784254583971471, "learning_rate": 6.867747666934529e-07, "loss": 0.0081, "step": 10296 }, { "epoch": 2.3428896473265075, "grad_norm": 0.8697854972382616, "learning_rate": 6.866858522554652e-07, "loss": 0.0068, "step": 10297 }, { "epoch": 2.3431171786120593, "grad_norm": 0.8035750513071476, "learning_rate": 6.865969365567314e-07, "loss": 0.0135, "step": 10298 }, { "epoch": 2.343344709897611, "grad_norm": 1.0448343065399286, "learning_rate": 6.865080195990691e-07, "loss": 0.0191, "step": 10299 }, { "epoch": 2.3435722411831628, "grad_norm": 0.5281502527499884, "learning_rate": 6.864191013842955e-07, "loss": 0.0114, "step": 10300 }, { "epoch": 2.3437997724687145, "grad_norm": 0.6396143284216406, "learning_rate": 6.863301819142279e-07, "loss": 0.0076, "step": 10301 }, { "epoch": 2.3440273037542663, "grad_norm": 0.906611104845208, "learning_rate": 6.862412611906838e-07, "loss": 0.007, "step": 10302 }, { "epoch": 2.344254835039818, "grad_norm": 0.9216250022272794, "learning_rate": 6.861523392154805e-07, "loss": 0.0097, "step": 10303 }, { "epoch": 2.3444823663253698, "grad_norm": 0.8559525528957445, "learning_rate": 6.860634159904354e-07, "loss": 0.0057, "step": 10304 }, { "epoch": 2.3447098976109215, "grad_norm": 1.2405227219104527, "learning_rate": 6.859744915173658e-07, "loss": 0.0193, "step": 10305 }, { "epoch": 2.3449374288964733, "grad_norm": 1.513291843283628, "learning_rate": 6.858855657980891e-07, "loss": 0.0192, "step": 10306 }, { "epoch": 2.345164960182025, "grad_norm": 0.8796671760088395, "learning_rate": 6.857966388344232e-07, "loss": 0.0197, "step": 10307 }, { "epoch": 2.345392491467577, "grad_norm": 1.132256743394204, "learning_rate": 6.857077106281852e-07, "loss": 0.0109, "step": 10308 }, { "epoch": 2.3456200227531285, "grad_norm": 0.7920261844405083, "learning_rate": 6.856187811811929e-07, "loss": 0.0038, "step": 10309 }, { "epoch": 2.3458475540386803, "grad_norm": 0.5834708019986359, "learning_rate": 6.855298504952637e-07, "loss": 0.0039, "step": 10310 }, { "epoch": 2.346075085324232, "grad_norm": 0.27868920835756295, "learning_rate": 6.854409185722152e-07, "loss": 0.0027, "step": 10311 }, { "epoch": 2.346302616609784, "grad_norm": 0.6863710360087306, "learning_rate": 6.853519854138652e-07, "loss": 0.0094, "step": 10312 }, { "epoch": 2.3465301478953355, "grad_norm": 1.2160370814863346, "learning_rate": 6.852630510220308e-07, "loss": 0.0264, "step": 10313 }, { "epoch": 2.3467576791808873, "grad_norm": 0.5921115884296834, "learning_rate": 6.851741153985302e-07, "loss": 0.0033, "step": 10314 }, { "epoch": 2.346985210466439, "grad_norm": 1.0942713859179245, "learning_rate": 6.850851785451809e-07, "loss": 0.016, "step": 10315 }, { "epoch": 2.347212741751991, "grad_norm": 1.135274947985809, "learning_rate": 6.849962404638005e-07, "loss": 0.0143, "step": 10316 }, { "epoch": 2.3474402730375425, "grad_norm": 0.8495480052728345, "learning_rate": 6.849073011562069e-07, "loss": 0.0098, "step": 10317 }, { "epoch": 2.3476678043230943, "grad_norm": 1.039150228036813, "learning_rate": 6.848183606242177e-07, "loss": 0.0096, "step": 10318 }, { "epoch": 2.347895335608646, "grad_norm": 0.43985591469961616, "learning_rate": 6.847294188696507e-07, "loss": 0.0052, "step": 10319 }, { "epoch": 2.348122866894198, "grad_norm": 1.2154202768207871, "learning_rate": 6.84640475894324e-07, "loss": 0.0148, "step": 10320 }, { "epoch": 2.3483503981797496, "grad_norm": 1.0921594435928292, "learning_rate": 6.845515317000551e-07, "loss": 0.0095, "step": 10321 }, { "epoch": 2.3485779294653013, "grad_norm": 0.6966589458346292, "learning_rate": 6.844625862886618e-07, "loss": 0.0084, "step": 10322 }, { "epoch": 2.348805460750853, "grad_norm": 1.050875591519738, "learning_rate": 6.843736396619622e-07, "loss": 0.008, "step": 10323 }, { "epoch": 2.3490329920364053, "grad_norm": 0.6128421753987343, "learning_rate": 6.842846918217743e-07, "loss": 0.0033, "step": 10324 }, { "epoch": 2.3492605233219566, "grad_norm": 0.621893101992011, "learning_rate": 6.841957427699158e-07, "loss": 0.01, "step": 10325 }, { "epoch": 2.3494880546075088, "grad_norm": 0.7920982959054942, "learning_rate": 6.841067925082046e-07, "loss": 0.0143, "step": 10326 }, { "epoch": 2.34971558589306, "grad_norm": 1.7468957589196497, "learning_rate": 6.840178410384591e-07, "loss": 0.0498, "step": 10327 }, { "epoch": 2.3499431171786123, "grad_norm": 0.7807163066139508, "learning_rate": 6.839288883624969e-07, "loss": 0.0182, "step": 10328 }, { "epoch": 2.350170648464164, "grad_norm": 0.5153460775295234, "learning_rate": 6.838399344821359e-07, "loss": 0.0029, "step": 10329 }, { "epoch": 2.3503981797497158, "grad_norm": 1.1450329929990304, "learning_rate": 6.837509793991946e-07, "loss": 0.0174, "step": 10330 }, { "epoch": 2.3506257110352675, "grad_norm": 0.6153157617558511, "learning_rate": 6.836620231154908e-07, "loss": 0.0091, "step": 10331 }, { "epoch": 2.3508532423208193, "grad_norm": 4.1219356279842225, "learning_rate": 6.835730656328429e-07, "loss": 0.0636, "step": 10332 }, { "epoch": 2.351080773606371, "grad_norm": 0.6971298007119479, "learning_rate": 6.834841069530686e-07, "loss": 0.0048, "step": 10333 }, { "epoch": 2.3513083048919228, "grad_norm": 0.8482873024282528, "learning_rate": 6.833951470779864e-07, "loss": 0.012, "step": 10334 }, { "epoch": 2.3515358361774745, "grad_norm": 0.9782559907511348, "learning_rate": 6.833061860094142e-07, "loss": 0.0176, "step": 10335 }, { "epoch": 2.3517633674630263, "grad_norm": 0.6598443402198825, "learning_rate": 6.832172237491703e-07, "loss": 0.0103, "step": 10336 }, { "epoch": 2.351990898748578, "grad_norm": 9.880256145104966, "learning_rate": 6.831282602990731e-07, "loss": 0.0155, "step": 10337 }, { "epoch": 2.35221843003413, "grad_norm": 1.7176831455270816, "learning_rate": 6.830392956609406e-07, "loss": 0.0206, "step": 10338 }, { "epoch": 2.3524459613196815, "grad_norm": 3.791972693388024, "learning_rate": 6.829503298365913e-07, "loss": 0.0125, "step": 10339 }, { "epoch": 2.3526734926052333, "grad_norm": 0.6052947601878111, "learning_rate": 6.828613628278433e-07, "loss": 0.0076, "step": 10340 }, { "epoch": 2.352901023890785, "grad_norm": 1.1374801748186376, "learning_rate": 6.827723946365153e-07, "loss": 0.0166, "step": 10341 }, { "epoch": 2.353128555176337, "grad_norm": 0.7153091286483448, "learning_rate": 6.826834252644251e-07, "loss": 0.0062, "step": 10342 }, { "epoch": 2.3533560864618885, "grad_norm": 0.4077784398472029, "learning_rate": 6.825944547133913e-07, "loss": 0.0043, "step": 10343 }, { "epoch": 2.3535836177474403, "grad_norm": 0.4724706322375158, "learning_rate": 6.825054829852323e-07, "loss": 0.0079, "step": 10344 }, { "epoch": 2.353811149032992, "grad_norm": 0.7561406578591986, "learning_rate": 6.824165100817667e-07, "loss": 0.0069, "step": 10345 }, { "epoch": 2.354038680318544, "grad_norm": 0.689871374278366, "learning_rate": 6.823275360048126e-07, "loss": 0.0059, "step": 10346 }, { "epoch": 2.3542662116040955, "grad_norm": 0.40336764543273756, "learning_rate": 6.822385607561889e-07, "loss": 0.0036, "step": 10347 }, { "epoch": 2.3544937428896473, "grad_norm": 0.8444264993624585, "learning_rate": 6.821495843377138e-07, "loss": 0.0099, "step": 10348 }, { "epoch": 2.354721274175199, "grad_norm": 0.9080815501148358, "learning_rate": 6.820606067512056e-07, "loss": 0.0108, "step": 10349 }, { "epoch": 2.354948805460751, "grad_norm": 0.836620904037802, "learning_rate": 6.819716279984833e-07, "loss": 0.0043, "step": 10350 }, { "epoch": 2.3551763367463026, "grad_norm": 0.8235354156844404, "learning_rate": 6.818826480813652e-07, "loss": 0.0079, "step": 10351 }, { "epoch": 2.3554038680318543, "grad_norm": 1.7303594386719274, "learning_rate": 6.8179366700167e-07, "loss": 0.0141, "step": 10352 }, { "epoch": 2.355631399317406, "grad_norm": 1.3227434170206829, "learning_rate": 6.817046847612164e-07, "loss": 0.0183, "step": 10353 }, { "epoch": 2.355858930602958, "grad_norm": 1.091096637588967, "learning_rate": 6.816157013618227e-07, "loss": 0.0205, "step": 10354 }, { "epoch": 2.3560864618885096, "grad_norm": 0.5099361957874957, "learning_rate": 6.815267168053078e-07, "loss": 0.0042, "step": 10355 }, { "epoch": 2.3563139931740613, "grad_norm": 0.904374869024215, "learning_rate": 6.814377310934901e-07, "loss": 0.0104, "step": 10356 }, { "epoch": 2.356541524459613, "grad_norm": 0.577842384058945, "learning_rate": 6.813487442281888e-07, "loss": 0.0096, "step": 10357 }, { "epoch": 2.356769055745165, "grad_norm": 1.6992429643870268, "learning_rate": 6.812597562112223e-07, "loss": 0.0196, "step": 10358 }, { "epoch": 2.3569965870307166, "grad_norm": 0.9530481843734885, "learning_rate": 6.811707670444095e-07, "loss": 0.0182, "step": 10359 }, { "epoch": 2.3572241183162683, "grad_norm": 1.4642992225831375, "learning_rate": 6.810817767295691e-07, "loss": 0.0165, "step": 10360 }, { "epoch": 2.35745164960182, "grad_norm": 1.7349664337916957, "learning_rate": 6.809927852685198e-07, "loss": 0.0304, "step": 10361 }, { "epoch": 2.357679180887372, "grad_norm": 0.3161990932584514, "learning_rate": 6.809037926630806e-07, "loss": 0.0022, "step": 10362 }, { "epoch": 2.357906712172924, "grad_norm": 1.1435751465816453, "learning_rate": 6.808147989150701e-07, "loss": 0.0246, "step": 10363 }, { "epoch": 2.3581342434584753, "grad_norm": 1.1573282974975814, "learning_rate": 6.807258040263075e-07, "loss": 0.0226, "step": 10364 }, { "epoch": 2.3583617747440275, "grad_norm": 0.6903794550878165, "learning_rate": 6.806368079986114e-07, "loss": 0.0041, "step": 10365 }, { "epoch": 2.358589306029579, "grad_norm": 0.8921949005942351, "learning_rate": 6.805478108338009e-07, "loss": 0.0067, "step": 10366 }, { "epoch": 2.358816837315131, "grad_norm": 0.6492011344615265, "learning_rate": 6.80458812533695e-07, "loss": 0.0066, "step": 10367 }, { "epoch": 2.359044368600683, "grad_norm": 1.608546697023726, "learning_rate": 6.803698131001124e-07, "loss": 0.0242, "step": 10368 }, { "epoch": 2.3592718998862345, "grad_norm": 0.7307925236236682, "learning_rate": 6.802808125348722e-07, "loss": 0.0071, "step": 10369 }, { "epoch": 2.3594994311717863, "grad_norm": 0.3714417088555214, "learning_rate": 6.801918108397934e-07, "loss": 0.0047, "step": 10370 }, { "epoch": 2.359726962457338, "grad_norm": 0.8409280642699111, "learning_rate": 6.801028080166952e-07, "loss": 0.0068, "step": 10371 }, { "epoch": 2.35995449374289, "grad_norm": 1.2678940373199323, "learning_rate": 6.800138040673964e-07, "loss": 0.011, "step": 10372 }, { "epoch": 2.3601820250284415, "grad_norm": 0.79958908390192, "learning_rate": 6.799247989937163e-07, "loss": 0.006, "step": 10373 }, { "epoch": 2.3604095563139933, "grad_norm": 0.6593139628149355, "learning_rate": 6.798357927974739e-07, "loss": 0.0174, "step": 10374 }, { "epoch": 2.360637087599545, "grad_norm": 0.8293602751263836, "learning_rate": 6.797467854804884e-07, "loss": 0.0139, "step": 10375 }, { "epoch": 2.360864618885097, "grad_norm": 1.3677978076252104, "learning_rate": 6.796577770445785e-07, "loss": 0.0178, "step": 10376 }, { "epoch": 2.3610921501706486, "grad_norm": 0.61782041716701, "learning_rate": 6.795687674915639e-07, "loss": 0.0098, "step": 10377 }, { "epoch": 2.3613196814562003, "grad_norm": 0.2708959103753517, "learning_rate": 6.794797568232639e-07, "loss": 0.0043, "step": 10378 }, { "epoch": 2.361547212741752, "grad_norm": 0.6321021654765626, "learning_rate": 6.793907450414972e-07, "loss": 0.0054, "step": 10379 }, { "epoch": 2.361774744027304, "grad_norm": 1.4057991527111133, "learning_rate": 6.793017321480834e-07, "loss": 0.026, "step": 10380 }, { "epoch": 2.3620022753128556, "grad_norm": 0.6736111951964856, "learning_rate": 6.792127181448415e-07, "loss": 0.0105, "step": 10381 }, { "epoch": 2.3622298065984073, "grad_norm": 0.5292460207716454, "learning_rate": 6.791237030335911e-07, "loss": 0.0063, "step": 10382 }, { "epoch": 2.362457337883959, "grad_norm": 2.1904801460557137, "learning_rate": 6.790346868161511e-07, "loss": 0.0234, "step": 10383 }, { "epoch": 2.362684869169511, "grad_norm": 1.0840868436262017, "learning_rate": 6.789456694943413e-07, "loss": 0.0206, "step": 10384 }, { "epoch": 2.3629124004550626, "grad_norm": 0.5608874388577607, "learning_rate": 6.788566510699808e-07, "loss": 0.0059, "step": 10385 }, { "epoch": 2.3631399317406143, "grad_norm": 0.984037688855531, "learning_rate": 6.787676315448887e-07, "loss": 0.0108, "step": 10386 }, { "epoch": 2.363367463026166, "grad_norm": 0.42569002708673764, "learning_rate": 6.78678610920885e-07, "loss": 0.0037, "step": 10387 }, { "epoch": 2.363594994311718, "grad_norm": 1.2700835025653037, "learning_rate": 6.785895891997887e-07, "loss": 0.0075, "step": 10388 }, { "epoch": 2.3638225255972696, "grad_norm": 0.5912364200157575, "learning_rate": 6.785005663834193e-07, "loss": 0.0118, "step": 10389 }, { "epoch": 2.3640500568828213, "grad_norm": 0.8160513537400322, "learning_rate": 6.784115424735962e-07, "loss": 0.0131, "step": 10390 }, { "epoch": 2.364277588168373, "grad_norm": 1.2033059986235666, "learning_rate": 6.783225174721393e-07, "loss": 0.0173, "step": 10391 }, { "epoch": 2.364505119453925, "grad_norm": 0.8448902707113899, "learning_rate": 6.782334913808678e-07, "loss": 0.0145, "step": 10392 }, { "epoch": 2.3647326507394766, "grad_norm": 0.7997562160201219, "learning_rate": 6.781444642016008e-07, "loss": 0.0106, "step": 10393 }, { "epoch": 2.3649601820250283, "grad_norm": 0.28102004234133715, "learning_rate": 6.780554359361585e-07, "loss": 0.0024, "step": 10394 }, { "epoch": 2.36518771331058, "grad_norm": 0.5721330368890247, "learning_rate": 6.779664065863605e-07, "loss": 0.008, "step": 10395 }, { "epoch": 2.365415244596132, "grad_norm": 0.6603591321583218, "learning_rate": 6.778773761540258e-07, "loss": 0.0146, "step": 10396 }, { "epoch": 2.3656427758816836, "grad_norm": 1.3041399585668558, "learning_rate": 6.777883446409746e-07, "loss": 0.0165, "step": 10397 }, { "epoch": 2.3658703071672353, "grad_norm": 0.6213102641001365, "learning_rate": 6.776993120490262e-07, "loss": 0.0057, "step": 10398 }, { "epoch": 2.366097838452787, "grad_norm": 1.7675832856898075, "learning_rate": 6.776102783800005e-07, "loss": 0.0276, "step": 10399 }, { "epoch": 2.366325369738339, "grad_norm": 0.3494876236447349, "learning_rate": 6.775212436357167e-07, "loss": 0.0024, "step": 10400 }, { "epoch": 2.3665529010238906, "grad_norm": 0.33226803959414797, "learning_rate": 6.774322078179953e-07, "loss": 0.0025, "step": 10401 }, { "epoch": 2.366780432309443, "grad_norm": 1.0644202310301152, "learning_rate": 6.773431709286554e-07, "loss": 0.0132, "step": 10402 }, { "epoch": 2.367007963594994, "grad_norm": 0.9991809754112262, "learning_rate": 6.77254132969517e-07, "loss": 0.0165, "step": 10403 }, { "epoch": 2.3672354948805463, "grad_norm": 0.6026879489359328, "learning_rate": 6.771650939423999e-07, "loss": 0.0082, "step": 10404 }, { "epoch": 2.3674630261660976, "grad_norm": 1.0391940550651553, "learning_rate": 6.770760538491236e-07, "loss": 0.0284, "step": 10405 }, { "epoch": 2.36769055745165, "grad_norm": 0.4852558939090991, "learning_rate": 6.769870126915083e-07, "loss": 0.0032, "step": 10406 }, { "epoch": 2.3679180887372016, "grad_norm": 1.3834321505738552, "learning_rate": 6.768979704713735e-07, "loss": 0.0266, "step": 10407 }, { "epoch": 2.3681456200227533, "grad_norm": 0.5168227245092136, "learning_rate": 6.768089271905394e-07, "loss": 0.0044, "step": 10408 }, { "epoch": 2.368373151308305, "grad_norm": 0.735837316715088, "learning_rate": 6.767198828508257e-07, "loss": 0.0104, "step": 10409 }, { "epoch": 2.368600682593857, "grad_norm": 0.7774304411768346, "learning_rate": 6.766308374540523e-07, "loss": 0.0129, "step": 10410 }, { "epoch": 2.3688282138794086, "grad_norm": 0.7594503654050483, "learning_rate": 6.76541791002039e-07, "loss": 0.0096, "step": 10411 }, { "epoch": 2.3690557451649603, "grad_norm": 0.8019703120963613, "learning_rate": 6.76452743496606e-07, "loss": 0.0089, "step": 10412 }, { "epoch": 2.369283276450512, "grad_norm": 0.625540600290023, "learning_rate": 6.763636949395731e-07, "loss": 0.0087, "step": 10413 }, { "epoch": 2.369510807736064, "grad_norm": 0.6989544173709423, "learning_rate": 6.762746453327604e-07, "loss": 0.0102, "step": 10414 }, { "epoch": 2.3697383390216156, "grad_norm": 1.4135562582184766, "learning_rate": 6.761855946779879e-07, "loss": 0.0137, "step": 10415 }, { "epoch": 2.3699658703071673, "grad_norm": 0.8685160598533576, "learning_rate": 6.760965429770755e-07, "loss": 0.0088, "step": 10416 }, { "epoch": 2.370193401592719, "grad_norm": 1.053687994486769, "learning_rate": 6.760074902318435e-07, "loss": 0.0208, "step": 10417 }, { "epoch": 2.370420932878271, "grad_norm": 1.3626392563627565, "learning_rate": 6.759184364441117e-07, "loss": 0.0143, "step": 10418 }, { "epoch": 2.3706484641638226, "grad_norm": 1.1219283935429138, "learning_rate": 6.758293816157003e-07, "loss": 0.0123, "step": 10419 }, { "epoch": 2.3708759954493743, "grad_norm": 0.758804604750528, "learning_rate": 6.757403257484293e-07, "loss": 0.0128, "step": 10420 }, { "epoch": 2.371103526734926, "grad_norm": 0.5276953906634207, "learning_rate": 6.756512688441191e-07, "loss": 0.0074, "step": 10421 }, { "epoch": 2.371331058020478, "grad_norm": 0.18295630632555668, "learning_rate": 6.7556221090459e-07, "loss": 0.0015, "step": 10422 }, { "epoch": 2.3715585893060296, "grad_norm": 1.3728595308032756, "learning_rate": 6.754731519316615e-07, "loss": 0.0183, "step": 10423 }, { "epoch": 2.3717861205915813, "grad_norm": 1.397696409103075, "learning_rate": 6.753840919271542e-07, "loss": 0.0106, "step": 10424 }, { "epoch": 2.372013651877133, "grad_norm": 1.1531768466755141, "learning_rate": 6.752950308928887e-07, "loss": 0.0184, "step": 10425 }, { "epoch": 2.372241183162685, "grad_norm": 0.861658381945885, "learning_rate": 6.752059688306846e-07, "loss": 0.0083, "step": 10426 }, { "epoch": 2.3724687144482366, "grad_norm": 0.888558429187069, "learning_rate": 6.751169057423625e-07, "loss": 0.0072, "step": 10427 }, { "epoch": 2.3726962457337883, "grad_norm": 1.0678443494608738, "learning_rate": 6.750278416297426e-07, "loss": 0.0156, "step": 10428 }, { "epoch": 2.37292377701934, "grad_norm": 0.697708598161338, "learning_rate": 6.749387764946454e-07, "loss": 0.0087, "step": 10429 }, { "epoch": 2.373151308304892, "grad_norm": 0.7912114990429278, "learning_rate": 6.748497103388908e-07, "loss": 0.006, "step": 10430 }, { "epoch": 2.3733788395904436, "grad_norm": 0.5128699466354705, "learning_rate": 6.747606431642996e-07, "loss": 0.0061, "step": 10431 }, { "epoch": 2.3736063708759954, "grad_norm": 1.339271305202392, "learning_rate": 6.746715749726921e-07, "loss": 0.0151, "step": 10432 }, { "epoch": 2.373833902161547, "grad_norm": 0.509893308083682, "learning_rate": 6.745825057658884e-07, "loss": 0.0057, "step": 10433 }, { "epoch": 2.374061433447099, "grad_norm": 0.5782531772594113, "learning_rate": 6.744934355457089e-07, "loss": 0.007, "step": 10434 }, { "epoch": 2.3742889647326506, "grad_norm": 0.5788267731286362, "learning_rate": 6.744043643139746e-07, "loss": 0.0127, "step": 10435 }, { "epoch": 2.3745164960182024, "grad_norm": 0.8723998110146735, "learning_rate": 6.743152920725054e-07, "loss": 0.0129, "step": 10436 }, { "epoch": 2.374744027303754, "grad_norm": 1.9100544848840797, "learning_rate": 6.742262188231219e-07, "loss": 0.0128, "step": 10437 }, { "epoch": 2.374971558589306, "grad_norm": 1.3620260213110171, "learning_rate": 6.741371445676448e-07, "loss": 0.0405, "step": 10438 }, { "epoch": 2.3751990898748576, "grad_norm": 0.4296088347716746, "learning_rate": 6.740480693078944e-07, "loss": 0.0039, "step": 10439 }, { "epoch": 2.3754266211604094, "grad_norm": 0.6664852967863667, "learning_rate": 6.739589930456911e-07, "loss": 0.0105, "step": 10440 }, { "epoch": 2.3756541524459616, "grad_norm": 1.9304744080349732, "learning_rate": 6.738699157828558e-07, "loss": 0.0226, "step": 10441 }, { "epoch": 2.375881683731513, "grad_norm": 0.693784540140346, "learning_rate": 6.737808375212091e-07, "loss": 0.0077, "step": 10442 }, { "epoch": 2.376109215017065, "grad_norm": 0.5166717993273834, "learning_rate": 6.73691758262571e-07, "loss": 0.0029, "step": 10443 }, { "epoch": 2.3763367463026164, "grad_norm": 0.8375864283024972, "learning_rate": 6.736026780087627e-07, "loss": 0.0053, "step": 10444 }, { "epoch": 2.3765642775881686, "grad_norm": 1.0378435920154334, "learning_rate": 6.735135967616048e-07, "loss": 0.0063, "step": 10445 }, { "epoch": 2.3767918088737203, "grad_norm": 1.1826817721529774, "learning_rate": 6.734245145229179e-07, "loss": 0.017, "step": 10446 }, { "epoch": 2.377019340159272, "grad_norm": 1.1497889875630931, "learning_rate": 6.733354312945223e-07, "loss": 0.011, "step": 10447 }, { "epoch": 2.377246871444824, "grad_norm": 0.5899571336847758, "learning_rate": 6.732463470782394e-07, "loss": 0.0084, "step": 10448 }, { "epoch": 2.3774744027303756, "grad_norm": 0.9766972469948964, "learning_rate": 6.731572618758893e-07, "loss": 0.0102, "step": 10449 }, { "epoch": 2.3777019340159273, "grad_norm": 0.5092569107578636, "learning_rate": 6.730681756892929e-07, "loss": 0.0052, "step": 10450 }, { "epoch": 2.377929465301479, "grad_norm": 0.9032342938431787, "learning_rate": 6.729790885202712e-07, "loss": 0.0032, "step": 10451 }, { "epoch": 2.378156996587031, "grad_norm": 1.0651255062476255, "learning_rate": 6.728900003706446e-07, "loss": 0.0068, "step": 10452 }, { "epoch": 2.3783845278725826, "grad_norm": 0.7870426288448875, "learning_rate": 6.728009112422341e-07, "loss": 0.0107, "step": 10453 }, { "epoch": 2.3786120591581343, "grad_norm": 1.7541520617502695, "learning_rate": 6.727118211368607e-07, "loss": 0.0162, "step": 10454 }, { "epoch": 2.378839590443686, "grad_norm": 0.9309792708459591, "learning_rate": 6.72622730056345e-07, "loss": 0.0194, "step": 10455 }, { "epoch": 2.379067121729238, "grad_norm": 0.6969203246473327, "learning_rate": 6.725336380025078e-07, "loss": 0.0073, "step": 10456 }, { "epoch": 2.3792946530147896, "grad_norm": 0.2726682552593126, "learning_rate": 6.724445449771702e-07, "loss": 0.0018, "step": 10457 }, { "epoch": 2.3795221843003413, "grad_norm": 0.6307155904472416, "learning_rate": 6.72355450982153e-07, "loss": 0.0078, "step": 10458 }, { "epoch": 2.379749715585893, "grad_norm": 2.023766973686841, "learning_rate": 6.72266356019277e-07, "loss": 0.0434, "step": 10459 }, { "epoch": 2.379977246871445, "grad_norm": 0.8460059301893675, "learning_rate": 6.721772600903634e-07, "loss": 0.0168, "step": 10460 }, { "epoch": 2.3802047781569966, "grad_norm": 0.18116310785672052, "learning_rate": 6.720881631972328e-07, "loss": 0.001, "step": 10461 }, { "epoch": 2.3804323094425484, "grad_norm": 0.4683672504480534, "learning_rate": 6.719990653417066e-07, "loss": 0.006, "step": 10462 }, { "epoch": 2.3806598407281, "grad_norm": 0.3500416042983809, "learning_rate": 6.719099665256056e-07, "loss": 0.0034, "step": 10463 }, { "epoch": 2.380887372013652, "grad_norm": 0.6933613755911345, "learning_rate": 6.718208667507506e-07, "loss": 0.0108, "step": 10464 }, { "epoch": 2.3811149032992036, "grad_norm": 1.0532455631093067, "learning_rate": 6.717317660189629e-07, "loss": 0.0083, "step": 10465 }, { "epoch": 2.3813424345847554, "grad_norm": 1.0253922678804737, "learning_rate": 6.716426643320635e-07, "loss": 0.0113, "step": 10466 }, { "epoch": 2.381569965870307, "grad_norm": 1.4657264301007844, "learning_rate": 6.715535616918735e-07, "loss": 0.0106, "step": 10467 }, { "epoch": 2.381797497155859, "grad_norm": 0.700323283997074, "learning_rate": 6.714644581002139e-07, "loss": 0.0086, "step": 10468 }, { "epoch": 2.3820250284414106, "grad_norm": 0.3791075456464287, "learning_rate": 6.71375353558906e-07, "loss": 0.0032, "step": 10469 }, { "epoch": 2.3822525597269624, "grad_norm": 0.9086353618252871, "learning_rate": 6.712862480697705e-07, "loss": 0.0072, "step": 10470 }, { "epoch": 2.382480091012514, "grad_norm": 0.6012041916166788, "learning_rate": 6.711971416346291e-07, "loss": 0.0077, "step": 10471 }, { "epoch": 2.382707622298066, "grad_norm": 0.6462433497204293, "learning_rate": 6.711080342553027e-07, "loss": 0.0085, "step": 10472 }, { "epoch": 2.3829351535836176, "grad_norm": 0.9820342443277711, "learning_rate": 6.710189259336125e-07, "loss": 0.0128, "step": 10473 }, { "epoch": 2.3831626848691694, "grad_norm": 2.5054827415467438, "learning_rate": 6.709298166713799e-07, "loss": 0.0561, "step": 10474 }, { "epoch": 2.383390216154721, "grad_norm": 1.5787614639891616, "learning_rate": 6.708407064704258e-07, "loss": 0.0175, "step": 10475 }, { "epoch": 2.383617747440273, "grad_norm": 1.297834388160344, "learning_rate": 6.707515953325716e-07, "loss": 0.0073, "step": 10476 }, { "epoch": 2.3838452787258246, "grad_norm": 0.6228348826762886, "learning_rate": 6.706624832596385e-07, "loss": 0.0053, "step": 10477 }, { "epoch": 2.3840728100113764, "grad_norm": 0.8905813526193781, "learning_rate": 6.70573370253448e-07, "loss": 0.0065, "step": 10478 }, { "epoch": 2.384300341296928, "grad_norm": 1.9778914968442138, "learning_rate": 6.704842563158214e-07, "loss": 0.0307, "step": 10479 }, { "epoch": 2.3845278725824803, "grad_norm": 1.2062523850900597, "learning_rate": 6.703951414485796e-07, "loss": 0.0079, "step": 10480 }, { "epoch": 2.3847554038680316, "grad_norm": 1.3994248329957228, "learning_rate": 6.703060256535445e-07, "loss": 0.012, "step": 10481 }, { "epoch": 2.384982935153584, "grad_norm": 0.46572957577022844, "learning_rate": 6.702169089325371e-07, "loss": 0.0044, "step": 10482 }, { "epoch": 2.385210466439135, "grad_norm": 0.6336414266533823, "learning_rate": 6.70127791287379e-07, "loss": 0.0052, "step": 10483 }, { "epoch": 2.3854379977246873, "grad_norm": 0.5523313010077782, "learning_rate": 6.700386727198911e-07, "loss": 0.0066, "step": 10484 }, { "epoch": 2.385665529010239, "grad_norm": 1.33325474917937, "learning_rate": 6.699495532318957e-07, "loss": 0.0164, "step": 10485 }, { "epoch": 2.385893060295791, "grad_norm": 1.814141801167993, "learning_rate": 6.698604328252137e-07, "loss": 0.0296, "step": 10486 }, { "epoch": 2.3861205915813426, "grad_norm": 3.2139154004946655, "learning_rate": 6.697713115016663e-07, "loss": 0.0284, "step": 10487 }, { "epoch": 2.3863481228668944, "grad_norm": 0.7942462926026592, "learning_rate": 6.696821892630754e-07, "loss": 0.005, "step": 10488 }, { "epoch": 2.386575654152446, "grad_norm": 0.6941270213608137, "learning_rate": 6.695930661112625e-07, "loss": 0.0047, "step": 10489 }, { "epoch": 2.386803185437998, "grad_norm": 0.9132018713699201, "learning_rate": 6.69503942048049e-07, "loss": 0.0068, "step": 10490 }, { "epoch": 2.3870307167235496, "grad_norm": 0.7196783795209714, "learning_rate": 6.694148170752562e-07, "loss": 0.0099, "step": 10491 }, { "epoch": 2.3872582480091014, "grad_norm": 0.8734476710286856, "learning_rate": 6.693256911947063e-07, "loss": 0.0151, "step": 10492 }, { "epoch": 2.387485779294653, "grad_norm": 0.8747373723409781, "learning_rate": 6.692365644082202e-07, "loss": 0.009, "step": 10493 }, { "epoch": 2.387713310580205, "grad_norm": 0.33285227786973626, "learning_rate": 6.691474367176195e-07, "loss": 0.0026, "step": 10494 }, { "epoch": 2.3879408418657566, "grad_norm": 1.0672360926767652, "learning_rate": 6.690583081247264e-07, "loss": 0.0122, "step": 10495 }, { "epoch": 2.3881683731513084, "grad_norm": 0.6627312748300798, "learning_rate": 6.68969178631362e-07, "loss": 0.0021, "step": 10496 }, { "epoch": 2.38839590443686, "grad_norm": 0.7727177843649681, "learning_rate": 6.688800482393481e-07, "loss": 0.0082, "step": 10497 }, { "epoch": 2.388623435722412, "grad_norm": 0.9950620121959026, "learning_rate": 6.687909169505066e-07, "loss": 0.0114, "step": 10498 }, { "epoch": 2.3888509670079636, "grad_norm": 1.3565115586085073, "learning_rate": 6.687017847666588e-07, "loss": 0.0237, "step": 10499 }, { "epoch": 2.3890784982935154, "grad_norm": 0.6185302318962483, "learning_rate": 6.686126516896266e-07, "loss": 0.0125, "step": 10500 }, { "epoch": 2.389306029579067, "grad_norm": 0.507350634173346, "learning_rate": 6.685235177212315e-07, "loss": 0.0087, "step": 10501 }, { "epoch": 2.389533560864619, "grad_norm": 0.44214598697912777, "learning_rate": 6.684343828632957e-07, "loss": 0.0074, "step": 10502 }, { "epoch": 2.3897610921501706, "grad_norm": 1.053522817358322, "learning_rate": 6.683452471176405e-07, "loss": 0.0181, "step": 10503 }, { "epoch": 2.3899886234357224, "grad_norm": 0.5457894784730959, "learning_rate": 6.682561104860878e-07, "loss": 0.0077, "step": 10504 }, { "epoch": 2.390216154721274, "grad_norm": 0.9252811742464548, "learning_rate": 6.681669729704595e-07, "loss": 0.0173, "step": 10505 }, { "epoch": 2.390443686006826, "grad_norm": 0.5313298270872074, "learning_rate": 6.680778345725773e-07, "loss": 0.0064, "step": 10506 }, { "epoch": 2.3906712172923776, "grad_norm": 1.4175736693664631, "learning_rate": 6.679886952942629e-07, "loss": 0.037, "step": 10507 }, { "epoch": 2.3908987485779294, "grad_norm": 0.56414969665424, "learning_rate": 6.678995551373385e-07, "loss": 0.0046, "step": 10508 }, { "epoch": 2.391126279863481, "grad_norm": 2.3557190617925325, "learning_rate": 6.678104141036257e-07, "loss": 0.0171, "step": 10509 }, { "epoch": 2.391353811149033, "grad_norm": 0.5962327398025546, "learning_rate": 6.677212721949464e-07, "loss": 0.0055, "step": 10510 }, { "epoch": 2.3915813424345846, "grad_norm": 1.3588870104122868, "learning_rate": 6.676321294131226e-07, "loss": 0.0252, "step": 10511 }, { "epoch": 2.3918088737201364, "grad_norm": 1.1732559538784868, "learning_rate": 6.675429857599762e-07, "loss": 0.0325, "step": 10512 }, { "epoch": 2.392036405005688, "grad_norm": 0.7119189406693635, "learning_rate": 6.674538412373289e-07, "loss": 0.0078, "step": 10513 }, { "epoch": 2.39226393629124, "grad_norm": 1.9297301672810434, "learning_rate": 6.673646958470029e-07, "loss": 0.0189, "step": 10514 }, { "epoch": 2.3924914675767917, "grad_norm": 0.6547908522329773, "learning_rate": 6.672755495908202e-07, "loss": 0.0051, "step": 10515 }, { "epoch": 2.3927189988623434, "grad_norm": 0.766973502234432, "learning_rate": 6.671864024706027e-07, "loss": 0.0088, "step": 10516 }, { "epoch": 2.392946530147895, "grad_norm": 3.8431367912874177, "learning_rate": 6.670972544881723e-07, "loss": 0.0179, "step": 10517 }, { "epoch": 2.393174061433447, "grad_norm": 1.9724269057934356, "learning_rate": 6.670081056453512e-07, "loss": 0.0654, "step": 10518 }, { "epoch": 2.393401592718999, "grad_norm": 0.9960761843592146, "learning_rate": 6.669189559439613e-07, "loss": 0.0127, "step": 10519 }, { "epoch": 2.3936291240045504, "grad_norm": 1.3622404765559109, "learning_rate": 6.668298053858248e-07, "loss": 0.0136, "step": 10520 }, { "epoch": 2.3938566552901026, "grad_norm": 1.5813639835733582, "learning_rate": 6.667406539727634e-07, "loss": 0.0179, "step": 10521 }, { "epoch": 2.394084186575654, "grad_norm": 0.9636799376453943, "learning_rate": 6.666515017065997e-07, "loss": 0.0078, "step": 10522 }, { "epoch": 2.394311717861206, "grad_norm": 0.6290185628393714, "learning_rate": 6.665623485891558e-07, "loss": 0.0054, "step": 10523 }, { "epoch": 2.394539249146758, "grad_norm": 0.7150614502135193, "learning_rate": 6.664731946222531e-07, "loss": 0.004, "step": 10524 }, { "epoch": 2.3947667804323096, "grad_norm": 0.7681173261445071, "learning_rate": 6.663840398077146e-07, "loss": 0.0062, "step": 10525 }, { "epoch": 2.3949943117178614, "grad_norm": 0.6902986783217506, "learning_rate": 6.662948841473621e-07, "loss": 0.007, "step": 10526 }, { "epoch": 2.395221843003413, "grad_norm": 1.7406677645515236, "learning_rate": 6.662057276430179e-07, "loss": 0.0417, "step": 10527 }, { "epoch": 2.395449374288965, "grad_norm": 0.9553025117359835, "learning_rate": 6.661165702965037e-07, "loss": 0.0077, "step": 10528 }, { "epoch": 2.3956769055745166, "grad_norm": 0.43844482453627254, "learning_rate": 6.660274121096425e-07, "loss": 0.0043, "step": 10529 }, { "epoch": 2.3959044368600684, "grad_norm": 1.6110091857105604, "learning_rate": 6.659382530842561e-07, "loss": 0.0113, "step": 10530 }, { "epoch": 2.39613196814562, "grad_norm": 1.0102648925924103, "learning_rate": 6.658490932221664e-07, "loss": 0.0131, "step": 10531 }, { "epoch": 2.396359499431172, "grad_norm": 0.7116059233014349, "learning_rate": 6.657599325251964e-07, "loss": 0.0079, "step": 10532 }, { "epoch": 2.3965870307167236, "grad_norm": 1.6995235728559506, "learning_rate": 6.656707709951679e-07, "loss": 0.0168, "step": 10533 }, { "epoch": 2.3968145620022754, "grad_norm": 0.8552693964499672, "learning_rate": 6.655816086339033e-07, "loss": 0.0141, "step": 10534 }, { "epoch": 2.397042093287827, "grad_norm": 0.9894035361645778, "learning_rate": 6.654924454432251e-07, "loss": 0.0173, "step": 10535 }, { "epoch": 2.397269624573379, "grad_norm": 0.6000315574292535, "learning_rate": 6.654032814249554e-07, "loss": 0.0141, "step": 10536 }, { "epoch": 2.3974971558589306, "grad_norm": 1.58663756844954, "learning_rate": 6.653141165809166e-07, "loss": 0.0098, "step": 10537 }, { "epoch": 2.3977246871444824, "grad_norm": 0.43547609285235916, "learning_rate": 6.65224950912931e-07, "loss": 0.0048, "step": 10538 }, { "epoch": 2.397952218430034, "grad_norm": 0.4537542116901841, "learning_rate": 6.651357844228213e-07, "loss": 0.0048, "step": 10539 }, { "epoch": 2.398179749715586, "grad_norm": 0.29056422290512074, "learning_rate": 6.650466171124094e-07, "loss": 0.0028, "step": 10540 }, { "epoch": 2.3984072810011376, "grad_norm": 1.8730610384258652, "learning_rate": 6.649574489835181e-07, "loss": 0.0142, "step": 10541 }, { "epoch": 2.3986348122866894, "grad_norm": 0.807741093439397, "learning_rate": 6.648682800379698e-07, "loss": 0.0092, "step": 10542 }, { "epoch": 2.398862343572241, "grad_norm": 0.5411482090387322, "learning_rate": 6.647791102775869e-07, "loss": 0.0122, "step": 10543 }, { "epoch": 2.399089874857793, "grad_norm": 0.881167329723324, "learning_rate": 6.646899397041915e-07, "loss": 0.019, "step": 10544 }, { "epoch": 2.3993174061433447, "grad_norm": 0.9168476238340052, "learning_rate": 6.646007683196068e-07, "loss": 0.0157, "step": 10545 }, { "epoch": 2.3995449374288964, "grad_norm": 0.436657861510665, "learning_rate": 6.645115961256549e-07, "loss": 0.0029, "step": 10546 }, { "epoch": 2.399772468714448, "grad_norm": 0.8669938052061428, "learning_rate": 6.644224231241582e-07, "loss": 0.0108, "step": 10547 }, { "epoch": 2.4, "grad_norm": 0.6118486709479505, "learning_rate": 6.643332493169393e-07, "loss": 0.0068, "step": 10548 }, { "epoch": 2.4002275312855517, "grad_norm": 1.0320899136646215, "learning_rate": 6.642440747058209e-07, "loss": 0.0166, "step": 10549 }, { "epoch": 2.4004550625711034, "grad_norm": 0.6225354846502709, "learning_rate": 6.641548992926256e-07, "loss": 0.0099, "step": 10550 }, { "epoch": 2.400682593856655, "grad_norm": 0.5728917739900032, "learning_rate": 6.640657230791757e-07, "loss": 0.0058, "step": 10551 }, { "epoch": 2.400910125142207, "grad_norm": 0.3848125119212441, "learning_rate": 6.639765460672941e-07, "loss": 0.0023, "step": 10552 }, { "epoch": 2.4011376564277587, "grad_norm": 0.45669209069704314, "learning_rate": 6.638873682588032e-07, "loss": 0.0051, "step": 10553 }, { "epoch": 2.4013651877133104, "grad_norm": 1.1143863308089785, "learning_rate": 6.637981896555257e-07, "loss": 0.0176, "step": 10554 }, { "epoch": 2.401592718998862, "grad_norm": 0.6566798746352321, "learning_rate": 6.637090102592843e-07, "loss": 0.0105, "step": 10555 }, { "epoch": 2.401820250284414, "grad_norm": 1.8530407153185289, "learning_rate": 6.636198300719017e-07, "loss": 0.02, "step": 10556 }, { "epoch": 2.4020477815699657, "grad_norm": 1.0507927115994748, "learning_rate": 6.635306490952003e-07, "loss": 0.0201, "step": 10557 }, { "epoch": 2.402275312855518, "grad_norm": 1.0918320751666546, "learning_rate": 6.63441467331003e-07, "loss": 0.0231, "step": 10558 }, { "epoch": 2.402502844141069, "grad_norm": 0.424457724547813, "learning_rate": 6.633522847811327e-07, "loss": 0.0056, "step": 10559 }, { "epoch": 2.4027303754266214, "grad_norm": 0.6090392300834907, "learning_rate": 6.63263101447412e-07, "loss": 0.0042, "step": 10560 }, { "epoch": 2.4029579067121727, "grad_norm": 0.4036569273511563, "learning_rate": 6.631739173316634e-07, "loss": 0.0049, "step": 10561 }, { "epoch": 2.403185437997725, "grad_norm": 1.493195967787498, "learning_rate": 6.630847324357098e-07, "loss": 0.0239, "step": 10562 }, { "epoch": 2.4034129692832766, "grad_norm": 0.6749520146040353, "learning_rate": 6.629955467613741e-07, "loss": 0.0043, "step": 10563 }, { "epoch": 2.4036405005688284, "grad_norm": 1.603124442728324, "learning_rate": 6.629063603104789e-07, "loss": 0.0144, "step": 10564 }, { "epoch": 2.40386803185438, "grad_norm": 1.0661818619571983, "learning_rate": 6.628171730848474e-07, "loss": 0.0207, "step": 10565 }, { "epoch": 2.404095563139932, "grad_norm": 0.9145262952949705, "learning_rate": 6.62727985086302e-07, "loss": 0.0097, "step": 10566 }, { "epoch": 2.4043230944254836, "grad_norm": 1.1871120968584317, "learning_rate": 6.626387963166655e-07, "loss": 0.0149, "step": 10567 }, { "epoch": 2.4045506257110354, "grad_norm": 0.46229083520751924, "learning_rate": 6.625496067777612e-07, "loss": 0.0042, "step": 10568 }, { "epoch": 2.404778156996587, "grad_norm": 1.9790609673096478, "learning_rate": 6.624604164714115e-07, "loss": 0.0222, "step": 10569 }, { "epoch": 2.405005688282139, "grad_norm": 0.9703198289970115, "learning_rate": 6.623712253994397e-07, "loss": 0.0118, "step": 10570 }, { "epoch": 2.4052332195676907, "grad_norm": 0.8872350998596833, "learning_rate": 6.622820335636683e-07, "loss": 0.016, "step": 10571 }, { "epoch": 2.4054607508532424, "grad_norm": 0.4097210873789789, "learning_rate": 6.621928409659204e-07, "loss": 0.0033, "step": 10572 }, { "epoch": 2.405688282138794, "grad_norm": 1.4541634955037377, "learning_rate": 6.621036476080191e-07, "loss": 0.0278, "step": 10573 }, { "epoch": 2.405915813424346, "grad_norm": 0.774940480502151, "learning_rate": 6.620144534917872e-07, "loss": 0.0104, "step": 10574 }, { "epoch": 2.4061433447098977, "grad_norm": 0.48907352571221585, "learning_rate": 6.619252586190477e-07, "loss": 0.0062, "step": 10575 }, { "epoch": 2.4063708759954494, "grad_norm": 0.5662053979584578, "learning_rate": 6.618360629916233e-07, "loss": 0.0033, "step": 10576 }, { "epoch": 2.406598407281001, "grad_norm": 0.5649428546034507, "learning_rate": 6.617468666113375e-07, "loss": 0.0093, "step": 10577 }, { "epoch": 2.406825938566553, "grad_norm": 1.3346146603162525, "learning_rate": 6.616576694800126e-07, "loss": 0.0277, "step": 10578 }, { "epoch": 2.4070534698521047, "grad_norm": 0.29809748747175163, "learning_rate": 6.615684715994725e-07, "loss": 0.0038, "step": 10579 }, { "epoch": 2.4072810011376564, "grad_norm": 1.2905363856712393, "learning_rate": 6.614792729715398e-07, "loss": 0.0186, "step": 10580 }, { "epoch": 2.407508532423208, "grad_norm": 0.2936743154122565, "learning_rate": 6.613900735980374e-07, "loss": 0.0023, "step": 10581 }, { "epoch": 2.40773606370876, "grad_norm": 1.1220857084344364, "learning_rate": 6.613008734807886e-07, "loss": 0.0065, "step": 10582 }, { "epoch": 2.4079635949943117, "grad_norm": 0.9261446941656838, "learning_rate": 6.612116726216164e-07, "loss": 0.0088, "step": 10583 }, { "epoch": 2.4081911262798634, "grad_norm": 0.5234584382681744, "learning_rate": 6.611224710223441e-07, "loss": 0.006, "step": 10584 }, { "epoch": 2.408418657565415, "grad_norm": 0.7289985932084123, "learning_rate": 6.610332686847944e-07, "loss": 0.0132, "step": 10585 }, { "epoch": 2.408646188850967, "grad_norm": 0.22855192179213, "learning_rate": 6.609440656107909e-07, "loss": 0.0019, "step": 10586 }, { "epoch": 2.4088737201365187, "grad_norm": 0.9305394699444485, "learning_rate": 6.608548618021563e-07, "loss": 0.008, "step": 10587 }, { "epoch": 2.4091012514220704, "grad_norm": 0.6491683781261206, "learning_rate": 6.60765657260714e-07, "loss": 0.0078, "step": 10588 }, { "epoch": 2.409328782707622, "grad_norm": 0.885599743289047, "learning_rate": 6.606764519882874e-07, "loss": 0.0101, "step": 10589 }, { "epoch": 2.409556313993174, "grad_norm": 1.5197909875824962, "learning_rate": 6.605872459866993e-07, "loss": 0.0097, "step": 10590 }, { "epoch": 2.4097838452787257, "grad_norm": 0.6735449988757173, "learning_rate": 6.604980392577729e-07, "loss": 0.0061, "step": 10591 }, { "epoch": 2.4100113765642774, "grad_norm": 0.6552591185198129, "learning_rate": 6.604088318033321e-07, "loss": 0.0074, "step": 10592 }, { "epoch": 2.410238907849829, "grad_norm": 0.8331754789037994, "learning_rate": 6.603196236251993e-07, "loss": 0.0092, "step": 10593 }, { "epoch": 2.410466439135381, "grad_norm": 0.8989376955810195, "learning_rate": 6.60230414725198e-07, "loss": 0.0083, "step": 10594 }, { "epoch": 2.4106939704209327, "grad_norm": 1.319891006209917, "learning_rate": 6.601412051051516e-07, "loss": 0.0181, "step": 10595 }, { "epoch": 2.4109215017064844, "grad_norm": 1.0137278019727531, "learning_rate": 6.600519947668835e-07, "loss": 0.0081, "step": 10596 }, { "epoch": 2.4111490329920366, "grad_norm": 1.3238759878349928, "learning_rate": 6.599627837122167e-07, "loss": 0.0104, "step": 10597 }, { "epoch": 2.411376564277588, "grad_norm": 1.0860762067436107, "learning_rate": 6.598735719429744e-07, "loss": 0.0075, "step": 10598 }, { "epoch": 2.41160409556314, "grad_norm": 0.6756901458377977, "learning_rate": 6.597843594609806e-07, "loss": 0.0068, "step": 10599 }, { "epoch": 2.4118316268486915, "grad_norm": 0.8731094511506113, "learning_rate": 6.59695146268058e-07, "loss": 0.0162, "step": 10600 }, { "epoch": 2.4120591581342437, "grad_norm": 1.4435778929199266, "learning_rate": 6.596059323660299e-07, "loss": 0.0261, "step": 10601 }, { "epoch": 2.4122866894197954, "grad_norm": 1.1697380089434815, "learning_rate": 6.595167177567203e-07, "loss": 0.0117, "step": 10602 }, { "epoch": 2.412514220705347, "grad_norm": 1.2259909902848922, "learning_rate": 6.594275024419521e-07, "loss": 0.017, "step": 10603 }, { "epoch": 2.412741751990899, "grad_norm": 0.4305303475159498, "learning_rate": 6.593382864235487e-07, "loss": 0.0037, "step": 10604 }, { "epoch": 2.4129692832764507, "grad_norm": 0.5873593964903512, "learning_rate": 6.592490697033337e-07, "loss": 0.011, "step": 10605 }, { "epoch": 2.4131968145620024, "grad_norm": 0.6957884192700555, "learning_rate": 6.591598522831303e-07, "loss": 0.0119, "step": 10606 }, { "epoch": 2.413424345847554, "grad_norm": 0.6850591097585259, "learning_rate": 6.590706341647623e-07, "loss": 0.0123, "step": 10607 }, { "epoch": 2.413651877133106, "grad_norm": 0.850318899910111, "learning_rate": 6.589814153500527e-07, "loss": 0.0188, "step": 10608 }, { "epoch": 2.4138794084186577, "grad_norm": 0.7508155808098737, "learning_rate": 6.588921958408254e-07, "loss": 0.0042, "step": 10609 }, { "epoch": 2.4141069397042094, "grad_norm": 0.6754166370596343, "learning_rate": 6.588029756389037e-07, "loss": 0.0063, "step": 10610 }, { "epoch": 2.414334470989761, "grad_norm": 0.6451368042561455, "learning_rate": 6.587137547461108e-07, "loss": 0.0088, "step": 10611 }, { "epoch": 2.414562002275313, "grad_norm": 0.81082467294428, "learning_rate": 6.586245331642707e-07, "loss": 0.0091, "step": 10612 }, { "epoch": 2.4147895335608647, "grad_norm": 1.0132478954860449, "learning_rate": 6.585353108952068e-07, "loss": 0.0118, "step": 10613 }, { "epoch": 2.4150170648464164, "grad_norm": 0.9226915670370973, "learning_rate": 6.584460879407425e-07, "loss": 0.0227, "step": 10614 }, { "epoch": 2.415244596131968, "grad_norm": 0.49618699526750243, "learning_rate": 6.583568643027012e-07, "loss": 0.0058, "step": 10615 }, { "epoch": 2.41547212741752, "grad_norm": 0.42436792908094667, "learning_rate": 6.58267639982907e-07, "loss": 0.0035, "step": 10616 }, { "epoch": 2.4156996587030717, "grad_norm": 1.096088403453783, "learning_rate": 6.58178414983183e-07, "loss": 0.0118, "step": 10617 }, { "epoch": 2.4159271899886234, "grad_norm": 0.39255606422891076, "learning_rate": 6.58089189305353e-07, "loss": 0.0046, "step": 10618 }, { "epoch": 2.416154721274175, "grad_norm": 0.7463169266605767, "learning_rate": 6.579999629512407e-07, "loss": 0.0074, "step": 10619 }, { "epoch": 2.416382252559727, "grad_norm": 0.6727033736340391, "learning_rate": 6.579107359226695e-07, "loss": 0.0063, "step": 10620 }, { "epoch": 2.4166097838452787, "grad_norm": 0.5841619038698073, "learning_rate": 6.578215082214629e-07, "loss": 0.0055, "step": 10621 }, { "epoch": 2.4168373151308304, "grad_norm": 0.4047312617949801, "learning_rate": 6.577322798494449e-07, "loss": 0.0045, "step": 10622 }, { "epoch": 2.417064846416382, "grad_norm": 0.8465208916183006, "learning_rate": 6.576430508084393e-07, "loss": 0.0077, "step": 10623 }, { "epoch": 2.417292377701934, "grad_norm": 0.4914160327505205, "learning_rate": 6.575538211002693e-07, "loss": 0.007, "step": 10624 }, { "epoch": 2.4175199089874857, "grad_norm": 0.8797823300394363, "learning_rate": 6.57464590726759e-07, "loss": 0.015, "step": 10625 }, { "epoch": 2.4177474402730375, "grad_norm": 1.0250898207429242, "learning_rate": 6.573753596897318e-07, "loss": 0.013, "step": 10626 }, { "epoch": 2.417974971558589, "grad_norm": 2.511959751237718, "learning_rate": 6.572861279910114e-07, "loss": 0.0258, "step": 10627 }, { "epoch": 2.418202502844141, "grad_norm": 0.7446878747497276, "learning_rate": 6.571968956324218e-07, "loss": 0.0077, "step": 10628 }, { "epoch": 2.4184300341296927, "grad_norm": 1.631426303645607, "learning_rate": 6.571076626157866e-07, "loss": 0.0106, "step": 10629 }, { "epoch": 2.4186575654152445, "grad_norm": 1.1472408901521858, "learning_rate": 6.570184289429297e-07, "loss": 0.0108, "step": 10630 }, { "epoch": 2.418885096700796, "grad_norm": 0.9089386762170139, "learning_rate": 6.569291946156746e-07, "loss": 0.0093, "step": 10631 }, { "epoch": 2.419112627986348, "grad_norm": 0.4477665847558512, "learning_rate": 6.568399596358453e-07, "loss": 0.0039, "step": 10632 }, { "epoch": 2.4193401592718997, "grad_norm": 0.4825094712532907, "learning_rate": 6.567507240052655e-07, "loss": 0.0028, "step": 10633 }, { "epoch": 2.4195676905574515, "grad_norm": 0.5957139365017673, "learning_rate": 6.566614877257591e-07, "loss": 0.0088, "step": 10634 }, { "epoch": 2.419795221843003, "grad_norm": 0.989404092724913, "learning_rate": 6.565722507991497e-07, "loss": 0.0208, "step": 10635 }, { "epoch": 2.4200227531285554, "grad_norm": 0.7216620759146102, "learning_rate": 6.564830132272617e-07, "loss": 0.0108, "step": 10636 }, { "epoch": 2.4202502844141067, "grad_norm": 1.8679361360567885, "learning_rate": 6.563937750119183e-07, "loss": 0.0299, "step": 10637 }, { "epoch": 2.420477815699659, "grad_norm": 0.7290596424358633, "learning_rate": 6.563045361549436e-07, "loss": 0.0103, "step": 10638 }, { "epoch": 2.4207053469852102, "grad_norm": 0.7308251301549937, "learning_rate": 6.562152966581615e-07, "loss": 0.0137, "step": 10639 }, { "epoch": 2.4209328782707624, "grad_norm": 0.9356482423569612, "learning_rate": 6.561260565233961e-07, "loss": 0.0175, "step": 10640 }, { "epoch": 2.421160409556314, "grad_norm": 0.3214467601951361, "learning_rate": 6.56036815752471e-07, "loss": 0.0015, "step": 10641 }, { "epoch": 2.421387940841866, "grad_norm": 0.45474543094916675, "learning_rate": 6.559475743472101e-07, "loss": 0.0032, "step": 10642 }, { "epoch": 2.4216154721274177, "grad_norm": 1.5987415282461066, "learning_rate": 6.558583323094378e-07, "loss": 0.0184, "step": 10643 }, { "epoch": 2.4218430034129694, "grad_norm": 1.0570589083799375, "learning_rate": 6.557690896409774e-07, "loss": 0.0256, "step": 10644 }, { "epoch": 2.422070534698521, "grad_norm": 0.9224074920737387, "learning_rate": 6.556798463436531e-07, "loss": 0.007, "step": 10645 }, { "epoch": 2.422298065984073, "grad_norm": 1.1757270322600974, "learning_rate": 6.555906024192892e-07, "loss": 0.0169, "step": 10646 }, { "epoch": 2.4225255972696247, "grad_norm": 1.0839594188119255, "learning_rate": 6.555013578697092e-07, "loss": 0.0073, "step": 10647 }, { "epoch": 2.4227531285551764, "grad_norm": 1.8310875568229699, "learning_rate": 6.554121126967375e-07, "loss": 0.0525, "step": 10648 }, { "epoch": 2.422980659840728, "grad_norm": 0.8406920458656837, "learning_rate": 6.553228669021977e-07, "loss": 0.0124, "step": 10649 }, { "epoch": 2.42320819112628, "grad_norm": 1.4981800619370553, "learning_rate": 6.552336204879142e-07, "loss": 0.0113, "step": 10650 }, { "epoch": 2.4234357224118317, "grad_norm": 0.6641085168002532, "learning_rate": 6.551443734557108e-07, "loss": 0.0064, "step": 10651 }, { "epoch": 2.4236632536973834, "grad_norm": 0.48276469829749635, "learning_rate": 6.550551258074115e-07, "loss": 0.0049, "step": 10652 }, { "epoch": 2.423890784982935, "grad_norm": 1.224961864694724, "learning_rate": 6.549658775448406e-07, "loss": 0.0179, "step": 10653 }, { "epoch": 2.424118316268487, "grad_norm": 1.0333122264237222, "learning_rate": 6.54876628669822e-07, "loss": 0.0163, "step": 10654 }, { "epoch": 2.4243458475540387, "grad_norm": 0.7930712334709034, "learning_rate": 6.547873791841799e-07, "loss": 0.0055, "step": 10655 }, { "epoch": 2.4245733788395905, "grad_norm": 0.5090942026790196, "learning_rate": 6.546981290897383e-07, "loss": 0.0046, "step": 10656 }, { "epoch": 2.424800910125142, "grad_norm": 0.3108007244044243, "learning_rate": 6.546088783883215e-07, "loss": 0.0042, "step": 10657 }, { "epoch": 2.425028441410694, "grad_norm": 0.9889063077636431, "learning_rate": 6.545196270817531e-07, "loss": 0.0123, "step": 10658 }, { "epoch": 2.4252559726962457, "grad_norm": 0.9199432011563206, "learning_rate": 6.544303751718577e-07, "loss": 0.016, "step": 10659 }, { "epoch": 2.4254835039817975, "grad_norm": 0.8061211286664101, "learning_rate": 6.543411226604595e-07, "loss": 0.0081, "step": 10660 }, { "epoch": 2.425711035267349, "grad_norm": 0.6699328233922631, "learning_rate": 6.542518695493823e-07, "loss": 0.0081, "step": 10661 }, { "epoch": 2.425938566552901, "grad_norm": 1.0789640222036518, "learning_rate": 6.541626158404506e-07, "loss": 0.0273, "step": 10662 }, { "epoch": 2.4261660978384527, "grad_norm": 0.24706948033550072, "learning_rate": 6.540733615354882e-07, "loss": 0.0016, "step": 10663 }, { "epoch": 2.4263936291240045, "grad_norm": 2.9451687842001464, "learning_rate": 6.539841066363198e-07, "loss": 0.0243, "step": 10664 }, { "epoch": 2.426621160409556, "grad_norm": 0.835878758057803, "learning_rate": 6.538948511447692e-07, "loss": 0.0052, "step": 10665 }, { "epoch": 2.426848691695108, "grad_norm": 0.736964230792622, "learning_rate": 6.538055950626608e-07, "loss": 0.0131, "step": 10666 }, { "epoch": 2.4270762229806597, "grad_norm": 0.7222670505715578, "learning_rate": 6.537163383918188e-07, "loss": 0.0083, "step": 10667 }, { "epoch": 2.4273037542662115, "grad_norm": 0.45179461491762, "learning_rate": 6.536270811340674e-07, "loss": 0.0049, "step": 10668 }, { "epoch": 2.4275312855517632, "grad_norm": 0.5060407054519989, "learning_rate": 6.53537823291231e-07, "loss": 0.0062, "step": 10669 }, { "epoch": 2.427758816837315, "grad_norm": 1.2334981252758885, "learning_rate": 6.534485648651337e-07, "loss": 0.0234, "step": 10670 }, { "epoch": 2.4279863481228667, "grad_norm": 1.2698350573078077, "learning_rate": 6.533593058575997e-07, "loss": 0.0084, "step": 10671 }, { "epoch": 2.4282138794084185, "grad_norm": 1.3023317944178974, "learning_rate": 6.532700462704534e-07, "loss": 0.0216, "step": 10672 }, { "epoch": 2.4284414106939702, "grad_norm": 0.5112144878902187, "learning_rate": 6.531807861055194e-07, "loss": 0.0071, "step": 10673 }, { "epoch": 2.428668941979522, "grad_norm": 0.6899324888644217, "learning_rate": 6.530915253646219e-07, "loss": 0.0065, "step": 10674 }, { "epoch": 2.428896473265074, "grad_norm": 0.5427526535245528, "learning_rate": 6.530022640495845e-07, "loss": 0.005, "step": 10675 }, { "epoch": 2.4291240045506255, "grad_norm": 2.9548470038250425, "learning_rate": 6.529130021622324e-07, "loss": 0.0077, "step": 10676 }, { "epoch": 2.4293515358361777, "grad_norm": 1.6380267129945179, "learning_rate": 6.528237397043896e-07, "loss": 0.0109, "step": 10677 }, { "epoch": 2.429579067121729, "grad_norm": 0.8822715634774386, "learning_rate": 6.527344766778806e-07, "loss": 0.0111, "step": 10678 }, { "epoch": 2.429806598407281, "grad_norm": 0.23065171833349413, "learning_rate": 6.526452130845296e-07, "loss": 0.0014, "step": 10679 }, { "epoch": 2.430034129692833, "grad_norm": 0.5546919289170708, "learning_rate": 6.525559489261612e-07, "loss": 0.0047, "step": 10680 }, { "epoch": 2.4302616609783847, "grad_norm": 0.7532739172282941, "learning_rate": 6.524666842045997e-07, "loss": 0.0071, "step": 10681 }, { "epoch": 2.4304891922639364, "grad_norm": 0.4647822518911876, "learning_rate": 6.523774189216692e-07, "loss": 0.0057, "step": 10682 }, { "epoch": 2.430716723549488, "grad_norm": 0.8034579383316617, "learning_rate": 6.522881530791945e-07, "loss": 0.0067, "step": 10683 }, { "epoch": 2.43094425483504, "grad_norm": 0.9030060507436293, "learning_rate": 6.521988866790001e-07, "loss": 0.0128, "step": 10684 }, { "epoch": 2.4311717861205917, "grad_norm": 0.5350878967992817, "learning_rate": 6.5210961972291e-07, "loss": 0.0051, "step": 10685 }, { "epoch": 2.4313993174061435, "grad_norm": 0.5897735445683355, "learning_rate": 6.520203522127492e-07, "loss": 0.0104, "step": 10686 }, { "epoch": 2.431626848691695, "grad_norm": 0.62007302100845, "learning_rate": 6.519310841503419e-07, "loss": 0.0144, "step": 10687 }, { "epoch": 2.431854379977247, "grad_norm": 0.662796243445202, "learning_rate": 6.518418155375123e-07, "loss": 0.0065, "step": 10688 }, { "epoch": 2.4320819112627987, "grad_norm": 0.6245362355440753, "learning_rate": 6.517525463760852e-07, "loss": 0.0092, "step": 10689 }, { "epoch": 2.4323094425483505, "grad_norm": 0.48059287261255085, "learning_rate": 6.516632766678853e-07, "loss": 0.0035, "step": 10690 }, { "epoch": 2.432536973833902, "grad_norm": 0.2756292174591797, "learning_rate": 6.515740064147366e-07, "loss": 0.002, "step": 10691 }, { "epoch": 2.432764505119454, "grad_norm": 1.8271618297942231, "learning_rate": 6.514847356184639e-07, "loss": 0.0174, "step": 10692 }, { "epoch": 2.4329920364050057, "grad_norm": 1.1685626943478253, "learning_rate": 6.513954642808919e-07, "loss": 0.0155, "step": 10693 }, { "epoch": 2.4332195676905575, "grad_norm": 1.493861038260507, "learning_rate": 6.513061924038448e-07, "loss": 0.018, "step": 10694 }, { "epoch": 2.4334470989761092, "grad_norm": 1.2382003009104203, "learning_rate": 6.512169199891473e-07, "loss": 0.0197, "step": 10695 }, { "epoch": 2.433674630261661, "grad_norm": 0.4161874742643499, "learning_rate": 6.51127647038624e-07, "loss": 0.0024, "step": 10696 }, { "epoch": 2.4339021615472127, "grad_norm": 0.45330420668501215, "learning_rate": 6.510383735540994e-07, "loss": 0.0048, "step": 10697 }, { "epoch": 2.4341296928327645, "grad_norm": 0.25894402190541443, "learning_rate": 6.509490995373983e-07, "loss": 0.0016, "step": 10698 }, { "epoch": 2.4343572241183162, "grad_norm": 1.0416923518863188, "learning_rate": 6.50859824990345e-07, "loss": 0.0203, "step": 10699 }, { "epoch": 2.434584755403868, "grad_norm": 0.6632104528197479, "learning_rate": 6.507705499147641e-07, "loss": 0.0093, "step": 10700 }, { "epoch": 2.4348122866894197, "grad_norm": 0.2517472794047998, "learning_rate": 6.506812743124806e-07, "loss": 0.002, "step": 10701 }, { "epoch": 2.4350398179749715, "grad_norm": 1.0216206732708468, "learning_rate": 6.505919981853187e-07, "loss": 0.0174, "step": 10702 }, { "epoch": 2.4352673492605232, "grad_norm": 0.6471005394965484, "learning_rate": 6.505027215351033e-07, "loss": 0.0068, "step": 10703 }, { "epoch": 2.435494880546075, "grad_norm": 1.6868541151169982, "learning_rate": 6.504134443636591e-07, "loss": 0.0322, "step": 10704 }, { "epoch": 2.4357224118316267, "grad_norm": 0.3312253829089387, "learning_rate": 6.503241666728105e-07, "loss": 0.003, "step": 10705 }, { "epoch": 2.4359499431171785, "grad_norm": 0.8913549347053876, "learning_rate": 6.502348884643824e-07, "loss": 0.0087, "step": 10706 }, { "epoch": 2.4361774744027302, "grad_norm": 0.9502403046339689, "learning_rate": 6.501456097401992e-07, "loss": 0.0057, "step": 10707 }, { "epoch": 2.436405005688282, "grad_norm": 1.0729935234996166, "learning_rate": 6.50056330502086e-07, "loss": 0.0226, "step": 10708 }, { "epoch": 2.4366325369738338, "grad_norm": 0.9331442531977829, "learning_rate": 6.499670507518671e-07, "loss": 0.0067, "step": 10709 }, { "epoch": 2.4368600682593855, "grad_norm": 0.546291740224753, "learning_rate": 6.498777704913675e-07, "loss": 0.0035, "step": 10710 }, { "epoch": 2.4370875995449373, "grad_norm": 0.3596382033708216, "learning_rate": 6.497884897224119e-07, "loss": 0.003, "step": 10711 }, { "epoch": 2.437315130830489, "grad_norm": 0.641171068825635, "learning_rate": 6.49699208446825e-07, "loss": 0.0026, "step": 10712 }, { "epoch": 2.4375426621160408, "grad_norm": 1.5837856729889586, "learning_rate": 6.496099266664314e-07, "loss": 0.0154, "step": 10713 }, { "epoch": 2.437770193401593, "grad_norm": 0.634496456914405, "learning_rate": 6.495206443830561e-07, "loss": 0.0039, "step": 10714 }, { "epoch": 2.4379977246871443, "grad_norm": 0.6897540105515563, "learning_rate": 6.494313615985235e-07, "loss": 0.0037, "step": 10715 }, { "epoch": 2.4382252559726965, "grad_norm": 1.6512177421254104, "learning_rate": 6.493420783146587e-07, "loss": 0.0346, "step": 10716 }, { "epoch": 2.4384527872582478, "grad_norm": 0.851086839917141, "learning_rate": 6.492527945332865e-07, "loss": 0.0083, "step": 10717 }, { "epoch": 2.4386803185438, "grad_norm": 0.3728250543637369, "learning_rate": 6.491635102562315e-07, "loss": 0.0036, "step": 10718 }, { "epoch": 2.4389078498293517, "grad_norm": 1.0720393158047274, "learning_rate": 6.490742254853187e-07, "loss": 0.0091, "step": 10719 }, { "epoch": 2.4391353811149035, "grad_norm": 1.060205151831748, "learning_rate": 6.489849402223729e-07, "loss": 0.0145, "step": 10720 }, { "epoch": 2.439362912400455, "grad_norm": 0.4800701110073416, "learning_rate": 6.488956544692187e-07, "loss": 0.0055, "step": 10721 }, { "epoch": 2.439590443686007, "grad_norm": 1.6847356457981062, "learning_rate": 6.48806368227681e-07, "loss": 0.0289, "step": 10722 }, { "epoch": 2.4398179749715587, "grad_norm": 1.161024625856234, "learning_rate": 6.487170814995849e-07, "loss": 0.0045, "step": 10723 }, { "epoch": 2.4400455062571105, "grad_norm": 0.7954033487121006, "learning_rate": 6.48627794286755e-07, "loss": 0.0072, "step": 10724 }, { "epoch": 2.4402730375426622, "grad_norm": 1.0973105918362323, "learning_rate": 6.485385065910163e-07, "loss": 0.0098, "step": 10725 }, { "epoch": 2.440500568828214, "grad_norm": 0.8178153747976028, "learning_rate": 6.484492184141937e-07, "loss": 0.0157, "step": 10726 }, { "epoch": 2.4407281001137657, "grad_norm": 0.31468343428665674, "learning_rate": 6.48359929758112e-07, "loss": 0.0025, "step": 10727 }, { "epoch": 2.4409556313993175, "grad_norm": 1.0005639797138288, "learning_rate": 6.482706406245961e-07, "loss": 0.0081, "step": 10728 }, { "epoch": 2.4411831626848692, "grad_norm": 0.6176552026012525, "learning_rate": 6.481813510154706e-07, "loss": 0.0047, "step": 10729 }, { "epoch": 2.441410693970421, "grad_norm": 0.3079163514785509, "learning_rate": 6.480920609325611e-07, "loss": 0.0026, "step": 10730 }, { "epoch": 2.4416382252559727, "grad_norm": 0.9956088444542914, "learning_rate": 6.480027703776923e-07, "loss": 0.0168, "step": 10731 }, { "epoch": 2.4418657565415245, "grad_norm": 1.180398935329982, "learning_rate": 6.479134793526887e-07, "loss": 0.0123, "step": 10732 }, { "epoch": 2.4420932878270762, "grad_norm": 0.29175480119413477, "learning_rate": 6.478241878593755e-07, "loss": 0.0018, "step": 10733 }, { "epoch": 2.442320819112628, "grad_norm": 0.4592243287238551, "learning_rate": 6.47734895899578e-07, "loss": 0.0053, "step": 10734 }, { "epoch": 2.4425483503981797, "grad_norm": 0.682528893640734, "learning_rate": 6.476456034751207e-07, "loss": 0.0047, "step": 10735 }, { "epoch": 2.4427758816837315, "grad_norm": 0.2111087305241007, "learning_rate": 6.475563105878285e-07, "loss": 0.0013, "step": 10736 }, { "epoch": 2.4430034129692833, "grad_norm": 1.2407371913187901, "learning_rate": 6.474670172395271e-07, "loss": 0.0111, "step": 10737 }, { "epoch": 2.443230944254835, "grad_norm": 0.5913140704418944, "learning_rate": 6.473777234320408e-07, "loss": 0.0097, "step": 10738 }, { "epoch": 2.4434584755403868, "grad_norm": 0.8084397498005721, "learning_rate": 6.472884291671947e-07, "loss": 0.006, "step": 10739 }, { "epoch": 2.4436860068259385, "grad_norm": 0.9119341943409708, "learning_rate": 6.47199134446814e-07, "loss": 0.0056, "step": 10740 }, { "epoch": 2.4439135381114903, "grad_norm": 0.6365726191341614, "learning_rate": 6.471098392727238e-07, "loss": 0.0135, "step": 10741 }, { "epoch": 2.444141069397042, "grad_norm": 0.5213489896186915, "learning_rate": 6.470205436467487e-07, "loss": 0.0061, "step": 10742 }, { "epoch": 2.4443686006825938, "grad_norm": 0.27696356832603547, "learning_rate": 6.469312475707141e-07, "loss": 0.0019, "step": 10743 }, { "epoch": 2.4445961319681455, "grad_norm": 0.8463052662250616, "learning_rate": 6.468419510464452e-07, "loss": 0.006, "step": 10744 }, { "epoch": 2.4448236632536973, "grad_norm": 0.7074385395321419, "learning_rate": 6.467526540757666e-07, "loss": 0.0062, "step": 10745 }, { "epoch": 2.445051194539249, "grad_norm": 1.6750779762066335, "learning_rate": 6.466633566605036e-07, "loss": 0.0112, "step": 10746 }, { "epoch": 2.4452787258248008, "grad_norm": 1.341368197813935, "learning_rate": 6.465740588024813e-07, "loss": 0.0191, "step": 10747 }, { "epoch": 2.4455062571103525, "grad_norm": 1.1649287815370934, "learning_rate": 6.464847605035247e-07, "loss": 0.0314, "step": 10748 }, { "epoch": 2.4457337883959043, "grad_norm": 0.5188371926135934, "learning_rate": 6.46395461765459e-07, "loss": 0.0041, "step": 10749 }, { "epoch": 2.445961319681456, "grad_norm": 1.5346575862291558, "learning_rate": 6.463061625901093e-07, "loss": 0.0396, "step": 10750 }, { "epoch": 2.4461888509670078, "grad_norm": 0.582273067395056, "learning_rate": 6.462168629793008e-07, "loss": 0.0066, "step": 10751 }, { "epoch": 2.4464163822525595, "grad_norm": 0.5588206457733336, "learning_rate": 6.461275629348581e-07, "loss": 0.0058, "step": 10752 }, { "epoch": 2.4466439135381117, "grad_norm": 0.2008518020216815, "learning_rate": 6.460382624586069e-07, "loss": 0.0021, "step": 10753 }, { "epoch": 2.446871444823663, "grad_norm": 0.872184327599714, "learning_rate": 6.45948961552372e-07, "loss": 0.0096, "step": 10754 }, { "epoch": 2.4470989761092152, "grad_norm": 0.5488005281535102, "learning_rate": 6.45859660217979e-07, "loss": 0.0078, "step": 10755 }, { "epoch": 2.4473265073947665, "grad_norm": 1.0992374575227417, "learning_rate": 6.457703584572525e-07, "loss": 0.0141, "step": 10756 }, { "epoch": 2.4475540386803187, "grad_norm": 0.2900576577576335, "learning_rate": 6.45681056272018e-07, "loss": 0.0021, "step": 10757 }, { "epoch": 2.4477815699658705, "grad_norm": 0.8810389522218369, "learning_rate": 6.455917536641006e-07, "loss": 0.0111, "step": 10758 }, { "epoch": 2.4480091012514222, "grad_norm": 1.0922460019439115, "learning_rate": 6.455024506353252e-07, "loss": 0.0224, "step": 10759 }, { "epoch": 2.448236632536974, "grad_norm": 0.5543838679043933, "learning_rate": 6.454131471875176e-07, "loss": 0.0037, "step": 10760 }, { "epoch": 2.4484641638225257, "grad_norm": 1.060326748391644, "learning_rate": 6.453238433225026e-07, "loss": 0.0251, "step": 10761 }, { "epoch": 2.4486916951080775, "grad_norm": 0.6619383651857649, "learning_rate": 6.452345390421054e-07, "loss": 0.0045, "step": 10762 }, { "epoch": 2.4489192263936292, "grad_norm": 0.8221761562772401, "learning_rate": 6.451452343481512e-07, "loss": 0.0119, "step": 10763 }, { "epoch": 2.449146757679181, "grad_norm": 1.3329407566267895, "learning_rate": 6.450559292424655e-07, "loss": 0.0176, "step": 10764 }, { "epoch": 2.4493742889647327, "grad_norm": 0.8877572137258396, "learning_rate": 6.449666237268733e-07, "loss": 0.0076, "step": 10765 }, { "epoch": 2.4496018202502845, "grad_norm": 0.48591664612902585, "learning_rate": 6.448773178031996e-07, "loss": 0.0029, "step": 10766 }, { "epoch": 2.4498293515358363, "grad_norm": 1.0543884673898778, "learning_rate": 6.447880114732702e-07, "loss": 0.0161, "step": 10767 }, { "epoch": 2.450056882821388, "grad_norm": 0.5619957369592506, "learning_rate": 6.4469870473891e-07, "loss": 0.0072, "step": 10768 }, { "epoch": 2.4502844141069398, "grad_norm": 0.8960349818049183, "learning_rate": 6.446093976019443e-07, "loss": 0.0154, "step": 10769 }, { "epoch": 2.4505119453924915, "grad_norm": 0.647075519890759, "learning_rate": 6.445200900641986e-07, "loss": 0.0094, "step": 10770 }, { "epoch": 2.4507394766780433, "grad_norm": 0.3114932345028148, "learning_rate": 6.444307821274979e-07, "loss": 0.0024, "step": 10771 }, { "epoch": 2.450967007963595, "grad_norm": 0.562012370087131, "learning_rate": 6.443414737936677e-07, "loss": 0.0059, "step": 10772 }, { "epoch": 2.4511945392491468, "grad_norm": 1.2034340354498272, "learning_rate": 6.442521650645329e-07, "loss": 0.0115, "step": 10773 }, { "epoch": 2.4514220705346985, "grad_norm": 1.6893213088640318, "learning_rate": 6.441628559419194e-07, "loss": 0.0109, "step": 10774 }, { "epoch": 2.4516496018202503, "grad_norm": 0.2093606198639335, "learning_rate": 6.440735464276524e-07, "loss": 0.001, "step": 10775 }, { "epoch": 2.451877133105802, "grad_norm": 0.6097081141555052, "learning_rate": 6.439842365235566e-07, "loss": 0.0049, "step": 10776 }, { "epoch": 2.4521046643913538, "grad_norm": 1.4639717771927387, "learning_rate": 6.43894926231458e-07, "loss": 0.019, "step": 10777 }, { "epoch": 2.4523321956769055, "grad_norm": 0.7415477733701809, "learning_rate": 6.438056155531816e-07, "loss": 0.0108, "step": 10778 }, { "epoch": 2.4525597269624573, "grad_norm": 1.4068549055514086, "learning_rate": 6.437163044905528e-07, "loss": 0.0154, "step": 10779 }, { "epoch": 2.452787258248009, "grad_norm": 0.5303448465263142, "learning_rate": 6.436269930453971e-07, "loss": 0.0059, "step": 10780 }, { "epoch": 2.453014789533561, "grad_norm": 0.7285901779390012, "learning_rate": 6.4353768121954e-07, "loss": 0.0097, "step": 10781 }, { "epoch": 2.4532423208191125, "grad_norm": 0.7456043460507109, "learning_rate": 6.434483690148063e-07, "loss": 0.0065, "step": 10782 }, { "epoch": 2.4534698521046643, "grad_norm": 0.7622575957886862, "learning_rate": 6.43359056433022e-07, "loss": 0.0087, "step": 10783 }, { "epoch": 2.453697383390216, "grad_norm": 0.8039835355489348, "learning_rate": 6.43269743476012e-07, "loss": 0.0079, "step": 10784 }, { "epoch": 2.453924914675768, "grad_norm": 1.7464943590978308, "learning_rate": 6.43180430145602e-07, "loss": 0.0221, "step": 10785 }, { "epoch": 2.4541524459613195, "grad_norm": 0.24458549267906413, "learning_rate": 6.430911164436172e-07, "loss": 0.0015, "step": 10786 }, { "epoch": 2.4543799772468713, "grad_norm": 0.9633185726802544, "learning_rate": 6.430018023718833e-07, "loss": 0.0178, "step": 10787 }, { "epoch": 2.454607508532423, "grad_norm": 0.5092659941181177, "learning_rate": 6.429124879322256e-07, "loss": 0.0048, "step": 10788 }, { "epoch": 2.454835039817975, "grad_norm": 0.8915182657776931, "learning_rate": 6.42823173126469e-07, "loss": 0.0127, "step": 10789 }, { "epoch": 2.4550625711035265, "grad_norm": 0.8251161113918759, "learning_rate": 6.427338579564397e-07, "loss": 0.0122, "step": 10790 }, { "epoch": 2.4552901023890783, "grad_norm": 1.0120882811121983, "learning_rate": 6.426445424239629e-07, "loss": 0.0095, "step": 10791 }, { "epoch": 2.4555176336746305, "grad_norm": 0.6355514828688767, "learning_rate": 6.425552265308639e-07, "loss": 0.0081, "step": 10792 }, { "epoch": 2.455745164960182, "grad_norm": 0.779374996823828, "learning_rate": 6.424659102789681e-07, "loss": 0.0091, "step": 10793 }, { "epoch": 2.455972696245734, "grad_norm": 0.8800194633893718, "learning_rate": 6.423765936701012e-07, "loss": 0.0251, "step": 10794 }, { "epoch": 2.4562002275312853, "grad_norm": 0.16300890339911003, "learning_rate": 6.422872767060886e-07, "loss": 0.0013, "step": 10795 }, { "epoch": 2.4564277588168375, "grad_norm": 0.5564588192567732, "learning_rate": 6.421979593887555e-07, "loss": 0.0039, "step": 10796 }, { "epoch": 2.4566552901023893, "grad_norm": 0.9759614657396343, "learning_rate": 6.421086417199277e-07, "loss": 0.012, "step": 10797 }, { "epoch": 2.456882821387941, "grad_norm": 0.9701263815835248, "learning_rate": 6.420193237014306e-07, "loss": 0.0137, "step": 10798 }, { "epoch": 2.4571103526734928, "grad_norm": 0.6035388231232742, "learning_rate": 6.419300053350898e-07, "loss": 0.0051, "step": 10799 }, { "epoch": 2.4573378839590445, "grad_norm": 0.5474630484628242, "learning_rate": 6.418406866227306e-07, "loss": 0.0071, "step": 10800 }, { "epoch": 2.4575654152445963, "grad_norm": 0.4745944751950476, "learning_rate": 6.417513675661787e-07, "loss": 0.0025, "step": 10801 }, { "epoch": 2.457792946530148, "grad_norm": 0.7525789070394524, "learning_rate": 6.416620481672595e-07, "loss": 0.012, "step": 10802 }, { "epoch": 2.4580204778156998, "grad_norm": 0.39602264833261386, "learning_rate": 6.415727284277984e-07, "loss": 0.0037, "step": 10803 }, { "epoch": 2.4582480091012515, "grad_norm": 1.3202659319690915, "learning_rate": 6.414834083496212e-07, "loss": 0.0066, "step": 10804 }, { "epoch": 2.4584755403868033, "grad_norm": 1.095046406779255, "learning_rate": 6.413940879345533e-07, "loss": 0.0097, "step": 10805 }, { "epoch": 2.458703071672355, "grad_norm": 0.5228220831302375, "learning_rate": 6.413047671844203e-07, "loss": 0.0043, "step": 10806 }, { "epoch": 2.4589306029579068, "grad_norm": 0.591873603853495, "learning_rate": 6.412154461010477e-07, "loss": 0.0061, "step": 10807 }, { "epoch": 2.4591581342434585, "grad_norm": 1.5016407487136476, "learning_rate": 6.411261246862611e-07, "loss": 0.0262, "step": 10808 }, { "epoch": 2.4593856655290103, "grad_norm": 0.6711663084744033, "learning_rate": 6.410368029418859e-07, "loss": 0.0052, "step": 10809 }, { "epoch": 2.459613196814562, "grad_norm": 0.713000253051493, "learning_rate": 6.40947480869748e-07, "loss": 0.0061, "step": 10810 }, { "epoch": 2.459840728100114, "grad_norm": 0.6540193637534298, "learning_rate": 6.408581584716728e-07, "loss": 0.0048, "step": 10811 }, { "epoch": 2.4600682593856655, "grad_norm": 1.0990132336581595, "learning_rate": 6.407688357494858e-07, "loss": 0.011, "step": 10812 }, { "epoch": 2.4602957906712173, "grad_norm": 1.286987969860926, "learning_rate": 6.406795127050126e-07, "loss": 0.0046, "step": 10813 }, { "epoch": 2.460523321956769, "grad_norm": 1.3056495366296126, "learning_rate": 6.405901893400791e-07, "loss": 0.0152, "step": 10814 }, { "epoch": 2.460750853242321, "grad_norm": 3.4824070289229225, "learning_rate": 6.405008656565105e-07, "loss": 0.0941, "step": 10815 }, { "epoch": 2.4609783845278725, "grad_norm": 0.7991635825456143, "learning_rate": 6.404115416561326e-07, "loss": 0.0079, "step": 10816 }, { "epoch": 2.4612059158134243, "grad_norm": 0.4913112114939639, "learning_rate": 6.403222173407711e-07, "loss": 0.0083, "step": 10817 }, { "epoch": 2.461433447098976, "grad_norm": 0.8816099176880907, "learning_rate": 6.402328927122514e-07, "loss": 0.0105, "step": 10818 }, { "epoch": 2.461660978384528, "grad_norm": 1.5105594428347433, "learning_rate": 6.401435677723995e-07, "loss": 0.0312, "step": 10819 }, { "epoch": 2.4618885096700796, "grad_norm": 1.4337228118713243, "learning_rate": 6.400542425230407e-07, "loss": 0.024, "step": 10820 }, { "epoch": 2.4621160409556313, "grad_norm": 0.4055836520149674, "learning_rate": 6.399649169660007e-07, "loss": 0.006, "step": 10821 }, { "epoch": 2.462343572241183, "grad_norm": 0.6675654750446234, "learning_rate": 6.398755911031053e-07, "loss": 0.011, "step": 10822 }, { "epoch": 2.462571103526735, "grad_norm": 1.2771411959644499, "learning_rate": 6.397862649361798e-07, "loss": 0.0253, "step": 10823 }, { "epoch": 2.4627986348122866, "grad_norm": 0.8032087285842606, "learning_rate": 6.396969384670504e-07, "loss": 0.0077, "step": 10824 }, { "epoch": 2.4630261660978383, "grad_norm": 1.0823528706451202, "learning_rate": 6.396076116975426e-07, "loss": 0.0202, "step": 10825 }, { "epoch": 2.46325369738339, "grad_norm": 0.6883769293870244, "learning_rate": 6.395182846294816e-07, "loss": 0.0032, "step": 10826 }, { "epoch": 2.463481228668942, "grad_norm": 1.8016730164109291, "learning_rate": 6.394289572646938e-07, "loss": 0.0133, "step": 10827 }, { "epoch": 2.4637087599544936, "grad_norm": 0.5709902782490789, "learning_rate": 6.393396296050043e-07, "loss": 0.0042, "step": 10828 }, { "epoch": 2.4639362912400453, "grad_norm": 0.5856864045016847, "learning_rate": 6.392503016522392e-07, "loss": 0.0098, "step": 10829 }, { "epoch": 2.464163822525597, "grad_norm": 1.0538468740096998, "learning_rate": 6.391609734082238e-07, "loss": 0.0216, "step": 10830 }, { "epoch": 2.4643913538111493, "grad_norm": 0.5711291621425975, "learning_rate": 6.390716448747841e-07, "loss": 0.0065, "step": 10831 }, { "epoch": 2.4646188850967006, "grad_norm": 0.816987295562233, "learning_rate": 6.38982316053746e-07, "loss": 0.0107, "step": 10832 }, { "epoch": 2.4648464163822528, "grad_norm": 1.6439331527301233, "learning_rate": 6.388929869469348e-07, "loss": 0.0197, "step": 10833 }, { "epoch": 2.465073947667804, "grad_norm": 0.6157493017714799, "learning_rate": 6.388036575561764e-07, "loss": 0.0069, "step": 10834 }, { "epoch": 2.4653014789533563, "grad_norm": 0.7165705787174699, "learning_rate": 6.387143278832964e-07, "loss": 0.0029, "step": 10835 }, { "epoch": 2.465529010238908, "grad_norm": 0.637362167504476, "learning_rate": 6.386249979301207e-07, "loss": 0.0057, "step": 10836 }, { "epoch": 2.4657565415244598, "grad_norm": 0.5362446458816534, "learning_rate": 6.385356676984751e-07, "loss": 0.0027, "step": 10837 }, { "epoch": 2.4659840728100115, "grad_norm": 0.4035016190015854, "learning_rate": 6.384463371901853e-07, "loss": 0.0028, "step": 10838 }, { "epoch": 2.4662116040955633, "grad_norm": 0.6442679767813987, "learning_rate": 6.383570064070768e-07, "loss": 0.0073, "step": 10839 }, { "epoch": 2.466439135381115, "grad_norm": 0.7060433177917154, "learning_rate": 6.382676753509756e-07, "loss": 0.0119, "step": 10840 }, { "epoch": 2.466666666666667, "grad_norm": 0.5867209999812123, "learning_rate": 6.381783440237076e-07, "loss": 0.0055, "step": 10841 }, { "epoch": 2.4668941979522185, "grad_norm": 1.0151571746359345, "learning_rate": 6.380890124270982e-07, "loss": 0.0108, "step": 10842 }, { "epoch": 2.4671217292377703, "grad_norm": 1.1051623624192553, "learning_rate": 6.379996805629733e-07, "loss": 0.022, "step": 10843 }, { "epoch": 2.467349260523322, "grad_norm": 1.1110016929242574, "learning_rate": 6.37910348433159e-07, "loss": 0.0264, "step": 10844 }, { "epoch": 2.467576791808874, "grad_norm": 0.6585008171435193, "learning_rate": 6.378210160394807e-07, "loss": 0.007, "step": 10845 }, { "epoch": 2.4678043230944255, "grad_norm": 1.1640032564533347, "learning_rate": 6.37731683383764e-07, "loss": 0.012, "step": 10846 }, { "epoch": 2.4680318543799773, "grad_norm": 2.0694281659420675, "learning_rate": 6.376423504678354e-07, "loss": 0.0242, "step": 10847 }, { "epoch": 2.468259385665529, "grad_norm": 0.7593295469500364, "learning_rate": 6.375530172935203e-07, "loss": 0.0086, "step": 10848 }, { "epoch": 2.468486916951081, "grad_norm": 0.8857008476819853, "learning_rate": 6.374636838626444e-07, "loss": 0.0119, "step": 10849 }, { "epoch": 2.4687144482366326, "grad_norm": 0.7618731988684969, "learning_rate": 6.373743501770335e-07, "loss": 0.0133, "step": 10850 }, { "epoch": 2.4689419795221843, "grad_norm": 0.6404986154951744, "learning_rate": 6.372850162385139e-07, "loss": 0.0027, "step": 10851 }, { "epoch": 2.469169510807736, "grad_norm": 0.9170176145766121, "learning_rate": 6.371956820489107e-07, "loss": 0.0097, "step": 10852 }, { "epoch": 2.469397042093288, "grad_norm": 1.2474985486062216, "learning_rate": 6.371063476100501e-07, "loss": 0.0152, "step": 10853 }, { "epoch": 2.4696245733788396, "grad_norm": 0.8940963311494483, "learning_rate": 6.370170129237582e-07, "loss": 0.0071, "step": 10854 }, { "epoch": 2.4698521046643913, "grad_norm": 0.8826542439344808, "learning_rate": 6.369276779918604e-07, "loss": 0.0085, "step": 10855 }, { "epoch": 2.470079635949943, "grad_norm": 0.3708683746895738, "learning_rate": 6.368383428161829e-07, "loss": 0.0044, "step": 10856 }, { "epoch": 2.470307167235495, "grad_norm": 0.5519297394942894, "learning_rate": 6.36749007398551e-07, "loss": 0.0044, "step": 10857 }, { "epoch": 2.4705346985210466, "grad_norm": 0.5423843821050582, "learning_rate": 6.366596717407912e-07, "loss": 0.0044, "step": 10858 }, { "epoch": 2.4707622298065983, "grad_norm": 0.7260060818375248, "learning_rate": 6.36570335844729e-07, "loss": 0.0052, "step": 10859 }, { "epoch": 2.47098976109215, "grad_norm": 0.6723072797408425, "learning_rate": 6.364809997121901e-07, "loss": 0.0118, "step": 10860 }, { "epoch": 2.471217292377702, "grad_norm": 0.5929836015028919, "learning_rate": 6.363916633450009e-07, "loss": 0.0045, "step": 10861 }, { "epoch": 2.4714448236632536, "grad_norm": 1.3530152589796973, "learning_rate": 6.363023267449868e-07, "loss": 0.0097, "step": 10862 }, { "epoch": 2.4716723549488053, "grad_norm": 32.81798792667052, "learning_rate": 6.362129899139739e-07, "loss": 0.1434, "step": 10863 }, { "epoch": 2.471899886234357, "grad_norm": 0.2697920322129157, "learning_rate": 6.36123652853788e-07, "loss": 0.0012, "step": 10864 }, { "epoch": 2.472127417519909, "grad_norm": 0.9974607002943556, "learning_rate": 6.360343155662551e-07, "loss": 0.0143, "step": 10865 }, { "epoch": 2.4723549488054606, "grad_norm": 0.6514453124138803, "learning_rate": 6.359449780532008e-07, "loss": 0.0042, "step": 10866 }, { "epoch": 2.4725824800910123, "grad_norm": 0.738367835837304, "learning_rate": 6.358556403164513e-07, "loss": 0.0101, "step": 10867 }, { "epoch": 2.472810011376564, "grad_norm": 1.0058650179130761, "learning_rate": 6.357663023578324e-07, "loss": 0.0184, "step": 10868 }, { "epoch": 2.473037542662116, "grad_norm": 0.9521105786092439, "learning_rate": 6.3567696417917e-07, "loss": 0.0166, "step": 10869 }, { "epoch": 2.473265073947668, "grad_norm": 1.4897097305526938, "learning_rate": 6.3558762578229e-07, "loss": 0.0113, "step": 10870 }, { "epoch": 2.4734926052332193, "grad_norm": 0.40889165525643906, "learning_rate": 6.354982871690184e-07, "loss": 0.003, "step": 10871 }, { "epoch": 2.4737201365187715, "grad_norm": 0.5628810391847923, "learning_rate": 6.35408948341181e-07, "loss": 0.0067, "step": 10872 }, { "epoch": 2.473947667804323, "grad_norm": 0.4104954262835541, "learning_rate": 6.353196093006035e-07, "loss": 0.005, "step": 10873 }, { "epoch": 2.474175199089875, "grad_norm": 0.3467978098683243, "learning_rate": 6.352302700491124e-07, "loss": 0.0027, "step": 10874 }, { "epoch": 2.474402730375427, "grad_norm": 1.4082313071222587, "learning_rate": 6.351409305885332e-07, "loss": 0.0132, "step": 10875 }, { "epoch": 2.4746302616609785, "grad_norm": 0.8236026940730048, "learning_rate": 6.35051590920692e-07, "loss": 0.005, "step": 10876 }, { "epoch": 2.4748577929465303, "grad_norm": 1.2002376001202546, "learning_rate": 6.349622510474146e-07, "loss": 0.0148, "step": 10877 }, { "epoch": 2.475085324232082, "grad_norm": 0.8324075495833092, "learning_rate": 6.348729109705272e-07, "loss": 0.0163, "step": 10878 }, { "epoch": 2.475312855517634, "grad_norm": 1.323552073309551, "learning_rate": 6.347835706918555e-07, "loss": 0.0153, "step": 10879 }, { "epoch": 2.4755403868031856, "grad_norm": 0.4422339221754777, "learning_rate": 6.346942302132253e-07, "loss": 0.003, "step": 10880 }, { "epoch": 2.4757679180887373, "grad_norm": 1.576370944701077, "learning_rate": 6.34604889536463e-07, "loss": 0.0243, "step": 10881 }, { "epoch": 2.475995449374289, "grad_norm": 1.0980383478247708, "learning_rate": 6.345155486633946e-07, "loss": 0.0103, "step": 10882 }, { "epoch": 2.476222980659841, "grad_norm": 0.5445578178483796, "learning_rate": 6.344262075958454e-07, "loss": 0.0047, "step": 10883 }, { "epoch": 2.4764505119453926, "grad_norm": 0.7782185211259499, "learning_rate": 6.343368663356419e-07, "loss": 0.0086, "step": 10884 }, { "epoch": 2.4766780432309443, "grad_norm": 0.943649321791744, "learning_rate": 6.3424752488461e-07, "loss": 0.0146, "step": 10885 }, { "epoch": 2.476905574516496, "grad_norm": 0.822634463608398, "learning_rate": 6.341581832445757e-07, "loss": 0.015, "step": 10886 }, { "epoch": 2.477133105802048, "grad_norm": 1.4436686124066975, "learning_rate": 6.340688414173647e-07, "loss": 0.0179, "step": 10887 }, { "epoch": 2.4773606370875996, "grad_norm": 0.9647875141978832, "learning_rate": 6.339794994048035e-07, "loss": 0.0111, "step": 10888 }, { "epoch": 2.4775881683731513, "grad_norm": 0.7273575754919915, "learning_rate": 6.338901572087177e-07, "loss": 0.007, "step": 10889 }, { "epoch": 2.477815699658703, "grad_norm": 1.1802393133427496, "learning_rate": 6.338008148309329e-07, "loss": 0.0157, "step": 10890 }, { "epoch": 2.478043230944255, "grad_norm": 1.0382805092607268, "learning_rate": 6.337114722732761e-07, "loss": 0.0158, "step": 10891 }, { "epoch": 2.4782707622298066, "grad_norm": 0.462463776400826, "learning_rate": 6.336221295375726e-07, "loss": 0.0032, "step": 10892 }, { "epoch": 2.4784982935153583, "grad_norm": 0.4551482973925467, "learning_rate": 6.335327866256486e-07, "loss": 0.0034, "step": 10893 }, { "epoch": 2.47872582480091, "grad_norm": 0.8253028251516872, "learning_rate": 6.334434435393298e-07, "loss": 0.0137, "step": 10894 }, { "epoch": 2.478953356086462, "grad_norm": 0.6334109898539092, "learning_rate": 6.333541002804429e-07, "loss": 0.0075, "step": 10895 }, { "epoch": 2.4791808873720136, "grad_norm": 1.3065846753670656, "learning_rate": 6.332647568508132e-07, "loss": 0.0124, "step": 10896 }, { "epoch": 2.4794084186575653, "grad_norm": 1.3631276156539223, "learning_rate": 6.33175413252267e-07, "loss": 0.0046, "step": 10897 }, { "epoch": 2.479635949943117, "grad_norm": 0.820668499543079, "learning_rate": 6.330860694866305e-07, "loss": 0.0067, "step": 10898 }, { "epoch": 2.479863481228669, "grad_norm": 0.3859809156110226, "learning_rate": 6.329967255557294e-07, "loss": 0.0025, "step": 10899 }, { "epoch": 2.4800910125142206, "grad_norm": 1.1694507675100612, "learning_rate": 6.329073814613899e-07, "loss": 0.0092, "step": 10900 }, { "epoch": 2.4803185437997723, "grad_norm": 0.8267139332123228, "learning_rate": 6.328180372054382e-07, "loss": 0.0103, "step": 10901 }, { "epoch": 2.480546075085324, "grad_norm": 0.3298002998100713, "learning_rate": 6.327286927897e-07, "loss": 0.0025, "step": 10902 }, { "epoch": 2.480773606370876, "grad_norm": 0.9458976507800748, "learning_rate": 6.326393482160013e-07, "loss": 0.0073, "step": 10903 }, { "epoch": 2.4810011376564276, "grad_norm": 0.3526865781442195, "learning_rate": 6.325500034861684e-07, "loss": 0.0039, "step": 10904 }, { "epoch": 2.4812286689419794, "grad_norm": 0.47139297279824577, "learning_rate": 6.324606586020274e-07, "loss": 0.0081, "step": 10905 }, { "epoch": 2.481456200227531, "grad_norm": 0.8199905613245286, "learning_rate": 6.323713135654041e-07, "loss": 0.0143, "step": 10906 }, { "epoch": 2.481683731513083, "grad_norm": 0.6670256822209086, "learning_rate": 6.322819683781248e-07, "loss": 0.011, "step": 10907 }, { "epoch": 2.4819112627986346, "grad_norm": 0.8931054489924118, "learning_rate": 6.321926230420153e-07, "loss": 0.0124, "step": 10908 }, { "epoch": 2.482138794084187, "grad_norm": 0.7722436490615392, "learning_rate": 6.321032775589018e-07, "loss": 0.0127, "step": 10909 }, { "epoch": 2.482366325369738, "grad_norm": 0.3397248819144064, "learning_rate": 6.3201393193061e-07, "loss": 0.0014, "step": 10910 }, { "epoch": 2.4825938566552903, "grad_norm": 0.7585664947709385, "learning_rate": 6.319245861589666e-07, "loss": 0.0112, "step": 10911 }, { "epoch": 2.4828213879408416, "grad_norm": 0.44011213221597917, "learning_rate": 6.318352402457973e-07, "loss": 0.0025, "step": 10912 }, { "epoch": 2.483048919226394, "grad_norm": 0.4302264220334163, "learning_rate": 6.317458941929281e-07, "loss": 0.0042, "step": 10913 }, { "epoch": 2.4832764505119456, "grad_norm": 0.5761367663172403, "learning_rate": 6.316565480021854e-07, "loss": 0.0081, "step": 10914 }, { "epoch": 2.4835039817974973, "grad_norm": 0.8113402596240088, "learning_rate": 6.315672016753949e-07, "loss": 0.0077, "step": 10915 }, { "epoch": 2.483731513083049, "grad_norm": 1.7555229639433978, "learning_rate": 6.314778552143827e-07, "loss": 0.0233, "step": 10916 }, { "epoch": 2.483959044368601, "grad_norm": 0.35966134649081744, "learning_rate": 6.31388508620975e-07, "loss": 0.002, "step": 10917 }, { "epoch": 2.4841865756541526, "grad_norm": 1.721642153025871, "learning_rate": 6.312991618969981e-07, "loss": 0.0245, "step": 10918 }, { "epoch": 2.4844141069397043, "grad_norm": 1.1122241135565563, "learning_rate": 6.312098150442777e-07, "loss": 0.0231, "step": 10919 }, { "epoch": 2.484641638225256, "grad_norm": 0.3170436077873871, "learning_rate": 6.311204680646403e-07, "loss": 0.0031, "step": 10920 }, { "epoch": 2.484869169510808, "grad_norm": 0.913928088660634, "learning_rate": 6.310311209599115e-07, "loss": 0.0114, "step": 10921 }, { "epoch": 2.4850967007963596, "grad_norm": 0.7391259214238254, "learning_rate": 6.309417737319178e-07, "loss": 0.0054, "step": 10922 }, { "epoch": 2.4853242320819113, "grad_norm": 1.6053565105124532, "learning_rate": 6.30852426382485e-07, "loss": 0.0151, "step": 10923 }, { "epoch": 2.485551763367463, "grad_norm": 0.8949142093260704, "learning_rate": 6.307630789134393e-07, "loss": 0.0097, "step": 10924 }, { "epoch": 2.485779294653015, "grad_norm": 0.44086688854343953, "learning_rate": 6.306737313266069e-07, "loss": 0.0057, "step": 10925 }, { "epoch": 2.4860068259385666, "grad_norm": 0.610802080045069, "learning_rate": 6.305843836238139e-07, "loss": 0.0104, "step": 10926 }, { "epoch": 2.4862343572241183, "grad_norm": 0.4014890849509062, "learning_rate": 6.304950358068862e-07, "loss": 0.0024, "step": 10927 }, { "epoch": 2.48646188850967, "grad_norm": 0.7820562124801371, "learning_rate": 6.3040568787765e-07, "loss": 0.0086, "step": 10928 }, { "epoch": 2.486689419795222, "grad_norm": 1.1363373674055175, "learning_rate": 6.303163398379316e-07, "loss": 0.0104, "step": 10929 }, { "epoch": 2.4869169510807736, "grad_norm": 1.0101345350977367, "learning_rate": 6.302269916895566e-07, "loss": 0.0096, "step": 10930 }, { "epoch": 2.4871444823663253, "grad_norm": 2.010807393524196, "learning_rate": 6.301376434343517e-07, "loss": 0.0074, "step": 10931 }, { "epoch": 2.487372013651877, "grad_norm": 0.9370373702819613, "learning_rate": 6.300482950741431e-07, "loss": 0.012, "step": 10932 }, { "epoch": 2.487599544937429, "grad_norm": 0.7410549028164761, "learning_rate": 6.299589466107561e-07, "loss": 0.0093, "step": 10933 }, { "epoch": 2.4878270762229806, "grad_norm": 0.834509272909021, "learning_rate": 6.298695980460174e-07, "loss": 0.0118, "step": 10934 }, { "epoch": 2.4880546075085324, "grad_norm": 1.1240330396411953, "learning_rate": 6.297802493817533e-07, "loss": 0.0071, "step": 10935 }, { "epoch": 2.488282138794084, "grad_norm": 0.8432386270833748, "learning_rate": 6.296909006197895e-07, "loss": 0.0101, "step": 10936 }, { "epoch": 2.488509670079636, "grad_norm": 0.5501005838986964, "learning_rate": 6.296015517619522e-07, "loss": 0.0037, "step": 10937 }, { "epoch": 2.4887372013651876, "grad_norm": 1.0358414488080459, "learning_rate": 6.295122028100677e-07, "loss": 0.0107, "step": 10938 }, { "epoch": 2.4889647326507394, "grad_norm": 0.9413706803526852, "learning_rate": 6.294228537659622e-07, "loss": 0.0126, "step": 10939 }, { "epoch": 2.489192263936291, "grad_norm": 1.869481132448024, "learning_rate": 6.293335046314612e-07, "loss": 0.0413, "step": 10940 }, { "epoch": 2.489419795221843, "grad_norm": 0.6231709501916093, "learning_rate": 6.292441554083917e-07, "loss": 0.0076, "step": 10941 }, { "epoch": 2.4896473265073946, "grad_norm": 1.0029582997906934, "learning_rate": 6.291548060985793e-07, "loss": 0.0116, "step": 10942 }, { "epoch": 2.4898748577929464, "grad_norm": 1.3355568913219178, "learning_rate": 6.290654567038504e-07, "loss": 0.0296, "step": 10943 }, { "epoch": 2.490102389078498, "grad_norm": 1.7791637190313352, "learning_rate": 6.289761072260307e-07, "loss": 0.0232, "step": 10944 }, { "epoch": 2.49032992036405, "grad_norm": 1.9456819604250821, "learning_rate": 6.28886757666947e-07, "loss": 0.0252, "step": 10945 }, { "epoch": 2.4905574516496016, "grad_norm": 0.8845233847729177, "learning_rate": 6.287974080284251e-07, "loss": 0.0138, "step": 10946 }, { "epoch": 2.4907849829351534, "grad_norm": 1.0755789469572519, "learning_rate": 6.287080583122908e-07, "loss": 0.0193, "step": 10947 }, { "epoch": 2.4910125142207056, "grad_norm": 27.693478960548948, "learning_rate": 6.286187085203707e-07, "loss": 0.1582, "step": 10948 }, { "epoch": 2.491240045506257, "grad_norm": 0.31924686746381814, "learning_rate": 6.28529358654491e-07, "loss": 0.0026, "step": 10949 }, { "epoch": 2.491467576791809, "grad_norm": 0.7726409763236523, "learning_rate": 6.284400087164776e-07, "loss": 0.008, "step": 10950 }, { "epoch": 2.4916951080773604, "grad_norm": 0.35083471437844993, "learning_rate": 6.283506587081568e-07, "loss": 0.0024, "step": 10951 }, { "epoch": 2.4919226393629126, "grad_norm": 0.8501246480772174, "learning_rate": 6.282613086313546e-07, "loss": 0.0152, "step": 10952 }, { "epoch": 2.4921501706484643, "grad_norm": 0.8951450787142191, "learning_rate": 6.28171958487897e-07, "loss": 0.0042, "step": 10953 }, { "epoch": 2.492377701934016, "grad_norm": 0.5415148450537789, "learning_rate": 6.280826082796104e-07, "loss": 0.0049, "step": 10954 }, { "epoch": 2.492605233219568, "grad_norm": 0.39189943764183033, "learning_rate": 6.279932580083212e-07, "loss": 0.0045, "step": 10955 }, { "epoch": 2.4928327645051196, "grad_norm": 0.6935350984340154, "learning_rate": 6.279039076758551e-07, "loss": 0.007, "step": 10956 }, { "epoch": 2.4930602957906713, "grad_norm": 0.7836738834245097, "learning_rate": 6.278145572840385e-07, "loss": 0.006, "step": 10957 }, { "epoch": 2.493287827076223, "grad_norm": 0.47443101362467044, "learning_rate": 6.277252068346977e-07, "loss": 0.0057, "step": 10958 }, { "epoch": 2.493515358361775, "grad_norm": 0.9132759642369774, "learning_rate": 6.276358563296585e-07, "loss": 0.0076, "step": 10959 }, { "epoch": 2.4937428896473266, "grad_norm": 0.9504528853947923, "learning_rate": 6.275465057707471e-07, "loss": 0.0166, "step": 10960 }, { "epoch": 2.4939704209328784, "grad_norm": 1.5490874740574343, "learning_rate": 6.274571551597899e-07, "loss": 0.0468, "step": 10961 }, { "epoch": 2.49419795221843, "grad_norm": 1.0129684680650592, "learning_rate": 6.273678044986129e-07, "loss": 0.0128, "step": 10962 }, { "epoch": 2.494425483503982, "grad_norm": 0.7130024937109426, "learning_rate": 6.272784537890425e-07, "loss": 0.0072, "step": 10963 }, { "epoch": 2.4946530147895336, "grad_norm": 0.5266009890459185, "learning_rate": 6.271891030329046e-07, "loss": 0.0064, "step": 10964 }, { "epoch": 2.4948805460750854, "grad_norm": 0.9455775991478202, "learning_rate": 6.270997522320254e-07, "loss": 0.0104, "step": 10965 }, { "epoch": 2.495108077360637, "grad_norm": 1.1591001245109893, "learning_rate": 6.270104013882311e-07, "loss": 0.0237, "step": 10966 }, { "epoch": 2.495335608646189, "grad_norm": 0.6873925573124675, "learning_rate": 6.269210505033476e-07, "loss": 0.0113, "step": 10967 }, { "epoch": 2.4955631399317406, "grad_norm": 0.41610922983914705, "learning_rate": 6.268316995792017e-07, "loss": 0.0027, "step": 10968 }, { "epoch": 2.4957906712172924, "grad_norm": 1.8172190147817484, "learning_rate": 6.267423486176191e-07, "loss": 0.002, "step": 10969 }, { "epoch": 2.496018202502844, "grad_norm": 0.8252036771256384, "learning_rate": 6.266529976204263e-07, "loss": 0.0123, "step": 10970 }, { "epoch": 2.496245733788396, "grad_norm": 0.9936330880641074, "learning_rate": 6.26563646589449e-07, "loss": 0.0126, "step": 10971 }, { "epoch": 2.4964732650739476, "grad_norm": 1.3187357120619616, "learning_rate": 6.264742955265138e-07, "loss": 0.0154, "step": 10972 }, { "epoch": 2.4967007963594994, "grad_norm": 0.7856519065649468, "learning_rate": 6.263849444334464e-07, "loss": 0.012, "step": 10973 }, { "epoch": 2.496928327645051, "grad_norm": 0.9575549681093884, "learning_rate": 6.262955933120735e-07, "loss": 0.0139, "step": 10974 }, { "epoch": 2.497155858930603, "grad_norm": 1.093448057199692, "learning_rate": 6.262062421642211e-07, "loss": 0.0134, "step": 10975 }, { "epoch": 2.4973833902161546, "grad_norm": 1.1377408677569962, "learning_rate": 6.261168909917154e-07, "loss": 0.0174, "step": 10976 }, { "epoch": 2.4976109215017064, "grad_norm": 0.6386910211731229, "learning_rate": 6.26027539796382e-07, "loss": 0.0046, "step": 10977 }, { "epoch": 2.497838452787258, "grad_norm": 0.37069503768241036, "learning_rate": 6.259381885800481e-07, "loss": 0.0032, "step": 10978 }, { "epoch": 2.49806598407281, "grad_norm": 1.3068971040396329, "learning_rate": 6.258488373445391e-07, "loss": 0.0139, "step": 10979 }, { "epoch": 2.4982935153583616, "grad_norm": 1.1096694273545726, "learning_rate": 6.257594860916815e-07, "loss": 0.0207, "step": 10980 }, { "epoch": 2.4985210466439134, "grad_norm": 0.539111470828706, "learning_rate": 6.256701348233012e-07, "loss": 0.0039, "step": 10981 }, { "epoch": 2.498748577929465, "grad_norm": 1.2230916015077256, "learning_rate": 6.255807835412248e-07, "loss": 0.0133, "step": 10982 }, { "epoch": 2.498976109215017, "grad_norm": 0.8116471509549497, "learning_rate": 6.254914322472783e-07, "loss": 0.0126, "step": 10983 }, { "epoch": 2.4992036405005686, "grad_norm": 1.153237726477132, "learning_rate": 6.254020809432876e-07, "loss": 0.0156, "step": 10984 }, { "epoch": 2.4994311717861204, "grad_norm": 1.0488081198042492, "learning_rate": 6.253127296310791e-07, "loss": 0.0084, "step": 10985 }, { "epoch": 2.499658703071672, "grad_norm": 0.4782332318521891, "learning_rate": 6.252233783124792e-07, "loss": 0.0038, "step": 10986 }, { "epoch": 2.4998862343572243, "grad_norm": 1.1007494401544546, "learning_rate": 6.251340269893138e-07, "loss": 0.0101, "step": 10987 }, { "epoch": 2.5001137656427757, "grad_norm": 0.7601589218517143, "learning_rate": 6.250446756634089e-07, "loss": 0.0137, "step": 10988 }, { "epoch": 2.500341296928328, "grad_norm": 0.3451852777959493, "learning_rate": 6.249553243365913e-07, "loss": 0.0023, "step": 10989 }, { "epoch": 2.500568828213879, "grad_norm": 0.6518271464292533, "learning_rate": 6.248659730106863e-07, "loss": 0.0044, "step": 10990 }, { "epoch": 2.5007963594994314, "grad_norm": 0.5041681412747858, "learning_rate": 6.24776621687521e-07, "loss": 0.0022, "step": 10991 }, { "epoch": 2.5010238907849827, "grad_norm": 0.5056306017280847, "learning_rate": 6.246872703689212e-07, "loss": 0.0116, "step": 10992 }, { "epoch": 2.501251422070535, "grad_norm": 0.6831186765841294, "learning_rate": 6.245979190567126e-07, "loss": 0.0071, "step": 10993 }, { "epoch": 2.5014789533560866, "grad_norm": 0.8814326121549686, "learning_rate": 6.245085677527219e-07, "loss": 0.0123, "step": 10994 }, { "epoch": 2.5017064846416384, "grad_norm": 0.9549028004891217, "learning_rate": 6.244192164587753e-07, "loss": 0.0101, "step": 10995 }, { "epoch": 2.50193401592719, "grad_norm": 0.48310734831731644, "learning_rate": 6.243298651766989e-07, "loss": 0.0047, "step": 10996 }, { "epoch": 2.502161547212742, "grad_norm": 1.3933170342363592, "learning_rate": 6.242405139083186e-07, "loss": 0.0144, "step": 10997 }, { "epoch": 2.5023890784982936, "grad_norm": 0.5201909766727337, "learning_rate": 6.241511626554611e-07, "loss": 0.0025, "step": 10998 }, { "epoch": 2.5026166097838454, "grad_norm": 1.9012820637602332, "learning_rate": 6.240618114199522e-07, "loss": 0.0391, "step": 10999 }, { "epoch": 2.502844141069397, "grad_norm": 0.7591951107526219, "learning_rate": 6.239724602036181e-07, "loss": 0.0067, "step": 11000 }, { "epoch": 2.503071672354949, "grad_norm": 0.5319246092202723, "learning_rate": 6.23883109008285e-07, "loss": 0.0065, "step": 11001 }, { "epoch": 2.5032992036405006, "grad_norm": 0.5296175997671938, "learning_rate": 6.23793757835779e-07, "loss": 0.0039, "step": 11002 }, { "epoch": 2.5035267349260524, "grad_norm": 0.9317391217783529, "learning_rate": 6.237044066879268e-07, "loss": 0.0123, "step": 11003 }, { "epoch": 2.503754266211604, "grad_norm": 0.6768563937423534, "learning_rate": 6.236150555665537e-07, "loss": 0.0029, "step": 11004 }, { "epoch": 2.503981797497156, "grad_norm": 0.5759981630850026, "learning_rate": 6.235257044734864e-07, "loss": 0.0043, "step": 11005 }, { "epoch": 2.5042093287827076, "grad_norm": 0.6646452820887204, "learning_rate": 6.234363534105513e-07, "loss": 0.0104, "step": 11006 }, { "epoch": 2.5044368600682594, "grad_norm": 0.708558022094836, "learning_rate": 6.23347002379574e-07, "loss": 0.004, "step": 11007 }, { "epoch": 2.504664391353811, "grad_norm": 0.7175180086703401, "learning_rate": 6.23257651382381e-07, "loss": 0.0049, "step": 11008 }, { "epoch": 2.504891922639363, "grad_norm": 0.7478243562014251, "learning_rate": 6.231683004207984e-07, "loss": 0.0055, "step": 11009 }, { "epoch": 2.5051194539249146, "grad_norm": 1.3397554903741586, "learning_rate": 6.230789494966525e-07, "loss": 0.0202, "step": 11010 }, { "epoch": 2.5053469852104664, "grad_norm": 0.522621378994573, "learning_rate": 6.229895986117693e-07, "loss": 0.006, "step": 11011 }, { "epoch": 2.505574516496018, "grad_norm": 0.5769308856473828, "learning_rate": 6.229002477679749e-07, "loss": 0.0052, "step": 11012 }, { "epoch": 2.50580204778157, "grad_norm": 0.9656314770316115, "learning_rate": 6.228108969670959e-07, "loss": 0.0086, "step": 11013 }, { "epoch": 2.5060295790671216, "grad_norm": 0.3209051390376647, "learning_rate": 6.227215462109577e-07, "loss": 0.0016, "step": 11014 }, { "epoch": 2.5062571103526734, "grad_norm": 0.9199717057927734, "learning_rate": 6.226321955013872e-07, "loss": 0.0035, "step": 11015 }, { "epoch": 2.506484641638225, "grad_norm": 3.4854585582858215, "learning_rate": 6.225428448402102e-07, "loss": 0.0338, "step": 11016 }, { "epoch": 2.506712172923777, "grad_norm": 1.0883039999273356, "learning_rate": 6.224534942292531e-07, "loss": 0.0131, "step": 11017 }, { "epoch": 2.5069397042093287, "grad_norm": 1.140210998392635, "learning_rate": 6.223641436703418e-07, "loss": 0.0108, "step": 11018 }, { "epoch": 2.5071672354948804, "grad_norm": 0.8033461112780089, "learning_rate": 6.222747931653025e-07, "loss": 0.0067, "step": 11019 }, { "epoch": 2.507394766780432, "grad_norm": 1.0554779994440733, "learning_rate": 6.221854427159617e-07, "loss": 0.0208, "step": 11020 }, { "epoch": 2.507622298065984, "grad_norm": 0.41170726230908505, "learning_rate": 6.220960923241448e-07, "loss": 0.0036, "step": 11021 }, { "epoch": 2.507849829351536, "grad_norm": 1.5266396128172244, "learning_rate": 6.22006741991679e-07, "loss": 0.0178, "step": 11022 }, { "epoch": 2.5080773606370874, "grad_norm": 0.2747647108001602, "learning_rate": 6.219173917203896e-07, "loss": 0.0023, "step": 11023 }, { "epoch": 2.5083048919226396, "grad_norm": 0.3605568148007234, "learning_rate": 6.218280415121032e-07, "loss": 0.0012, "step": 11024 }, { "epoch": 2.508532423208191, "grad_norm": 0.6491922585873395, "learning_rate": 6.217386913686459e-07, "loss": 0.0045, "step": 11025 }, { "epoch": 2.508759954493743, "grad_norm": 1.111070900371462, "learning_rate": 6.216493412918436e-07, "loss": 0.0057, "step": 11026 }, { "epoch": 2.5089874857792944, "grad_norm": 0.6284309093296693, "learning_rate": 6.215599912835226e-07, "loss": 0.0095, "step": 11027 }, { "epoch": 2.5092150170648466, "grad_norm": 1.2326905473251335, "learning_rate": 6.214706413455091e-07, "loss": 0.0184, "step": 11028 }, { "epoch": 2.509442548350398, "grad_norm": 0.8015300130673784, "learning_rate": 6.213812914796294e-07, "loss": 0.0069, "step": 11029 }, { "epoch": 2.50967007963595, "grad_norm": 0.8433253397709174, "learning_rate": 6.212919416877094e-07, "loss": 0.0089, "step": 11030 }, { "epoch": 2.5098976109215014, "grad_norm": 1.2805260920711292, "learning_rate": 6.212025919715751e-07, "loss": 0.0146, "step": 11031 }, { "epoch": 2.5101251422070536, "grad_norm": 0.5361676663144075, "learning_rate": 6.211132423330533e-07, "loss": 0.0028, "step": 11032 }, { "epoch": 2.5103526734926054, "grad_norm": 1.2267658005986728, "learning_rate": 6.210238927739693e-07, "loss": 0.0271, "step": 11033 }, { "epoch": 2.510580204778157, "grad_norm": 0.3747031767306393, "learning_rate": 6.209345432961498e-07, "loss": 0.0035, "step": 11034 }, { "epoch": 2.510807736063709, "grad_norm": 0.4758226125405678, "learning_rate": 6.208451939014207e-07, "loss": 0.0054, "step": 11035 }, { "epoch": 2.5110352673492606, "grad_norm": 1.0355702709812236, "learning_rate": 6.207558445916085e-07, "loss": 0.0108, "step": 11036 }, { "epoch": 2.5112627986348124, "grad_norm": 0.6539401715169905, "learning_rate": 6.206664953685389e-07, "loss": 0.0117, "step": 11037 }, { "epoch": 2.511490329920364, "grad_norm": 0.45421425949471517, "learning_rate": 6.205771462340381e-07, "loss": 0.003, "step": 11038 }, { "epoch": 2.511717861205916, "grad_norm": 0.48038841266388477, "learning_rate": 6.204877971899325e-07, "loss": 0.0086, "step": 11039 }, { "epoch": 2.5119453924914676, "grad_norm": 0.8659321270468432, "learning_rate": 6.203984482380479e-07, "loss": 0.0168, "step": 11040 }, { "epoch": 2.5121729237770194, "grad_norm": 2.1988333409154652, "learning_rate": 6.203090993802107e-07, "loss": 0.0121, "step": 11041 }, { "epoch": 2.512400455062571, "grad_norm": 0.8945177776767862, "learning_rate": 6.202197506182468e-07, "loss": 0.0067, "step": 11042 }, { "epoch": 2.512627986348123, "grad_norm": 1.0882444544570002, "learning_rate": 6.201304019539827e-07, "loss": 0.0102, "step": 11043 }, { "epoch": 2.5128555176336747, "grad_norm": 0.5746588272933494, "learning_rate": 6.200410533892441e-07, "loss": 0.0049, "step": 11044 }, { "epoch": 2.5130830489192264, "grad_norm": 0.6230746730546091, "learning_rate": 6.199517049258571e-07, "loss": 0.0047, "step": 11045 }, { "epoch": 2.513310580204778, "grad_norm": 0.5135946555225067, "learning_rate": 6.198623565656484e-07, "loss": 0.0045, "step": 11046 }, { "epoch": 2.51353811149033, "grad_norm": 0.7139612348743826, "learning_rate": 6.197730083104433e-07, "loss": 0.0069, "step": 11047 }, { "epoch": 2.5137656427758817, "grad_norm": 1.1866503896327176, "learning_rate": 6.196836601620686e-07, "loss": 0.0109, "step": 11048 }, { "epoch": 2.5139931740614334, "grad_norm": 0.49298384308592297, "learning_rate": 6.195943121223503e-07, "loss": 0.0048, "step": 11049 }, { "epoch": 2.514220705346985, "grad_norm": 0.8833465598974445, "learning_rate": 6.19504964193114e-07, "loss": 0.0186, "step": 11050 }, { "epoch": 2.514448236632537, "grad_norm": 1.6099992823198406, "learning_rate": 6.194156163761863e-07, "loss": 0.0196, "step": 11051 }, { "epoch": 2.5146757679180887, "grad_norm": 0.9625891623738381, "learning_rate": 6.193262686733931e-07, "loss": 0.0147, "step": 11052 }, { "epoch": 2.5149032992036404, "grad_norm": 0.7419203192153007, "learning_rate": 6.192369210865609e-07, "loss": 0.004, "step": 11053 }, { "epoch": 2.515130830489192, "grad_norm": 0.45458896771165086, "learning_rate": 6.191475736175152e-07, "loss": 0.0045, "step": 11054 }, { "epoch": 2.515358361774744, "grad_norm": 0.8571306887688278, "learning_rate": 6.190582262680824e-07, "loss": 0.0126, "step": 11055 }, { "epoch": 2.5155858930602957, "grad_norm": 2.2550868190767863, "learning_rate": 6.189688790400888e-07, "loss": 0.0251, "step": 11056 }, { "epoch": 2.5158134243458474, "grad_norm": 4.092122440914341, "learning_rate": 6.188795319353599e-07, "loss": 0.0286, "step": 11057 }, { "epoch": 2.516040955631399, "grad_norm": 1.2952269885176273, "learning_rate": 6.187901849557224e-07, "loss": 0.0206, "step": 11058 }, { "epoch": 2.516268486916951, "grad_norm": 0.3668652750068098, "learning_rate": 6.187008381030019e-07, "loss": 0.0029, "step": 11059 }, { "epoch": 2.5164960182025027, "grad_norm": 0.8216086858747413, "learning_rate": 6.186114913790251e-07, "loss": 0.016, "step": 11060 }, { "epoch": 2.516723549488055, "grad_norm": 0.7480819176507385, "learning_rate": 6.185221447856175e-07, "loss": 0.0053, "step": 11061 }, { "epoch": 2.516951080773606, "grad_norm": 1.8998850459990404, "learning_rate": 6.184327983246054e-07, "loss": 0.0248, "step": 11062 }, { "epoch": 2.5171786120591584, "grad_norm": 1.029038118785473, "learning_rate": 6.18343451997815e-07, "loss": 0.0128, "step": 11063 }, { "epoch": 2.5174061433447097, "grad_norm": 1.0796912773008642, "learning_rate": 6.182541058070721e-07, "loss": 0.0158, "step": 11064 }, { "epoch": 2.517633674630262, "grad_norm": 0.38231991474500676, "learning_rate": 6.181647597542029e-07, "loss": 0.0026, "step": 11065 }, { "epoch": 2.517861205915813, "grad_norm": 0.7563425344849958, "learning_rate": 6.180754138410334e-07, "loss": 0.0062, "step": 11066 }, { "epoch": 2.5180887372013654, "grad_norm": 0.8049016555758454, "learning_rate": 6.179860680693902e-07, "loss": 0.0058, "step": 11067 }, { "epoch": 2.5183162684869167, "grad_norm": 0.33069378452453935, "learning_rate": 6.178967224410987e-07, "loss": 0.003, "step": 11068 }, { "epoch": 2.518543799772469, "grad_norm": 0.9821946303335527, "learning_rate": 6.17807376957985e-07, "loss": 0.0128, "step": 11069 }, { "epoch": 2.51877133105802, "grad_norm": 0.8331258193125192, "learning_rate": 6.177180316218756e-07, "loss": 0.0079, "step": 11070 }, { "epoch": 2.5189988623435724, "grad_norm": 1.2394207761226181, "learning_rate": 6.17628686434596e-07, "loss": 0.0102, "step": 11071 }, { "epoch": 2.519226393629124, "grad_norm": 0.7845721220867985, "learning_rate": 6.175393413979728e-07, "loss": 0.0092, "step": 11072 }, { "epoch": 2.519453924914676, "grad_norm": 0.5570497928309812, "learning_rate": 6.174499965138316e-07, "loss": 0.0093, "step": 11073 }, { "epoch": 2.5196814562002277, "grad_norm": 0.49689974131270753, "learning_rate": 6.173606517839989e-07, "loss": 0.007, "step": 11074 }, { "epoch": 2.5199089874857794, "grad_norm": 1.6239209133696957, "learning_rate": 6.172713072103004e-07, "loss": 0.0161, "step": 11075 }, { "epoch": 2.520136518771331, "grad_norm": 1.1047931501692938, "learning_rate": 6.17181962794562e-07, "loss": 0.0144, "step": 11076 }, { "epoch": 2.520364050056883, "grad_norm": 2.5058921584778706, "learning_rate": 6.170926185386102e-07, "loss": 0.0133, "step": 11077 }, { "epoch": 2.5205915813424347, "grad_norm": 1.4561202468915706, "learning_rate": 6.170032744442706e-07, "loss": 0.0184, "step": 11078 }, { "epoch": 2.5208191126279864, "grad_norm": 0.6356173882322954, "learning_rate": 6.169139305133697e-07, "loss": 0.0048, "step": 11079 }, { "epoch": 2.521046643913538, "grad_norm": 0.7917116708377551, "learning_rate": 6.168245867477333e-07, "loss": 0.0093, "step": 11080 }, { "epoch": 2.52127417519909, "grad_norm": 0.47404828128743454, "learning_rate": 6.16735243149187e-07, "loss": 0.0039, "step": 11081 }, { "epoch": 2.5215017064846417, "grad_norm": 0.7842342972715951, "learning_rate": 6.166458997195575e-07, "loss": 0.0158, "step": 11082 }, { "epoch": 2.5217292377701934, "grad_norm": 0.9713268992974443, "learning_rate": 6.165565564606702e-07, "loss": 0.0088, "step": 11083 }, { "epoch": 2.521956769055745, "grad_norm": 0.8356985364453546, "learning_rate": 6.164672133743516e-07, "loss": 0.0095, "step": 11084 }, { "epoch": 2.522184300341297, "grad_norm": 0.6014872151116541, "learning_rate": 6.163778704624275e-07, "loss": 0.0072, "step": 11085 }, { "epoch": 2.5224118316268487, "grad_norm": 0.9470575694020449, "learning_rate": 6.162885277267241e-07, "loss": 0.0106, "step": 11086 }, { "epoch": 2.5226393629124004, "grad_norm": 0.5815222538981084, "learning_rate": 6.161991851690672e-07, "loss": 0.0072, "step": 11087 }, { "epoch": 2.522866894197952, "grad_norm": 49.9496841275997, "learning_rate": 6.161098427912827e-07, "loss": 0.6804, "step": 11088 }, { "epoch": 2.523094425483504, "grad_norm": 0.792281581495676, "learning_rate": 6.160205005951969e-07, "loss": 0.0098, "step": 11089 }, { "epoch": 2.5233219567690557, "grad_norm": 0.9078671967588235, "learning_rate": 6.159311585826353e-07, "loss": 0.0122, "step": 11090 }, { "epoch": 2.5235494880546074, "grad_norm": 0.49293821319040865, "learning_rate": 6.158418167554245e-07, "loss": 0.0041, "step": 11091 }, { "epoch": 2.523777019340159, "grad_norm": 1.7820418637846558, "learning_rate": 6.1575247511539e-07, "loss": 0.0224, "step": 11092 }, { "epoch": 2.524004550625711, "grad_norm": 0.66388545799721, "learning_rate": 6.156631336643582e-07, "loss": 0.0064, "step": 11093 }, { "epoch": 2.5242320819112627, "grad_norm": 0.8561468889756351, "learning_rate": 6.155737924041548e-07, "loss": 0.0189, "step": 11094 }, { "epoch": 2.5244596131968144, "grad_norm": 0.8037860584664871, "learning_rate": 6.154844513366056e-07, "loss": 0.0074, "step": 11095 }, { "epoch": 2.524687144482366, "grad_norm": 0.6034611678015117, "learning_rate": 6.153951104635371e-07, "loss": 0.0111, "step": 11096 }, { "epoch": 2.524914675767918, "grad_norm": 0.3149705376971132, "learning_rate": 6.153057697867746e-07, "loss": 0.0024, "step": 11097 }, { "epoch": 2.5251422070534697, "grad_norm": 0.6268806993880165, "learning_rate": 6.152164293081447e-07, "loss": 0.0129, "step": 11098 }, { "epoch": 2.5253697383390215, "grad_norm": 0.4481346092296306, "learning_rate": 6.151270890294731e-07, "loss": 0.0026, "step": 11099 }, { "epoch": 2.5255972696245736, "grad_norm": 0.8788328999338542, "learning_rate": 6.150377489525855e-07, "loss": 0.0191, "step": 11100 }, { "epoch": 2.525824800910125, "grad_norm": 1.116238887649132, "learning_rate": 6.149484090793082e-07, "loss": 0.0246, "step": 11101 }, { "epoch": 2.526052332195677, "grad_norm": 1.0552088473896062, "learning_rate": 6.148590694114669e-07, "loss": 0.0162, "step": 11102 }, { "epoch": 2.5262798634812285, "grad_norm": 0.5997638489259535, "learning_rate": 6.147697299508878e-07, "loss": 0.0034, "step": 11103 }, { "epoch": 2.5265073947667807, "grad_norm": 0.685760191293105, "learning_rate": 6.146803906993965e-07, "loss": 0.0149, "step": 11104 }, { "epoch": 2.526734926052332, "grad_norm": 0.8458748145321378, "learning_rate": 6.145910516588192e-07, "loss": 0.0111, "step": 11105 }, { "epoch": 2.526962457337884, "grad_norm": 0.8961851242340496, "learning_rate": 6.14501712830982e-07, "loss": 0.0038, "step": 11106 }, { "epoch": 2.5271899886234355, "grad_norm": 1.6364209905761182, "learning_rate": 6.144123742177101e-07, "loss": 0.0168, "step": 11107 }, { "epoch": 2.5274175199089877, "grad_norm": 1.164170480414528, "learning_rate": 6.143230358208302e-07, "loss": 0.0081, "step": 11108 }, { "epoch": 2.527645051194539, "grad_norm": 0.4020896523180197, "learning_rate": 6.142336976421676e-07, "loss": 0.0041, "step": 11109 }, { "epoch": 2.527872582480091, "grad_norm": 0.7233705406392515, "learning_rate": 6.141443596835489e-07, "loss": 0.005, "step": 11110 }, { "epoch": 2.528100113765643, "grad_norm": 0.9428726036328915, "learning_rate": 6.140550219467993e-07, "loss": 0.0069, "step": 11111 }, { "epoch": 2.5283276450511947, "grad_norm": 0.8951165388541701, "learning_rate": 6.139656844337451e-07, "loss": 0.0148, "step": 11112 }, { "epoch": 2.5285551763367464, "grad_norm": 0.8851348841540119, "learning_rate": 6.138763471462122e-07, "loss": 0.0093, "step": 11113 }, { "epoch": 2.528782707622298, "grad_norm": 0.8650529107500345, "learning_rate": 6.137870100860262e-07, "loss": 0.0052, "step": 11114 }, { "epoch": 2.52901023890785, "grad_norm": 0.9565417968749346, "learning_rate": 6.136976732550134e-07, "loss": 0.0175, "step": 11115 }, { "epoch": 2.5292377701934017, "grad_norm": 0.6211989714364408, "learning_rate": 6.136083366549992e-07, "loss": 0.0123, "step": 11116 }, { "epoch": 2.5294653014789534, "grad_norm": 0.7228637817562508, "learning_rate": 6.135190002878101e-07, "loss": 0.0078, "step": 11117 }, { "epoch": 2.529692832764505, "grad_norm": 0.6787548000496948, "learning_rate": 6.134296641552713e-07, "loss": 0.0063, "step": 11118 }, { "epoch": 2.529920364050057, "grad_norm": 0.3133912216839711, "learning_rate": 6.13340328259209e-07, "loss": 0.003, "step": 11119 }, { "epoch": 2.5301478953356087, "grad_norm": 0.6623429061644779, "learning_rate": 6.132509926014492e-07, "loss": 0.0084, "step": 11120 }, { "epoch": 2.5303754266211604, "grad_norm": 1.8333170493690958, "learning_rate": 6.131616571838174e-07, "loss": 0.0306, "step": 11121 }, { "epoch": 2.530602957906712, "grad_norm": 0.8477479730950706, "learning_rate": 6.130723220081398e-07, "loss": 0.0152, "step": 11122 }, { "epoch": 2.530830489192264, "grad_norm": 1.1093944802180402, "learning_rate": 6.129829870762419e-07, "loss": 0.0148, "step": 11123 }, { "epoch": 2.5310580204778157, "grad_norm": 0.4472195620563988, "learning_rate": 6.1289365238995e-07, "loss": 0.0037, "step": 11124 }, { "epoch": 2.5312855517633674, "grad_norm": 0.45609528417572953, "learning_rate": 6.128043179510895e-07, "loss": 0.0059, "step": 11125 }, { "epoch": 2.531513083048919, "grad_norm": 1.4996258693573907, "learning_rate": 6.127149837614864e-07, "loss": 0.0103, "step": 11126 }, { "epoch": 2.531740614334471, "grad_norm": 0.44353278386626094, "learning_rate": 6.126256498229667e-07, "loss": 0.0032, "step": 11127 }, { "epoch": 2.5319681456200227, "grad_norm": 0.3971942509406645, "learning_rate": 6.125363161373558e-07, "loss": 0.0022, "step": 11128 }, { "epoch": 2.5321956769055745, "grad_norm": 1.1541215833046454, "learning_rate": 6.124469827064799e-07, "loss": 0.0247, "step": 11129 }, { "epoch": 2.532423208191126, "grad_norm": 1.146495439560197, "learning_rate": 6.123576495321646e-07, "loss": 0.0154, "step": 11130 }, { "epoch": 2.532650739476678, "grad_norm": 0.7591913825958772, "learning_rate": 6.12268316616236e-07, "loss": 0.0086, "step": 11131 }, { "epoch": 2.5328782707622297, "grad_norm": 1.072635991095043, "learning_rate": 6.121789839605196e-07, "loss": 0.012, "step": 11132 }, { "epoch": 2.5331058020477815, "grad_norm": 0.5901905546264875, "learning_rate": 6.120896515668412e-07, "loss": 0.0076, "step": 11133 }, { "epoch": 2.533333333333333, "grad_norm": 0.6117868684450274, "learning_rate": 6.120003194370269e-07, "loss": 0.0042, "step": 11134 }, { "epoch": 2.533560864618885, "grad_norm": 0.8210141773291113, "learning_rate": 6.11910987572902e-07, "loss": 0.0044, "step": 11135 }, { "epoch": 2.5337883959044367, "grad_norm": 0.5494785476923978, "learning_rate": 6.118216559762926e-07, "loss": 0.0068, "step": 11136 }, { "epoch": 2.5340159271899885, "grad_norm": 1.0818813765901596, "learning_rate": 6.117323246490246e-07, "loss": 0.0219, "step": 11137 }, { "epoch": 2.5342434584755402, "grad_norm": 1.2350745660870779, "learning_rate": 6.116429935929234e-07, "loss": 0.0108, "step": 11138 }, { "epoch": 2.5344709897610924, "grad_norm": 0.8659769360193896, "learning_rate": 6.11553662809815e-07, "loss": 0.0067, "step": 11139 }, { "epoch": 2.5346985210466437, "grad_norm": 1.9924802029657398, "learning_rate": 6.11464332301525e-07, "loss": 0.0059, "step": 11140 }, { "epoch": 2.534926052332196, "grad_norm": 0.8112182016480113, "learning_rate": 6.113750020698795e-07, "loss": 0.0069, "step": 11141 }, { "epoch": 2.5351535836177472, "grad_norm": 0.7102662478602477, "learning_rate": 6.112856721167037e-07, "loss": 0.0038, "step": 11142 }, { "epoch": 2.5353811149032994, "grad_norm": 0.8347265509370249, "learning_rate": 6.111963424438239e-07, "loss": 0.011, "step": 11143 }, { "epoch": 2.5356086461888507, "grad_norm": 1.240547447339102, "learning_rate": 6.111070130530655e-07, "loss": 0.0067, "step": 11144 }, { "epoch": 2.535836177474403, "grad_norm": 0.9575550410646441, "learning_rate": 6.110176839462541e-07, "loss": 0.0245, "step": 11145 }, { "epoch": 2.5360637087599542, "grad_norm": 1.0967807562177287, "learning_rate": 6.10928355125216e-07, "loss": 0.0111, "step": 11146 }, { "epoch": 2.5362912400455064, "grad_norm": 0.3634429191656314, "learning_rate": 6.108390265917763e-07, "loss": 0.0024, "step": 11147 }, { "epoch": 2.536518771331058, "grad_norm": 1.5915589666470076, "learning_rate": 6.10749698347761e-07, "loss": 0.0169, "step": 11148 }, { "epoch": 2.53674630261661, "grad_norm": 0.9572786210929439, "learning_rate": 6.106603703949958e-07, "loss": 0.0165, "step": 11149 }, { "epoch": 2.5369738339021617, "grad_norm": 1.6644441107201375, "learning_rate": 6.105710427353065e-07, "loss": 0.0117, "step": 11150 }, { "epoch": 2.5372013651877134, "grad_norm": 0.8600431852262455, "learning_rate": 6.104817153705185e-07, "loss": 0.0108, "step": 11151 }, { "epoch": 2.537428896473265, "grad_norm": 0.5575860782102788, "learning_rate": 6.103923883024576e-07, "loss": 0.0047, "step": 11152 }, { "epoch": 2.537656427758817, "grad_norm": 1.1394961572439393, "learning_rate": 6.103030615329497e-07, "loss": 0.0136, "step": 11153 }, { "epoch": 2.5378839590443687, "grad_norm": 0.30864508610795555, "learning_rate": 6.102137350638202e-07, "loss": 0.0024, "step": 11154 }, { "epoch": 2.5381114903299204, "grad_norm": 0.6453743093237803, "learning_rate": 6.101244088968948e-07, "loss": 0.0073, "step": 11155 }, { "epoch": 2.538339021615472, "grad_norm": 0.999725995837898, "learning_rate": 6.100350830339996e-07, "loss": 0.0106, "step": 11156 }, { "epoch": 2.538566552901024, "grad_norm": 0.6738650702600254, "learning_rate": 6.099457574769595e-07, "loss": 0.0041, "step": 11157 }, { "epoch": 2.5387940841865757, "grad_norm": 1.1173797404284813, "learning_rate": 6.098564322276007e-07, "loss": 0.0126, "step": 11158 }, { "epoch": 2.5390216154721275, "grad_norm": 2.016862468583062, "learning_rate": 6.097671072877485e-07, "loss": 0.0369, "step": 11159 }, { "epoch": 2.539249146757679, "grad_norm": 1.0009244130502184, "learning_rate": 6.096777826592292e-07, "loss": 0.0111, "step": 11160 }, { "epoch": 2.539476678043231, "grad_norm": 0.6420321563900034, "learning_rate": 6.095884583438675e-07, "loss": 0.0115, "step": 11161 }, { "epoch": 2.5397042093287827, "grad_norm": 0.7901207671455388, "learning_rate": 6.094991343434896e-07, "loss": 0.0107, "step": 11162 }, { "epoch": 2.5399317406143345, "grad_norm": 0.9002216267916138, "learning_rate": 6.094098106599212e-07, "loss": 0.0123, "step": 11163 }, { "epoch": 2.540159271899886, "grad_norm": 1.0750735979010848, "learning_rate": 6.093204872949875e-07, "loss": 0.0181, "step": 11164 }, { "epoch": 2.540386803185438, "grad_norm": 0.9493590896549994, "learning_rate": 6.092311642505144e-07, "loss": 0.0125, "step": 11165 }, { "epoch": 2.5406143344709897, "grad_norm": 1.07361647771442, "learning_rate": 6.091418415283273e-07, "loss": 0.009, "step": 11166 }, { "epoch": 2.5408418657565415, "grad_norm": 0.7763079773191258, "learning_rate": 6.090525191302522e-07, "loss": 0.0057, "step": 11167 }, { "epoch": 2.5410693970420932, "grad_norm": 1.1698981545113225, "learning_rate": 6.089631970581142e-07, "loss": 0.0207, "step": 11168 }, { "epoch": 2.541296928327645, "grad_norm": 0.8125125314830746, "learning_rate": 6.088738753137391e-07, "loss": 0.0037, "step": 11169 }, { "epoch": 2.5415244596131967, "grad_norm": 0.7914904137338447, "learning_rate": 6.087845538989525e-07, "loss": 0.0053, "step": 11170 }, { "epoch": 2.5417519908987485, "grad_norm": 0.6047806837764429, "learning_rate": 6.086952328155798e-07, "loss": 0.0075, "step": 11171 }, { "epoch": 2.5419795221843002, "grad_norm": 0.6874019766799371, "learning_rate": 6.086059120654469e-07, "loss": 0.0052, "step": 11172 }, { "epoch": 2.542207053469852, "grad_norm": 0.48785658820683225, "learning_rate": 6.085165916503789e-07, "loss": 0.0063, "step": 11173 }, { "epoch": 2.5424345847554037, "grad_norm": 0.7739008237805562, "learning_rate": 6.084272715722018e-07, "loss": 0.0168, "step": 11174 }, { "epoch": 2.5426621160409555, "grad_norm": 0.8624545217840557, "learning_rate": 6.083379518327408e-07, "loss": 0.016, "step": 11175 }, { "epoch": 2.5428896473265072, "grad_norm": 0.533971061214122, "learning_rate": 6.082486324338214e-07, "loss": 0.0085, "step": 11176 }, { "epoch": 2.543117178612059, "grad_norm": 2.1339388567438893, "learning_rate": 6.081593133772697e-07, "loss": 0.0102, "step": 11177 }, { "epoch": 2.543344709897611, "grad_norm": 0.9137217157143516, "learning_rate": 6.080699946649104e-07, "loss": 0.0086, "step": 11178 }, { "epoch": 2.5435722411831625, "grad_norm": 0.519302633092328, "learning_rate": 6.079806762985695e-07, "loss": 0.0026, "step": 11179 }, { "epoch": 2.5437997724687147, "grad_norm": 0.9198326056224464, "learning_rate": 6.078913582800723e-07, "loss": 0.0144, "step": 11180 }, { "epoch": 2.544027303754266, "grad_norm": 0.4580931012688901, "learning_rate": 6.078020406112447e-07, "loss": 0.0039, "step": 11181 }, { "epoch": 2.544254835039818, "grad_norm": 0.6686228952185265, "learning_rate": 6.077127232939118e-07, "loss": 0.0106, "step": 11182 }, { "epoch": 2.5444823663253695, "grad_norm": 0.39057481525098053, "learning_rate": 6.07623406329899e-07, "loss": 0.0048, "step": 11183 }, { "epoch": 2.5447098976109217, "grad_norm": 1.0539467865456074, "learning_rate": 6.075340897210321e-07, "loss": 0.0142, "step": 11184 }, { "epoch": 2.544937428896473, "grad_norm": 1.3140653018575124, "learning_rate": 6.074447734691363e-07, "loss": 0.0057, "step": 11185 }, { "epoch": 2.545164960182025, "grad_norm": 0.5419888199210957, "learning_rate": 6.073554575760373e-07, "loss": 0.0044, "step": 11186 }, { "epoch": 2.545392491467577, "grad_norm": 0.49185241756054865, "learning_rate": 6.072661420435606e-07, "loss": 0.0042, "step": 11187 }, { "epoch": 2.5456200227531287, "grad_norm": 1.2898119867703366, "learning_rate": 6.071768268735311e-07, "loss": 0.0271, "step": 11188 }, { "epoch": 2.5458475540386805, "grad_norm": 0.5514563752547883, "learning_rate": 6.070875120677748e-07, "loss": 0.0064, "step": 11189 }, { "epoch": 2.546075085324232, "grad_norm": 1.0107972008644523, "learning_rate": 6.069981976281169e-07, "loss": 0.0128, "step": 11190 }, { "epoch": 2.546302616609784, "grad_norm": 0.4025224335027235, "learning_rate": 6.06908883556383e-07, "loss": 0.0048, "step": 11191 }, { "epoch": 2.5465301478953357, "grad_norm": 0.9390908380743286, "learning_rate": 6.068195698543981e-07, "loss": 0.0132, "step": 11192 }, { "epoch": 2.5467576791808875, "grad_norm": 1.5487904130792896, "learning_rate": 6.067302565239881e-07, "loss": 0.035, "step": 11193 }, { "epoch": 2.546985210466439, "grad_norm": 2.0685807037797597, "learning_rate": 6.066409435669784e-07, "loss": 0.0189, "step": 11194 }, { "epoch": 2.547212741751991, "grad_norm": 0.8701718698649671, "learning_rate": 6.065516309851938e-07, "loss": 0.0078, "step": 11195 }, { "epoch": 2.5474402730375427, "grad_norm": 0.3632663359341828, "learning_rate": 6.064623187804603e-07, "loss": 0.0028, "step": 11196 }, { "epoch": 2.5476678043230945, "grad_norm": 1.1130557498870808, "learning_rate": 6.063730069546029e-07, "loss": 0.0287, "step": 11197 }, { "epoch": 2.5478953356086462, "grad_norm": 1.001522485129694, "learning_rate": 6.062836955094474e-07, "loss": 0.0121, "step": 11198 }, { "epoch": 2.548122866894198, "grad_norm": 0.4877817827870358, "learning_rate": 6.061943844468184e-07, "loss": 0.0055, "step": 11199 }, { "epoch": 2.5483503981797497, "grad_norm": 0.5595090608817304, "learning_rate": 6.061050737685423e-07, "loss": 0.0028, "step": 11200 }, { "epoch": 2.5485779294653015, "grad_norm": 0.8471603724006631, "learning_rate": 6.060157634764438e-07, "loss": 0.0159, "step": 11201 }, { "epoch": 2.5488054607508532, "grad_norm": 0.8856045805727134, "learning_rate": 6.059264535723479e-07, "loss": 0.0079, "step": 11202 }, { "epoch": 2.549032992036405, "grad_norm": 0.9244952196822779, "learning_rate": 6.058371440580808e-07, "loss": 0.0057, "step": 11203 }, { "epoch": 2.5492605233219567, "grad_norm": 0.8064330733033157, "learning_rate": 6.057478349354671e-07, "loss": 0.0098, "step": 11204 }, { "epoch": 2.5494880546075085, "grad_norm": 1.188740984401866, "learning_rate": 6.056585262063324e-07, "loss": 0.0258, "step": 11205 }, { "epoch": 2.5497155858930602, "grad_norm": 0.34042786672998, "learning_rate": 6.055692178725024e-07, "loss": 0.0026, "step": 11206 }, { "epoch": 2.549943117178612, "grad_norm": 0.7799776579739107, "learning_rate": 6.054799099358016e-07, "loss": 0.0101, "step": 11207 }, { "epoch": 2.5501706484641637, "grad_norm": 0.7361184743292412, "learning_rate": 6.053906023980558e-07, "loss": 0.0103, "step": 11208 }, { "epoch": 2.5503981797497155, "grad_norm": 0.5859856589263621, "learning_rate": 6.053012952610901e-07, "loss": 0.0083, "step": 11209 }, { "epoch": 2.5506257110352673, "grad_norm": 0.4161399948859948, "learning_rate": 6.0521198852673e-07, "loss": 0.0035, "step": 11210 }, { "epoch": 2.550853242320819, "grad_norm": 0.6690051180207981, "learning_rate": 6.051226821968003e-07, "loss": 0.005, "step": 11211 }, { "epoch": 2.5510807736063708, "grad_norm": 0.8733624112537617, "learning_rate": 6.050333762731269e-07, "loss": 0.0112, "step": 11212 }, { "epoch": 2.5513083048919225, "grad_norm": 2.3616381360900864, "learning_rate": 6.049440707575348e-07, "loss": 0.0404, "step": 11213 }, { "epoch": 2.5515358361774743, "grad_norm": 0.6115976868606634, "learning_rate": 6.048547656518489e-07, "loss": 0.0052, "step": 11214 }, { "epoch": 2.551763367463026, "grad_norm": 1.6594840850112884, "learning_rate": 6.047654609578948e-07, "loss": 0.0156, "step": 11215 }, { "epoch": 2.5519908987485778, "grad_norm": 0.7672180141285514, "learning_rate": 6.046761566774975e-07, "loss": 0.011, "step": 11216 }, { "epoch": 2.55221843003413, "grad_norm": 0.8649711240108763, "learning_rate": 6.045868528124825e-07, "loss": 0.0069, "step": 11217 }, { "epoch": 2.5524459613196813, "grad_norm": 1.1394419072273747, "learning_rate": 6.044975493646748e-07, "loss": 0.0151, "step": 11218 }, { "epoch": 2.5526734926052335, "grad_norm": 1.1548138261762146, "learning_rate": 6.044082463358996e-07, "loss": 0.0143, "step": 11219 }, { "epoch": 2.5529010238907848, "grad_norm": 0.5482047142572376, "learning_rate": 6.043189437279823e-07, "loss": 0.0039, "step": 11220 }, { "epoch": 2.553128555176337, "grad_norm": 1.4763086152582012, "learning_rate": 6.042296415427477e-07, "loss": 0.0195, "step": 11221 }, { "epoch": 2.5533560864618883, "grad_norm": 0.9587668403401677, "learning_rate": 6.041403397820212e-07, "loss": 0.0122, "step": 11222 }, { "epoch": 2.5535836177474405, "grad_norm": 0.5607915565673384, "learning_rate": 6.040510384476279e-07, "loss": 0.0072, "step": 11223 }, { "epoch": 2.553811149032992, "grad_norm": 1.0735557776733466, "learning_rate": 6.039617375413934e-07, "loss": 0.0175, "step": 11224 }, { "epoch": 2.554038680318544, "grad_norm": 0.631980093306518, "learning_rate": 6.038724370651421e-07, "loss": 0.0113, "step": 11225 }, { "epoch": 2.5542662116040957, "grad_norm": 0.7195781686368343, "learning_rate": 6.037831370206995e-07, "loss": 0.0089, "step": 11226 }, { "epoch": 2.5544937428896475, "grad_norm": 0.5118704426015912, "learning_rate": 6.03693837409891e-07, "loss": 0.0053, "step": 11227 }, { "epoch": 2.5547212741751992, "grad_norm": 1.214523766168284, "learning_rate": 6.036045382345411e-07, "loss": 0.0179, "step": 11228 }, { "epoch": 2.554948805460751, "grad_norm": 0.7150924891531036, "learning_rate": 6.035152394964755e-07, "loss": 0.0089, "step": 11229 }, { "epoch": 2.5551763367463027, "grad_norm": 0.7313912719209208, "learning_rate": 6.034259411975188e-07, "loss": 0.0063, "step": 11230 }, { "epoch": 2.5554038680318545, "grad_norm": 0.6109653502771913, "learning_rate": 6.033366433394966e-07, "loss": 0.0048, "step": 11231 }, { "epoch": 2.5556313993174062, "grad_norm": 0.8545764543067789, "learning_rate": 6.032473459242337e-07, "loss": 0.0101, "step": 11232 }, { "epoch": 2.555858930602958, "grad_norm": 1.3721068757693482, "learning_rate": 6.031580489535549e-07, "loss": 0.012, "step": 11233 }, { "epoch": 2.5560864618885097, "grad_norm": 0.7934746322842061, "learning_rate": 6.030687524292861e-07, "loss": 0.0111, "step": 11234 }, { "epoch": 2.5563139931740615, "grad_norm": 0.7373418446458947, "learning_rate": 6.029794563532514e-07, "loss": 0.0044, "step": 11235 }, { "epoch": 2.5565415244596132, "grad_norm": 0.9103870048290277, "learning_rate": 6.028901607272765e-07, "loss": 0.0046, "step": 11236 }, { "epoch": 2.556769055745165, "grad_norm": 1.1482440353468213, "learning_rate": 6.02800865553186e-07, "loss": 0.0166, "step": 11237 }, { "epoch": 2.5569965870307167, "grad_norm": 0.9792715005501836, "learning_rate": 6.027115708328056e-07, "loss": 0.0074, "step": 11238 }, { "epoch": 2.5572241183162685, "grad_norm": 0.9384401203731455, "learning_rate": 6.026222765679595e-07, "loss": 0.0141, "step": 11239 }, { "epoch": 2.5574516496018203, "grad_norm": 0.36846643198681395, "learning_rate": 6.02532982760473e-07, "loss": 0.0021, "step": 11240 }, { "epoch": 2.557679180887372, "grad_norm": 1.0506666902117796, "learning_rate": 6.024436894121716e-07, "loss": 0.014, "step": 11241 }, { "epoch": 2.5579067121729238, "grad_norm": 0.5224833364528875, "learning_rate": 6.023543965248795e-07, "loss": 0.0041, "step": 11242 }, { "epoch": 2.5581342434584755, "grad_norm": 1.0147757148655203, "learning_rate": 6.022651041004222e-07, "loss": 0.0112, "step": 11243 }, { "epoch": 2.5583617747440273, "grad_norm": 0.9917799611357146, "learning_rate": 6.021758121406247e-07, "loss": 0.0101, "step": 11244 }, { "epoch": 2.558589306029579, "grad_norm": 0.5478294504547503, "learning_rate": 6.020865206473116e-07, "loss": 0.0076, "step": 11245 }, { "epoch": 2.5588168373151308, "grad_norm": 0.49756372623690437, "learning_rate": 6.01997229622308e-07, "loss": 0.0044, "step": 11246 }, { "epoch": 2.5590443686006825, "grad_norm": 0.5031936816673118, "learning_rate": 6.01907939067439e-07, "loss": 0.0056, "step": 11247 }, { "epoch": 2.5592718998862343, "grad_norm": 0.7205417928114884, "learning_rate": 6.018186489845295e-07, "loss": 0.0048, "step": 11248 }, { "epoch": 2.559499431171786, "grad_norm": 1.2699389291581373, "learning_rate": 6.017293593754042e-07, "loss": 0.0218, "step": 11249 }, { "epoch": 2.5597269624573378, "grad_norm": 0.767629074838235, "learning_rate": 6.016400702418883e-07, "loss": 0.007, "step": 11250 }, { "epoch": 2.5599544937428895, "grad_norm": 0.5029291533523943, "learning_rate": 6.015507815858067e-07, "loss": 0.0067, "step": 11251 }, { "epoch": 2.5601820250284413, "grad_norm": 0.8393351202958717, "learning_rate": 6.014614934089839e-07, "loss": 0.0102, "step": 11252 }, { "epoch": 2.560409556313993, "grad_norm": 0.7777153751746002, "learning_rate": 6.013722057132452e-07, "loss": 0.0113, "step": 11253 }, { "epoch": 2.560637087599545, "grad_norm": 1.1023559501528557, "learning_rate": 6.012829185004153e-07, "loss": 0.0074, "step": 11254 }, { "epoch": 2.5608646188850965, "grad_norm": 1.1185660645222122, "learning_rate": 6.011936317723192e-07, "loss": 0.023, "step": 11255 }, { "epoch": 2.5610921501706487, "grad_norm": 1.9027338679977202, "learning_rate": 6.011043455307815e-07, "loss": 0.0146, "step": 11256 }, { "epoch": 2.5613196814562, "grad_norm": 1.3138110591063024, "learning_rate": 6.010150597776273e-07, "loss": 0.0193, "step": 11257 }, { "epoch": 2.5615472127417522, "grad_norm": 0.6972064698300652, "learning_rate": 6.009257745146815e-07, "loss": 0.0074, "step": 11258 }, { "epoch": 2.5617747440273035, "grad_norm": 1.4477341522383897, "learning_rate": 6.008364897437686e-07, "loss": 0.0113, "step": 11259 }, { "epoch": 2.5620022753128557, "grad_norm": 1.1031224185159596, "learning_rate": 6.007472054667136e-07, "loss": 0.0098, "step": 11260 }, { "epoch": 2.562229806598407, "grad_norm": 1.2193262946843628, "learning_rate": 6.006579216853413e-07, "loss": 0.0196, "step": 11261 }, { "epoch": 2.5624573378839592, "grad_norm": 0.45567759862186596, "learning_rate": 6.005686384014767e-07, "loss": 0.0037, "step": 11262 }, { "epoch": 2.5626848691695105, "grad_norm": 0.7463050328420427, "learning_rate": 6.004793556169443e-07, "loss": 0.0112, "step": 11263 }, { "epoch": 2.5629124004550627, "grad_norm": 1.0966589148867218, "learning_rate": 6.003900733335688e-07, "loss": 0.012, "step": 11264 }, { "epoch": 2.5631399317406145, "grad_norm": 1.1321675786935537, "learning_rate": 6.003007915531753e-07, "loss": 0.021, "step": 11265 }, { "epoch": 2.5633674630261662, "grad_norm": 0.8937027229552645, "learning_rate": 6.002115102775881e-07, "loss": 0.0174, "step": 11266 }, { "epoch": 2.563594994311718, "grad_norm": 0.418158599290515, "learning_rate": 6.001222295086326e-07, "loss": 0.0029, "step": 11267 }, { "epoch": 2.5638225255972698, "grad_norm": 1.1626781409286726, "learning_rate": 6.00032949248133e-07, "loss": 0.0041, "step": 11268 }, { "epoch": 2.5640500568828215, "grad_norm": 1.1582685486989257, "learning_rate": 5.999436694979142e-07, "loss": 0.0165, "step": 11269 }, { "epoch": 2.5642775881683733, "grad_norm": 1.7934772975170323, "learning_rate": 5.998543902598011e-07, "loss": 0.0195, "step": 11270 }, { "epoch": 2.564505119453925, "grad_norm": 0.6116441114586695, "learning_rate": 5.997651115356179e-07, "loss": 0.0058, "step": 11271 }, { "epoch": 2.5647326507394768, "grad_norm": 1.01940295176585, "learning_rate": 5.996758333271897e-07, "loss": 0.0116, "step": 11272 }, { "epoch": 2.5649601820250285, "grad_norm": 0.8086047539747259, "learning_rate": 5.99586555636341e-07, "loss": 0.0065, "step": 11273 }, { "epoch": 2.5651877133105803, "grad_norm": 0.7251064046686883, "learning_rate": 5.994972784648968e-07, "loss": 0.0102, "step": 11274 }, { "epoch": 2.565415244596132, "grad_norm": 1.2206589733049762, "learning_rate": 5.994080018146814e-07, "loss": 0.0091, "step": 11275 }, { "epoch": 2.5656427758816838, "grad_norm": 0.521793802876033, "learning_rate": 5.993187256875196e-07, "loss": 0.0043, "step": 11276 }, { "epoch": 2.5658703071672355, "grad_norm": 0.5209519604215227, "learning_rate": 5.992294500852361e-07, "loss": 0.0037, "step": 11277 }, { "epoch": 2.5660978384527873, "grad_norm": 0.8432041756609311, "learning_rate": 5.991401750096553e-07, "loss": 0.0139, "step": 11278 }, { "epoch": 2.566325369738339, "grad_norm": 0.8260417386597494, "learning_rate": 5.99050900462602e-07, "loss": 0.0062, "step": 11279 }, { "epoch": 2.5665529010238908, "grad_norm": 0.5601447025060057, "learning_rate": 5.989616264459005e-07, "loss": 0.0128, "step": 11280 }, { "epoch": 2.5667804323094425, "grad_norm": 0.4828720519422529, "learning_rate": 5.988723529613761e-07, "loss": 0.0085, "step": 11281 }, { "epoch": 2.5670079635949943, "grad_norm": 0.8826624431900596, "learning_rate": 5.987830800108528e-07, "loss": 0.0178, "step": 11282 }, { "epoch": 2.567235494880546, "grad_norm": 1.0031717397909494, "learning_rate": 5.986938075961553e-07, "loss": 0.013, "step": 11283 }, { "epoch": 2.567463026166098, "grad_norm": 0.790940699168938, "learning_rate": 5.986045357191083e-07, "loss": 0.0113, "step": 11284 }, { "epoch": 2.5676905574516495, "grad_norm": 1.0814656368000197, "learning_rate": 5.985152643815361e-07, "loss": 0.0065, "step": 11285 }, { "epoch": 2.5679180887372013, "grad_norm": 0.29875160325984407, "learning_rate": 5.984259935852635e-07, "loss": 0.0022, "step": 11286 }, { "epoch": 2.568145620022753, "grad_norm": 0.6402111118063144, "learning_rate": 5.983367233321148e-07, "loss": 0.0075, "step": 11287 }, { "epoch": 2.568373151308305, "grad_norm": 1.140595810955115, "learning_rate": 5.982474536239149e-07, "loss": 0.0213, "step": 11288 }, { "epoch": 2.5686006825938565, "grad_norm": 0.4471159433912634, "learning_rate": 5.981581844624878e-07, "loss": 0.0029, "step": 11289 }, { "epoch": 2.5688282138794083, "grad_norm": 0.4629049417369728, "learning_rate": 5.980689158496584e-07, "loss": 0.0032, "step": 11290 }, { "epoch": 2.56905574516496, "grad_norm": 0.7459328395895309, "learning_rate": 5.97979647787251e-07, "loss": 0.0114, "step": 11291 }, { "epoch": 2.569283276450512, "grad_norm": 1.4390827830257134, "learning_rate": 5.9789038027709e-07, "loss": 0.0287, "step": 11292 }, { "epoch": 2.5695108077360636, "grad_norm": 2.0598388683071946, "learning_rate": 5.97801113321e-07, "loss": 0.0189, "step": 11293 }, { "epoch": 2.5697383390216153, "grad_norm": 0.649479804895687, "learning_rate": 5.977118469208057e-07, "loss": 0.0067, "step": 11294 }, { "epoch": 2.5699658703071675, "grad_norm": 1.566998067312298, "learning_rate": 5.976225810783309e-07, "loss": 0.033, "step": 11295 }, { "epoch": 2.570193401592719, "grad_norm": 0.5766670802430298, "learning_rate": 5.975333157954007e-07, "loss": 0.0048, "step": 11296 }, { "epoch": 2.570420932878271, "grad_norm": 2.4875815751909185, "learning_rate": 5.974440510738389e-07, "loss": 0.0209, "step": 11297 }, { "epoch": 2.5706484641638223, "grad_norm": 1.378062094099465, "learning_rate": 5.973547869154707e-07, "loss": 0.0166, "step": 11298 }, { "epoch": 2.5708759954493745, "grad_norm": 1.0028914149398709, "learning_rate": 5.972655233221195e-07, "loss": 0.0129, "step": 11299 }, { "epoch": 2.571103526734926, "grad_norm": 0.8206982593908878, "learning_rate": 5.971762602956105e-07, "loss": 0.0123, "step": 11300 }, { "epoch": 2.571331058020478, "grad_norm": 0.6497499179266333, "learning_rate": 5.970869978377679e-07, "loss": 0.0094, "step": 11301 }, { "epoch": 2.5715585893060293, "grad_norm": 2.136837798439974, "learning_rate": 5.969977359504156e-07, "loss": 0.0142, "step": 11302 }, { "epoch": 2.5717861205915815, "grad_norm": 1.4548234983354196, "learning_rate": 5.969084746353786e-07, "loss": 0.0195, "step": 11303 }, { "epoch": 2.5720136518771333, "grad_norm": 0.934791398584495, "learning_rate": 5.968192138944806e-07, "loss": 0.0117, "step": 11304 }, { "epoch": 2.572241183162685, "grad_norm": 0.9347848211519807, "learning_rate": 5.967299537295467e-07, "loss": 0.0151, "step": 11305 }, { "epoch": 2.5724687144482368, "grad_norm": 0.7678250638810762, "learning_rate": 5.966406941424004e-07, "loss": 0.0078, "step": 11306 }, { "epoch": 2.5726962457337885, "grad_norm": 1.0824839561545887, "learning_rate": 5.965514351348665e-07, "loss": 0.0103, "step": 11307 }, { "epoch": 2.5729237770193403, "grad_norm": 1.2521953482601953, "learning_rate": 5.964621767087694e-07, "loss": 0.0189, "step": 11308 }, { "epoch": 2.573151308304892, "grad_norm": 1.077097875568067, "learning_rate": 5.963729188659328e-07, "loss": 0.0098, "step": 11309 }, { "epoch": 2.573378839590444, "grad_norm": 0.8943357837357347, "learning_rate": 5.962836616081814e-07, "loss": 0.0187, "step": 11310 }, { "epoch": 2.5736063708759955, "grad_norm": 0.8761572983941627, "learning_rate": 5.961944049373393e-07, "loss": 0.015, "step": 11311 }, { "epoch": 2.5738339021615473, "grad_norm": 0.5694277954830308, "learning_rate": 5.961051488552311e-07, "loss": 0.013, "step": 11312 }, { "epoch": 2.574061433447099, "grad_norm": 1.2634387168214887, "learning_rate": 5.960158933636805e-07, "loss": 0.0131, "step": 11313 }, { "epoch": 2.574288964732651, "grad_norm": 0.9940865938664409, "learning_rate": 5.959266384645119e-07, "loss": 0.0043, "step": 11314 }, { "epoch": 2.5745164960182025, "grad_norm": 0.6076448483903257, "learning_rate": 5.958373841595498e-07, "loss": 0.0084, "step": 11315 }, { "epoch": 2.5747440273037543, "grad_norm": 1.236515444485821, "learning_rate": 5.957481304506179e-07, "loss": 0.0193, "step": 11316 }, { "epoch": 2.574971558589306, "grad_norm": 0.46351618533554195, "learning_rate": 5.956588773395408e-07, "loss": 0.0066, "step": 11317 }, { "epoch": 2.575199089874858, "grad_norm": 0.7639960803928041, "learning_rate": 5.955696248281424e-07, "loss": 0.0088, "step": 11318 }, { "epoch": 2.5754266211604095, "grad_norm": 0.9035851045497888, "learning_rate": 5.954803729182471e-07, "loss": 0.0118, "step": 11319 }, { "epoch": 2.5756541524459613, "grad_norm": 0.4746996538431534, "learning_rate": 5.953911216116789e-07, "loss": 0.0053, "step": 11320 }, { "epoch": 2.575881683731513, "grad_norm": 0.21596100361022072, "learning_rate": 5.953018709102618e-07, "loss": 0.0013, "step": 11321 }, { "epoch": 2.576109215017065, "grad_norm": 1.1027736143914455, "learning_rate": 5.952126208158204e-07, "loss": 0.0144, "step": 11322 }, { "epoch": 2.5763367463026166, "grad_norm": 0.7184307331320215, "learning_rate": 5.95123371330178e-07, "loss": 0.0119, "step": 11323 }, { "epoch": 2.5765642775881683, "grad_norm": 0.9177014062408603, "learning_rate": 5.950341224551595e-07, "loss": 0.0184, "step": 11324 }, { "epoch": 2.57679180887372, "grad_norm": 1.3401137131857193, "learning_rate": 5.949448741925886e-07, "loss": 0.0225, "step": 11325 }, { "epoch": 2.577019340159272, "grad_norm": 0.6561371292515696, "learning_rate": 5.948556265442893e-07, "loss": 0.007, "step": 11326 }, { "epoch": 2.5772468714448236, "grad_norm": 0.5305286903408271, "learning_rate": 5.947663795120861e-07, "loss": 0.0026, "step": 11327 }, { "epoch": 2.5774744027303753, "grad_norm": 0.826203767948072, "learning_rate": 5.946771330978024e-07, "loss": 0.0137, "step": 11328 }, { "epoch": 2.577701934015927, "grad_norm": 0.5181406400583546, "learning_rate": 5.945878873032628e-07, "loss": 0.0073, "step": 11329 }, { "epoch": 2.577929465301479, "grad_norm": 0.5691248103575477, "learning_rate": 5.944986421302909e-07, "loss": 0.01, "step": 11330 }, { "epoch": 2.5781569965870306, "grad_norm": 0.918266142771813, "learning_rate": 5.94409397580711e-07, "loss": 0.006, "step": 11331 }, { "epoch": 2.5783845278725823, "grad_norm": 1.908694355861258, "learning_rate": 5.943201536563471e-07, "loss": 0.0294, "step": 11332 }, { "epoch": 2.578612059158134, "grad_norm": 0.5930307847457225, "learning_rate": 5.942309103590228e-07, "loss": 0.0063, "step": 11333 }, { "epoch": 2.5788395904436863, "grad_norm": 0.9011605390148291, "learning_rate": 5.941416676905626e-07, "loss": 0.0095, "step": 11334 }, { "epoch": 2.5790671217292376, "grad_norm": 0.7403334483371002, "learning_rate": 5.940524256527899e-07, "loss": 0.0099, "step": 11335 }, { "epoch": 2.5792946530147898, "grad_norm": 0.4970671329274726, "learning_rate": 5.939631842475292e-07, "loss": 0.0082, "step": 11336 }, { "epoch": 2.579522184300341, "grad_norm": 1.6909720660916392, "learning_rate": 5.938739434766039e-07, "loss": 0.0243, "step": 11337 }, { "epoch": 2.5797497155858933, "grad_norm": 0.8343549616970224, "learning_rate": 5.937847033418386e-07, "loss": 0.0132, "step": 11338 }, { "epoch": 2.5799772468714446, "grad_norm": 0.5768895501484509, "learning_rate": 5.936954638450566e-07, "loss": 0.0084, "step": 11339 }, { "epoch": 2.580204778156997, "grad_norm": 0.759391923803757, "learning_rate": 5.936062249880819e-07, "loss": 0.0072, "step": 11340 }, { "epoch": 2.580432309442548, "grad_norm": 0.6606141426882816, "learning_rate": 5.935169867727386e-07, "loss": 0.0069, "step": 11341 }, { "epoch": 2.5806598407281003, "grad_norm": 0.4766810274623855, "learning_rate": 5.934277492008502e-07, "loss": 0.0052, "step": 11342 }, { "epoch": 2.580887372013652, "grad_norm": 0.8631419376932308, "learning_rate": 5.93338512274241e-07, "loss": 0.0121, "step": 11343 }, { "epoch": 2.581114903299204, "grad_norm": 1.1089529181239444, "learning_rate": 5.932492759947345e-07, "loss": 0.0132, "step": 11344 }, { "epoch": 2.5813424345847555, "grad_norm": 1.8924380685808302, "learning_rate": 5.931600403641549e-07, "loss": 0.0153, "step": 11345 }, { "epoch": 2.5815699658703073, "grad_norm": 0.11167857173752622, "learning_rate": 5.930708053843257e-07, "loss": 0.0008, "step": 11346 }, { "epoch": 2.581797497155859, "grad_norm": 0.5196287044594063, "learning_rate": 5.929815710570705e-07, "loss": 0.0059, "step": 11347 }, { "epoch": 2.582025028441411, "grad_norm": 0.7450330278983887, "learning_rate": 5.928923373842136e-07, "loss": 0.0128, "step": 11348 }, { "epoch": 2.5822525597269625, "grad_norm": 0.7855052455163158, "learning_rate": 5.928031043675783e-07, "loss": 0.0112, "step": 11349 }, { "epoch": 2.5824800910125143, "grad_norm": 0.4620703166906869, "learning_rate": 5.927138720089887e-07, "loss": 0.0045, "step": 11350 }, { "epoch": 2.582707622298066, "grad_norm": 0.32999075382404447, "learning_rate": 5.926246403102686e-07, "loss": 0.0039, "step": 11351 }, { "epoch": 2.582935153583618, "grad_norm": 1.3133062003554143, "learning_rate": 5.925354092732412e-07, "loss": 0.0148, "step": 11352 }, { "epoch": 2.5831626848691696, "grad_norm": 0.7999571382429617, "learning_rate": 5.924461788997308e-07, "loss": 0.0074, "step": 11353 }, { "epoch": 2.5833902161547213, "grad_norm": 1.0551292350655888, "learning_rate": 5.923569491915608e-07, "loss": 0.016, "step": 11354 }, { "epoch": 2.583617747440273, "grad_norm": 0.957022547981308, "learning_rate": 5.922677201505552e-07, "loss": 0.0202, "step": 11355 }, { "epoch": 2.583845278725825, "grad_norm": 1.2179737049319173, "learning_rate": 5.921784917785371e-07, "loss": 0.0192, "step": 11356 }, { "epoch": 2.5840728100113766, "grad_norm": 0.8530907127678642, "learning_rate": 5.920892640773308e-07, "loss": 0.0126, "step": 11357 }, { "epoch": 2.5843003412969283, "grad_norm": 0.8798826244485719, "learning_rate": 5.920000370487597e-07, "loss": 0.0112, "step": 11358 }, { "epoch": 2.58452787258248, "grad_norm": 0.7822360975770843, "learning_rate": 5.919108106946472e-07, "loss": 0.0086, "step": 11359 }, { "epoch": 2.584755403868032, "grad_norm": 0.48715625958999825, "learning_rate": 5.918215850168171e-07, "loss": 0.0018, "step": 11360 }, { "epoch": 2.5849829351535836, "grad_norm": 1.7298668937835986, "learning_rate": 5.917323600170931e-07, "loss": 0.0266, "step": 11361 }, { "epoch": 2.5852104664391353, "grad_norm": 0.8799209687594939, "learning_rate": 5.916431356972989e-07, "loss": 0.01, "step": 11362 }, { "epoch": 2.585437997724687, "grad_norm": 0.3453804599102657, "learning_rate": 5.915539120592577e-07, "loss": 0.0023, "step": 11363 }, { "epoch": 2.585665529010239, "grad_norm": 0.7028524906652335, "learning_rate": 5.914646891047933e-07, "loss": 0.0089, "step": 11364 }, { "epoch": 2.5858930602957906, "grad_norm": 1.4949171061992035, "learning_rate": 5.913754668357295e-07, "loss": 0.0257, "step": 11365 }, { "epoch": 2.5861205915813423, "grad_norm": 8.123324945937771, "learning_rate": 5.912862452538894e-07, "loss": 0.1116, "step": 11366 }, { "epoch": 2.586348122866894, "grad_norm": 1.038147279717141, "learning_rate": 5.911970243610967e-07, "loss": 0.0187, "step": 11367 }, { "epoch": 2.586575654152446, "grad_norm": 1.156071306064788, "learning_rate": 5.911078041591747e-07, "loss": 0.0216, "step": 11368 }, { "epoch": 2.5868031854379976, "grad_norm": 0.6845275992296727, "learning_rate": 5.910185846499474e-07, "loss": 0.0094, "step": 11369 }, { "epoch": 2.5870307167235493, "grad_norm": 0.4491993154557728, "learning_rate": 5.90929365835238e-07, "loss": 0.0038, "step": 11370 }, { "epoch": 2.587258248009101, "grad_norm": 1.0118356022960187, "learning_rate": 5.908401477168698e-07, "loss": 0.0131, "step": 11371 }, { "epoch": 2.587485779294653, "grad_norm": 1.490327103574777, "learning_rate": 5.907509302966666e-07, "loss": 0.0206, "step": 11372 }, { "epoch": 2.587713310580205, "grad_norm": 0.6845600258443947, "learning_rate": 5.906617135764515e-07, "loss": 0.015, "step": 11373 }, { "epoch": 2.5879408418657563, "grad_norm": 1.4000938589322849, "learning_rate": 5.905724975580482e-07, "loss": 0.0149, "step": 11374 }, { "epoch": 2.5881683731513085, "grad_norm": 0.4061919123492756, "learning_rate": 5.904832822432799e-07, "loss": 0.003, "step": 11375 }, { "epoch": 2.58839590443686, "grad_norm": 0.7959285652288838, "learning_rate": 5.903940676339702e-07, "loss": 0.0077, "step": 11376 }, { "epoch": 2.588623435722412, "grad_norm": 0.9127674044966255, "learning_rate": 5.903048537319424e-07, "loss": 0.0058, "step": 11377 }, { "epoch": 2.5888509670079634, "grad_norm": 0.6435576637583936, "learning_rate": 5.902156405390196e-07, "loss": 0.0066, "step": 11378 }, { "epoch": 2.5890784982935156, "grad_norm": 0.4360404641833643, "learning_rate": 5.901264280570258e-07, "loss": 0.0038, "step": 11379 }, { "epoch": 2.589306029579067, "grad_norm": 1.8995733368988084, "learning_rate": 5.900372162877835e-07, "loss": 0.0134, "step": 11380 }, { "epoch": 2.589533560864619, "grad_norm": 1.293878313392434, "learning_rate": 5.899480052331167e-07, "loss": 0.0192, "step": 11381 }, { "epoch": 2.589761092150171, "grad_norm": 0.7076199956188518, "learning_rate": 5.898587948948487e-07, "loss": 0.0138, "step": 11382 }, { "epoch": 2.5899886234357226, "grad_norm": 0.6607840428900508, "learning_rate": 5.897695852748022e-07, "loss": 0.0106, "step": 11383 }, { "epoch": 2.5902161547212743, "grad_norm": 1.5580514140039599, "learning_rate": 5.89680376374801e-07, "loss": 0.045, "step": 11384 }, { "epoch": 2.590443686006826, "grad_norm": 1.4059025591180585, "learning_rate": 5.895911681966681e-07, "loss": 0.02, "step": 11385 }, { "epoch": 2.590671217292378, "grad_norm": 1.0881742092745654, "learning_rate": 5.895019607422272e-07, "loss": 0.0118, "step": 11386 }, { "epoch": 2.5908987485779296, "grad_norm": 0.5810289375874217, "learning_rate": 5.894127540133007e-07, "loss": 0.0055, "step": 11387 }, { "epoch": 2.5911262798634813, "grad_norm": 0.7150432100860611, "learning_rate": 5.893235480117128e-07, "loss": 0.0071, "step": 11388 }, { "epoch": 2.591353811149033, "grad_norm": 0.43594260078699526, "learning_rate": 5.892343427392862e-07, "loss": 0.0024, "step": 11389 }, { "epoch": 2.591581342434585, "grad_norm": 0.32859757068145495, "learning_rate": 5.891451381978438e-07, "loss": 0.003, "step": 11390 }, { "epoch": 2.5918088737201366, "grad_norm": 0.6296185912220242, "learning_rate": 5.890559343892094e-07, "loss": 0.013, "step": 11391 }, { "epoch": 2.5920364050056883, "grad_norm": 0.4204911245529162, "learning_rate": 5.889667313152057e-07, "loss": 0.0027, "step": 11392 }, { "epoch": 2.59226393629124, "grad_norm": 1.1126148944219885, "learning_rate": 5.888775289776561e-07, "loss": 0.0128, "step": 11393 }, { "epoch": 2.592491467576792, "grad_norm": 1.055886802987816, "learning_rate": 5.887883273783836e-07, "loss": 0.0106, "step": 11394 }, { "epoch": 2.5927189988623436, "grad_norm": 0.8027106615158228, "learning_rate": 5.886991265192116e-07, "loss": 0.0089, "step": 11395 }, { "epoch": 2.5929465301478953, "grad_norm": 1.241251845482249, "learning_rate": 5.886099264019627e-07, "loss": 0.0105, "step": 11396 }, { "epoch": 2.593174061433447, "grad_norm": 0.6699890912830153, "learning_rate": 5.885207270284604e-07, "loss": 0.0052, "step": 11397 }, { "epoch": 2.593401592718999, "grad_norm": 1.0156010608610404, "learning_rate": 5.884315284005278e-07, "loss": 0.0176, "step": 11398 }, { "epoch": 2.5936291240045506, "grad_norm": 0.7572234857510292, "learning_rate": 5.883423305199874e-07, "loss": 0.011, "step": 11399 }, { "epoch": 2.5938566552901023, "grad_norm": 0.5816319862220617, "learning_rate": 5.882531333886627e-07, "loss": 0.0067, "step": 11400 }, { "epoch": 2.594084186575654, "grad_norm": 0.4277209519813939, "learning_rate": 5.88163937008377e-07, "loss": 0.0029, "step": 11401 }, { "epoch": 2.594311717861206, "grad_norm": 1.0404957254201326, "learning_rate": 5.880747413809526e-07, "loss": 0.0084, "step": 11402 }, { "epoch": 2.5945392491467576, "grad_norm": 0.9938760168291778, "learning_rate": 5.87985546508213e-07, "loss": 0.0154, "step": 11403 }, { "epoch": 2.5947667804323093, "grad_norm": 0.5561795955506493, "learning_rate": 5.878963523919809e-07, "loss": 0.008, "step": 11404 }, { "epoch": 2.594994311717861, "grad_norm": 1.0569186031478959, "learning_rate": 5.878071590340798e-07, "loss": 0.024, "step": 11405 }, { "epoch": 2.595221843003413, "grad_norm": 0.5889894821621313, "learning_rate": 5.877179664363318e-07, "loss": 0.005, "step": 11406 }, { "epoch": 2.5954493742889646, "grad_norm": 1.2418219071574386, "learning_rate": 5.876287746005605e-07, "loss": 0.0242, "step": 11407 }, { "epoch": 2.5956769055745164, "grad_norm": 0.9675530906495017, "learning_rate": 5.875395835285887e-07, "loss": 0.0108, "step": 11408 }, { "epoch": 2.595904436860068, "grad_norm": 0.6247691164961721, "learning_rate": 5.87450393222239e-07, "loss": 0.0087, "step": 11409 }, { "epoch": 2.59613196814562, "grad_norm": 0.6417887050085711, "learning_rate": 5.873612036833346e-07, "loss": 0.0044, "step": 11410 }, { "epoch": 2.5963594994311716, "grad_norm": 1.4246177860903018, "learning_rate": 5.872720149136981e-07, "loss": 0.025, "step": 11411 }, { "epoch": 2.596587030716724, "grad_norm": 0.8137429133942512, "learning_rate": 5.871828269151528e-07, "loss": 0.0086, "step": 11412 }, { "epoch": 2.596814562002275, "grad_norm": 2.4417662711307164, "learning_rate": 5.87093639689521e-07, "loss": 0.0353, "step": 11413 }, { "epoch": 2.5970420932878273, "grad_norm": 0.5121738420789244, "learning_rate": 5.87004453238626e-07, "loss": 0.0056, "step": 11414 }, { "epoch": 2.5972696245733786, "grad_norm": 0.6433092175452104, "learning_rate": 5.869152675642904e-07, "loss": 0.0059, "step": 11415 }, { "epoch": 2.597497155858931, "grad_norm": 2.4948732796471687, "learning_rate": 5.868260826683368e-07, "loss": 0.0159, "step": 11416 }, { "epoch": 2.597724687144482, "grad_norm": 0.6604084612282336, "learning_rate": 5.867368985525882e-07, "loss": 0.0153, "step": 11417 }, { "epoch": 2.5979522184300343, "grad_norm": 0.7172022807658246, "learning_rate": 5.866477152188673e-07, "loss": 0.0052, "step": 11418 }, { "epoch": 2.5981797497155856, "grad_norm": 1.3030589030011863, "learning_rate": 5.86558532668997e-07, "loss": 0.0175, "step": 11419 }, { "epoch": 2.598407281001138, "grad_norm": 0.31084983754819784, "learning_rate": 5.864693509048e-07, "loss": 0.0016, "step": 11420 }, { "epoch": 2.5986348122866896, "grad_norm": 0.8788173697515955, "learning_rate": 5.863801699280985e-07, "loss": 0.0152, "step": 11421 }, { "epoch": 2.5988623435722413, "grad_norm": 1.126998167496842, "learning_rate": 5.862909897407159e-07, "loss": 0.0112, "step": 11422 }, { "epoch": 2.599089874857793, "grad_norm": 0.7981389960228343, "learning_rate": 5.862018103444744e-07, "loss": 0.0197, "step": 11423 }, { "epoch": 2.599317406143345, "grad_norm": 0.8172855913526103, "learning_rate": 5.86112631741197e-07, "loss": 0.008, "step": 11424 }, { "epoch": 2.5995449374288966, "grad_norm": 0.6310047265333928, "learning_rate": 5.86023453932706e-07, "loss": 0.0077, "step": 11425 }, { "epoch": 2.5997724687144483, "grad_norm": 0.7160137372218588, "learning_rate": 5.859342769208245e-07, "loss": 0.0055, "step": 11426 }, { "epoch": 2.6, "grad_norm": 0.8973179560382585, "learning_rate": 5.858451007073747e-07, "loss": 0.0065, "step": 11427 }, { "epoch": 2.600227531285552, "grad_norm": 0.25217082857608586, "learning_rate": 5.857559252941792e-07, "loss": 0.0019, "step": 11428 }, { "epoch": 2.6004550625711036, "grad_norm": 1.1097906971872549, "learning_rate": 5.85666750683061e-07, "loss": 0.0176, "step": 11429 }, { "epoch": 2.6006825938566553, "grad_norm": 0.5908283856217559, "learning_rate": 5.85577576875842e-07, "loss": 0.007, "step": 11430 }, { "epoch": 2.600910125142207, "grad_norm": 0.42345107125783454, "learning_rate": 5.854884038743454e-07, "loss": 0.0052, "step": 11431 }, { "epoch": 2.601137656427759, "grad_norm": 1.414068575849952, "learning_rate": 5.853992316803932e-07, "loss": 0.0194, "step": 11432 }, { "epoch": 2.6013651877133106, "grad_norm": 0.948907286465661, "learning_rate": 5.853100602958086e-07, "loss": 0.0085, "step": 11433 }, { "epoch": 2.6015927189988624, "grad_norm": 0.25484424042401993, "learning_rate": 5.852208897224134e-07, "loss": 0.0011, "step": 11434 }, { "epoch": 2.601820250284414, "grad_norm": 0.7005705798577243, "learning_rate": 5.851317199620303e-07, "loss": 0.0098, "step": 11435 }, { "epoch": 2.602047781569966, "grad_norm": 1.072370686863242, "learning_rate": 5.850425510164821e-07, "loss": 0.0136, "step": 11436 }, { "epoch": 2.6022753128555176, "grad_norm": 0.3068142320386619, "learning_rate": 5.849533828875907e-07, "loss": 0.0022, "step": 11437 }, { "epoch": 2.6025028441410694, "grad_norm": 0.7884861763753701, "learning_rate": 5.84864215577179e-07, "loss": 0.0098, "step": 11438 }, { "epoch": 2.602730375426621, "grad_norm": 0.6395172471450085, "learning_rate": 5.847750490870694e-07, "loss": 0.0081, "step": 11439 }, { "epoch": 2.602957906712173, "grad_norm": 0.9882409567891629, "learning_rate": 5.846858834190837e-07, "loss": 0.0034, "step": 11440 }, { "epoch": 2.6031854379977246, "grad_norm": 2.072864026666834, "learning_rate": 5.845967185750449e-07, "loss": 0.0309, "step": 11441 }, { "epoch": 2.6034129692832764, "grad_norm": 0.9499264087923991, "learning_rate": 5.84507554556775e-07, "loss": 0.0097, "step": 11442 }, { "epoch": 2.603640500568828, "grad_norm": 0.9563157295064401, "learning_rate": 5.844183913660969e-07, "loss": 0.0152, "step": 11443 }, { "epoch": 2.60386803185438, "grad_norm": 1.2374167787675707, "learning_rate": 5.843292290048321e-07, "loss": 0.0129, "step": 11444 }, { "epoch": 2.6040955631399316, "grad_norm": 0.7033318413456748, "learning_rate": 5.842400674748038e-07, "loss": 0.01, "step": 11445 }, { "epoch": 2.6043230944254834, "grad_norm": 1.0062921514039158, "learning_rate": 5.841509067778339e-07, "loss": 0.0077, "step": 11446 }, { "epoch": 2.604550625711035, "grad_norm": 0.27199523510086887, "learning_rate": 5.840617469157441e-07, "loss": 0.002, "step": 11447 }, { "epoch": 2.604778156996587, "grad_norm": 0.6881414737462886, "learning_rate": 5.839725878903578e-07, "loss": 0.0069, "step": 11448 }, { "epoch": 2.6050056882821386, "grad_norm": 0.8994844816697072, "learning_rate": 5.838834297034964e-07, "loss": 0.0101, "step": 11449 }, { "epoch": 2.6052332195676904, "grad_norm": 1.2944431171289654, "learning_rate": 5.837942723569825e-07, "loss": 0.0132, "step": 11450 }, { "epoch": 2.6054607508532426, "grad_norm": 0.8097722903757755, "learning_rate": 5.837051158526379e-07, "loss": 0.0161, "step": 11451 }, { "epoch": 2.605688282138794, "grad_norm": 2.15948722723196, "learning_rate": 5.836159601922856e-07, "loss": 0.0258, "step": 11452 }, { "epoch": 2.605915813424346, "grad_norm": 1.0289324148199106, "learning_rate": 5.83526805377747e-07, "loss": 0.005, "step": 11453 }, { "epoch": 2.6061433447098974, "grad_norm": 2.367606416196267, "learning_rate": 5.834376514108444e-07, "loss": 0.0438, "step": 11454 }, { "epoch": 2.6063708759954496, "grad_norm": 0.4930190500897435, "learning_rate": 5.833484982934005e-07, "loss": 0.0038, "step": 11455 }, { "epoch": 2.606598407281001, "grad_norm": 0.6129089691846411, "learning_rate": 5.832593460272367e-07, "loss": 0.0065, "step": 11456 }, { "epoch": 2.606825938566553, "grad_norm": 0.9746259490073146, "learning_rate": 5.831701946141755e-07, "loss": 0.0165, "step": 11457 }, { "epoch": 2.6070534698521044, "grad_norm": 0.821741841472342, "learning_rate": 5.83081044056039e-07, "loss": 0.0062, "step": 11458 }, { "epoch": 2.6072810011376566, "grad_norm": 1.3357048351178977, "learning_rate": 5.82991894354649e-07, "loss": 0.0083, "step": 11459 }, { "epoch": 2.6075085324232083, "grad_norm": 1.5090408010452365, "learning_rate": 5.829027455118279e-07, "loss": 0.034, "step": 11460 }, { "epoch": 2.60773606370876, "grad_norm": 0.8788759977121002, "learning_rate": 5.828135975293974e-07, "loss": 0.0104, "step": 11461 }, { "epoch": 2.607963594994312, "grad_norm": 1.5444644252376933, "learning_rate": 5.8272445040918e-07, "loss": 0.0214, "step": 11462 }, { "epoch": 2.6081911262798636, "grad_norm": 1.0243087841672067, "learning_rate": 5.826353041529971e-07, "loss": 0.0283, "step": 11463 }, { "epoch": 2.6084186575654154, "grad_norm": 0.8407175396517, "learning_rate": 5.825461587626712e-07, "loss": 0.011, "step": 11464 }, { "epoch": 2.608646188850967, "grad_norm": 0.9107969590356138, "learning_rate": 5.824570142400242e-07, "loss": 0.0111, "step": 11465 }, { "epoch": 2.608873720136519, "grad_norm": 1.2042508045358014, "learning_rate": 5.823678705868775e-07, "loss": 0.0092, "step": 11466 }, { "epoch": 2.6091012514220706, "grad_norm": 1.0848249984608633, "learning_rate": 5.822787278050537e-07, "loss": 0.0146, "step": 11467 }, { "epoch": 2.6093287827076224, "grad_norm": 0.9116110084582509, "learning_rate": 5.821895858963743e-07, "loss": 0.0183, "step": 11468 }, { "epoch": 2.609556313993174, "grad_norm": 0.7029753676934629, "learning_rate": 5.821004448626617e-07, "loss": 0.0086, "step": 11469 }, { "epoch": 2.609783845278726, "grad_norm": 0.5590024364871417, "learning_rate": 5.820113047057372e-07, "loss": 0.0051, "step": 11470 }, { "epoch": 2.6100113765642776, "grad_norm": 0.49702561219464947, "learning_rate": 5.819221654274229e-07, "loss": 0.0056, "step": 11471 }, { "epoch": 2.6102389078498294, "grad_norm": 0.723825041809538, "learning_rate": 5.818330270295408e-07, "loss": 0.0071, "step": 11472 }, { "epoch": 2.610466439135381, "grad_norm": 1.2511749222566337, "learning_rate": 5.817438895139123e-07, "loss": 0.0168, "step": 11473 }, { "epoch": 2.610693970420933, "grad_norm": 0.2985504257168626, "learning_rate": 5.816547528823598e-07, "loss": 0.0024, "step": 11474 }, { "epoch": 2.6109215017064846, "grad_norm": 0.41458248505303325, "learning_rate": 5.815656171367044e-07, "loss": 0.0058, "step": 11475 }, { "epoch": 2.6111490329920364, "grad_norm": 0.5540976493660386, "learning_rate": 5.814764822787686e-07, "loss": 0.0073, "step": 11476 }, { "epoch": 2.611376564277588, "grad_norm": 0.8786260337397159, "learning_rate": 5.813873483103736e-07, "loss": 0.0204, "step": 11477 }, { "epoch": 2.61160409556314, "grad_norm": 0.8558854202850116, "learning_rate": 5.812982152333413e-07, "loss": 0.0091, "step": 11478 }, { "epoch": 2.6118316268486916, "grad_norm": 0.7909016920122335, "learning_rate": 5.812090830494937e-07, "loss": 0.0094, "step": 11479 }, { "epoch": 2.6120591581342434, "grad_norm": 1.1009122213059364, "learning_rate": 5.811199517606519e-07, "loss": 0.0154, "step": 11480 }, { "epoch": 2.612286689419795, "grad_norm": 0.8777567402562577, "learning_rate": 5.810308213686381e-07, "loss": 0.0083, "step": 11481 }, { "epoch": 2.612514220705347, "grad_norm": 0.6346439696255319, "learning_rate": 5.809416918752736e-07, "loss": 0.007, "step": 11482 }, { "epoch": 2.6127417519908986, "grad_norm": 1.0254451531126525, "learning_rate": 5.808525632823806e-07, "loss": 0.0124, "step": 11483 }, { "epoch": 2.6129692832764504, "grad_norm": 1.0748505355844356, "learning_rate": 5.807634355917801e-07, "loss": 0.0068, "step": 11484 }, { "epoch": 2.613196814562002, "grad_norm": 0.40381973632159346, "learning_rate": 5.806743088052939e-07, "loss": 0.0024, "step": 11485 }, { "epoch": 2.613424345847554, "grad_norm": 3.458321537312123, "learning_rate": 5.805851829247439e-07, "loss": 0.0984, "step": 11486 }, { "epoch": 2.6136518771331056, "grad_norm": 0.7973375474962491, "learning_rate": 5.804960579519512e-07, "loss": 0.0045, "step": 11487 }, { "epoch": 2.6138794084186574, "grad_norm": 1.3946181332749725, "learning_rate": 5.804069338887376e-07, "loss": 0.025, "step": 11488 }, { "epoch": 2.614106939704209, "grad_norm": 0.3244377681361108, "learning_rate": 5.803178107369248e-07, "loss": 0.0029, "step": 11489 }, { "epoch": 2.6143344709897613, "grad_norm": 2.1171698106660695, "learning_rate": 5.802286884983339e-07, "loss": 0.0345, "step": 11490 }, { "epoch": 2.6145620022753127, "grad_norm": 0.8330701245936201, "learning_rate": 5.801395671747866e-07, "loss": 0.0073, "step": 11491 }, { "epoch": 2.614789533560865, "grad_norm": 0.7220036370757617, "learning_rate": 5.800504467681044e-07, "loss": 0.0046, "step": 11492 }, { "epoch": 2.615017064846416, "grad_norm": 0.8897722202563255, "learning_rate": 5.79961327280109e-07, "loss": 0.0142, "step": 11493 }, { "epoch": 2.6152445961319684, "grad_norm": 0.7826511621214296, "learning_rate": 5.798722087126212e-07, "loss": 0.0144, "step": 11494 }, { "epoch": 2.6154721274175197, "grad_norm": 0.6998556239928975, "learning_rate": 5.797830910674631e-07, "loss": 0.0074, "step": 11495 }, { "epoch": 2.615699658703072, "grad_norm": 0.7867464297713307, "learning_rate": 5.796939743464558e-07, "loss": 0.0099, "step": 11496 }, { "epoch": 2.615927189988623, "grad_norm": 0.6377237201246098, "learning_rate": 5.796048585514205e-07, "loss": 0.0059, "step": 11497 }, { "epoch": 2.6161547212741754, "grad_norm": 1.280056065477516, "learning_rate": 5.795157436841789e-07, "loss": 0.0153, "step": 11498 }, { "epoch": 2.616382252559727, "grad_norm": 0.663920181015657, "learning_rate": 5.794266297465521e-07, "loss": 0.0086, "step": 11499 }, { "epoch": 2.616609783845279, "grad_norm": 0.8237227100809703, "learning_rate": 5.793375167403617e-07, "loss": 0.0066, "step": 11500 }, { "epoch": 2.6168373151308306, "grad_norm": 2.8132013242987113, "learning_rate": 5.792484046674285e-07, "loss": 0.0266, "step": 11501 }, { "epoch": 2.6170648464163824, "grad_norm": 0.9044746174852385, "learning_rate": 5.791592935295745e-07, "loss": 0.0068, "step": 11502 }, { "epoch": 2.617292377701934, "grad_norm": 0.41749810155391487, "learning_rate": 5.790701833286206e-07, "loss": 0.0027, "step": 11503 }, { "epoch": 2.617519908987486, "grad_norm": 0.5645216828150709, "learning_rate": 5.789810740663876e-07, "loss": 0.0067, "step": 11504 }, { "epoch": 2.6177474402730376, "grad_norm": 0.8063952741859556, "learning_rate": 5.788919657446974e-07, "loss": 0.0114, "step": 11505 }, { "epoch": 2.6179749715585894, "grad_norm": 0.6017853004029763, "learning_rate": 5.788028583653709e-07, "loss": 0.0058, "step": 11506 }, { "epoch": 2.618202502844141, "grad_norm": 0.9515817947755597, "learning_rate": 5.787137519302297e-07, "loss": 0.0084, "step": 11507 }, { "epoch": 2.618430034129693, "grad_norm": 0.9879028727536086, "learning_rate": 5.786246464410944e-07, "loss": 0.0203, "step": 11508 }, { "epoch": 2.6186575654152446, "grad_norm": 0.7921259128875163, "learning_rate": 5.785355418997862e-07, "loss": 0.0114, "step": 11509 }, { "epoch": 2.6188850967007964, "grad_norm": 0.6397145566774447, "learning_rate": 5.784464383081268e-07, "loss": 0.0073, "step": 11510 }, { "epoch": 2.619112627986348, "grad_norm": 0.8421149007173949, "learning_rate": 5.783573356679365e-07, "loss": 0.0105, "step": 11511 }, { "epoch": 2.6193401592719, "grad_norm": 0.31447267143100605, "learning_rate": 5.782682339810374e-07, "loss": 0.0012, "step": 11512 }, { "epoch": 2.6195676905574516, "grad_norm": 1.333333191761062, "learning_rate": 5.781791332492495e-07, "loss": 0.0101, "step": 11513 }, { "epoch": 2.6197952218430034, "grad_norm": 0.5761833433210127, "learning_rate": 5.780900334743946e-07, "loss": 0.0042, "step": 11514 }, { "epoch": 2.620022753128555, "grad_norm": 0.9724343358785722, "learning_rate": 5.780009346582936e-07, "loss": 0.0198, "step": 11515 }, { "epoch": 2.620250284414107, "grad_norm": 1.4773628021911929, "learning_rate": 5.779118368027673e-07, "loss": 0.011, "step": 11516 }, { "epoch": 2.6204778156996587, "grad_norm": 1.0240461492823363, "learning_rate": 5.778227399096368e-07, "loss": 0.005, "step": 11517 }, { "epoch": 2.6207053469852104, "grad_norm": 0.5326684936294505, "learning_rate": 5.777336439807231e-07, "loss": 0.0031, "step": 11518 }, { "epoch": 2.620932878270762, "grad_norm": 0.9105737084066423, "learning_rate": 5.776445490178472e-07, "loss": 0.0084, "step": 11519 }, { "epoch": 2.621160409556314, "grad_norm": 0.9748479423253597, "learning_rate": 5.775554550228299e-07, "loss": 0.0088, "step": 11520 }, { "epoch": 2.6213879408418657, "grad_norm": 0.5651713680911095, "learning_rate": 5.774663619974923e-07, "loss": 0.0105, "step": 11521 }, { "epoch": 2.6216154721274174, "grad_norm": 1.1781116016719513, "learning_rate": 5.773772699436553e-07, "loss": 0.0142, "step": 11522 }, { "epoch": 2.621843003412969, "grad_norm": 0.9530068216888067, "learning_rate": 5.772881788631394e-07, "loss": 0.0072, "step": 11523 }, { "epoch": 2.622070534698521, "grad_norm": 0.84022521169783, "learning_rate": 5.77199088757766e-07, "loss": 0.0088, "step": 11524 }, { "epoch": 2.6222980659840727, "grad_norm": 0.958567540932105, "learning_rate": 5.771099996293554e-07, "loss": 0.0064, "step": 11525 }, { "epoch": 2.6225255972696244, "grad_norm": 1.2505711335146743, "learning_rate": 5.770209114797292e-07, "loss": 0.0218, "step": 11526 }, { "epoch": 2.622753128555176, "grad_norm": 0.7518681261445477, "learning_rate": 5.769318243107073e-07, "loss": 0.007, "step": 11527 }, { "epoch": 2.622980659840728, "grad_norm": 1.126689259709487, "learning_rate": 5.768427381241109e-07, "loss": 0.0075, "step": 11528 }, { "epoch": 2.62320819112628, "grad_norm": 1.0091440334245902, "learning_rate": 5.76753652921761e-07, "loss": 0.0165, "step": 11529 }, { "epoch": 2.6234357224118314, "grad_norm": 0.48092144578334084, "learning_rate": 5.766645687054778e-07, "loss": 0.01, "step": 11530 }, { "epoch": 2.6236632536973836, "grad_norm": 0.8622866171283813, "learning_rate": 5.765754854770823e-07, "loss": 0.0131, "step": 11531 }, { "epoch": 2.623890784982935, "grad_norm": 1.0462865190880926, "learning_rate": 5.764864032383951e-07, "loss": 0.0217, "step": 11532 }, { "epoch": 2.624118316268487, "grad_norm": 0.4818394072439531, "learning_rate": 5.763973219912374e-07, "loss": 0.0041, "step": 11533 }, { "epoch": 2.6243458475540384, "grad_norm": 0.29989580586227715, "learning_rate": 5.763082417374291e-07, "loss": 0.002, "step": 11534 }, { "epoch": 2.6245733788395906, "grad_norm": 0.3186527227410267, "learning_rate": 5.762191624787912e-07, "loss": 0.0022, "step": 11535 }, { "epoch": 2.624800910125142, "grad_norm": 0.854562030388683, "learning_rate": 5.761300842171445e-07, "loss": 0.0153, "step": 11536 }, { "epoch": 2.625028441410694, "grad_norm": 0.5767412462403492, "learning_rate": 5.76041006954309e-07, "loss": 0.0078, "step": 11537 }, { "epoch": 2.625255972696246, "grad_norm": 0.49131848073404094, "learning_rate": 5.759519306921059e-07, "loss": 0.0037, "step": 11538 }, { "epoch": 2.6254835039817976, "grad_norm": 1.4885808605101745, "learning_rate": 5.758628554323553e-07, "loss": 0.0136, "step": 11539 }, { "epoch": 2.6257110352673494, "grad_norm": 0.2745801538295468, "learning_rate": 5.757737811768783e-07, "loss": 0.0022, "step": 11540 }, { "epoch": 2.625938566552901, "grad_norm": 0.8998550027321822, "learning_rate": 5.756847079274949e-07, "loss": 0.0135, "step": 11541 }, { "epoch": 2.626166097838453, "grad_norm": 1.0522907194994418, "learning_rate": 5.755956356860255e-07, "loss": 0.0118, "step": 11542 }, { "epoch": 2.6263936291240046, "grad_norm": 1.2750197609008853, "learning_rate": 5.755065644542912e-07, "loss": 0.0139, "step": 11543 }, { "epoch": 2.6266211604095564, "grad_norm": 0.6191845222035965, "learning_rate": 5.754174942341118e-07, "loss": 0.0116, "step": 11544 }, { "epoch": 2.626848691695108, "grad_norm": 0.420533557844463, "learning_rate": 5.753284250273082e-07, "loss": 0.005, "step": 11545 }, { "epoch": 2.62707622298066, "grad_norm": 0.4328221812753991, "learning_rate": 5.752393568357008e-07, "loss": 0.0039, "step": 11546 }, { "epoch": 2.6273037542662117, "grad_norm": 0.4333205677855306, "learning_rate": 5.751502896611093e-07, "loss": 0.0036, "step": 11547 }, { "epoch": 2.6275312855517634, "grad_norm": 0.7556448811221074, "learning_rate": 5.750612235053548e-07, "loss": 0.0159, "step": 11548 }, { "epoch": 2.627758816837315, "grad_norm": 0.5875419009506624, "learning_rate": 5.749721583702575e-07, "loss": 0.0081, "step": 11549 }, { "epoch": 2.627986348122867, "grad_norm": 0.8387582306721185, "learning_rate": 5.748830942576377e-07, "loss": 0.0162, "step": 11550 }, { "epoch": 2.6282138794084187, "grad_norm": 0.8214550202593314, "learning_rate": 5.747940311693156e-07, "loss": 0.0109, "step": 11551 }, { "epoch": 2.6284414106939704, "grad_norm": 0.47091017571647104, "learning_rate": 5.747049691071116e-07, "loss": 0.0029, "step": 11552 }, { "epoch": 2.628668941979522, "grad_norm": 1.045863462014319, "learning_rate": 5.74615908072846e-07, "loss": 0.0107, "step": 11553 }, { "epoch": 2.628896473265074, "grad_norm": 0.510678452562129, "learning_rate": 5.745268480683387e-07, "loss": 0.0066, "step": 11554 }, { "epoch": 2.6291240045506257, "grad_norm": 0.6562033826163642, "learning_rate": 5.744377890954103e-07, "loss": 0.0059, "step": 11555 }, { "epoch": 2.6293515358361774, "grad_norm": 1.0430722349036978, "learning_rate": 5.743487311558809e-07, "loss": 0.0133, "step": 11556 }, { "epoch": 2.629579067121729, "grad_norm": 0.6624837043217175, "learning_rate": 5.742596742515709e-07, "loss": 0.0065, "step": 11557 }, { "epoch": 2.629806598407281, "grad_norm": 1.1533996155285537, "learning_rate": 5.741706183842999e-07, "loss": 0.0133, "step": 11558 }, { "epoch": 2.6300341296928327, "grad_norm": 0.3854838511711772, "learning_rate": 5.740815635558885e-07, "loss": 0.0035, "step": 11559 }, { "epoch": 2.6302616609783844, "grad_norm": 0.5045220819171813, "learning_rate": 5.739925097681569e-07, "loss": 0.0054, "step": 11560 }, { "epoch": 2.630489192263936, "grad_norm": 0.6605729884543552, "learning_rate": 5.739034570229246e-07, "loss": 0.0048, "step": 11561 }, { "epoch": 2.630716723549488, "grad_norm": 0.7160053110799744, "learning_rate": 5.738144053220122e-07, "loss": 0.0101, "step": 11562 }, { "epoch": 2.6309442548350397, "grad_norm": 0.6812454726752339, "learning_rate": 5.737253546672396e-07, "loss": 0.0061, "step": 11563 }, { "epoch": 2.6311717861205914, "grad_norm": 1.3058130085032276, "learning_rate": 5.73636305060427e-07, "loss": 0.0072, "step": 11564 }, { "epoch": 2.631399317406143, "grad_norm": 1.3034388590243693, "learning_rate": 5.735472565033942e-07, "loss": 0.0154, "step": 11565 }, { "epoch": 2.631626848691695, "grad_norm": 0.7627371326588952, "learning_rate": 5.734582089979611e-07, "loss": 0.0143, "step": 11566 }, { "epoch": 2.6318543799772467, "grad_norm": 0.9358173139781957, "learning_rate": 5.733691625459481e-07, "loss": 0.0048, "step": 11567 }, { "epoch": 2.632081911262799, "grad_norm": 1.3368475137957954, "learning_rate": 5.732801171491744e-07, "loss": 0.0137, "step": 11568 }, { "epoch": 2.63230944254835, "grad_norm": 0.9300525237378799, "learning_rate": 5.731910728094609e-07, "loss": 0.0119, "step": 11569 }, { "epoch": 2.6325369738339024, "grad_norm": 1.143983306468455, "learning_rate": 5.731020295286265e-07, "loss": 0.0159, "step": 11570 }, { "epoch": 2.6327645051194537, "grad_norm": 1.7434701958949752, "learning_rate": 5.730129873084919e-07, "loss": 0.0187, "step": 11571 }, { "epoch": 2.632992036405006, "grad_norm": 0.5367203835993297, "learning_rate": 5.729239461508767e-07, "loss": 0.0058, "step": 11572 }, { "epoch": 2.633219567690557, "grad_norm": 0.8559994947405825, "learning_rate": 5.728349060576004e-07, "loss": 0.0177, "step": 11573 }, { "epoch": 2.6334470989761094, "grad_norm": 0.5746655921071138, "learning_rate": 5.727458670304832e-07, "loss": 0.0059, "step": 11574 }, { "epoch": 2.6336746302616607, "grad_norm": 0.8220634869460332, "learning_rate": 5.726568290713447e-07, "loss": 0.01, "step": 11575 }, { "epoch": 2.633902161547213, "grad_norm": 1.1319235146980031, "learning_rate": 5.725677921820049e-07, "loss": 0.0104, "step": 11576 }, { "epoch": 2.6341296928327647, "grad_norm": 1.269048521371317, "learning_rate": 5.724787563642832e-07, "loss": 0.0117, "step": 11577 }, { "epoch": 2.6343572241183164, "grad_norm": 0.4214195001502554, "learning_rate": 5.723897216199997e-07, "loss": 0.0022, "step": 11578 }, { "epoch": 2.634584755403868, "grad_norm": 0.6383120810283366, "learning_rate": 5.723006879509741e-07, "loss": 0.0032, "step": 11579 }, { "epoch": 2.63481228668942, "grad_norm": 1.0333724470716263, "learning_rate": 5.722116553590256e-07, "loss": 0.0065, "step": 11580 }, { "epoch": 2.6350398179749717, "grad_norm": 0.7139531747518628, "learning_rate": 5.721226238459744e-07, "loss": 0.0082, "step": 11581 }, { "epoch": 2.6352673492605234, "grad_norm": 0.40266309285711843, "learning_rate": 5.720335934136396e-07, "loss": 0.0025, "step": 11582 }, { "epoch": 2.635494880546075, "grad_norm": 0.9324971008226665, "learning_rate": 5.719445640638416e-07, "loss": 0.0098, "step": 11583 }, { "epoch": 2.635722411831627, "grad_norm": 1.0229450932958464, "learning_rate": 5.718555357983993e-07, "loss": 0.0134, "step": 11584 }, { "epoch": 2.6359499431171787, "grad_norm": 0.7417158971765794, "learning_rate": 5.717665086191325e-07, "loss": 0.0053, "step": 11585 }, { "epoch": 2.6361774744027304, "grad_norm": 1.247119130588846, "learning_rate": 5.716774825278609e-07, "loss": 0.0146, "step": 11586 }, { "epoch": 2.636405005688282, "grad_norm": 1.6116800361712482, "learning_rate": 5.715884575264038e-07, "loss": 0.0157, "step": 11587 }, { "epoch": 2.636632536973834, "grad_norm": 0.6655356766886699, "learning_rate": 5.714994336165808e-07, "loss": 0.0054, "step": 11588 }, { "epoch": 2.6368600682593857, "grad_norm": 0.48347217890977595, "learning_rate": 5.714104108002113e-07, "loss": 0.0072, "step": 11589 }, { "epoch": 2.6370875995449374, "grad_norm": 0.8812852669609412, "learning_rate": 5.713213890791151e-07, "loss": 0.0126, "step": 11590 }, { "epoch": 2.637315130830489, "grad_norm": 0.9592237953788204, "learning_rate": 5.712323684551114e-07, "loss": 0.0124, "step": 11591 }, { "epoch": 2.637542662116041, "grad_norm": 1.3723693124148557, "learning_rate": 5.711433489300193e-07, "loss": 0.0128, "step": 11592 }, { "epoch": 2.6377701934015927, "grad_norm": 0.6064226932831299, "learning_rate": 5.710543305056589e-07, "loss": 0.0071, "step": 11593 }, { "epoch": 2.6379977246871444, "grad_norm": 0.8365984002951941, "learning_rate": 5.709653131838489e-07, "loss": 0.0141, "step": 11594 }, { "epoch": 2.638225255972696, "grad_norm": 0.7643156699829069, "learning_rate": 5.708762969664091e-07, "loss": 0.0091, "step": 11595 }, { "epoch": 2.638452787258248, "grad_norm": 1.421222063440038, "learning_rate": 5.707872818551588e-07, "loss": 0.0061, "step": 11596 }, { "epoch": 2.6386803185437997, "grad_norm": 2.366148082928216, "learning_rate": 5.706982678519169e-07, "loss": 0.0397, "step": 11597 }, { "epoch": 2.6389078498293514, "grad_norm": 0.5860605168709145, "learning_rate": 5.70609254958503e-07, "loss": 0.0079, "step": 11598 }, { "epoch": 2.639135381114903, "grad_norm": 1.1433137574841, "learning_rate": 5.705202431767362e-07, "loss": 0.0152, "step": 11599 }, { "epoch": 2.639362912400455, "grad_norm": 0.6089245644801665, "learning_rate": 5.704312325084363e-07, "loss": 0.0079, "step": 11600 }, { "epoch": 2.6395904436860067, "grad_norm": 0.9090746208319579, "learning_rate": 5.703422229554215e-07, "loss": 0.0119, "step": 11601 }, { "epoch": 2.6398179749715585, "grad_norm": 0.25716267504865303, "learning_rate": 5.70253214519512e-07, "loss": 0.0008, "step": 11602 }, { "epoch": 2.64004550625711, "grad_norm": 1.5278412958897662, "learning_rate": 5.701642072025265e-07, "loss": 0.0384, "step": 11603 }, { "epoch": 2.640273037542662, "grad_norm": 0.7909904822761551, "learning_rate": 5.700752010062839e-07, "loss": 0.0095, "step": 11604 }, { "epoch": 2.640500568828214, "grad_norm": 0.9211796826055662, "learning_rate": 5.699861959326038e-07, "loss": 0.0173, "step": 11605 }, { "epoch": 2.6407281001137655, "grad_norm": 1.7135095623585794, "learning_rate": 5.698971919833049e-07, "loss": 0.0223, "step": 11606 }, { "epoch": 2.6409556313993177, "grad_norm": 1.3648737417491332, "learning_rate": 5.698081891602068e-07, "loss": 0.0166, "step": 11607 }, { "epoch": 2.641183162684869, "grad_norm": 0.9176665379491397, "learning_rate": 5.69719187465128e-07, "loss": 0.0085, "step": 11608 }, { "epoch": 2.641410693970421, "grad_norm": 0.19731156418373272, "learning_rate": 5.696301868998878e-07, "loss": 0.0012, "step": 11609 }, { "epoch": 2.6416382252559725, "grad_norm": 1.8232711520677871, "learning_rate": 5.695411874663054e-07, "loss": 0.0127, "step": 11610 }, { "epoch": 2.6418657565415247, "grad_norm": 0.579053205177639, "learning_rate": 5.694521891661992e-07, "loss": 0.0036, "step": 11611 }, { "epoch": 2.642093287827076, "grad_norm": 0.4849058510556083, "learning_rate": 5.693631920013887e-07, "loss": 0.0023, "step": 11612 }, { "epoch": 2.642320819112628, "grad_norm": 0.7086773308272727, "learning_rate": 5.692741959736925e-07, "loss": 0.005, "step": 11613 }, { "epoch": 2.6425483503981795, "grad_norm": 0.5188353655865037, "learning_rate": 5.691852010849301e-07, "loss": 0.0049, "step": 11614 }, { "epoch": 2.6427758816837317, "grad_norm": 0.3502427957219832, "learning_rate": 5.690962073369196e-07, "loss": 0.0025, "step": 11615 }, { "epoch": 2.6430034129692834, "grad_norm": 0.6494470089899493, "learning_rate": 5.690072147314804e-07, "loss": 0.0047, "step": 11616 }, { "epoch": 2.643230944254835, "grad_norm": 1.0625094264843387, "learning_rate": 5.689182232704313e-07, "loss": 0.0168, "step": 11617 }, { "epoch": 2.643458475540387, "grad_norm": 0.7320144640228273, "learning_rate": 5.688292329555906e-07, "loss": 0.007, "step": 11618 }, { "epoch": 2.6436860068259387, "grad_norm": 0.6105102721693291, "learning_rate": 5.687402437887778e-07, "loss": 0.0083, "step": 11619 }, { "epoch": 2.6439135381114904, "grad_norm": 0.8168930775623814, "learning_rate": 5.686512557718112e-07, "loss": 0.0154, "step": 11620 }, { "epoch": 2.644141069397042, "grad_norm": 2.210251564377376, "learning_rate": 5.6856226890651e-07, "loss": 0.0158, "step": 11621 }, { "epoch": 2.644368600682594, "grad_norm": 1.2273907145235101, "learning_rate": 5.684732831946925e-07, "loss": 0.0114, "step": 11622 }, { "epoch": 2.6445961319681457, "grad_norm": 1.2695866198602503, "learning_rate": 5.683842986381775e-07, "loss": 0.0066, "step": 11623 }, { "epoch": 2.6448236632536974, "grad_norm": 0.7186760310268226, "learning_rate": 5.68295315238784e-07, "loss": 0.0096, "step": 11624 }, { "epoch": 2.645051194539249, "grad_norm": 1.1110525851341062, "learning_rate": 5.682063329983301e-07, "loss": 0.0165, "step": 11625 }, { "epoch": 2.645278725824801, "grad_norm": 1.5079698659648562, "learning_rate": 5.681173519186349e-07, "loss": 0.0255, "step": 11626 }, { "epoch": 2.6455062571103527, "grad_norm": 1.3515130273035518, "learning_rate": 5.680283720015167e-07, "loss": 0.0179, "step": 11627 }, { "epoch": 2.6457337883959045, "grad_norm": 0.8651123762507712, "learning_rate": 5.679393932487946e-07, "loss": 0.0045, "step": 11628 }, { "epoch": 2.645961319681456, "grad_norm": 1.3293960497221111, "learning_rate": 5.678504156622866e-07, "loss": 0.034, "step": 11629 }, { "epoch": 2.646188850967008, "grad_norm": 0.80167552702329, "learning_rate": 5.677614392438112e-07, "loss": 0.0091, "step": 11630 }, { "epoch": 2.6464163822525597, "grad_norm": 0.8435915957657178, "learning_rate": 5.676724639951876e-07, "loss": 0.0144, "step": 11631 }, { "epoch": 2.6466439135381115, "grad_norm": 0.4532666628758451, "learning_rate": 5.675834899182334e-07, "loss": 0.0035, "step": 11632 }, { "epoch": 2.646871444823663, "grad_norm": 1.1122735043050678, "learning_rate": 5.674945170147678e-07, "loss": 0.0152, "step": 11633 }, { "epoch": 2.647098976109215, "grad_norm": 0.99249089824753, "learning_rate": 5.674055452866091e-07, "loss": 0.0037, "step": 11634 }, { "epoch": 2.6473265073947667, "grad_norm": 1.1345848188368113, "learning_rate": 5.673165747355751e-07, "loss": 0.0107, "step": 11635 }, { "epoch": 2.6475540386803185, "grad_norm": 1.512746752780425, "learning_rate": 5.67227605363485e-07, "loss": 0.0283, "step": 11636 }, { "epoch": 2.64778156996587, "grad_norm": 0.643252640595306, "learning_rate": 5.671386371721567e-07, "loss": 0.0036, "step": 11637 }, { "epoch": 2.648009101251422, "grad_norm": 0.3135772700711853, "learning_rate": 5.670496701634088e-07, "loss": 0.004, "step": 11638 }, { "epoch": 2.6482366325369737, "grad_norm": 1.2441653288825796, "learning_rate": 5.669607043390593e-07, "loss": 0.0053, "step": 11639 }, { "epoch": 2.6484641638225255, "grad_norm": 1.3148449991720932, "learning_rate": 5.668717397009271e-07, "loss": 0.0097, "step": 11640 }, { "epoch": 2.6486916951080772, "grad_norm": 0.7274294105148519, "learning_rate": 5.667827762508299e-07, "loss": 0.0103, "step": 11641 }, { "epoch": 2.648919226393629, "grad_norm": 0.4507220762670272, "learning_rate": 5.66693813990586e-07, "loss": 0.0064, "step": 11642 }, { "epoch": 2.6491467576791807, "grad_norm": 1.0846875259945585, "learning_rate": 5.666048529220139e-07, "loss": 0.0061, "step": 11643 }, { "epoch": 2.649374288964733, "grad_norm": 1.2611562971667483, "learning_rate": 5.665158930469315e-07, "loss": 0.0184, "step": 11644 }, { "epoch": 2.6496018202502842, "grad_norm": 0.5226417202599313, "learning_rate": 5.664269343671573e-07, "loss": 0.0023, "step": 11645 }, { "epoch": 2.6498293515358364, "grad_norm": 0.8666814001603053, "learning_rate": 5.663379768845091e-07, "loss": 0.0106, "step": 11646 }, { "epoch": 2.6500568828213877, "grad_norm": 0.47461849996360345, "learning_rate": 5.662490206008056e-07, "loss": 0.0059, "step": 11647 }, { "epoch": 2.65028441410694, "grad_norm": 1.3187069052761948, "learning_rate": 5.661600655178643e-07, "loss": 0.0246, "step": 11648 }, { "epoch": 2.6505119453924912, "grad_norm": 0.7457750200830515, "learning_rate": 5.660711116375034e-07, "loss": 0.0083, "step": 11649 }, { "epoch": 2.6507394766780434, "grad_norm": 1.3610698876695029, "learning_rate": 5.659821589615412e-07, "loss": 0.0111, "step": 11650 }, { "epoch": 2.6509670079635947, "grad_norm": 1.0707349118973282, "learning_rate": 5.658932074917955e-07, "loss": 0.0192, "step": 11651 }, { "epoch": 2.651194539249147, "grad_norm": 0.8780201630278823, "learning_rate": 5.658042572300844e-07, "loss": 0.0174, "step": 11652 }, { "epoch": 2.6514220705346982, "grad_norm": 1.3527958649483003, "learning_rate": 5.65715308178226e-07, "loss": 0.0189, "step": 11653 }, { "epoch": 2.6516496018202504, "grad_norm": 0.9670193944007419, "learning_rate": 5.656263603380379e-07, "loss": 0.0102, "step": 11654 }, { "epoch": 2.651877133105802, "grad_norm": 0.7977369574043603, "learning_rate": 5.655374137113384e-07, "loss": 0.0088, "step": 11655 }, { "epoch": 2.652104664391354, "grad_norm": 0.719148650018699, "learning_rate": 5.65448468299945e-07, "loss": 0.0036, "step": 11656 }, { "epoch": 2.6523321956769057, "grad_norm": 1.2015322912411543, "learning_rate": 5.653595241056763e-07, "loss": 0.0217, "step": 11657 }, { "epoch": 2.6525597269624575, "grad_norm": 1.07243743251018, "learning_rate": 5.652705811303493e-07, "loss": 0.0195, "step": 11658 }, { "epoch": 2.652787258248009, "grad_norm": 1.2459503027627674, "learning_rate": 5.651816393757825e-07, "loss": 0.0128, "step": 11659 }, { "epoch": 2.653014789533561, "grad_norm": 0.5065131243858156, "learning_rate": 5.650926988437934e-07, "loss": 0.0071, "step": 11660 }, { "epoch": 2.6532423208191127, "grad_norm": 0.5602192044068773, "learning_rate": 5.650037595361997e-07, "loss": 0.0051, "step": 11661 }, { "epoch": 2.6534698521046645, "grad_norm": 0.6726730660013938, "learning_rate": 5.649148214548194e-07, "loss": 0.0058, "step": 11662 }, { "epoch": 2.653697383390216, "grad_norm": 1.0252429941585517, "learning_rate": 5.648258846014699e-07, "loss": 0.0151, "step": 11663 }, { "epoch": 2.653924914675768, "grad_norm": 0.5391399107832615, "learning_rate": 5.647369489779695e-07, "loss": 0.0043, "step": 11664 }, { "epoch": 2.6541524459613197, "grad_norm": 0.8575912662115016, "learning_rate": 5.646480145861351e-07, "loss": 0.0128, "step": 11665 }, { "epoch": 2.6543799772468715, "grad_norm": 0.6682726583647777, "learning_rate": 5.645590814277849e-07, "loss": 0.0073, "step": 11666 }, { "epoch": 2.654607508532423, "grad_norm": 0.583051122501158, "learning_rate": 5.644701495047365e-07, "loss": 0.0081, "step": 11667 }, { "epoch": 2.654835039817975, "grad_norm": 0.6851604542121051, "learning_rate": 5.643812188188072e-07, "loss": 0.009, "step": 11668 }, { "epoch": 2.6550625711035267, "grad_norm": 0.9355816932009464, "learning_rate": 5.642922893718149e-07, "loss": 0.0184, "step": 11669 }, { "epoch": 2.6552901023890785, "grad_norm": 0.96086121814601, "learning_rate": 5.642033611655769e-07, "loss": 0.0148, "step": 11670 }, { "epoch": 2.6555176336746302, "grad_norm": 1.3512625486285657, "learning_rate": 5.64114434201911e-07, "loss": 0.0175, "step": 11671 }, { "epoch": 2.655745164960182, "grad_norm": 0.7230687677193945, "learning_rate": 5.640255084826346e-07, "loss": 0.0042, "step": 11672 }, { "epoch": 2.6559726962457337, "grad_norm": 0.9544858861767093, "learning_rate": 5.639365840095649e-07, "loss": 0.0121, "step": 11673 }, { "epoch": 2.6562002275312855, "grad_norm": 0.33756375165823055, "learning_rate": 5.638476607845199e-07, "loss": 0.0024, "step": 11674 }, { "epoch": 2.6564277588168372, "grad_norm": 0.5665073910133729, "learning_rate": 5.637587388093164e-07, "loss": 0.0143, "step": 11675 }, { "epoch": 2.656655290102389, "grad_norm": 1.4060692534715278, "learning_rate": 5.636698180857722e-07, "loss": 0.023, "step": 11676 }, { "epoch": 2.6568828213879407, "grad_norm": 0.7749160397064253, "learning_rate": 5.635808986157046e-07, "loss": 0.006, "step": 11677 }, { "epoch": 2.6571103526734925, "grad_norm": 0.7137860509108364, "learning_rate": 5.634919804009312e-07, "loss": 0.015, "step": 11678 }, { "epoch": 2.6573378839590442, "grad_norm": 0.8670394355534217, "learning_rate": 5.634030634432688e-07, "loss": 0.0079, "step": 11679 }, { "epoch": 2.657565415244596, "grad_norm": 1.3760124588418299, "learning_rate": 5.63314147744535e-07, "loss": 0.0228, "step": 11680 }, { "epoch": 2.6577929465301477, "grad_norm": 0.9337899925895597, "learning_rate": 5.632252333065473e-07, "loss": 0.0092, "step": 11681 }, { "epoch": 2.6580204778156995, "grad_norm": 0.5957961482569748, "learning_rate": 5.631363201311224e-07, "loss": 0.0066, "step": 11682 }, { "epoch": 2.6582480091012517, "grad_norm": 0.48154381356341525, "learning_rate": 5.630474082200779e-07, "loss": 0.0053, "step": 11683 }, { "epoch": 2.658475540386803, "grad_norm": 1.0999793406726717, "learning_rate": 5.629584975752308e-07, "loss": 0.0139, "step": 11684 }, { "epoch": 2.658703071672355, "grad_norm": 0.9942652673389853, "learning_rate": 5.628695881983987e-07, "loss": 0.012, "step": 11685 }, { "epoch": 2.6589306029579065, "grad_norm": 0.5646852313343537, "learning_rate": 5.627806800913982e-07, "loss": 0.0067, "step": 11686 }, { "epoch": 2.6591581342434587, "grad_norm": 0.5591347057313585, "learning_rate": 5.626917732560467e-07, "loss": 0.004, "step": 11687 }, { "epoch": 2.65938566552901, "grad_norm": 0.8588597199236512, "learning_rate": 5.626028676941612e-07, "loss": 0.0187, "step": 11688 }, { "epoch": 2.659613196814562, "grad_norm": 0.7714821342903759, "learning_rate": 5.625139634075589e-07, "loss": 0.0074, "step": 11689 }, { "epoch": 2.6598407281001135, "grad_norm": 0.9592290592228792, "learning_rate": 5.624250603980566e-07, "loss": 0.0112, "step": 11690 }, { "epoch": 2.6600682593856657, "grad_norm": 0.5443419670784782, "learning_rate": 5.623361586674718e-07, "loss": 0.0085, "step": 11691 }, { "epoch": 2.660295790671217, "grad_norm": 0.5222758245124203, "learning_rate": 5.622472582176207e-07, "loss": 0.0045, "step": 11692 }, { "epoch": 2.660523321956769, "grad_norm": 1.1934634949881147, "learning_rate": 5.62158359050321e-07, "loss": 0.0148, "step": 11693 }, { "epoch": 2.660750853242321, "grad_norm": 0.5920325797110719, "learning_rate": 5.620694611673891e-07, "loss": 0.0044, "step": 11694 }, { "epoch": 2.6609783845278727, "grad_norm": 0.5571555071456246, "learning_rate": 5.619805645706424e-07, "loss": 0.008, "step": 11695 }, { "epoch": 2.6612059158134245, "grad_norm": 0.5784216925179713, "learning_rate": 5.618916692618974e-07, "loss": 0.0069, "step": 11696 }, { "epoch": 2.6614334470989762, "grad_norm": 0.7376133689790371, "learning_rate": 5.618027752429714e-07, "loss": 0.0059, "step": 11697 }, { "epoch": 2.661660978384528, "grad_norm": 1.5789674519688544, "learning_rate": 5.617138825156808e-07, "loss": 0.0186, "step": 11698 }, { "epoch": 2.6618885096700797, "grad_norm": 0.47721387888584393, "learning_rate": 5.616249910818423e-07, "loss": 0.0034, "step": 11699 }, { "epoch": 2.6621160409556315, "grad_norm": 0.8577144567052576, "learning_rate": 5.615361009432732e-07, "loss": 0.0212, "step": 11700 }, { "epoch": 2.6623435722411832, "grad_norm": 0.7822636223710794, "learning_rate": 5.614472121017897e-07, "loss": 0.0091, "step": 11701 }, { "epoch": 2.662571103526735, "grad_norm": 1.151456707386974, "learning_rate": 5.61358324559209e-07, "loss": 0.0198, "step": 11702 }, { "epoch": 2.6627986348122867, "grad_norm": 1.2004447821854463, "learning_rate": 5.612694383173477e-07, "loss": 0.0187, "step": 11703 }, { "epoch": 2.6630261660978385, "grad_norm": 0.6626774285222945, "learning_rate": 5.611805533780221e-07, "loss": 0.0063, "step": 11704 }, { "epoch": 2.6632536973833902, "grad_norm": 0.2756349946710385, "learning_rate": 5.610916697430492e-07, "loss": 0.0031, "step": 11705 }, { "epoch": 2.663481228668942, "grad_norm": 1.3991072309822972, "learning_rate": 5.610027874142454e-07, "loss": 0.017, "step": 11706 }, { "epoch": 2.6637087599544937, "grad_norm": 0.2532875172321739, "learning_rate": 5.609139063934278e-07, "loss": 0.002, "step": 11707 }, { "epoch": 2.6639362912400455, "grad_norm": 0.7042197626872148, "learning_rate": 5.608250266824121e-07, "loss": 0.0117, "step": 11708 }, { "epoch": 2.6641638225255972, "grad_norm": 1.1085156326895695, "learning_rate": 5.607361482830157e-07, "loss": 0.0191, "step": 11709 }, { "epoch": 2.664391353811149, "grad_norm": 0.7757610310376687, "learning_rate": 5.606472711970547e-07, "loss": 0.0055, "step": 11710 }, { "epoch": 2.6646188850967008, "grad_norm": 0.7001252669637558, "learning_rate": 5.605583954263454e-07, "loss": 0.0126, "step": 11711 }, { "epoch": 2.6648464163822525, "grad_norm": 1.1366191450867325, "learning_rate": 5.604695209727046e-07, "loss": 0.0124, "step": 11712 }, { "epoch": 2.6650739476678043, "grad_norm": 0.7438294936465332, "learning_rate": 5.603806478379485e-07, "loss": 0.0072, "step": 11713 }, { "epoch": 2.665301478953356, "grad_norm": 0.4037558745658714, "learning_rate": 5.602917760238939e-07, "loss": 0.0049, "step": 11714 }, { "epoch": 2.6655290102389078, "grad_norm": 1.4345170096926703, "learning_rate": 5.602029055323566e-07, "loss": 0.0196, "step": 11715 }, { "epoch": 2.6657565415244595, "grad_norm": 0.8514276088728794, "learning_rate": 5.601140363651534e-07, "loss": 0.0055, "step": 11716 }, { "epoch": 2.6659840728100113, "grad_norm": 1.7113630879222572, "learning_rate": 5.600251685241005e-07, "loss": 0.0214, "step": 11717 }, { "epoch": 2.666211604095563, "grad_norm": 0.7330022615932076, "learning_rate": 5.59936302011014e-07, "loss": 0.019, "step": 11718 }, { "epoch": 2.6664391353811148, "grad_norm": 0.5443023064673834, "learning_rate": 5.598474368277105e-07, "loss": 0.0057, "step": 11719 }, { "epoch": 2.6666666666666665, "grad_norm": 0.8774218702921097, "learning_rate": 5.597585729760058e-07, "loss": 0.0083, "step": 11720 }, { "epoch": 2.6668941979522183, "grad_norm": 0.8258755947525533, "learning_rate": 5.596697104577167e-07, "loss": 0.0095, "step": 11721 }, { "epoch": 2.6671217292377705, "grad_norm": 0.4769559529589376, "learning_rate": 5.59580849274659e-07, "loss": 0.0046, "step": 11722 }, { "epoch": 2.6673492605233218, "grad_norm": 1.1646569062854788, "learning_rate": 5.594919894286487e-07, "loss": 0.0197, "step": 11723 }, { "epoch": 2.667576791808874, "grad_norm": 1.0413977037303122, "learning_rate": 5.594031309215025e-07, "loss": 0.0173, "step": 11724 }, { "epoch": 2.6678043230944253, "grad_norm": 2.3658236650028504, "learning_rate": 5.593142737550359e-07, "loss": 0.0221, "step": 11725 }, { "epoch": 2.6680318543799775, "grad_norm": 0.9951588039858636, "learning_rate": 5.592254179310653e-07, "loss": 0.0091, "step": 11726 }, { "epoch": 2.668259385665529, "grad_norm": 0.8935270108709377, "learning_rate": 5.591365634514067e-07, "loss": 0.005, "step": 11727 }, { "epoch": 2.668486916951081, "grad_norm": 0.9630324885048532, "learning_rate": 5.590477103178762e-07, "loss": 0.0228, "step": 11728 }, { "epoch": 2.6687144482366323, "grad_norm": 1.1021099600881135, "learning_rate": 5.589588585322898e-07, "loss": 0.017, "step": 11729 }, { "epoch": 2.6689419795221845, "grad_norm": 1.0857527870189179, "learning_rate": 5.588700080964631e-07, "loss": 0.0122, "step": 11730 }, { "epoch": 2.669169510807736, "grad_norm": 1.3568089527260412, "learning_rate": 5.587811590122126e-07, "loss": 0.0256, "step": 11731 }, { "epoch": 2.669397042093288, "grad_norm": 0.23174170143237094, "learning_rate": 5.586923112813537e-07, "loss": 0.0022, "step": 11732 }, { "epoch": 2.6696245733788397, "grad_norm": 0.7196281485418528, "learning_rate": 5.586034649057027e-07, "loss": 0.0144, "step": 11733 }, { "epoch": 2.6698521046643915, "grad_norm": 1.4130050034035155, "learning_rate": 5.585146198870751e-07, "loss": 0.0218, "step": 11734 }, { "epoch": 2.6700796359499432, "grad_norm": 1.407027012623296, "learning_rate": 5.584257762272871e-07, "loss": 0.0237, "step": 11735 }, { "epoch": 2.670307167235495, "grad_norm": 1.1058823883762243, "learning_rate": 5.583369339281543e-07, "loss": 0.0104, "step": 11736 }, { "epoch": 2.6705346985210467, "grad_norm": 0.351048625308732, "learning_rate": 5.582480929914924e-07, "loss": 0.0022, "step": 11737 }, { "epoch": 2.6707622298065985, "grad_norm": 1.0762057448669344, "learning_rate": 5.581592534191173e-07, "loss": 0.0101, "step": 11738 }, { "epoch": 2.6709897610921502, "grad_norm": 0.9563346851277681, "learning_rate": 5.580704152128445e-07, "loss": 0.0063, "step": 11739 }, { "epoch": 2.671217292377702, "grad_norm": 1.2651610136828186, "learning_rate": 5.579815783744899e-07, "loss": 0.0225, "step": 11740 }, { "epoch": 2.6714448236632538, "grad_norm": 1.3679238287331836, "learning_rate": 5.578927429058694e-07, "loss": 0.0113, "step": 11741 }, { "epoch": 2.6716723549488055, "grad_norm": 1.6211647444996788, "learning_rate": 5.578039088087978e-07, "loss": 0.0116, "step": 11742 }, { "epoch": 2.6718998862343573, "grad_norm": 0.41266579322205704, "learning_rate": 5.577150760850916e-07, "loss": 0.003, "step": 11743 }, { "epoch": 2.672127417519909, "grad_norm": 0.5029822131361193, "learning_rate": 5.576262447365659e-07, "loss": 0.0076, "step": 11744 }, { "epoch": 2.6723549488054608, "grad_norm": 1.8187952596064412, "learning_rate": 5.575374147650364e-07, "loss": 0.037, "step": 11745 }, { "epoch": 2.6725824800910125, "grad_norm": 0.48874570032268844, "learning_rate": 5.574485861723185e-07, "loss": 0.0092, "step": 11746 }, { "epoch": 2.6728100113765643, "grad_norm": 0.5079511687757594, "learning_rate": 5.573597589602279e-07, "loss": 0.0046, "step": 11747 }, { "epoch": 2.673037542662116, "grad_norm": 1.2229815241157098, "learning_rate": 5.5727093313058e-07, "loss": 0.0166, "step": 11748 }, { "epoch": 2.6732650739476678, "grad_norm": 0.8094597191087707, "learning_rate": 5.5718210868519e-07, "loss": 0.0077, "step": 11749 }, { "epoch": 2.6734926052332195, "grad_norm": 1.6037173556779352, "learning_rate": 5.570932856258736e-07, "loss": 0.0148, "step": 11750 }, { "epoch": 2.6737201365187713, "grad_norm": 0.9233974845164695, "learning_rate": 5.57004463954446e-07, "loss": 0.0098, "step": 11751 }, { "epoch": 2.673947667804323, "grad_norm": 0.6029693628555272, "learning_rate": 5.569156436727229e-07, "loss": 0.008, "step": 11752 }, { "epoch": 2.6741751990898748, "grad_norm": 0.6205215088173052, "learning_rate": 5.568268247825188e-07, "loss": 0.0057, "step": 11753 }, { "epoch": 2.6744027303754265, "grad_norm": 1.1553733147664542, "learning_rate": 5.5673800728565e-07, "loss": 0.0213, "step": 11754 }, { "epoch": 2.6746302616609783, "grad_norm": 0.7774639245277706, "learning_rate": 5.566491911839314e-07, "loss": 0.0122, "step": 11755 }, { "epoch": 2.67485779294653, "grad_norm": 0.47942456500683706, "learning_rate": 5.565603764791778e-07, "loss": 0.0079, "step": 11756 }, { "epoch": 2.675085324232082, "grad_norm": 0.5902590961215188, "learning_rate": 5.564715631732051e-07, "loss": 0.0109, "step": 11757 }, { "epoch": 2.6753128555176335, "grad_norm": 0.406409887752118, "learning_rate": 5.563827512678279e-07, "loss": 0.0049, "step": 11758 }, { "epoch": 2.6755403868031853, "grad_norm": 0.5706807446272323, "learning_rate": 5.562939407648617e-07, "loss": 0.0071, "step": 11759 }, { "epoch": 2.675767918088737, "grad_norm": 0.9893869795830219, "learning_rate": 5.562051316661216e-07, "loss": 0.0056, "step": 11760 }, { "epoch": 2.6759954493742892, "grad_norm": 0.6403688723800689, "learning_rate": 5.561163239734224e-07, "loss": 0.0086, "step": 11761 }, { "epoch": 2.6762229806598405, "grad_norm": 0.9911507376744415, "learning_rate": 5.560275176885795e-07, "loss": 0.0199, "step": 11762 }, { "epoch": 2.6764505119453927, "grad_norm": 0.8497301233220171, "learning_rate": 5.559387128134077e-07, "loss": 0.0137, "step": 11763 }, { "epoch": 2.676678043230944, "grad_norm": 0.6547130908467277, "learning_rate": 5.558499093497225e-07, "loss": 0.0074, "step": 11764 }, { "epoch": 2.6769055745164962, "grad_norm": 1.1432554268334767, "learning_rate": 5.55761107299338e-07, "loss": 0.014, "step": 11765 }, { "epoch": 2.6771331058020476, "grad_norm": 1.0384620359906593, "learning_rate": 5.5567230666407e-07, "loss": 0.0127, "step": 11766 }, { "epoch": 2.6773606370875997, "grad_norm": 1.0865383885088888, "learning_rate": 5.555835074457332e-07, "loss": 0.0197, "step": 11767 }, { "epoch": 2.677588168373151, "grad_norm": 0.40534394732689993, "learning_rate": 5.55494709646142e-07, "loss": 0.0048, "step": 11768 }, { "epoch": 2.6778156996587033, "grad_norm": 0.13884351740958215, "learning_rate": 5.554059132671118e-07, "loss": 0.0009, "step": 11769 }, { "epoch": 2.6780432309442546, "grad_norm": 0.6356966566953929, "learning_rate": 5.553171183104572e-07, "loss": 0.0067, "step": 11770 }, { "epoch": 2.6782707622298068, "grad_norm": 1.5614326303752748, "learning_rate": 5.552283247779934e-07, "loss": 0.0168, "step": 11771 }, { "epoch": 2.6784982935153585, "grad_norm": 0.6513715794160334, "learning_rate": 5.551395326715345e-07, "loss": 0.0159, "step": 11772 }, { "epoch": 2.6787258248009103, "grad_norm": 0.6623573394932485, "learning_rate": 5.550507419928958e-07, "loss": 0.0058, "step": 11773 }, { "epoch": 2.678953356086462, "grad_norm": 0.5480707211067553, "learning_rate": 5.54961952743892e-07, "loss": 0.0056, "step": 11774 }, { "epoch": 2.6791808873720138, "grad_norm": 0.6192242346620278, "learning_rate": 5.548731649263372e-07, "loss": 0.0111, "step": 11775 }, { "epoch": 2.6794084186575655, "grad_norm": 0.5692942597940908, "learning_rate": 5.547843785420467e-07, "loss": 0.0052, "step": 11776 }, { "epoch": 2.6796359499431173, "grad_norm": 2.0932248816610786, "learning_rate": 5.546955935928347e-07, "loss": 0.0283, "step": 11777 }, { "epoch": 2.679863481228669, "grad_norm": 1.0241643751499274, "learning_rate": 5.546068100805165e-07, "loss": 0.0098, "step": 11778 }, { "epoch": 2.6800910125142208, "grad_norm": 0.7485498570110618, "learning_rate": 5.545180280069059e-07, "loss": 0.0045, "step": 11779 }, { "epoch": 2.6803185437997725, "grad_norm": 0.7221546660514129, "learning_rate": 5.544292473738175e-07, "loss": 0.0101, "step": 11780 }, { "epoch": 2.6805460750853243, "grad_norm": 0.4863286644860177, "learning_rate": 5.543404681830665e-07, "loss": 0.0046, "step": 11781 }, { "epoch": 2.680773606370876, "grad_norm": 2.166993489676748, "learning_rate": 5.542516904364665e-07, "loss": 0.033, "step": 11782 }, { "epoch": 2.681001137656428, "grad_norm": 0.9958840684877107, "learning_rate": 5.541629141358326e-07, "loss": 0.0066, "step": 11783 }, { "epoch": 2.6812286689419795, "grad_norm": 0.7177432750731882, "learning_rate": 5.540741392829788e-07, "loss": 0.0081, "step": 11784 }, { "epoch": 2.6814562002275313, "grad_norm": 2.2489779409367823, "learning_rate": 5.539853658797199e-07, "loss": 0.054, "step": 11785 }, { "epoch": 2.681683731513083, "grad_norm": 1.1698912067999705, "learning_rate": 5.538965939278701e-07, "loss": 0.0124, "step": 11786 }, { "epoch": 2.681911262798635, "grad_norm": 0.5090748509626041, "learning_rate": 5.538078234292435e-07, "loss": 0.0049, "step": 11787 }, { "epoch": 2.6821387940841865, "grad_norm": 0.84013795133811, "learning_rate": 5.537190543856548e-07, "loss": 0.0127, "step": 11788 }, { "epoch": 2.6823663253697383, "grad_norm": 0.972191404133992, "learning_rate": 5.536302867989179e-07, "loss": 0.0231, "step": 11789 }, { "epoch": 2.68259385665529, "grad_norm": 0.642325793787157, "learning_rate": 5.535415206708474e-07, "loss": 0.0064, "step": 11790 }, { "epoch": 2.682821387940842, "grad_norm": 0.28656699069002983, "learning_rate": 5.534527560032572e-07, "loss": 0.002, "step": 11791 }, { "epoch": 2.6830489192263935, "grad_norm": 0.24305863742321443, "learning_rate": 5.533639927979619e-07, "loss": 0.0017, "step": 11792 }, { "epoch": 2.6832764505119453, "grad_norm": 1.0970341761331304, "learning_rate": 5.532752310567751e-07, "loss": 0.0132, "step": 11793 }, { "epoch": 2.683503981797497, "grad_norm": 0.877397490658028, "learning_rate": 5.531864707815112e-07, "loss": 0.014, "step": 11794 }, { "epoch": 2.683731513083049, "grad_norm": 1.6678510835582903, "learning_rate": 5.530977119739847e-07, "loss": 0.0195, "step": 11795 }, { "epoch": 2.6839590443686006, "grad_norm": 0.8694088451195225, "learning_rate": 5.530089546360089e-07, "loss": 0.0114, "step": 11796 }, { "epoch": 2.6841865756541523, "grad_norm": 0.5248821446495795, "learning_rate": 5.529201987693984e-07, "loss": 0.0071, "step": 11797 }, { "epoch": 2.684414106939704, "grad_norm": 0.7514692214336103, "learning_rate": 5.528314443759672e-07, "loss": 0.0059, "step": 11798 }, { "epoch": 2.684641638225256, "grad_norm": 0.9268942874248323, "learning_rate": 5.527426914575286e-07, "loss": 0.0091, "step": 11799 }, { "epoch": 2.684869169510808, "grad_norm": 0.5748166094295564, "learning_rate": 5.526539400158974e-07, "loss": 0.0078, "step": 11800 }, { "epoch": 2.6850967007963593, "grad_norm": 0.4910323379527007, "learning_rate": 5.52565190052887e-07, "loss": 0.0063, "step": 11801 }, { "epoch": 2.6853242320819115, "grad_norm": 0.7001027830498375, "learning_rate": 5.524764415703117e-07, "loss": 0.0104, "step": 11802 }, { "epoch": 2.685551763367463, "grad_norm": 0.51338326327779, "learning_rate": 5.523876945699849e-07, "loss": 0.0046, "step": 11803 }, { "epoch": 2.685779294653015, "grad_norm": 0.7528698108902763, "learning_rate": 5.522989490537207e-07, "loss": 0.0023, "step": 11804 }, { "epoch": 2.6860068259385663, "grad_norm": 0.7944780317584451, "learning_rate": 5.522102050233331e-07, "loss": 0.0095, "step": 11805 }, { "epoch": 2.6862343572241185, "grad_norm": 0.7665883277018515, "learning_rate": 5.521214624806352e-07, "loss": 0.0066, "step": 11806 }, { "epoch": 2.68646188850967, "grad_norm": 0.6389350234551819, "learning_rate": 5.520327214274413e-07, "loss": 0.0058, "step": 11807 }, { "epoch": 2.686689419795222, "grad_norm": 0.574049416943994, "learning_rate": 5.519439818655648e-07, "loss": 0.0086, "step": 11808 }, { "epoch": 2.6869169510807733, "grad_norm": 1.3048493993233596, "learning_rate": 5.518552437968198e-07, "loss": 0.0117, "step": 11809 }, { "epoch": 2.6871444823663255, "grad_norm": 0.8497245226350814, "learning_rate": 5.517665072230195e-07, "loss": 0.0072, "step": 11810 }, { "epoch": 2.6873720136518773, "grad_norm": 0.9987894105360857, "learning_rate": 5.516777721459777e-07, "loss": 0.0133, "step": 11811 }, { "epoch": 2.687599544937429, "grad_norm": 0.908840322924415, "learning_rate": 5.51589038567508e-07, "loss": 0.006, "step": 11812 }, { "epoch": 2.687827076222981, "grad_norm": 0.9875184570982183, "learning_rate": 5.515003064894236e-07, "loss": 0.0243, "step": 11813 }, { "epoch": 2.6880546075085325, "grad_norm": 0.3589743280784015, "learning_rate": 5.514115759135387e-07, "loss": 0.0034, "step": 11814 }, { "epoch": 2.6882821387940843, "grad_norm": 1.2542100856711813, "learning_rate": 5.513228468416662e-07, "loss": 0.0142, "step": 11815 }, { "epoch": 2.688509670079636, "grad_norm": 0.5968803717345474, "learning_rate": 5.512341192756199e-07, "loss": 0.0052, "step": 11816 }, { "epoch": 2.688737201365188, "grad_norm": 0.7488610507499963, "learning_rate": 5.511453932172132e-07, "loss": 0.004, "step": 11817 }, { "epoch": 2.6889647326507395, "grad_norm": 0.7837588167785465, "learning_rate": 5.510566686682592e-07, "loss": 0.0091, "step": 11818 }, { "epoch": 2.6891922639362913, "grad_norm": 0.6685472126532944, "learning_rate": 5.509679456305715e-07, "loss": 0.0131, "step": 11819 }, { "epoch": 2.689419795221843, "grad_norm": 1.4784594638200654, "learning_rate": 5.508792241059634e-07, "loss": 0.0262, "step": 11820 }, { "epoch": 2.689647326507395, "grad_norm": 0.40393506972965165, "learning_rate": 5.507905040962484e-07, "loss": 0.0029, "step": 11821 }, { "epoch": 2.6898748577929465, "grad_norm": 0.619644585146703, "learning_rate": 5.507017856032393e-07, "loss": 0.007, "step": 11822 }, { "epoch": 2.6901023890784983, "grad_norm": 1.9711876665344372, "learning_rate": 5.506130686287498e-07, "loss": 0.0321, "step": 11823 }, { "epoch": 2.69032992036405, "grad_norm": 0.7779182538088062, "learning_rate": 5.505243531745931e-07, "loss": 0.0108, "step": 11824 }, { "epoch": 2.690557451649602, "grad_norm": 0.6170937097695632, "learning_rate": 5.504356392425819e-07, "loss": 0.0039, "step": 11825 }, { "epoch": 2.6907849829351536, "grad_norm": 1.9038334839882527, "learning_rate": 5.503469268345299e-07, "loss": 0.0364, "step": 11826 }, { "epoch": 2.6910125142207053, "grad_norm": 1.7117592402725346, "learning_rate": 5.502582159522498e-07, "loss": 0.0137, "step": 11827 }, { "epoch": 2.691240045506257, "grad_norm": 1.1357216820731026, "learning_rate": 5.501695065975551e-07, "loss": 0.0077, "step": 11828 }, { "epoch": 2.691467576791809, "grad_norm": 1.179946892225116, "learning_rate": 5.500807987722586e-07, "loss": 0.0179, "step": 11829 }, { "epoch": 2.6916951080773606, "grad_norm": 0.8321112724736789, "learning_rate": 5.499920924781732e-07, "loss": 0.016, "step": 11830 }, { "epoch": 2.6919226393629123, "grad_norm": 1.444526926452012, "learning_rate": 5.499033877171123e-07, "loss": 0.0206, "step": 11831 }, { "epoch": 2.692150170648464, "grad_norm": 0.4145908934171216, "learning_rate": 5.498146844908884e-07, "loss": 0.0068, "step": 11832 }, { "epoch": 2.692377701934016, "grad_norm": 1.0238106373635232, "learning_rate": 5.497259828013148e-07, "loss": 0.0147, "step": 11833 }, { "epoch": 2.6926052332195676, "grad_norm": 1.1932402600499594, "learning_rate": 5.49637282650204e-07, "loss": 0.029, "step": 11834 }, { "epoch": 2.6928327645051193, "grad_norm": 0.921518271939559, "learning_rate": 5.495485840393695e-07, "loss": 0.0165, "step": 11835 }, { "epoch": 2.693060295790671, "grad_norm": 0.7189773714251781, "learning_rate": 5.494598869706237e-07, "loss": 0.0057, "step": 11836 }, { "epoch": 2.693287827076223, "grad_norm": 1.6252023912298, "learning_rate": 5.493711914457791e-07, "loss": 0.0332, "step": 11837 }, { "epoch": 2.6935153583617746, "grad_norm": 0.4627435891941314, "learning_rate": 5.492824974666493e-07, "loss": 0.0052, "step": 11838 }, { "epoch": 2.6937428896473268, "grad_norm": 0.5559389697057808, "learning_rate": 5.491938050350462e-07, "loss": 0.0033, "step": 11839 }, { "epoch": 2.693970420932878, "grad_norm": 1.7062317395209106, "learning_rate": 5.491051141527831e-07, "loss": 0.0228, "step": 11840 }, { "epoch": 2.6941979522184303, "grad_norm": 0.5357521430748353, "learning_rate": 5.490164248216724e-07, "loss": 0.0055, "step": 11841 }, { "epoch": 2.6944254835039816, "grad_norm": 1.6309134436863066, "learning_rate": 5.48927737043527e-07, "loss": 0.0331, "step": 11842 }, { "epoch": 2.694653014789534, "grad_norm": 0.8246240406094866, "learning_rate": 5.488390508201592e-07, "loss": 0.0067, "step": 11843 }, { "epoch": 2.694880546075085, "grad_norm": 0.8357725728117544, "learning_rate": 5.487503661533816e-07, "loss": 0.0047, "step": 11844 }, { "epoch": 2.6951080773606373, "grad_norm": 0.254787983350425, "learning_rate": 5.486616830450072e-07, "loss": 0.0024, "step": 11845 }, { "epoch": 2.6953356086461886, "grad_norm": 0.6653398478596568, "learning_rate": 5.485730014968477e-07, "loss": 0.0067, "step": 11846 }, { "epoch": 2.695563139931741, "grad_norm": 0.9960752060898969, "learning_rate": 5.484843215107164e-07, "loss": 0.02, "step": 11847 }, { "epoch": 2.695790671217292, "grad_norm": 1.0902910611397578, "learning_rate": 5.483956430884256e-07, "loss": 0.0157, "step": 11848 }, { "epoch": 2.6960182025028443, "grad_norm": 0.9350906730455228, "learning_rate": 5.483069662317871e-07, "loss": 0.0108, "step": 11849 }, { "epoch": 2.696245733788396, "grad_norm": 0.8552236143539419, "learning_rate": 5.482182909426141e-07, "loss": 0.0095, "step": 11850 }, { "epoch": 2.696473265073948, "grad_norm": 0.722050015525497, "learning_rate": 5.481296172227184e-07, "loss": 0.0045, "step": 11851 }, { "epoch": 2.6967007963594996, "grad_norm": 0.8293267490030252, "learning_rate": 5.480409450739128e-07, "loss": 0.0131, "step": 11852 }, { "epoch": 2.6969283276450513, "grad_norm": 0.7974587954491494, "learning_rate": 5.479522744980091e-07, "loss": 0.0113, "step": 11853 }, { "epoch": 2.697155858930603, "grad_norm": 0.2218425498055874, "learning_rate": 5.4786360549682e-07, "loss": 0.0014, "step": 11854 }, { "epoch": 2.697383390216155, "grad_norm": 0.7808834075678057, "learning_rate": 5.477749380721576e-07, "loss": 0.0172, "step": 11855 }, { "epoch": 2.6976109215017066, "grad_norm": 0.5500504120780505, "learning_rate": 5.476862722258336e-07, "loss": 0.0048, "step": 11856 }, { "epoch": 2.6978384527872583, "grad_norm": 0.15814341747135363, "learning_rate": 5.47597607959661e-07, "loss": 0.0012, "step": 11857 }, { "epoch": 2.69806598407281, "grad_norm": 1.393535006732649, "learning_rate": 5.475089452754513e-07, "loss": 0.0297, "step": 11858 }, { "epoch": 2.698293515358362, "grad_norm": 1.2245379289726859, "learning_rate": 5.474202841750171e-07, "loss": 0.0127, "step": 11859 }, { "epoch": 2.6985210466439136, "grad_norm": 0.9612482941189999, "learning_rate": 5.473316246601698e-07, "loss": 0.0191, "step": 11860 }, { "epoch": 2.6987485779294653, "grad_norm": 1.236148989674566, "learning_rate": 5.472429667327222e-07, "loss": 0.0165, "step": 11861 }, { "epoch": 2.698976109215017, "grad_norm": 1.1194433409305584, "learning_rate": 5.47154310394486e-07, "loss": 0.015, "step": 11862 }, { "epoch": 2.699203640500569, "grad_norm": 1.1675065735331476, "learning_rate": 5.470656556472729e-07, "loss": 0.0183, "step": 11863 }, { "epoch": 2.6994311717861206, "grad_norm": 1.014448478438783, "learning_rate": 5.469770024928952e-07, "loss": 0.0146, "step": 11864 }, { "epoch": 2.6996587030716723, "grad_norm": 0.7279388665538972, "learning_rate": 5.468883509331644e-07, "loss": 0.0096, "step": 11865 }, { "epoch": 2.699886234357224, "grad_norm": 1.771415588997198, "learning_rate": 5.467997009698931e-07, "loss": 0.0145, "step": 11866 }, { "epoch": 2.700113765642776, "grad_norm": 0.7580168591987099, "learning_rate": 5.467110526048925e-07, "loss": 0.0069, "step": 11867 }, { "epoch": 2.7003412969283276, "grad_norm": 0.8861740740533481, "learning_rate": 5.466224058399746e-07, "loss": 0.0131, "step": 11868 }, { "epoch": 2.7005688282138793, "grad_norm": 0.2802311103200003, "learning_rate": 5.465337606769512e-07, "loss": 0.0017, "step": 11869 }, { "epoch": 2.700796359499431, "grad_norm": 0.7364908649452866, "learning_rate": 5.46445117117634e-07, "loss": 0.0152, "step": 11870 }, { "epoch": 2.701023890784983, "grad_norm": 1.140623745523031, "learning_rate": 5.463564751638348e-07, "loss": 0.0158, "step": 11871 }, { "epoch": 2.7012514220705346, "grad_norm": 0.7873923530527468, "learning_rate": 5.462678348173649e-07, "loss": 0.0182, "step": 11872 }, { "epoch": 2.7014789533560863, "grad_norm": 1.9351141608452775, "learning_rate": 5.461791960800367e-07, "loss": 0.0104, "step": 11873 }, { "epoch": 2.701706484641638, "grad_norm": 0.44898068311426687, "learning_rate": 5.460905589536613e-07, "loss": 0.0026, "step": 11874 }, { "epoch": 2.70193401592719, "grad_norm": 0.3231132109746276, "learning_rate": 5.460019234400501e-07, "loss": 0.0018, "step": 11875 }, { "epoch": 2.7021615472127416, "grad_norm": 1.1428101396604324, "learning_rate": 5.459132895410152e-07, "loss": 0.0072, "step": 11876 }, { "epoch": 2.7023890784982934, "grad_norm": 1.1883571694616584, "learning_rate": 5.458246572583674e-07, "loss": 0.0088, "step": 11877 }, { "epoch": 2.7026166097838455, "grad_norm": 0.7852898134190726, "learning_rate": 5.45736026593919e-07, "loss": 0.0095, "step": 11878 }, { "epoch": 2.702844141069397, "grad_norm": 0.6611196014160001, "learning_rate": 5.456473975494809e-07, "loss": 0.0086, "step": 11879 }, { "epoch": 2.703071672354949, "grad_norm": 0.8081360016619591, "learning_rate": 5.455587701268647e-07, "loss": 0.0063, "step": 11880 }, { "epoch": 2.7032992036405004, "grad_norm": 0.7975450896540592, "learning_rate": 5.454701443278819e-07, "loss": 0.0036, "step": 11881 }, { "epoch": 2.7035267349260526, "grad_norm": 0.53469728488137, "learning_rate": 5.453815201543435e-07, "loss": 0.0088, "step": 11882 }, { "epoch": 2.703754266211604, "grad_norm": 0.4024573937471117, "learning_rate": 5.452928976080611e-07, "loss": 0.0052, "step": 11883 }, { "epoch": 2.703981797497156, "grad_norm": 0.926629822523081, "learning_rate": 5.452042766908457e-07, "loss": 0.0082, "step": 11884 }, { "epoch": 2.7042093287827074, "grad_norm": 0.4115220077776518, "learning_rate": 5.451156574045091e-07, "loss": 0.0056, "step": 11885 }, { "epoch": 2.7044368600682596, "grad_norm": 0.8792173545064345, "learning_rate": 5.45027039750862e-07, "loss": 0.0122, "step": 11886 }, { "epoch": 2.7046643913538113, "grad_norm": 0.7843126767670746, "learning_rate": 5.449384237317156e-07, "loss": 0.0131, "step": 11887 }, { "epoch": 2.704891922639363, "grad_norm": 0.16094072293916012, "learning_rate": 5.448498093488814e-07, "loss": 0.001, "step": 11888 }, { "epoch": 2.705119453924915, "grad_norm": 0.8140949128151521, "learning_rate": 5.447611966041701e-07, "loss": 0.0092, "step": 11889 }, { "epoch": 2.7053469852104666, "grad_norm": 0.7763149168454129, "learning_rate": 5.446725854993932e-07, "loss": 0.0124, "step": 11890 }, { "epoch": 2.7055745164960183, "grad_norm": 0.5351884422449552, "learning_rate": 5.445839760363613e-07, "loss": 0.0023, "step": 11891 }, { "epoch": 2.70580204778157, "grad_norm": 0.7530487784919839, "learning_rate": 5.444953682168859e-07, "loss": 0.0095, "step": 11892 }, { "epoch": 2.706029579067122, "grad_norm": 2.061974311298186, "learning_rate": 5.444067620427777e-07, "loss": 0.0208, "step": 11893 }, { "epoch": 2.7062571103526736, "grad_norm": 1.0220912383164273, "learning_rate": 5.443181575158475e-07, "loss": 0.0157, "step": 11894 }, { "epoch": 2.7064846416382253, "grad_norm": 1.0031258865976527, "learning_rate": 5.442295546379067e-07, "loss": 0.0161, "step": 11895 }, { "epoch": 2.706712172923777, "grad_norm": 0.7941848919902509, "learning_rate": 5.441409534107657e-07, "loss": 0.0099, "step": 11896 }, { "epoch": 2.706939704209329, "grad_norm": 0.3677414733164823, "learning_rate": 5.440523538362355e-07, "loss": 0.0038, "step": 11897 }, { "epoch": 2.7071672354948806, "grad_norm": 2.1387117495943255, "learning_rate": 5.439637559161267e-07, "loss": 0.0469, "step": 11898 }, { "epoch": 2.7073947667804323, "grad_norm": 1.0597752311718385, "learning_rate": 5.438751596522508e-07, "loss": 0.0077, "step": 11899 }, { "epoch": 2.707622298065984, "grad_norm": 0.9488826593693157, "learning_rate": 5.437865650464179e-07, "loss": 0.005, "step": 11900 }, { "epoch": 2.707849829351536, "grad_norm": 0.7762895415544564, "learning_rate": 5.436979721004388e-07, "loss": 0.0092, "step": 11901 }, { "epoch": 2.7080773606370876, "grad_norm": 1.054662232878711, "learning_rate": 5.436093808161243e-07, "loss": 0.0157, "step": 11902 }, { "epoch": 2.7083048919226393, "grad_norm": 0.8144261967053653, "learning_rate": 5.435207911952849e-07, "loss": 0.0076, "step": 11903 }, { "epoch": 2.708532423208191, "grad_norm": 0.5341452909941972, "learning_rate": 5.434322032397314e-07, "loss": 0.0042, "step": 11904 }, { "epoch": 2.708759954493743, "grad_norm": 0.5885434198423115, "learning_rate": 5.433436169512744e-07, "loss": 0.0049, "step": 11905 }, { "epoch": 2.7089874857792946, "grad_norm": 0.4161817001295814, "learning_rate": 5.432550323317241e-07, "loss": 0.0031, "step": 11906 }, { "epoch": 2.7092150170648464, "grad_norm": 0.4781607275540322, "learning_rate": 5.431664493828914e-07, "loss": 0.0054, "step": 11907 }, { "epoch": 2.709442548350398, "grad_norm": 1.2147945166064893, "learning_rate": 5.430778681065863e-07, "loss": 0.0146, "step": 11908 }, { "epoch": 2.70967007963595, "grad_norm": 1.4444486576004318, "learning_rate": 5.429892885046199e-07, "loss": 0.0186, "step": 11909 }, { "epoch": 2.7098976109215016, "grad_norm": 0.7217840703968338, "learning_rate": 5.42900710578802e-07, "loss": 0.0194, "step": 11910 }, { "epoch": 2.7101251422070534, "grad_norm": 0.7033479230547811, "learning_rate": 5.428121343309434e-07, "loss": 0.0042, "step": 11911 }, { "epoch": 2.710352673492605, "grad_norm": 1.067960897256648, "learning_rate": 5.427235597628543e-07, "loss": 0.0313, "step": 11912 }, { "epoch": 2.710580204778157, "grad_norm": 0.636736446222804, "learning_rate": 5.426349868763447e-07, "loss": 0.0093, "step": 11913 }, { "epoch": 2.7108077360637086, "grad_norm": 2.3743507757307976, "learning_rate": 5.425464156732253e-07, "loss": 0.0198, "step": 11914 }, { "epoch": 2.7110352673492604, "grad_norm": 1.1723659983957235, "learning_rate": 5.42457846155306e-07, "loss": 0.0177, "step": 11915 }, { "epoch": 2.711262798634812, "grad_norm": 0.6136596470155284, "learning_rate": 5.423692783243975e-07, "loss": 0.0087, "step": 11916 }, { "epoch": 2.7114903299203643, "grad_norm": 0.4415015746325517, "learning_rate": 5.422807121823093e-07, "loss": 0.0062, "step": 11917 }, { "epoch": 2.7117178612059156, "grad_norm": 0.3547223924454857, "learning_rate": 5.421921477308519e-07, "loss": 0.0021, "step": 11918 }, { "epoch": 2.711945392491468, "grad_norm": 0.9189470995334015, "learning_rate": 5.421035849718355e-07, "loss": 0.0114, "step": 11919 }, { "epoch": 2.712172923777019, "grad_norm": 1.626254770775174, "learning_rate": 5.420150239070698e-07, "loss": 0.0199, "step": 11920 }, { "epoch": 2.7124004550625713, "grad_norm": 1.341938036674727, "learning_rate": 5.419264645383652e-07, "loss": 0.0237, "step": 11921 }, { "epoch": 2.7126279863481226, "grad_norm": 1.24013983474554, "learning_rate": 5.418379068675313e-07, "loss": 0.0223, "step": 11922 }, { "epoch": 2.712855517633675, "grad_norm": 0.9282545879959919, "learning_rate": 5.417493508963786e-07, "loss": 0.0101, "step": 11923 }, { "epoch": 2.713083048919226, "grad_norm": 1.1227358674443657, "learning_rate": 5.416607966267165e-07, "loss": 0.0068, "step": 11924 }, { "epoch": 2.7133105802047783, "grad_norm": 0.8878153850304248, "learning_rate": 5.415722440603551e-07, "loss": 0.0088, "step": 11925 }, { "epoch": 2.71353811149033, "grad_norm": 0.7116490864109268, "learning_rate": 5.414836931991043e-07, "loss": 0.0223, "step": 11926 }, { "epoch": 2.713765642775882, "grad_norm": 1.2907955584084454, "learning_rate": 5.413951440447737e-07, "loss": 0.0182, "step": 11927 }, { "epoch": 2.7139931740614336, "grad_norm": 0.6246070810468543, "learning_rate": 5.413065965991734e-07, "loss": 0.0129, "step": 11928 }, { "epoch": 2.7142207053469853, "grad_norm": 0.9110201540092246, "learning_rate": 5.412180508641128e-07, "loss": 0.0161, "step": 11929 }, { "epoch": 2.714448236632537, "grad_norm": 1.7832473404616662, "learning_rate": 5.411295068414022e-07, "loss": 0.0197, "step": 11930 }, { "epoch": 2.714675767918089, "grad_norm": 0.6909352580099262, "learning_rate": 5.410409645328506e-07, "loss": 0.0088, "step": 11931 }, { "epoch": 2.7149032992036406, "grad_norm": 1.440587116039775, "learning_rate": 5.409524239402678e-07, "loss": 0.0208, "step": 11932 }, { "epoch": 2.7151308304891923, "grad_norm": 0.7802716685485855, "learning_rate": 5.408638850654638e-07, "loss": 0.0056, "step": 11933 }, { "epoch": 2.715358361774744, "grad_norm": 0.330416197701637, "learning_rate": 5.407753479102477e-07, "loss": 0.0022, "step": 11934 }, { "epoch": 2.715585893060296, "grad_norm": 0.8324755172903011, "learning_rate": 5.406868124764293e-07, "loss": 0.0146, "step": 11935 }, { "epoch": 2.7158134243458476, "grad_norm": 0.7058170709507305, "learning_rate": 5.405982787658182e-07, "loss": 0.0085, "step": 11936 }, { "epoch": 2.7160409556313994, "grad_norm": 0.5206932780106741, "learning_rate": 5.405097467802233e-07, "loss": 0.0033, "step": 11937 }, { "epoch": 2.716268486916951, "grad_norm": 1.7950044696633456, "learning_rate": 5.404212165214549e-07, "loss": 0.0118, "step": 11938 }, { "epoch": 2.716496018202503, "grad_norm": 1.1150726655424734, "learning_rate": 5.403326879913216e-07, "loss": 0.0213, "step": 11939 }, { "epoch": 2.7167235494880546, "grad_norm": 1.0024400667471087, "learning_rate": 5.402441611916333e-07, "loss": 0.018, "step": 11940 }, { "epoch": 2.7169510807736064, "grad_norm": 0.7535266003638291, "learning_rate": 5.401556361241989e-07, "loss": 0.0134, "step": 11941 }, { "epoch": 2.717178612059158, "grad_norm": 0.9477479188865915, "learning_rate": 5.400671127908282e-07, "loss": 0.0083, "step": 11942 }, { "epoch": 2.71740614334471, "grad_norm": 1.0782534299894933, "learning_rate": 5.3997859119333e-07, "loss": 0.0129, "step": 11943 }, { "epoch": 2.7176336746302616, "grad_norm": 2.1060101514258833, "learning_rate": 5.398900713335137e-07, "loss": 0.0219, "step": 11944 }, { "epoch": 2.7178612059158134, "grad_norm": 0.8170018717065882, "learning_rate": 5.398015532131887e-07, "loss": 0.0121, "step": 11945 }, { "epoch": 2.718088737201365, "grad_norm": 0.7660633770723897, "learning_rate": 5.397130368341635e-07, "loss": 0.0035, "step": 11946 }, { "epoch": 2.718316268486917, "grad_norm": 0.6109707228867461, "learning_rate": 5.396245221982479e-07, "loss": 0.0041, "step": 11947 }, { "epoch": 2.7185437997724686, "grad_norm": 0.9540000599038602, "learning_rate": 5.395360093072506e-07, "loss": 0.0146, "step": 11948 }, { "epoch": 2.7187713310580204, "grad_norm": 0.847534915144919, "learning_rate": 5.394474981629809e-07, "loss": 0.0111, "step": 11949 }, { "epoch": 2.718998862343572, "grad_norm": 0.751967031024854, "learning_rate": 5.393589887672476e-07, "loss": 0.0122, "step": 11950 }, { "epoch": 2.719226393629124, "grad_norm": 1.0645240736912234, "learning_rate": 5.392704811218595e-07, "loss": 0.0184, "step": 11951 }, { "epoch": 2.7194539249146756, "grad_norm": 3.2294854418888237, "learning_rate": 5.391819752286262e-07, "loss": 0.116, "step": 11952 }, { "epoch": 2.7196814562002274, "grad_norm": 1.0115182981994242, "learning_rate": 5.390934710893557e-07, "loss": 0.0077, "step": 11953 }, { "epoch": 2.719908987485779, "grad_norm": 0.9876313923507377, "learning_rate": 5.390049687058575e-07, "loss": 0.0105, "step": 11954 }, { "epoch": 2.720136518771331, "grad_norm": 0.5174473165834417, "learning_rate": 5.389164680799405e-07, "loss": 0.0047, "step": 11955 }, { "epoch": 2.720364050056883, "grad_norm": 0.3107391335071714, "learning_rate": 5.388279692134129e-07, "loss": 0.0028, "step": 11956 }, { "epoch": 2.7205915813424344, "grad_norm": 1.2213620301710744, "learning_rate": 5.387394721080839e-07, "loss": 0.0277, "step": 11957 }, { "epoch": 2.7208191126279866, "grad_norm": 0.5887134466166104, "learning_rate": 5.38650976765762e-07, "loss": 0.0041, "step": 11958 }, { "epoch": 2.721046643913538, "grad_norm": 0.6064256608306823, "learning_rate": 5.385624831882562e-07, "loss": 0.0071, "step": 11959 }, { "epoch": 2.72127417519909, "grad_norm": 1.487963535822759, "learning_rate": 5.384739913773748e-07, "loss": 0.0254, "step": 11960 }, { "epoch": 2.7215017064846414, "grad_norm": 1.088958609797225, "learning_rate": 5.383855013349266e-07, "loss": 0.0074, "step": 11961 }, { "epoch": 2.7217292377701936, "grad_norm": 1.520784100909351, "learning_rate": 5.382970130627203e-07, "loss": 0.0221, "step": 11962 }, { "epoch": 2.721956769055745, "grad_norm": 0.4641409148544967, "learning_rate": 5.382085265625639e-07, "loss": 0.0051, "step": 11963 }, { "epoch": 2.722184300341297, "grad_norm": 1.177212218741871, "learning_rate": 5.381200418362665e-07, "loss": 0.0219, "step": 11964 }, { "epoch": 2.722411831626849, "grad_norm": 1.0902745185805536, "learning_rate": 5.380315588856362e-07, "loss": 0.0081, "step": 11965 }, { "epoch": 2.7226393629124006, "grad_norm": 0.6785374321508625, "learning_rate": 5.379430777124817e-07, "loss": 0.0064, "step": 11966 }, { "epoch": 2.7228668941979524, "grad_norm": 1.0406321694376237, "learning_rate": 5.378545983186111e-07, "loss": 0.0176, "step": 11967 }, { "epoch": 2.723094425483504, "grad_norm": 0.5122953061617832, "learning_rate": 5.377661207058331e-07, "loss": 0.0049, "step": 11968 }, { "epoch": 2.723321956769056, "grad_norm": 0.5107741839047342, "learning_rate": 5.376776448759559e-07, "loss": 0.0057, "step": 11969 }, { "epoch": 2.7235494880546076, "grad_norm": 0.3558682284021996, "learning_rate": 5.375891708307874e-07, "loss": 0.0042, "step": 11970 }, { "epoch": 2.7237770193401594, "grad_norm": 0.8523800473000891, "learning_rate": 5.375006985721364e-07, "loss": 0.0127, "step": 11971 }, { "epoch": 2.724004550625711, "grad_norm": 1.0102140986242438, "learning_rate": 5.374122281018106e-07, "loss": 0.0132, "step": 11972 }, { "epoch": 2.724232081911263, "grad_norm": 0.6719775231926927, "learning_rate": 5.373237594216188e-07, "loss": 0.0072, "step": 11973 }, { "epoch": 2.7244596131968146, "grad_norm": 0.653904524448434, "learning_rate": 5.372352925333687e-07, "loss": 0.0133, "step": 11974 }, { "epoch": 2.7246871444823664, "grad_norm": 0.47918624072064975, "learning_rate": 5.371468274388683e-07, "loss": 0.0053, "step": 11975 }, { "epoch": 2.724914675767918, "grad_norm": 0.8602834674549673, "learning_rate": 5.370583641399261e-07, "loss": 0.0117, "step": 11976 }, { "epoch": 2.72514220705347, "grad_norm": 0.5212450556491235, "learning_rate": 5.369699026383495e-07, "loss": 0.0064, "step": 11977 }, { "epoch": 2.7253697383390216, "grad_norm": 0.9577509197303766, "learning_rate": 5.368814429359472e-07, "loss": 0.0162, "step": 11978 }, { "epoch": 2.7255972696245734, "grad_norm": 0.9040541786834738, "learning_rate": 5.367929850345267e-07, "loss": 0.0137, "step": 11979 }, { "epoch": 2.725824800910125, "grad_norm": 1.2720435361931715, "learning_rate": 5.367045289358962e-07, "loss": 0.0199, "step": 11980 }, { "epoch": 2.726052332195677, "grad_norm": 0.5128127262700752, "learning_rate": 5.366160746418633e-07, "loss": 0.0037, "step": 11981 }, { "epoch": 2.7262798634812286, "grad_norm": 1.503409063711201, "learning_rate": 5.365276221542359e-07, "loss": 0.0105, "step": 11982 }, { "epoch": 2.7265073947667804, "grad_norm": 0.6261098047626347, "learning_rate": 5.364391714748221e-07, "loss": 0.0087, "step": 11983 }, { "epoch": 2.726734926052332, "grad_norm": 0.4926247171371658, "learning_rate": 5.363507226054292e-07, "loss": 0.0039, "step": 11984 }, { "epoch": 2.726962457337884, "grad_norm": 1.2705091654833147, "learning_rate": 5.362622755478653e-07, "loss": 0.0296, "step": 11985 }, { "epoch": 2.7271899886234356, "grad_norm": 1.7953331834525466, "learning_rate": 5.361738303039377e-07, "loss": 0.0263, "step": 11986 }, { "epoch": 2.7274175199089874, "grad_norm": 1.8777130603736873, "learning_rate": 5.360853868754548e-07, "loss": 0.0169, "step": 11987 }, { "epoch": 2.727645051194539, "grad_norm": 1.042740059307303, "learning_rate": 5.359969452642235e-07, "loss": 0.0184, "step": 11988 }, { "epoch": 2.727872582480091, "grad_norm": 1.0207714108865253, "learning_rate": 5.359085054720515e-07, "loss": 0.0173, "step": 11989 }, { "epoch": 2.7281001137656427, "grad_norm": 0.976689281713901, "learning_rate": 5.358200675007469e-07, "loss": 0.0227, "step": 11990 }, { "epoch": 2.7283276450511944, "grad_norm": 0.9155092000939417, "learning_rate": 5.357316313521164e-07, "loss": 0.0167, "step": 11991 }, { "epoch": 2.728555176336746, "grad_norm": 0.8044122847595246, "learning_rate": 5.356431970279681e-07, "loss": 0.0082, "step": 11992 }, { "epoch": 2.728782707622298, "grad_norm": 0.5854549165650298, "learning_rate": 5.355547645301092e-07, "loss": 0.0061, "step": 11993 }, { "epoch": 2.7290102389078497, "grad_norm": 0.9042819381752832, "learning_rate": 5.354663338603469e-07, "loss": 0.0076, "step": 11994 }, { "epoch": 2.729237770193402, "grad_norm": 1.231687526036608, "learning_rate": 5.35377905020489e-07, "loss": 0.0084, "step": 11995 }, { "epoch": 2.729465301478953, "grad_norm": 0.7794605857614766, "learning_rate": 5.352894780123423e-07, "loss": 0.0048, "step": 11996 }, { "epoch": 2.7296928327645054, "grad_norm": 0.9252973166940076, "learning_rate": 5.352010528377147e-07, "loss": 0.014, "step": 11997 }, { "epoch": 2.7299203640500567, "grad_norm": 1.3423458619743, "learning_rate": 5.351126294984126e-07, "loss": 0.0053, "step": 11998 }, { "epoch": 2.730147895335609, "grad_norm": 1.1438833036386062, "learning_rate": 5.350242079962443e-07, "loss": 0.0155, "step": 11999 }, { "epoch": 2.73037542662116, "grad_norm": 0.9765013779122593, "learning_rate": 5.349357883330164e-07, "loss": 0.0072, "step": 12000 }, { "epoch": 2.7306029579067124, "grad_norm": 1.6737252391036452, "learning_rate": 5.348473705105355e-07, "loss": 0.0254, "step": 12001 }, { "epoch": 2.7308304891922637, "grad_norm": 0.6255614153911672, "learning_rate": 5.347589545306097e-07, "loss": 0.0032, "step": 12002 }, { "epoch": 2.731058020477816, "grad_norm": 0.38385973147368574, "learning_rate": 5.346705403950454e-07, "loss": 0.0038, "step": 12003 }, { "epoch": 2.7312855517633676, "grad_norm": 0.5612790113856384, "learning_rate": 5.345821281056499e-07, "loss": 0.0048, "step": 12004 }, { "epoch": 2.7315130830489194, "grad_norm": 0.6413057028345108, "learning_rate": 5.3449371766423e-07, "loss": 0.0055, "step": 12005 }, { "epoch": 2.731740614334471, "grad_norm": 1.0238055638845198, "learning_rate": 5.344053090725931e-07, "loss": 0.0198, "step": 12006 }, { "epoch": 2.731968145620023, "grad_norm": 0.4350112440579598, "learning_rate": 5.343169023325455e-07, "loss": 0.0056, "step": 12007 }, { "epoch": 2.7321956769055746, "grad_norm": 1.0166165489212549, "learning_rate": 5.342284974458943e-07, "loss": 0.0165, "step": 12008 }, { "epoch": 2.7324232081911264, "grad_norm": 0.6692151587033197, "learning_rate": 5.341400944144465e-07, "loss": 0.0107, "step": 12009 }, { "epoch": 2.732650739476678, "grad_norm": 1.1279836251527762, "learning_rate": 5.340516932400086e-07, "loss": 0.0137, "step": 12010 }, { "epoch": 2.73287827076223, "grad_norm": 0.8099858960519103, "learning_rate": 5.339632939243877e-07, "loss": 0.0047, "step": 12011 }, { "epoch": 2.7331058020477816, "grad_norm": 0.6679168714872123, "learning_rate": 5.338748964693905e-07, "loss": 0.004, "step": 12012 }, { "epoch": 2.7333333333333334, "grad_norm": 0.9116224148978845, "learning_rate": 5.337865008768231e-07, "loss": 0.011, "step": 12013 }, { "epoch": 2.733560864618885, "grad_norm": 0.5606894170182276, "learning_rate": 5.336981071484928e-07, "loss": 0.0041, "step": 12014 }, { "epoch": 2.733788395904437, "grad_norm": 1.0316483485738166, "learning_rate": 5.336097152862059e-07, "loss": 0.015, "step": 12015 }, { "epoch": 2.7340159271899886, "grad_norm": 0.6822142726149316, "learning_rate": 5.335213252917693e-07, "loss": 0.0049, "step": 12016 }, { "epoch": 2.7342434584755404, "grad_norm": 1.72291297055628, "learning_rate": 5.334329371669889e-07, "loss": 0.0228, "step": 12017 }, { "epoch": 2.734470989761092, "grad_norm": 0.6854219219160173, "learning_rate": 5.333445509136718e-07, "loss": 0.0102, "step": 12018 }, { "epoch": 2.734698521046644, "grad_norm": 0.5602498549063797, "learning_rate": 5.332561665336243e-07, "loss": 0.0075, "step": 12019 }, { "epoch": 2.7349260523321957, "grad_norm": 1.1532339902635893, "learning_rate": 5.331677840286524e-07, "loss": 0.0143, "step": 12020 }, { "epoch": 2.7351535836177474, "grad_norm": 0.2386756348964981, "learning_rate": 5.330794034005631e-07, "loss": 0.0024, "step": 12021 }, { "epoch": 2.735381114903299, "grad_norm": 1.667697134670315, "learning_rate": 5.329910246511623e-07, "loss": 0.0203, "step": 12022 }, { "epoch": 2.735608646188851, "grad_norm": 1.4702106900136604, "learning_rate": 5.329026477822566e-07, "loss": 0.0174, "step": 12023 }, { "epoch": 2.7358361774744027, "grad_norm": 0.6340555157911011, "learning_rate": 5.328142727956521e-07, "loss": 0.0085, "step": 12024 }, { "epoch": 2.7360637087599544, "grad_norm": 0.17485286197992916, "learning_rate": 5.327258996931548e-07, "loss": 0.0012, "step": 12025 }, { "epoch": 2.736291240045506, "grad_norm": 0.691147841673016, "learning_rate": 5.326375284765715e-07, "loss": 0.0103, "step": 12026 }, { "epoch": 2.736518771331058, "grad_norm": 1.3382470760347913, "learning_rate": 5.325491591477076e-07, "loss": 0.0079, "step": 12027 }, { "epoch": 2.7367463026166097, "grad_norm": 0.8071169901394345, "learning_rate": 5.324607917083698e-07, "loss": 0.0125, "step": 12028 }, { "epoch": 2.7369738339021614, "grad_norm": 1.0406890216759455, "learning_rate": 5.323724261603637e-07, "loss": 0.0148, "step": 12029 }, { "epoch": 2.737201365187713, "grad_norm": 0.32369974008457003, "learning_rate": 5.322840625054959e-07, "loss": 0.0034, "step": 12030 }, { "epoch": 2.737428896473265, "grad_norm": 0.6590565717726029, "learning_rate": 5.321957007455719e-07, "loss": 0.0056, "step": 12031 }, { "epoch": 2.7376564277588167, "grad_norm": 1.2624883599487968, "learning_rate": 5.321073408823976e-07, "loss": 0.0155, "step": 12032 }, { "epoch": 2.7378839590443684, "grad_norm": 0.8657709275145515, "learning_rate": 5.320189829177796e-07, "loss": 0.0089, "step": 12033 }, { "epoch": 2.7381114903299206, "grad_norm": 0.7580080941272516, "learning_rate": 5.319306268535229e-07, "loss": 0.0059, "step": 12034 }, { "epoch": 2.738339021615472, "grad_norm": 1.7512406452990632, "learning_rate": 5.318422726914339e-07, "loss": 0.0202, "step": 12035 }, { "epoch": 2.738566552901024, "grad_norm": 1.4701398336484697, "learning_rate": 5.31753920433318e-07, "loss": 0.0135, "step": 12036 }, { "epoch": 2.7387940841865754, "grad_norm": 1.0421058418404208, "learning_rate": 5.316655700809816e-07, "loss": 0.0097, "step": 12037 }, { "epoch": 2.7390216154721276, "grad_norm": 0.7933204170043289, "learning_rate": 5.315772216362298e-07, "loss": 0.0121, "step": 12038 }, { "epoch": 2.739249146757679, "grad_norm": 0.6709288947034324, "learning_rate": 5.314888751008684e-07, "loss": 0.0098, "step": 12039 }, { "epoch": 2.739476678043231, "grad_norm": 1.0051633604994337, "learning_rate": 5.314005304767034e-07, "loss": 0.0168, "step": 12040 }, { "epoch": 2.7397042093287824, "grad_norm": 1.163463936162744, "learning_rate": 5.313121877655399e-07, "loss": 0.0149, "step": 12041 }, { "epoch": 2.7399317406143346, "grad_norm": 1.32062924229439, "learning_rate": 5.312238469691838e-07, "loss": 0.0101, "step": 12042 }, { "epoch": 2.7401592718998864, "grad_norm": 0.9516761947191837, "learning_rate": 5.311355080894407e-07, "loss": 0.0084, "step": 12043 }, { "epoch": 2.740386803185438, "grad_norm": 0.4609844064813129, "learning_rate": 5.310471711281155e-07, "loss": 0.0047, "step": 12044 }, { "epoch": 2.74061433447099, "grad_norm": 1.7557637273658377, "learning_rate": 5.309588360870144e-07, "loss": 0.032, "step": 12045 }, { "epoch": 2.7408418657565417, "grad_norm": 1.3326381497688198, "learning_rate": 5.308705029679421e-07, "loss": 0.0195, "step": 12046 }, { "epoch": 2.7410693970420934, "grad_norm": 0.5131171861763627, "learning_rate": 5.307821717727047e-07, "loss": 0.005, "step": 12047 }, { "epoch": 2.741296928327645, "grad_norm": 0.7901322096637989, "learning_rate": 5.306938425031069e-07, "loss": 0.0095, "step": 12048 }, { "epoch": 2.741524459613197, "grad_norm": 3.1785087704336585, "learning_rate": 5.306055151609544e-07, "loss": 0.0817, "step": 12049 }, { "epoch": 2.7417519908987487, "grad_norm": 0.7032471494031763, "learning_rate": 5.305171897480524e-07, "loss": 0.0073, "step": 12050 }, { "epoch": 2.7419795221843004, "grad_norm": 1.6932256790207334, "learning_rate": 5.304288662662059e-07, "loss": 0.0238, "step": 12051 }, { "epoch": 2.742207053469852, "grad_norm": 0.5983499662039524, "learning_rate": 5.303405447172201e-07, "loss": 0.0046, "step": 12052 }, { "epoch": 2.742434584755404, "grad_norm": 1.1560594016699173, "learning_rate": 5.302522251029002e-07, "loss": 0.0118, "step": 12053 }, { "epoch": 2.7426621160409557, "grad_norm": 0.806369536231005, "learning_rate": 5.301639074250514e-07, "loss": 0.0095, "step": 12054 }, { "epoch": 2.7428896473265074, "grad_norm": 0.7496697655777281, "learning_rate": 5.300755916854784e-07, "loss": 0.0084, "step": 12055 }, { "epoch": 2.743117178612059, "grad_norm": 0.9464673539506436, "learning_rate": 5.299872778859867e-07, "loss": 0.0074, "step": 12056 }, { "epoch": 2.743344709897611, "grad_norm": 2.1130545341626132, "learning_rate": 5.298989660283812e-07, "loss": 0.0162, "step": 12057 }, { "epoch": 2.7435722411831627, "grad_norm": 0.4889876535838294, "learning_rate": 5.298106561144662e-07, "loss": 0.0036, "step": 12058 }, { "epoch": 2.7437997724687144, "grad_norm": 1.0733863700721429, "learning_rate": 5.297223481460474e-07, "loss": 0.0088, "step": 12059 }, { "epoch": 2.744027303754266, "grad_norm": 0.7182745513154595, "learning_rate": 5.296340421249291e-07, "loss": 0.0053, "step": 12060 }, { "epoch": 2.744254835039818, "grad_norm": 1.2873604812356538, "learning_rate": 5.295457380529164e-07, "loss": 0.022, "step": 12061 }, { "epoch": 2.7444823663253697, "grad_norm": 0.6256351231723917, "learning_rate": 5.294574359318143e-07, "loss": 0.004, "step": 12062 }, { "epoch": 2.7447098976109214, "grad_norm": 2.0494102908507625, "learning_rate": 5.293691357634269e-07, "loss": 0.041, "step": 12063 }, { "epoch": 2.744937428896473, "grad_norm": 0.9905787469088841, "learning_rate": 5.292808375495593e-07, "loss": 0.013, "step": 12064 }, { "epoch": 2.745164960182025, "grad_norm": 0.8860942583851056, "learning_rate": 5.291925412920159e-07, "loss": 0.0111, "step": 12065 }, { "epoch": 2.7453924914675767, "grad_norm": 0.40860943841096964, "learning_rate": 5.291042469926019e-07, "loss": 0.0034, "step": 12066 }, { "epoch": 2.7456200227531284, "grad_norm": 0.3429730560954107, "learning_rate": 5.290159546531211e-07, "loss": 0.0033, "step": 12067 }, { "epoch": 2.74584755403868, "grad_norm": 0.4496528014106175, "learning_rate": 5.289276642753785e-07, "loss": 0.0052, "step": 12068 }, { "epoch": 2.746075085324232, "grad_norm": 1.0160245049877488, "learning_rate": 5.288393758611787e-07, "loss": 0.0181, "step": 12069 }, { "epoch": 2.7463026166097837, "grad_norm": 1.2739282961886154, "learning_rate": 5.287510894123256e-07, "loss": 0.0225, "step": 12070 }, { "epoch": 2.7465301478953354, "grad_norm": 2.5929467164078535, "learning_rate": 5.286628049306243e-07, "loss": 0.0324, "step": 12071 }, { "epoch": 2.746757679180887, "grad_norm": 0.6053855532286034, "learning_rate": 5.285745224178785e-07, "loss": 0.0085, "step": 12072 }, { "epoch": 2.7469852104664394, "grad_norm": 1.4477681905870832, "learning_rate": 5.284862418758932e-07, "loss": 0.0117, "step": 12073 }, { "epoch": 2.7472127417519907, "grad_norm": 0.4820568001432872, "learning_rate": 5.28397963306472e-07, "loss": 0.0033, "step": 12074 }, { "epoch": 2.747440273037543, "grad_norm": 1.3042781458978863, "learning_rate": 5.283096867114198e-07, "loss": 0.0346, "step": 12075 }, { "epoch": 2.747667804323094, "grad_norm": 1.234566151730282, "learning_rate": 5.282214120925406e-07, "loss": 0.0235, "step": 12076 }, { "epoch": 2.7478953356086464, "grad_norm": 0.6473669142722525, "learning_rate": 5.281331394516382e-07, "loss": 0.0085, "step": 12077 }, { "epoch": 2.7481228668941977, "grad_norm": 1.2839994294301948, "learning_rate": 5.280448687905172e-07, "loss": 0.0209, "step": 12078 }, { "epoch": 2.74835039817975, "grad_norm": 0.859833006335185, "learning_rate": 5.279566001109813e-07, "loss": 0.0113, "step": 12079 }, { "epoch": 2.748577929465301, "grad_norm": 1.1263142381030564, "learning_rate": 5.278683334148351e-07, "loss": 0.0181, "step": 12080 }, { "epoch": 2.7488054607508534, "grad_norm": 0.5027799291686573, "learning_rate": 5.277800687038821e-07, "loss": 0.0054, "step": 12081 }, { "epoch": 2.749032992036405, "grad_norm": 0.4477592823107829, "learning_rate": 5.276918059799263e-07, "loss": 0.0071, "step": 12082 }, { "epoch": 2.749260523321957, "grad_norm": 0.9116203509050415, "learning_rate": 5.276035452447722e-07, "loss": 0.0116, "step": 12083 }, { "epoch": 2.7494880546075087, "grad_norm": 0.5451232039272934, "learning_rate": 5.275152865002228e-07, "loss": 0.0069, "step": 12084 }, { "epoch": 2.7497155858930604, "grad_norm": 0.7297184168440937, "learning_rate": 5.274270297480827e-07, "loss": 0.0118, "step": 12085 }, { "epoch": 2.749943117178612, "grad_norm": 0.9144257959309362, "learning_rate": 5.273387749901552e-07, "loss": 0.0183, "step": 12086 }, { "epoch": 2.750170648464164, "grad_norm": 0.9386679994187789, "learning_rate": 5.272505222282446e-07, "loss": 0.0077, "step": 12087 }, { "epoch": 2.7503981797497157, "grad_norm": 1.3259364696347, "learning_rate": 5.271622714641541e-07, "loss": 0.0137, "step": 12088 }, { "epoch": 2.7506257110352674, "grad_norm": 0.6284768340958554, "learning_rate": 5.270740226996874e-07, "loss": 0.0079, "step": 12089 }, { "epoch": 2.750853242320819, "grad_norm": 1.7146862904045752, "learning_rate": 5.269857759366488e-07, "loss": 0.0138, "step": 12090 }, { "epoch": 2.751080773606371, "grad_norm": 0.5766569957270437, "learning_rate": 5.26897531176841e-07, "loss": 0.0042, "step": 12091 }, { "epoch": 2.7513083048919227, "grad_norm": 1.1567068153447684, "learning_rate": 5.268092884220682e-07, "loss": 0.017, "step": 12092 }, { "epoch": 2.7515358361774744, "grad_norm": 0.8510754074922492, "learning_rate": 5.267210476741336e-07, "loss": 0.0076, "step": 12093 }, { "epoch": 2.751763367463026, "grad_norm": 0.8063924263357316, "learning_rate": 5.26632808934841e-07, "loss": 0.0117, "step": 12094 }, { "epoch": 2.751990898748578, "grad_norm": 1.0750777363577295, "learning_rate": 5.265445722059935e-07, "loss": 0.011, "step": 12095 }, { "epoch": 2.7522184300341297, "grad_norm": 1.090573624703611, "learning_rate": 5.264563374893945e-07, "loss": 0.0105, "step": 12096 }, { "epoch": 2.7524459613196814, "grad_norm": 0.9483808384728863, "learning_rate": 5.263681047868478e-07, "loss": 0.0223, "step": 12097 }, { "epoch": 2.752673492605233, "grad_norm": 0.1972971147490277, "learning_rate": 5.262798741001561e-07, "loss": 0.0008, "step": 12098 }, { "epoch": 2.752901023890785, "grad_norm": 1.0449941385711827, "learning_rate": 5.261916454311232e-07, "loss": 0.0108, "step": 12099 }, { "epoch": 2.7531285551763367, "grad_norm": 1.7538574041767978, "learning_rate": 5.261034187815522e-07, "loss": 0.022, "step": 12100 }, { "epoch": 2.7533560864618885, "grad_norm": 2.3785127534593005, "learning_rate": 5.260151941532458e-07, "loss": 0.0139, "step": 12101 }, { "epoch": 2.75358361774744, "grad_norm": 0.44032656039977824, "learning_rate": 5.259269715480078e-07, "loss": 0.0027, "step": 12102 }, { "epoch": 2.753811149032992, "grad_norm": 0.5833351244741664, "learning_rate": 5.25838750967641e-07, "loss": 0.0072, "step": 12103 }, { "epoch": 2.7540386803185437, "grad_norm": 1.5561006074201673, "learning_rate": 5.257505324139486e-07, "loss": 0.0142, "step": 12104 }, { "epoch": 2.7542662116040955, "grad_norm": 0.21023642095426429, "learning_rate": 5.256623158887334e-07, "loss": 0.0018, "step": 12105 }, { "epoch": 2.754493742889647, "grad_norm": 0.4512645176540536, "learning_rate": 5.255741013937987e-07, "loss": 0.0068, "step": 12106 }, { "epoch": 2.754721274175199, "grad_norm": 0.3058659847454547, "learning_rate": 5.254858889309474e-07, "loss": 0.002, "step": 12107 }, { "epoch": 2.7549488054607507, "grad_norm": 0.43421665559043743, "learning_rate": 5.253976785019819e-07, "loss": 0.0038, "step": 12108 }, { "epoch": 2.7551763367463025, "grad_norm": 1.4275799214948557, "learning_rate": 5.253094701087057e-07, "loss": 0.0406, "step": 12109 }, { "epoch": 2.755403868031854, "grad_norm": 0.6229485801150975, "learning_rate": 5.252212637529211e-07, "loss": 0.0123, "step": 12110 }, { "epoch": 2.755631399317406, "grad_norm": 0.6699882810129917, "learning_rate": 5.251330594364315e-07, "loss": 0.0057, "step": 12111 }, { "epoch": 2.755858930602958, "grad_norm": 1.0884787182553346, "learning_rate": 5.25044857161039e-07, "loss": 0.0107, "step": 12112 }, { "epoch": 2.7560864618885095, "grad_norm": 0.317282779022866, "learning_rate": 5.249566569285467e-07, "loss": 0.0009, "step": 12113 }, { "epoch": 2.7563139931740617, "grad_norm": 0.9629150867257472, "learning_rate": 5.248684587407572e-07, "loss": 0.0228, "step": 12114 }, { "epoch": 2.756541524459613, "grad_norm": 0.4406569431432372, "learning_rate": 5.247802625994728e-07, "loss": 0.0027, "step": 12115 }, { "epoch": 2.756769055745165, "grad_norm": 0.8987012328973808, "learning_rate": 5.246920685064963e-07, "loss": 0.0154, "step": 12116 }, { "epoch": 2.7569965870307165, "grad_norm": 0.6879515786654452, "learning_rate": 5.246038764636302e-07, "loss": 0.0047, "step": 12117 }, { "epoch": 2.7572241183162687, "grad_norm": 0.4409929234959327, "learning_rate": 5.245156864726772e-07, "loss": 0.0044, "step": 12118 }, { "epoch": 2.75745164960182, "grad_norm": 1.1851090869190084, "learning_rate": 5.244274985354394e-07, "loss": 0.011, "step": 12119 }, { "epoch": 2.757679180887372, "grad_norm": 0.9016180280298914, "learning_rate": 5.243393126537194e-07, "loss": 0.0175, "step": 12120 }, { "epoch": 2.757906712172924, "grad_norm": 1.3622604915078584, "learning_rate": 5.242511288293195e-07, "loss": 0.0115, "step": 12121 }, { "epoch": 2.7581342434584757, "grad_norm": 0.9198998530835089, "learning_rate": 5.241629470640418e-07, "loss": 0.012, "step": 12122 }, { "epoch": 2.7583617747440274, "grad_norm": 6.184298155638549, "learning_rate": 5.240747673596891e-07, "loss": 0.1096, "step": 12123 }, { "epoch": 2.758589306029579, "grad_norm": 0.7128717493727317, "learning_rate": 5.23986589718063e-07, "loss": 0.0064, "step": 12124 }, { "epoch": 2.758816837315131, "grad_norm": 0.9743880482229041, "learning_rate": 5.238984141409662e-07, "loss": 0.0099, "step": 12125 }, { "epoch": 2.7590443686006827, "grad_norm": 0.5858199056409921, "learning_rate": 5.238102406302008e-07, "loss": 0.0068, "step": 12126 }, { "epoch": 2.7592718998862344, "grad_norm": 0.7530191807179692, "learning_rate": 5.237220691875685e-07, "loss": 0.0101, "step": 12127 }, { "epoch": 2.759499431171786, "grad_norm": 0.4748900580143386, "learning_rate": 5.236338998148717e-07, "loss": 0.0047, "step": 12128 }, { "epoch": 2.759726962457338, "grad_norm": 1.0312017831402738, "learning_rate": 5.235457325139123e-07, "loss": 0.0211, "step": 12129 }, { "epoch": 2.7599544937428897, "grad_norm": 0.47050078872881324, "learning_rate": 5.234575672864926e-07, "loss": 0.0043, "step": 12130 }, { "epoch": 2.7601820250284415, "grad_norm": 1.0392651314536632, "learning_rate": 5.233694041344141e-07, "loss": 0.0102, "step": 12131 }, { "epoch": 2.760409556313993, "grad_norm": 1.1333999943169384, "learning_rate": 5.232812430594786e-07, "loss": 0.0221, "step": 12132 }, { "epoch": 2.760637087599545, "grad_norm": 0.5464708561552196, "learning_rate": 5.231930840634886e-07, "loss": 0.0042, "step": 12133 }, { "epoch": 2.7608646188850967, "grad_norm": 0.7394678897626542, "learning_rate": 5.231049271482453e-07, "loss": 0.0106, "step": 12134 }, { "epoch": 2.7610921501706485, "grad_norm": 1.0033283991871556, "learning_rate": 5.230167723155507e-07, "loss": 0.0141, "step": 12135 }, { "epoch": 2.7613196814562, "grad_norm": 0.7243656278525215, "learning_rate": 5.229286195672064e-07, "loss": 0.0113, "step": 12136 }, { "epoch": 2.761547212741752, "grad_norm": 1.0116829750199121, "learning_rate": 5.228404689050143e-07, "loss": 0.0092, "step": 12137 }, { "epoch": 2.7617747440273037, "grad_norm": 0.718391246053125, "learning_rate": 5.227523203307759e-07, "loss": 0.0079, "step": 12138 }, { "epoch": 2.7620022753128555, "grad_norm": 0.7385267131721649, "learning_rate": 5.226641738462928e-07, "loss": 0.0062, "step": 12139 }, { "epoch": 2.7622298065984072, "grad_norm": 1.0056960868887934, "learning_rate": 5.225760294533667e-07, "loss": 0.0166, "step": 12140 }, { "epoch": 2.762457337883959, "grad_norm": 0.44810915764628667, "learning_rate": 5.224878871537987e-07, "loss": 0.0038, "step": 12141 }, { "epoch": 2.7626848691695107, "grad_norm": 0.6414591610522793, "learning_rate": 5.223997469493907e-07, "loss": 0.0073, "step": 12142 }, { "epoch": 2.7629124004550625, "grad_norm": 0.5431998071346013, "learning_rate": 5.223116088419439e-07, "loss": 0.0092, "step": 12143 }, { "epoch": 2.7631399317406142, "grad_norm": 0.6823535476174455, "learning_rate": 5.222234728332601e-07, "loss": 0.0052, "step": 12144 }, { "epoch": 2.763367463026166, "grad_norm": 0.8697680013845125, "learning_rate": 5.221353389251399e-07, "loss": 0.0125, "step": 12145 }, { "epoch": 2.7635949943117177, "grad_norm": 0.9378136348307635, "learning_rate": 5.22047207119385e-07, "loss": 0.0062, "step": 12146 }, { "epoch": 2.7638225255972695, "grad_norm": 0.8403891105368738, "learning_rate": 5.219590774177969e-07, "loss": 0.011, "step": 12147 }, { "epoch": 2.7640500568828212, "grad_norm": 0.525140507053815, "learning_rate": 5.218709498221762e-07, "loss": 0.0048, "step": 12148 }, { "epoch": 2.764277588168373, "grad_norm": 0.8868923041631589, "learning_rate": 5.217828243343246e-07, "loss": 0.0078, "step": 12149 }, { "epoch": 2.7645051194539247, "grad_norm": 2.032983456575477, "learning_rate": 5.216947009560433e-07, "loss": 0.0157, "step": 12150 }, { "epoch": 2.764732650739477, "grad_norm": 0.5754343918629776, "learning_rate": 5.216065796891327e-07, "loss": 0.0069, "step": 12151 }, { "epoch": 2.7649601820250282, "grad_norm": 0.8748028805632866, "learning_rate": 5.215184605353944e-07, "loss": 0.0202, "step": 12152 }, { "epoch": 2.7651877133105804, "grad_norm": 0.44596293423813105, "learning_rate": 5.214303434966292e-07, "loss": 0.0036, "step": 12153 }, { "epoch": 2.7654152445961317, "grad_norm": 0.885650161869352, "learning_rate": 5.213422285746382e-07, "loss": 0.012, "step": 12154 }, { "epoch": 2.765642775881684, "grad_norm": 1.190778282261985, "learning_rate": 5.212541157712221e-07, "loss": 0.0085, "step": 12155 }, { "epoch": 2.7658703071672353, "grad_norm": 0.9849061595012073, "learning_rate": 5.21166005088182e-07, "loss": 0.0115, "step": 12156 }, { "epoch": 2.7660978384527874, "grad_norm": 0.4476513295099323, "learning_rate": 5.210778965273187e-07, "loss": 0.0046, "step": 12157 }, { "epoch": 2.7663253697383388, "grad_norm": 0.9864097126011947, "learning_rate": 5.209897900904325e-07, "loss": 0.0111, "step": 12158 }, { "epoch": 2.766552901023891, "grad_norm": 0.9965366034215066, "learning_rate": 5.209016857793248e-07, "loss": 0.0117, "step": 12159 }, { "epoch": 2.7667804323094427, "grad_norm": 1.2561840923756025, "learning_rate": 5.208135835957958e-07, "loss": 0.0039, "step": 12160 }, { "epoch": 2.7670079635949945, "grad_norm": 0.4954126417143121, "learning_rate": 5.207254835416466e-07, "loss": 0.0131, "step": 12161 }, { "epoch": 2.767235494880546, "grad_norm": 0.5001099158421396, "learning_rate": 5.206373856186772e-07, "loss": 0.0032, "step": 12162 }, { "epoch": 2.767463026166098, "grad_norm": 1.2859489774074186, "learning_rate": 5.205492898286888e-07, "loss": 0.0063, "step": 12163 }, { "epoch": 2.7676905574516497, "grad_norm": 0.6512696789411628, "learning_rate": 5.204611961734815e-07, "loss": 0.0106, "step": 12164 }, { "epoch": 2.7679180887372015, "grad_norm": 0.6844914529817726, "learning_rate": 5.203731046548559e-07, "loss": 0.0093, "step": 12165 }, { "epoch": 2.768145620022753, "grad_norm": 0.528189677934689, "learning_rate": 5.202850152746124e-07, "loss": 0.0046, "step": 12166 }, { "epoch": 2.768373151308305, "grad_norm": 1.8659096070374077, "learning_rate": 5.201969280345514e-07, "loss": 0.0226, "step": 12167 }, { "epoch": 2.7686006825938567, "grad_norm": 0.5578808664332775, "learning_rate": 5.201088429364734e-07, "loss": 0.007, "step": 12168 }, { "epoch": 2.7688282138794085, "grad_norm": 0.6820856290161957, "learning_rate": 5.200207599821786e-07, "loss": 0.0052, "step": 12169 }, { "epoch": 2.7690557451649602, "grad_norm": 0.8172146409887626, "learning_rate": 5.199326791734669e-07, "loss": 0.0103, "step": 12170 }, { "epoch": 2.769283276450512, "grad_norm": 0.8384321240719007, "learning_rate": 5.198446005121391e-07, "loss": 0.0238, "step": 12171 }, { "epoch": 2.7695108077360637, "grad_norm": 0.8506917307373494, "learning_rate": 5.197565239999948e-07, "loss": 0.0096, "step": 12172 }, { "epoch": 2.7697383390216155, "grad_norm": 1.7323890308939798, "learning_rate": 5.196684496388347e-07, "loss": 0.0202, "step": 12173 }, { "epoch": 2.7699658703071672, "grad_norm": 0.5557548121704935, "learning_rate": 5.195803774304583e-07, "loss": 0.0056, "step": 12174 }, { "epoch": 2.770193401592719, "grad_norm": 0.49425930284760516, "learning_rate": 5.194923073766661e-07, "loss": 0.0086, "step": 12175 }, { "epoch": 2.7704209328782707, "grad_norm": 0.9048228219152107, "learning_rate": 5.19404239479258e-07, "loss": 0.0081, "step": 12176 }, { "epoch": 2.7706484641638225, "grad_norm": 1.1038625884738404, "learning_rate": 5.193161737400336e-07, "loss": 0.0176, "step": 12177 }, { "epoch": 2.7708759954493742, "grad_norm": 0.8335718835979444, "learning_rate": 5.192281101607934e-07, "loss": 0.0136, "step": 12178 }, { "epoch": 2.771103526734926, "grad_norm": 1.041680190627887, "learning_rate": 5.191400487433365e-07, "loss": 0.0102, "step": 12179 }, { "epoch": 2.7713310580204777, "grad_norm": 0.5617681681343544, "learning_rate": 5.190519894894633e-07, "loss": 0.0038, "step": 12180 }, { "epoch": 2.7715585893060295, "grad_norm": 1.238534994384474, "learning_rate": 5.189639324009734e-07, "loss": 0.0109, "step": 12181 }, { "epoch": 2.7717861205915812, "grad_norm": 1.1003890654243191, "learning_rate": 5.188758774796666e-07, "loss": 0.0177, "step": 12182 }, { "epoch": 2.772013651877133, "grad_norm": 1.3465535623546006, "learning_rate": 5.187878247273425e-07, "loss": 0.0144, "step": 12183 }, { "epoch": 2.7722411831626848, "grad_norm": 0.45994899020709534, "learning_rate": 5.186997741458007e-07, "loss": 0.0041, "step": 12184 }, { "epoch": 2.7724687144482365, "grad_norm": 2.450937223972592, "learning_rate": 5.186117257368409e-07, "loss": 0.0221, "step": 12185 }, { "epoch": 2.7726962457337883, "grad_norm": 0.8425517913126942, "learning_rate": 5.185236795022624e-07, "loss": 0.0056, "step": 12186 }, { "epoch": 2.77292377701934, "grad_norm": 0.6158281453920817, "learning_rate": 5.184356354438651e-07, "loss": 0.0077, "step": 12187 }, { "epoch": 2.7731513083048918, "grad_norm": 1.7181919358124307, "learning_rate": 5.183475935634483e-07, "loss": 0.0205, "step": 12188 }, { "epoch": 2.7733788395904435, "grad_norm": 1.084363643945743, "learning_rate": 5.182595538628111e-07, "loss": 0.0137, "step": 12189 }, { "epoch": 2.7736063708759957, "grad_norm": 1.4960785919565673, "learning_rate": 5.181715163437534e-07, "loss": 0.0129, "step": 12190 }, { "epoch": 2.773833902161547, "grad_norm": 0.6498000880011671, "learning_rate": 5.18083481008074e-07, "loss": 0.0071, "step": 12191 }, { "epoch": 2.774061433447099, "grad_norm": 1.5776608058980994, "learning_rate": 5.179954478575725e-07, "loss": 0.0212, "step": 12192 }, { "epoch": 2.7742889647326505, "grad_norm": 2.258194964826193, "learning_rate": 5.179074168940481e-07, "loss": 0.0089, "step": 12193 }, { "epoch": 2.7745164960182027, "grad_norm": 0.46922809991016334, "learning_rate": 5.178193881193002e-07, "loss": 0.0048, "step": 12194 }, { "epoch": 2.774744027303754, "grad_norm": 1.9396550421578114, "learning_rate": 5.177313615351275e-07, "loss": 0.0248, "step": 12195 }, { "epoch": 2.774971558589306, "grad_norm": 0.8863851852437132, "learning_rate": 5.176433371433293e-07, "loss": 0.0119, "step": 12196 }, { "epoch": 2.7751990898748575, "grad_norm": 0.7837982010012365, "learning_rate": 5.175553149457048e-07, "loss": 0.0111, "step": 12197 }, { "epoch": 2.7754266211604097, "grad_norm": 1.2136728902486444, "learning_rate": 5.174672949440527e-07, "loss": 0.0164, "step": 12198 }, { "epoch": 2.7756541524459615, "grad_norm": 0.4885337510021463, "learning_rate": 5.173792771401723e-07, "loss": 0.0042, "step": 12199 }, { "epoch": 2.7758816837315132, "grad_norm": 0.8982019993958259, "learning_rate": 5.172912615358622e-07, "loss": 0.0093, "step": 12200 }, { "epoch": 2.776109215017065, "grad_norm": 1.4905678201990542, "learning_rate": 5.172032481329217e-07, "loss": 0.007, "step": 12201 }, { "epoch": 2.7763367463026167, "grad_norm": 0.9911047506159931, "learning_rate": 5.171152369331493e-07, "loss": 0.0189, "step": 12202 }, { "epoch": 2.7765642775881685, "grad_norm": 0.6477164111348246, "learning_rate": 5.170272279383438e-07, "loss": 0.0048, "step": 12203 }, { "epoch": 2.7767918088737202, "grad_norm": 1.2427080977977132, "learning_rate": 5.169392211503043e-07, "loss": 0.0192, "step": 12204 }, { "epoch": 2.777019340159272, "grad_norm": 0.9081777935381712, "learning_rate": 5.168512165708288e-07, "loss": 0.0157, "step": 12205 }, { "epoch": 2.7772468714448237, "grad_norm": 0.865785665380509, "learning_rate": 5.167632142017166e-07, "loss": 0.0145, "step": 12206 }, { "epoch": 2.7774744027303755, "grad_norm": 0.8396903281592089, "learning_rate": 5.166752140447662e-07, "loss": 0.0097, "step": 12207 }, { "epoch": 2.7777019340159272, "grad_norm": 1.1257713882810032, "learning_rate": 5.165872161017757e-07, "loss": 0.0319, "step": 12208 }, { "epoch": 2.777929465301479, "grad_norm": 0.7169225825231406, "learning_rate": 5.164992203745441e-07, "loss": 0.01, "step": 12209 }, { "epoch": 2.7781569965870307, "grad_norm": 1.1491696163229772, "learning_rate": 5.164112268648697e-07, "loss": 0.022, "step": 12210 }, { "epoch": 2.7783845278725825, "grad_norm": 0.4658099665641108, "learning_rate": 5.163232355745512e-07, "loss": 0.0069, "step": 12211 }, { "epoch": 2.7786120591581343, "grad_norm": 1.8197748933707163, "learning_rate": 5.162352465053863e-07, "loss": 0.0272, "step": 12212 }, { "epoch": 2.778839590443686, "grad_norm": 0.6565070137318494, "learning_rate": 5.161472596591741e-07, "loss": 0.0086, "step": 12213 }, { "epoch": 2.7790671217292378, "grad_norm": 0.5440888286969645, "learning_rate": 5.160592750377127e-07, "loss": 0.004, "step": 12214 }, { "epoch": 2.7792946530147895, "grad_norm": 0.702496628915317, "learning_rate": 5.159712926427997e-07, "loss": 0.0062, "step": 12215 }, { "epoch": 2.7795221843003413, "grad_norm": 1.301996249985904, "learning_rate": 5.158833124762341e-07, "loss": 0.0137, "step": 12216 }, { "epoch": 2.779749715585893, "grad_norm": 1.7079556304384296, "learning_rate": 5.157953345398136e-07, "loss": 0.0182, "step": 12217 }, { "epoch": 2.7799772468714448, "grad_norm": 0.5155461201444175, "learning_rate": 5.157073588353367e-07, "loss": 0.0014, "step": 12218 }, { "epoch": 2.7802047781569965, "grad_norm": 2.6912327384436123, "learning_rate": 5.15619385364601e-07, "loss": 0.0504, "step": 12219 }, { "epoch": 2.7804323094425483, "grad_norm": 0.5900661911651006, "learning_rate": 5.155314141294049e-07, "loss": 0.0038, "step": 12220 }, { "epoch": 2.7806598407281, "grad_norm": 0.47614368145145136, "learning_rate": 5.154434451315464e-07, "loss": 0.0048, "step": 12221 }, { "epoch": 2.7808873720136518, "grad_norm": 0.8522681472901376, "learning_rate": 5.153554783728229e-07, "loss": 0.0179, "step": 12222 }, { "epoch": 2.7811149032992035, "grad_norm": 0.7826996006026473, "learning_rate": 5.152675138550327e-07, "loss": 0.007, "step": 12223 }, { "epoch": 2.7813424345847553, "grad_norm": 1.1307041516103058, "learning_rate": 5.151795515799734e-07, "loss": 0.0239, "step": 12224 }, { "epoch": 2.781569965870307, "grad_norm": 0.5689605587456935, "learning_rate": 5.150915915494432e-07, "loss": 0.0084, "step": 12225 }, { "epoch": 2.781797497155859, "grad_norm": 1.1793871959331583, "learning_rate": 5.150036337652396e-07, "loss": 0.0092, "step": 12226 }, { "epoch": 2.7820250284414105, "grad_norm": 0.7355729771131685, "learning_rate": 5.1491567822916e-07, "loss": 0.0048, "step": 12227 }, { "epoch": 2.7822525597269623, "grad_norm": 0.8484932114375013, "learning_rate": 5.148277249430026e-07, "loss": 0.0073, "step": 12228 }, { "epoch": 2.7824800910125145, "grad_norm": 1.3358141897475275, "learning_rate": 5.147397739085646e-07, "loss": 0.0118, "step": 12229 }, { "epoch": 2.782707622298066, "grad_norm": 1.0436842131362185, "learning_rate": 5.146518251276437e-07, "loss": 0.0148, "step": 12230 }, { "epoch": 2.782935153583618, "grad_norm": 0.6894635855384859, "learning_rate": 5.145638786020373e-07, "loss": 0.0098, "step": 12231 }, { "epoch": 2.7831626848691693, "grad_norm": 0.21912242068794124, "learning_rate": 5.144759343335433e-07, "loss": 0.0013, "step": 12232 }, { "epoch": 2.7833902161547215, "grad_norm": 0.7437287772347932, "learning_rate": 5.143879923239586e-07, "loss": 0.0099, "step": 12233 }, { "epoch": 2.783617747440273, "grad_norm": 0.6803827322046856, "learning_rate": 5.143000525750805e-07, "loss": 0.0042, "step": 12234 }, { "epoch": 2.783845278725825, "grad_norm": 0.5667706608105139, "learning_rate": 5.142121150887071e-07, "loss": 0.0097, "step": 12235 }, { "epoch": 2.7840728100113763, "grad_norm": 0.5159620357210135, "learning_rate": 5.141241798666347e-07, "loss": 0.0058, "step": 12236 }, { "epoch": 2.7843003412969285, "grad_norm": 0.42295660574255994, "learning_rate": 5.140362469106612e-07, "loss": 0.0058, "step": 12237 }, { "epoch": 2.7845278725824802, "grad_norm": 0.4536124734701248, "learning_rate": 5.139483162225835e-07, "loss": 0.0051, "step": 12238 }, { "epoch": 2.784755403868032, "grad_norm": 0.6442144033580876, "learning_rate": 5.138603878041991e-07, "loss": 0.0055, "step": 12239 }, { "epoch": 2.7849829351535837, "grad_norm": 0.41929414614378135, "learning_rate": 5.137724616573047e-07, "loss": 0.0049, "step": 12240 }, { "epoch": 2.7852104664391355, "grad_norm": 0.8581230138666565, "learning_rate": 5.136845377836973e-07, "loss": 0.0059, "step": 12241 }, { "epoch": 2.7854379977246873, "grad_norm": 0.5225954925240587, "learning_rate": 5.135966161851743e-07, "loss": 0.0053, "step": 12242 }, { "epoch": 2.785665529010239, "grad_norm": 0.6497007014003239, "learning_rate": 5.135086968635321e-07, "loss": 0.0066, "step": 12243 }, { "epoch": 2.7858930602957908, "grad_norm": 0.8511098213828515, "learning_rate": 5.134207798205684e-07, "loss": 0.0112, "step": 12244 }, { "epoch": 2.7861205915813425, "grad_norm": 2.004727824935709, "learning_rate": 5.133328650580796e-07, "loss": 0.0292, "step": 12245 }, { "epoch": 2.7863481228668943, "grad_norm": 0.7902411834857392, "learning_rate": 5.13244952577862e-07, "loss": 0.0137, "step": 12246 }, { "epoch": 2.786575654152446, "grad_norm": 0.9655486741324417, "learning_rate": 5.131570423817134e-07, "loss": 0.0061, "step": 12247 }, { "epoch": 2.7868031854379978, "grad_norm": 0.7386866504594938, "learning_rate": 5.130691344714298e-07, "loss": 0.0059, "step": 12248 }, { "epoch": 2.7870307167235495, "grad_norm": 2.146593731387448, "learning_rate": 5.129812288488081e-07, "loss": 0.0142, "step": 12249 }, { "epoch": 2.7872582480091013, "grad_norm": 0.621574054958032, "learning_rate": 5.12893325515645e-07, "loss": 0.008, "step": 12250 }, { "epoch": 2.787485779294653, "grad_norm": 0.521282217971791, "learning_rate": 5.128054244737371e-07, "loss": 0.0044, "step": 12251 }, { "epoch": 2.7877133105802048, "grad_norm": 1.1952453893674198, "learning_rate": 5.127175257248808e-07, "loss": 0.0126, "step": 12252 }, { "epoch": 2.7879408418657565, "grad_norm": 0.7504282688493823, "learning_rate": 5.126296292708724e-07, "loss": 0.0119, "step": 12253 }, { "epoch": 2.7881683731513083, "grad_norm": 0.6709476270254983, "learning_rate": 5.12541735113509e-07, "loss": 0.0031, "step": 12254 }, { "epoch": 2.78839590443686, "grad_norm": 1.105958771584379, "learning_rate": 5.124538432545863e-07, "loss": 0.0084, "step": 12255 }, { "epoch": 2.788623435722412, "grad_norm": 1.099711062985296, "learning_rate": 5.12365953695901e-07, "loss": 0.0067, "step": 12256 }, { "epoch": 2.7888509670079635, "grad_norm": 0.763479672678375, "learning_rate": 5.122780664392494e-07, "loss": 0.012, "step": 12257 }, { "epoch": 2.7890784982935153, "grad_norm": 0.5228007828531142, "learning_rate": 5.121901814864274e-07, "loss": 0.0049, "step": 12258 }, { "epoch": 2.789306029579067, "grad_norm": 0.7697025394574826, "learning_rate": 5.121022988392318e-07, "loss": 0.0045, "step": 12259 }, { "epoch": 2.789533560864619, "grad_norm": 0.501357423463927, "learning_rate": 5.120144184994582e-07, "loss": 0.0034, "step": 12260 }, { "epoch": 2.7897610921501705, "grad_norm": 0.6372331675975574, "learning_rate": 5.119265404689032e-07, "loss": 0.005, "step": 12261 }, { "epoch": 2.7899886234357223, "grad_norm": 1.1750388461803307, "learning_rate": 5.118386647493624e-07, "loss": 0.0208, "step": 12262 }, { "epoch": 2.790216154721274, "grad_norm": 0.7945180059840947, "learning_rate": 5.117507913426323e-07, "loss": 0.012, "step": 12263 }, { "epoch": 2.790443686006826, "grad_norm": 1.536561470729175, "learning_rate": 5.116629202505086e-07, "loss": 0.0183, "step": 12264 }, { "epoch": 2.7906712172923775, "grad_norm": 1.2821040109399051, "learning_rate": 5.115750514747869e-07, "loss": 0.0125, "step": 12265 }, { "epoch": 2.7908987485779293, "grad_norm": 7.101753417987993, "learning_rate": 5.114871850172637e-07, "loss": 0.1315, "step": 12266 }, { "epoch": 2.791126279863481, "grad_norm": 0.6822968250572052, "learning_rate": 5.113993208797344e-07, "loss": 0.0079, "step": 12267 }, { "epoch": 2.7913538111490332, "grad_norm": 1.491434672786853, "learning_rate": 5.113114590639952e-07, "loss": 0.0183, "step": 12268 }, { "epoch": 2.7915813424345846, "grad_norm": 1.35183271678378, "learning_rate": 5.112235995718413e-07, "loss": 0.0198, "step": 12269 }, { "epoch": 2.7918088737201368, "grad_norm": 0.9797281042064265, "learning_rate": 5.111357424050688e-07, "loss": 0.0101, "step": 12270 }, { "epoch": 2.792036405005688, "grad_norm": 0.8792684376051225, "learning_rate": 5.110478875654733e-07, "loss": 0.0092, "step": 12271 }, { "epoch": 2.7922639362912403, "grad_norm": 1.2285707758695268, "learning_rate": 5.1096003505485e-07, "loss": 0.0158, "step": 12272 }, { "epoch": 2.7924914675767916, "grad_norm": 0.9836563019300768, "learning_rate": 5.108721848749948e-07, "loss": 0.0071, "step": 12273 }, { "epoch": 2.7927189988623438, "grad_norm": 0.7707964000867568, "learning_rate": 5.107843370277031e-07, "loss": 0.0077, "step": 12274 }, { "epoch": 2.792946530147895, "grad_norm": 1.011084287104494, "learning_rate": 5.106964915147706e-07, "loss": 0.0092, "step": 12275 }, { "epoch": 2.7931740614334473, "grad_norm": 1.3602418200404656, "learning_rate": 5.106086483379924e-07, "loss": 0.0211, "step": 12276 }, { "epoch": 2.793401592718999, "grad_norm": 0.8241964085344385, "learning_rate": 5.105208074991637e-07, "loss": 0.0117, "step": 12277 }, { "epoch": 2.7936291240045508, "grad_norm": 0.2796718124503555, "learning_rate": 5.104329690000803e-07, "loss": 0.002, "step": 12278 }, { "epoch": 2.7938566552901025, "grad_norm": 0.8077280178970869, "learning_rate": 5.103451328425369e-07, "loss": 0.0108, "step": 12279 }, { "epoch": 2.7940841865756543, "grad_norm": 0.9359067395936889, "learning_rate": 5.102572990283292e-07, "loss": 0.0044, "step": 12280 }, { "epoch": 2.794311717861206, "grad_norm": 0.6778175688071185, "learning_rate": 5.101694675592521e-07, "loss": 0.0064, "step": 12281 }, { "epoch": 2.7945392491467578, "grad_norm": 0.5936850302204248, "learning_rate": 5.100816384371011e-07, "loss": 0.0071, "step": 12282 }, { "epoch": 2.7947667804323095, "grad_norm": 0.7608992609807901, "learning_rate": 5.099938116636706e-07, "loss": 0.0151, "step": 12283 }, { "epoch": 2.7949943117178613, "grad_norm": 0.7531854553845009, "learning_rate": 5.09905987240756e-07, "loss": 0.0122, "step": 12284 }, { "epoch": 2.795221843003413, "grad_norm": 1.0630735156186042, "learning_rate": 5.098181651701525e-07, "loss": 0.0319, "step": 12285 }, { "epoch": 2.795449374288965, "grad_norm": 0.5576909524816067, "learning_rate": 5.097303454536544e-07, "loss": 0.0045, "step": 12286 }, { "epoch": 2.7956769055745165, "grad_norm": 0.713886857082896, "learning_rate": 5.096425280930571e-07, "loss": 0.0075, "step": 12287 }, { "epoch": 2.7959044368600683, "grad_norm": 0.7053470882126169, "learning_rate": 5.095547130901551e-07, "loss": 0.0056, "step": 12288 }, { "epoch": 2.79613196814562, "grad_norm": 1.1658659762828358, "learning_rate": 5.094669004467437e-07, "loss": 0.0184, "step": 12289 }, { "epoch": 2.796359499431172, "grad_norm": 0.3081631484008799, "learning_rate": 5.09379090164617e-07, "loss": 0.0021, "step": 12290 }, { "epoch": 2.7965870307167235, "grad_norm": 0.9096872046371701, "learning_rate": 5.0929128224557e-07, "loss": 0.0082, "step": 12291 }, { "epoch": 2.7968145620022753, "grad_norm": 0.7501303275823573, "learning_rate": 5.092034766913974e-07, "loss": 0.0115, "step": 12292 }, { "epoch": 2.797042093287827, "grad_norm": 1.3351790079255241, "learning_rate": 5.091156735038934e-07, "loss": 0.0112, "step": 12293 }, { "epoch": 2.797269624573379, "grad_norm": 1.4080315094278837, "learning_rate": 5.090278726848528e-07, "loss": 0.0203, "step": 12294 }, { "epoch": 2.7974971558589306, "grad_norm": 0.33843242942228197, "learning_rate": 5.089400742360705e-07, "loss": 0.0033, "step": 12295 }, { "epoch": 2.7977246871444823, "grad_norm": 1.7036339353018222, "learning_rate": 5.088522781593401e-07, "loss": 0.0147, "step": 12296 }, { "epoch": 2.797952218430034, "grad_norm": 0.5188485078755833, "learning_rate": 5.087644844564567e-07, "loss": 0.005, "step": 12297 }, { "epoch": 2.798179749715586, "grad_norm": 0.3302299934972675, "learning_rate": 5.086766931292141e-07, "loss": 0.0034, "step": 12298 }, { "epoch": 2.7984072810011376, "grad_norm": 0.6389618653152777, "learning_rate": 5.085889041794071e-07, "loss": 0.0037, "step": 12299 }, { "epoch": 2.7986348122866893, "grad_norm": 0.5257916717742785, "learning_rate": 5.085011176088295e-07, "loss": 0.0031, "step": 12300 }, { "epoch": 2.798862343572241, "grad_norm": 0.7853744684281502, "learning_rate": 5.084133334192758e-07, "loss": 0.0122, "step": 12301 }, { "epoch": 2.799089874857793, "grad_norm": 1.460673294224888, "learning_rate": 5.083255516125401e-07, "loss": 0.0287, "step": 12302 }, { "epoch": 2.7993174061433446, "grad_norm": 1.2606440214984023, "learning_rate": 5.082377721904164e-07, "loss": 0.0172, "step": 12303 }, { "epoch": 2.7995449374288963, "grad_norm": 0.3817865658468556, "learning_rate": 5.081499951546988e-07, "loss": 0.0033, "step": 12304 }, { "epoch": 2.799772468714448, "grad_norm": 0.4075728146920454, "learning_rate": 5.080622205071811e-07, "loss": 0.0046, "step": 12305 }, { "epoch": 2.8, "grad_norm": 0.5511299990471693, "learning_rate": 5.079744482496577e-07, "loss": 0.002, "step": 12306 }, { "epoch": 2.800227531285552, "grad_norm": 0.5902331806780535, "learning_rate": 5.07886678383922e-07, "loss": 0.0038, "step": 12307 }, { "epoch": 2.8004550625711033, "grad_norm": 0.6857016324962125, "learning_rate": 5.077989109117685e-07, "loss": 0.0095, "step": 12308 }, { "epoch": 2.8006825938566555, "grad_norm": 0.8443162912633219, "learning_rate": 5.077111458349905e-07, "loss": 0.0107, "step": 12309 }, { "epoch": 2.800910125142207, "grad_norm": 0.6398587991316066, "learning_rate": 5.076233831553816e-07, "loss": 0.0108, "step": 12310 }, { "epoch": 2.801137656427759, "grad_norm": 1.0762906842531936, "learning_rate": 5.075356228747362e-07, "loss": 0.01, "step": 12311 }, { "epoch": 2.8013651877133103, "grad_norm": 0.19394868156805395, "learning_rate": 5.074478649948472e-07, "loss": 0.0012, "step": 12312 }, { "epoch": 2.8015927189988625, "grad_norm": 1.0174644371264217, "learning_rate": 5.073601095175086e-07, "loss": 0.0138, "step": 12313 }, { "epoch": 2.801820250284414, "grad_norm": 0.68227589539657, "learning_rate": 5.072723564445142e-07, "loss": 0.0068, "step": 12314 }, { "epoch": 2.802047781569966, "grad_norm": 0.595959826141777, "learning_rate": 5.071846057776569e-07, "loss": 0.01, "step": 12315 }, { "epoch": 2.802275312855518, "grad_norm": 0.9044027514912767, "learning_rate": 5.070968575187306e-07, "loss": 0.0109, "step": 12316 }, { "epoch": 2.8025028441410695, "grad_norm": 0.7565270295942014, "learning_rate": 5.070091116695285e-07, "loss": 0.0127, "step": 12317 }, { "epoch": 2.8027303754266213, "grad_norm": 0.48264253041035293, "learning_rate": 5.069213682318442e-07, "loss": 0.0029, "step": 12318 }, { "epoch": 2.802957906712173, "grad_norm": 0.5797207248846721, "learning_rate": 5.068336272074708e-07, "loss": 0.0015, "step": 12319 }, { "epoch": 2.803185437997725, "grad_norm": 0.7711646127977183, "learning_rate": 5.067458885982017e-07, "loss": 0.0222, "step": 12320 }, { "epoch": 2.8034129692832765, "grad_norm": 1.3748101985360357, "learning_rate": 5.066581524058303e-07, "loss": 0.0272, "step": 12321 }, { "epoch": 2.8036405005688283, "grad_norm": 0.814765314133962, "learning_rate": 5.065704186321492e-07, "loss": 0.0064, "step": 12322 }, { "epoch": 2.80386803185438, "grad_norm": 0.7191179881431865, "learning_rate": 5.064826872789519e-07, "loss": 0.013, "step": 12323 }, { "epoch": 2.804095563139932, "grad_norm": 0.55497862998972, "learning_rate": 5.063949583480314e-07, "loss": 0.0023, "step": 12324 }, { "epoch": 2.8043230944254836, "grad_norm": 1.1945598212518445, "learning_rate": 5.063072318411809e-07, "loss": 0.0128, "step": 12325 }, { "epoch": 2.8045506257110353, "grad_norm": 0.6482097547811637, "learning_rate": 5.06219507760193e-07, "loss": 0.0092, "step": 12326 }, { "epoch": 2.804778156996587, "grad_norm": 1.2137921054552627, "learning_rate": 5.06131786106861e-07, "loss": 0.0048, "step": 12327 }, { "epoch": 2.805005688282139, "grad_norm": 1.0041981007457514, "learning_rate": 5.060440668829776e-07, "loss": 0.016, "step": 12328 }, { "epoch": 2.8052332195676906, "grad_norm": 0.168566609259696, "learning_rate": 5.059563500903355e-07, "loss": 0.001, "step": 12329 }, { "epoch": 2.8054607508532423, "grad_norm": 0.6663286918641061, "learning_rate": 5.058686357307276e-07, "loss": 0.0066, "step": 12330 }, { "epoch": 2.805688282138794, "grad_norm": 0.7064653985321233, "learning_rate": 5.057809238059466e-07, "loss": 0.0188, "step": 12331 }, { "epoch": 2.805915813424346, "grad_norm": 0.34823254003505427, "learning_rate": 5.056932143177853e-07, "loss": 0.004, "step": 12332 }, { "epoch": 2.8061433447098976, "grad_norm": 1.2989796661290913, "learning_rate": 5.056055072680362e-07, "loss": 0.0207, "step": 12333 }, { "epoch": 2.8063708759954493, "grad_norm": 0.8593142437555377, "learning_rate": 5.055178026584915e-07, "loss": 0.0081, "step": 12334 }, { "epoch": 2.806598407281001, "grad_norm": 1.290494668461789, "learning_rate": 5.054301004909447e-07, "loss": 0.0089, "step": 12335 }, { "epoch": 2.806825938566553, "grad_norm": 1.2354249685596643, "learning_rate": 5.053424007671871e-07, "loss": 0.0061, "step": 12336 }, { "epoch": 2.8070534698521046, "grad_norm": 1.2878800506762595, "learning_rate": 5.05254703489012e-07, "loss": 0.0237, "step": 12337 }, { "epoch": 2.8072810011376563, "grad_norm": 0.9566913651853753, "learning_rate": 5.051670086582112e-07, "loss": 0.0157, "step": 12338 }, { "epoch": 2.807508532423208, "grad_norm": 0.39004038400292107, "learning_rate": 5.050793162765777e-07, "loss": 0.0021, "step": 12339 }, { "epoch": 2.80773606370876, "grad_norm": 0.47309347679297836, "learning_rate": 5.049916263459031e-07, "loss": 0.0051, "step": 12340 }, { "epoch": 2.8079635949943116, "grad_norm": 1.1221136043938131, "learning_rate": 5.049039388679798e-07, "loss": 0.0182, "step": 12341 }, { "epoch": 2.8081911262798633, "grad_norm": 0.3005024222921557, "learning_rate": 5.048162538446003e-07, "loss": 0.0013, "step": 12342 }, { "epoch": 2.808418657565415, "grad_norm": 0.6737175218368349, "learning_rate": 5.047285712775562e-07, "loss": 0.0202, "step": 12343 }, { "epoch": 2.8086461888509673, "grad_norm": 0.8998125449947902, "learning_rate": 5.046408911686399e-07, "loss": 0.0073, "step": 12344 }, { "epoch": 2.8088737201365186, "grad_norm": 0.739012054820928, "learning_rate": 5.045532135196433e-07, "loss": 0.0057, "step": 12345 }, { "epoch": 2.809101251422071, "grad_norm": 0.846321731229178, "learning_rate": 5.044655383323586e-07, "loss": 0.0125, "step": 12346 }, { "epoch": 2.809328782707622, "grad_norm": 0.5892547782977333, "learning_rate": 5.043778656085776e-07, "loss": 0.0082, "step": 12347 }, { "epoch": 2.8095563139931743, "grad_norm": 0.19156357870233232, "learning_rate": 5.042901953500918e-07, "loss": 0.0011, "step": 12348 }, { "epoch": 2.8097838452787256, "grad_norm": 0.8472890627407195, "learning_rate": 5.042025275586937e-07, "loss": 0.012, "step": 12349 }, { "epoch": 2.810011376564278, "grad_norm": 0.7675314300501085, "learning_rate": 5.041148622361744e-07, "loss": 0.0054, "step": 12350 }, { "epoch": 2.810238907849829, "grad_norm": 2.6110106226529926, "learning_rate": 5.040271993843261e-07, "loss": 0.0205, "step": 12351 }, { "epoch": 2.8104664391353813, "grad_norm": 1.486676002669598, "learning_rate": 5.039395390049403e-07, "loss": 0.0246, "step": 12352 }, { "epoch": 2.8106939704209326, "grad_norm": 0.8789790899490879, "learning_rate": 5.038518810998085e-07, "loss": 0.0087, "step": 12353 }, { "epoch": 2.810921501706485, "grad_norm": 0.8155576990960206, "learning_rate": 5.037642256707225e-07, "loss": 0.01, "step": 12354 }, { "epoch": 2.8111490329920366, "grad_norm": 0.7394244449636862, "learning_rate": 5.036765727194735e-07, "loss": 0.0089, "step": 12355 }, { "epoch": 2.8113765642775883, "grad_norm": 0.6240962776768268, "learning_rate": 5.035889222478535e-07, "loss": 0.0067, "step": 12356 }, { "epoch": 2.81160409556314, "grad_norm": 1.1880403697233282, "learning_rate": 5.035012742576532e-07, "loss": 0.0209, "step": 12357 }, { "epoch": 2.811831626848692, "grad_norm": 0.5698906833772497, "learning_rate": 5.034136287506645e-07, "loss": 0.0089, "step": 12358 }, { "epoch": 2.8120591581342436, "grad_norm": 0.6889944552911644, "learning_rate": 5.033259857286788e-07, "loss": 0.0073, "step": 12359 }, { "epoch": 2.8122866894197953, "grad_norm": 0.8211471069838945, "learning_rate": 5.032383451934867e-07, "loss": 0.0109, "step": 12360 }, { "epoch": 2.812514220705347, "grad_norm": 0.653142855640482, "learning_rate": 5.0315070714688e-07, "loss": 0.01, "step": 12361 }, { "epoch": 2.812741751990899, "grad_norm": 0.7744913509280733, "learning_rate": 5.030630715906495e-07, "loss": 0.0113, "step": 12362 }, { "epoch": 2.8129692832764506, "grad_norm": 1.0214026102407592, "learning_rate": 5.029754385265869e-07, "loss": 0.0195, "step": 12363 }, { "epoch": 2.8131968145620023, "grad_norm": 0.4768994649841856, "learning_rate": 5.028878079564827e-07, "loss": 0.0033, "step": 12364 }, { "epoch": 2.813424345847554, "grad_norm": 0.6154996329191611, "learning_rate": 5.02800179882128e-07, "loss": 0.0055, "step": 12365 }, { "epoch": 2.813651877133106, "grad_norm": 1.4559504580812166, "learning_rate": 5.02712554305314e-07, "loss": 0.0295, "step": 12366 }, { "epoch": 2.8138794084186576, "grad_norm": 0.626847704534161, "learning_rate": 5.026249312278309e-07, "loss": 0.006, "step": 12367 }, { "epoch": 2.8141069397042093, "grad_norm": 0.9726345490045167, "learning_rate": 5.025373106514707e-07, "loss": 0.0044, "step": 12368 }, { "epoch": 2.814334470989761, "grad_norm": 0.8538510741235861, "learning_rate": 5.024496925780232e-07, "loss": 0.0072, "step": 12369 }, { "epoch": 2.814562002275313, "grad_norm": 0.9579027195902572, "learning_rate": 5.023620770092797e-07, "loss": 0.0129, "step": 12370 }, { "epoch": 2.8147895335608646, "grad_norm": 1.2024820134030088, "learning_rate": 5.022744639470309e-07, "loss": 0.0207, "step": 12371 }, { "epoch": 2.8150170648464163, "grad_norm": 1.3443168327208543, "learning_rate": 5.02186853393067e-07, "loss": 0.0118, "step": 12372 }, { "epoch": 2.815244596131968, "grad_norm": 0.5715075763621112, "learning_rate": 5.020992453491791e-07, "loss": 0.0081, "step": 12373 }, { "epoch": 2.81547212741752, "grad_norm": 0.712948831561136, "learning_rate": 5.020116398171574e-07, "loss": 0.0059, "step": 12374 }, { "epoch": 2.8156996587030716, "grad_norm": 0.8806348922578436, "learning_rate": 5.019240367987927e-07, "loss": 0.0112, "step": 12375 }, { "epoch": 2.8159271899886233, "grad_norm": 0.5332942456350261, "learning_rate": 5.01836436295875e-07, "loss": 0.0048, "step": 12376 }, { "epoch": 2.816154721274175, "grad_norm": 0.8289456438401365, "learning_rate": 5.017488383101952e-07, "loss": 0.0154, "step": 12377 }, { "epoch": 2.816382252559727, "grad_norm": 0.8010726230503683, "learning_rate": 5.016612428435436e-07, "loss": 0.0089, "step": 12378 }, { "epoch": 2.8166097838452786, "grad_norm": 0.9002284465461421, "learning_rate": 5.0157364989771e-07, "loss": 0.0098, "step": 12379 }, { "epoch": 2.8168373151308304, "grad_norm": 1.0564955838553294, "learning_rate": 5.014860594744851e-07, "loss": 0.0189, "step": 12380 }, { "epoch": 2.817064846416382, "grad_norm": 1.316908489587945, "learning_rate": 5.013984715756588e-07, "loss": 0.0172, "step": 12381 }, { "epoch": 2.817292377701934, "grad_norm": 0.9063826994705297, "learning_rate": 5.013108862030216e-07, "loss": 0.0093, "step": 12382 }, { "epoch": 2.817519908987486, "grad_norm": 1.1373614954816809, "learning_rate": 5.012233033583632e-07, "loss": 0.0133, "step": 12383 }, { "epoch": 2.8177474402730374, "grad_norm": 0.18063295515201297, "learning_rate": 5.011357230434738e-07, "loss": 0.0013, "step": 12384 }, { "epoch": 2.8179749715585896, "grad_norm": 0.6759786686594076, "learning_rate": 5.010481452601435e-07, "loss": 0.0035, "step": 12385 }, { "epoch": 2.818202502844141, "grad_norm": 0.5672386801222908, "learning_rate": 5.009605700101619e-07, "loss": 0.0067, "step": 12386 }, { "epoch": 2.818430034129693, "grad_norm": 0.7188393171363927, "learning_rate": 5.008729972953192e-07, "loss": 0.0085, "step": 12387 }, { "epoch": 2.8186575654152444, "grad_norm": 0.7866581201733361, "learning_rate": 5.00785427117405e-07, "loss": 0.0169, "step": 12388 }, { "epoch": 2.8188850967007966, "grad_norm": 2.558303289319003, "learning_rate": 5.006978594782094e-07, "loss": 0.0325, "step": 12389 }, { "epoch": 2.819112627986348, "grad_norm": 0.792523324326396, "learning_rate": 5.006102943795219e-07, "loss": 0.0043, "step": 12390 }, { "epoch": 2.8193401592719, "grad_norm": 1.2092649665354884, "learning_rate": 5.005227318231319e-07, "loss": 0.0261, "step": 12391 }, { "epoch": 2.8195676905574514, "grad_norm": 0.521599210751114, "learning_rate": 5.004351718108296e-07, "loss": 0.0047, "step": 12392 }, { "epoch": 2.8197952218430036, "grad_norm": 0.9098315896534416, "learning_rate": 5.00347614344404e-07, "loss": 0.0106, "step": 12393 }, { "epoch": 2.8200227531285553, "grad_norm": 0.5934754452197804, "learning_rate": 5.00260059425645e-07, "loss": 0.0084, "step": 12394 }, { "epoch": 2.820250284414107, "grad_norm": 0.7824802581593417, "learning_rate": 5.001725070563418e-07, "loss": 0.0135, "step": 12395 }, { "epoch": 2.820477815699659, "grad_norm": 0.569089183264535, "learning_rate": 5.000849572382842e-07, "loss": 0.0043, "step": 12396 }, { "epoch": 2.8207053469852106, "grad_norm": 1.3283507452588312, "learning_rate": 4.999974099732612e-07, "loss": 0.0246, "step": 12397 }, { "epoch": 2.8209328782707623, "grad_norm": 0.6256947321759803, "learning_rate": 4.999098652630619e-07, "loss": 0.0069, "step": 12398 }, { "epoch": 2.821160409556314, "grad_norm": 0.9416264620297027, "learning_rate": 4.998223231094764e-07, "loss": 0.0124, "step": 12399 }, { "epoch": 2.821387940841866, "grad_norm": 0.7436403041304972, "learning_rate": 4.99734783514293e-07, "loss": 0.0123, "step": 12400 }, { "epoch": 2.8216154721274176, "grad_norm": 0.7028252068109339, "learning_rate": 4.996472464793013e-07, "loss": 0.0085, "step": 12401 }, { "epoch": 2.8218430034129693, "grad_norm": 2.370782730250357, "learning_rate": 4.995597120062905e-07, "loss": 0.0386, "step": 12402 }, { "epoch": 2.822070534698521, "grad_norm": 0.38980066407089164, "learning_rate": 4.994721800970491e-07, "loss": 0.0034, "step": 12403 }, { "epoch": 2.822298065984073, "grad_norm": 0.8358366913134648, "learning_rate": 4.993846507533666e-07, "loss": 0.0075, "step": 12404 }, { "epoch": 2.8225255972696246, "grad_norm": 1.14694045900316, "learning_rate": 4.992971239770318e-07, "loss": 0.0191, "step": 12405 }, { "epoch": 2.8227531285551763, "grad_norm": 1.0739811722551613, "learning_rate": 4.992095997698337e-07, "loss": 0.0138, "step": 12406 }, { "epoch": 2.822980659840728, "grad_norm": 0.3849510345787068, "learning_rate": 4.991220781335607e-07, "loss": 0.0027, "step": 12407 }, { "epoch": 2.82320819112628, "grad_norm": 0.7043606076999218, "learning_rate": 4.990345590700021e-07, "loss": 0.0081, "step": 12408 }, { "epoch": 2.8234357224118316, "grad_norm": 0.8398843440666922, "learning_rate": 4.989470425809466e-07, "loss": 0.0072, "step": 12409 }, { "epoch": 2.8236632536973834, "grad_norm": 1.2249600260801923, "learning_rate": 4.988595286681824e-07, "loss": 0.0211, "step": 12410 }, { "epoch": 2.823890784982935, "grad_norm": 0.713130923050544, "learning_rate": 4.987720173334985e-07, "loss": 0.0099, "step": 12411 }, { "epoch": 2.824118316268487, "grad_norm": 0.3454049913140197, "learning_rate": 4.986845085786833e-07, "loss": 0.0026, "step": 12412 }, { "epoch": 2.8243458475540386, "grad_norm": 2.2633418543506734, "learning_rate": 4.985970024055256e-07, "loss": 0.0447, "step": 12413 }, { "epoch": 2.8245733788395904, "grad_norm": 0.5796324892164759, "learning_rate": 4.985094988158134e-07, "loss": 0.0057, "step": 12414 }, { "epoch": 2.824800910125142, "grad_norm": 0.6721549314710036, "learning_rate": 4.984219978113357e-07, "loss": 0.009, "step": 12415 }, { "epoch": 2.825028441410694, "grad_norm": 0.646128489800546, "learning_rate": 4.983344993938805e-07, "loss": 0.0101, "step": 12416 }, { "epoch": 2.8252559726962456, "grad_norm": 0.34993577483484223, "learning_rate": 4.982470035652359e-07, "loss": 0.0021, "step": 12417 }, { "epoch": 2.8254835039817974, "grad_norm": 1.764951668804052, "learning_rate": 4.981595103271906e-07, "loss": 0.0141, "step": 12418 }, { "epoch": 2.825711035267349, "grad_norm": 1.1755889181798094, "learning_rate": 4.980720196815325e-07, "loss": 0.0245, "step": 12419 }, { "epoch": 2.825938566552901, "grad_norm": 1.336073759657574, "learning_rate": 4.9798453163005e-07, "loss": 0.0118, "step": 12420 }, { "epoch": 2.8261660978384526, "grad_norm": 0.8887701195138535, "learning_rate": 4.978970461745311e-07, "loss": 0.0155, "step": 12421 }, { "epoch": 2.826393629124005, "grad_norm": 0.9696089570397625, "learning_rate": 4.978095633167636e-07, "loss": 0.0096, "step": 12422 }, { "epoch": 2.826621160409556, "grad_norm": 0.34372921678299384, "learning_rate": 4.97722083058536e-07, "loss": 0.0025, "step": 12423 }, { "epoch": 2.8268486916951083, "grad_norm": 0.8345791138286331, "learning_rate": 4.976346054016356e-07, "loss": 0.0122, "step": 12424 }, { "epoch": 2.8270762229806596, "grad_norm": 0.6384237421997279, "learning_rate": 4.975471303478508e-07, "loss": 0.006, "step": 12425 }, { "epoch": 2.827303754266212, "grad_norm": 0.7327601691668029, "learning_rate": 4.974596578989689e-07, "loss": 0.0082, "step": 12426 }, { "epoch": 2.827531285551763, "grad_norm": 0.6962285696523923, "learning_rate": 4.973721880567783e-07, "loss": 0.0055, "step": 12427 }, { "epoch": 2.8277588168373153, "grad_norm": 1.0642362834036303, "learning_rate": 4.972847208230666e-07, "loss": 0.0212, "step": 12428 }, { "epoch": 2.8279863481228666, "grad_norm": 0.19148611302513996, "learning_rate": 4.97197256199621e-07, "loss": 0.0007, "step": 12429 }, { "epoch": 2.828213879408419, "grad_norm": 0.08373518660880948, "learning_rate": 4.971097941882296e-07, "loss": 0.0005, "step": 12430 }, { "epoch": 2.82844141069397, "grad_norm": 0.8936909437494889, "learning_rate": 4.970223347906795e-07, "loss": 0.0116, "step": 12431 }, { "epoch": 2.8286689419795223, "grad_norm": 0.7052626495951314, "learning_rate": 4.969348780087589e-07, "loss": 0.0104, "step": 12432 }, { "epoch": 2.828896473265074, "grad_norm": 0.6098274221421818, "learning_rate": 4.968474238442546e-07, "loss": 0.0092, "step": 12433 }, { "epoch": 2.829124004550626, "grad_norm": 0.5613074264300129, "learning_rate": 4.967599722989544e-07, "loss": 0.008, "step": 12434 }, { "epoch": 2.8293515358361776, "grad_norm": 0.9455622833685394, "learning_rate": 4.966725233746455e-07, "loss": 0.0135, "step": 12435 }, { "epoch": 2.8295790671217294, "grad_norm": 0.5126293792437444, "learning_rate": 4.96585077073115e-07, "loss": 0.0036, "step": 12436 }, { "epoch": 2.829806598407281, "grad_norm": 0.6681263949595772, "learning_rate": 4.964976333961506e-07, "loss": 0.0084, "step": 12437 }, { "epoch": 2.830034129692833, "grad_norm": 0.7673089885451926, "learning_rate": 4.96410192345539e-07, "loss": 0.004, "step": 12438 }, { "epoch": 2.8302616609783846, "grad_norm": 2.123799064901035, "learning_rate": 4.963227539230678e-07, "loss": 0.0353, "step": 12439 }, { "epoch": 2.8304891922639364, "grad_norm": 0.39967995554084523, "learning_rate": 4.962353181305237e-07, "loss": 0.0038, "step": 12440 }, { "epoch": 2.830716723549488, "grad_norm": 0.6796190381237651, "learning_rate": 4.961478849696938e-07, "loss": 0.0074, "step": 12441 }, { "epoch": 2.83094425483504, "grad_norm": 0.3717851267849413, "learning_rate": 4.960604544423654e-07, "loss": 0.0021, "step": 12442 }, { "epoch": 2.8311717861205916, "grad_norm": 0.6600124657780437, "learning_rate": 4.959730265503249e-07, "loss": 0.0075, "step": 12443 }, { "epoch": 2.8313993174061434, "grad_norm": 0.7740950436771507, "learning_rate": 4.958856012953596e-07, "loss": 0.0112, "step": 12444 }, { "epoch": 2.831626848691695, "grad_norm": 0.7350371177802464, "learning_rate": 4.95798178679256e-07, "loss": 0.0129, "step": 12445 }, { "epoch": 2.831854379977247, "grad_norm": 0.9128606535940484, "learning_rate": 4.957107587038013e-07, "loss": 0.0151, "step": 12446 }, { "epoch": 2.8320819112627986, "grad_norm": 1.7402319907115282, "learning_rate": 4.956233413707817e-07, "loss": 0.0047, "step": 12447 }, { "epoch": 2.8323094425483504, "grad_norm": 0.8481134723585907, "learning_rate": 4.95535926681984e-07, "loss": 0.0163, "step": 12448 }, { "epoch": 2.832536973833902, "grad_norm": 0.8447619427849868, "learning_rate": 4.954485146391951e-07, "loss": 0.0129, "step": 12449 }, { "epoch": 2.832764505119454, "grad_norm": 0.8086760479256794, "learning_rate": 4.953611052442011e-07, "loss": 0.0098, "step": 12450 }, { "epoch": 2.8329920364050056, "grad_norm": 1.3998721610162443, "learning_rate": 4.952736984987887e-07, "loss": 0.0078, "step": 12451 }, { "epoch": 2.8332195676905574, "grad_norm": 0.7770218042841287, "learning_rate": 4.951862944047442e-07, "loss": 0.0078, "step": 12452 }, { "epoch": 2.833447098976109, "grad_norm": 0.6028209892537322, "learning_rate": 4.950988929638544e-07, "loss": 0.0046, "step": 12453 }, { "epoch": 2.833674630261661, "grad_norm": 0.6645308103930142, "learning_rate": 4.950114941779052e-07, "loss": 0.0069, "step": 12454 }, { "epoch": 2.8339021615472126, "grad_norm": 0.6193758820422698, "learning_rate": 4.949240980486828e-07, "loss": 0.0093, "step": 12455 }, { "epoch": 2.8341296928327644, "grad_norm": 0.3466208211742783, "learning_rate": 4.94836704577974e-07, "loss": 0.003, "step": 12456 }, { "epoch": 2.834357224118316, "grad_norm": 1.17993351866705, "learning_rate": 4.947493137675642e-07, "loss": 0.0147, "step": 12457 }, { "epoch": 2.834584755403868, "grad_norm": 0.5960403612752823, "learning_rate": 4.9466192561924e-07, "loss": 0.0045, "step": 12458 }, { "epoch": 2.8348122866894196, "grad_norm": 0.25400470512589063, "learning_rate": 4.945745401347876e-07, "loss": 0.0017, "step": 12459 }, { "epoch": 2.8350398179749714, "grad_norm": 0.9820826528141572, "learning_rate": 4.944871573159923e-07, "loss": 0.0127, "step": 12460 }, { "epoch": 2.8352673492605236, "grad_norm": 0.7972745557007116, "learning_rate": 4.943997771646408e-07, "loss": 0.0126, "step": 12461 }, { "epoch": 2.835494880546075, "grad_norm": 0.48597227702236806, "learning_rate": 4.943123996825185e-07, "loss": 0.0053, "step": 12462 }, { "epoch": 2.835722411831627, "grad_norm": 2.6196391853457817, "learning_rate": 4.942250248714116e-07, "loss": 0.0082, "step": 12463 }, { "epoch": 2.8359499431171784, "grad_norm": 0.41120181665888567, "learning_rate": 4.941376527331054e-07, "loss": 0.0038, "step": 12464 }, { "epoch": 2.8361774744027306, "grad_norm": 1.1480711899059923, "learning_rate": 4.94050283269386e-07, "loss": 0.0102, "step": 12465 }, { "epoch": 2.836405005688282, "grad_norm": 0.7235518801862282, "learning_rate": 4.939629164820394e-07, "loss": 0.0113, "step": 12466 }, { "epoch": 2.836632536973834, "grad_norm": 0.8588025696378286, "learning_rate": 4.938755523728503e-07, "loss": 0.0146, "step": 12467 }, { "epoch": 2.8368600682593854, "grad_norm": 0.6585241531190322, "learning_rate": 4.93788190943605e-07, "loss": 0.0035, "step": 12468 }, { "epoch": 2.8370875995449376, "grad_norm": 1.5877266913047599, "learning_rate": 4.937008321960885e-07, "loss": 0.0245, "step": 12469 }, { "epoch": 2.837315130830489, "grad_norm": 0.8458334686637817, "learning_rate": 4.936134761320868e-07, "loss": 0.0105, "step": 12470 }, { "epoch": 2.837542662116041, "grad_norm": 0.2990036198544926, "learning_rate": 4.935261227533851e-07, "loss": 0.0019, "step": 12471 }, { "epoch": 2.837770193401593, "grad_norm": 0.4728216637461291, "learning_rate": 4.934387720617683e-07, "loss": 0.0041, "step": 12472 }, { "epoch": 2.8379977246871446, "grad_norm": 0.6316377921774267, "learning_rate": 4.933514240590225e-07, "loss": 0.0066, "step": 12473 }, { "epoch": 2.8382252559726964, "grad_norm": 0.7807567346201908, "learning_rate": 4.932640787469322e-07, "loss": 0.0175, "step": 12474 }, { "epoch": 2.838452787258248, "grad_norm": 0.5823907648719608, "learning_rate": 4.93176736127283e-07, "loss": 0.0072, "step": 12475 }, { "epoch": 2.8386803185438, "grad_norm": 0.7996070257505361, "learning_rate": 4.930893962018597e-07, "loss": 0.0143, "step": 12476 }, { "epoch": 2.8389078498293516, "grad_norm": 1.3222281614612441, "learning_rate": 4.930020589724479e-07, "loss": 0.0197, "step": 12477 }, { "epoch": 2.8391353811149034, "grad_norm": 0.9448151084995878, "learning_rate": 4.92914724440832e-07, "loss": 0.0207, "step": 12478 }, { "epoch": 2.839362912400455, "grad_norm": 1.027691703649301, "learning_rate": 4.928273926087972e-07, "loss": 0.0113, "step": 12479 }, { "epoch": 2.839590443686007, "grad_norm": 1.1924103137729711, "learning_rate": 4.927400634781286e-07, "loss": 0.0207, "step": 12480 }, { "epoch": 2.8398179749715586, "grad_norm": 1.399075737354254, "learning_rate": 4.926527370506108e-07, "loss": 0.0133, "step": 12481 }, { "epoch": 2.8400455062571104, "grad_norm": 0.34924771674220345, "learning_rate": 4.925654133280286e-07, "loss": 0.003, "step": 12482 }, { "epoch": 2.840273037542662, "grad_norm": 1.3321536028795655, "learning_rate": 4.924780923121668e-07, "loss": 0.0188, "step": 12483 }, { "epoch": 2.840500568828214, "grad_norm": 0.8817893747982484, "learning_rate": 4.923907740048103e-07, "loss": 0.0119, "step": 12484 }, { "epoch": 2.8407281001137656, "grad_norm": 0.4588126913475762, "learning_rate": 4.923034584077434e-07, "loss": 0.005, "step": 12485 }, { "epoch": 2.8409556313993174, "grad_norm": 1.0772393469563946, "learning_rate": 4.922161455227508e-07, "loss": 0.0069, "step": 12486 }, { "epoch": 2.841183162684869, "grad_norm": 0.30583812910655545, "learning_rate": 4.92128835351617e-07, "loss": 0.0033, "step": 12487 }, { "epoch": 2.841410693970421, "grad_norm": 1.1135228617950046, "learning_rate": 4.920415278961262e-07, "loss": 0.0201, "step": 12488 }, { "epoch": 2.8416382252559726, "grad_norm": 0.46925163476074916, "learning_rate": 4.919542231580634e-07, "loss": 0.0023, "step": 12489 }, { "epoch": 2.8418657565415244, "grad_norm": 0.4385561814804959, "learning_rate": 4.918669211392128e-07, "loss": 0.0042, "step": 12490 }, { "epoch": 2.842093287827076, "grad_norm": 1.0614205979017277, "learning_rate": 4.91779621841358e-07, "loss": 0.0213, "step": 12491 }, { "epoch": 2.842320819112628, "grad_norm": 0.5446292596086578, "learning_rate": 4.916923252662841e-07, "loss": 0.0047, "step": 12492 }, { "epoch": 2.8425483503981797, "grad_norm": 0.9759635856337535, "learning_rate": 4.916050314157747e-07, "loss": 0.0085, "step": 12493 }, { "epoch": 2.8427758816837314, "grad_norm": 0.594914588365409, "learning_rate": 4.915177402916143e-07, "loss": 0.0045, "step": 12494 }, { "epoch": 2.843003412969283, "grad_norm": 1.0936259930103134, "learning_rate": 4.914304518955868e-07, "loss": 0.0103, "step": 12495 }, { "epoch": 2.843230944254835, "grad_norm": 0.7703777707538957, "learning_rate": 4.913431662294763e-07, "loss": 0.0087, "step": 12496 }, { "epoch": 2.8434584755403867, "grad_norm": 0.7819343844889579, "learning_rate": 4.912558832950667e-07, "loss": 0.0119, "step": 12497 }, { "epoch": 2.8436860068259384, "grad_norm": 0.9183440952406253, "learning_rate": 4.911686030941417e-07, "loss": 0.0096, "step": 12498 }, { "epoch": 2.84391353811149, "grad_norm": 1.3051354282798422, "learning_rate": 4.910813256284857e-07, "loss": 0.0148, "step": 12499 }, { "epoch": 2.8441410693970424, "grad_norm": 0.5542983703733658, "learning_rate": 4.909940508998818e-07, "loss": 0.0042, "step": 12500 }, { "epoch": 2.8443686006825937, "grad_norm": 0.5859889380554715, "learning_rate": 4.909067789101143e-07, "loss": 0.0066, "step": 12501 }, { "epoch": 2.844596131968146, "grad_norm": 0.9882806586883365, "learning_rate": 4.908195096609665e-07, "loss": 0.0103, "step": 12502 }, { "epoch": 2.844823663253697, "grad_norm": 0.5763941368633245, "learning_rate": 4.907322431542223e-07, "loss": 0.0109, "step": 12503 }, { "epoch": 2.8450511945392494, "grad_norm": 0.9896059493627137, "learning_rate": 4.906449793916652e-07, "loss": 0.0206, "step": 12504 }, { "epoch": 2.8452787258248007, "grad_norm": 1.0827935252975245, "learning_rate": 4.905577183750784e-07, "loss": 0.0162, "step": 12505 }, { "epoch": 2.845506257110353, "grad_norm": 0.7212354993891869, "learning_rate": 4.90470460106246e-07, "loss": 0.0081, "step": 12506 }, { "epoch": 2.845733788395904, "grad_norm": 1.2322085556527, "learning_rate": 4.903832045869507e-07, "loss": 0.0211, "step": 12507 }, { "epoch": 2.8459613196814564, "grad_norm": 1.8280539105997649, "learning_rate": 4.902959518189763e-07, "loss": 0.0212, "step": 12508 }, { "epoch": 2.8461888509670077, "grad_norm": 0.9313396090844671, "learning_rate": 4.90208701804106e-07, "loss": 0.0253, "step": 12509 }, { "epoch": 2.84641638225256, "grad_norm": 0.31298668434906857, "learning_rate": 4.901214545441228e-07, "loss": 0.001, "step": 12510 }, { "epoch": 2.8466439135381116, "grad_norm": 1.5710701825451414, "learning_rate": 4.900342100408102e-07, "loss": 0.0094, "step": 12511 }, { "epoch": 2.8468714448236634, "grad_norm": 0.7971598134335246, "learning_rate": 4.899469682959511e-07, "loss": 0.0108, "step": 12512 }, { "epoch": 2.847098976109215, "grad_norm": 0.41832440657658193, "learning_rate": 4.898597293113287e-07, "loss": 0.0056, "step": 12513 }, { "epoch": 2.847326507394767, "grad_norm": 0.6802489599089581, "learning_rate": 4.897724930887258e-07, "loss": 0.0055, "step": 12514 }, { "epoch": 2.8475540386803186, "grad_norm": 0.2951694268940373, "learning_rate": 4.896852596299255e-07, "loss": 0.0024, "step": 12515 }, { "epoch": 2.8477815699658704, "grad_norm": 1.237450184602967, "learning_rate": 4.89598028936711e-07, "loss": 0.0071, "step": 12516 }, { "epoch": 2.848009101251422, "grad_norm": 0.8613945274347712, "learning_rate": 4.895108010108644e-07, "loss": 0.0083, "step": 12517 }, { "epoch": 2.848236632536974, "grad_norm": 0.968861667097762, "learning_rate": 4.894235758541691e-07, "loss": 0.0153, "step": 12518 }, { "epoch": 2.8484641638225257, "grad_norm": 0.6271558295453823, "learning_rate": 4.893363534684074e-07, "loss": 0.0102, "step": 12519 }, { "epoch": 2.8486916951080774, "grad_norm": 1.3145603484771868, "learning_rate": 4.892491338553625e-07, "loss": 0.0109, "step": 12520 }, { "epoch": 2.848919226393629, "grad_norm": 0.9331957371105323, "learning_rate": 4.891619170168164e-07, "loss": 0.0119, "step": 12521 }, { "epoch": 2.849146757679181, "grad_norm": 0.25858604458829654, "learning_rate": 4.890747029545521e-07, "loss": 0.0024, "step": 12522 }, { "epoch": 2.8493742889647327, "grad_norm": 0.7499375609465574, "learning_rate": 4.88987491670352e-07, "loss": 0.006, "step": 12523 }, { "epoch": 2.8496018202502844, "grad_norm": 0.5747411187948314, "learning_rate": 4.889002831659983e-07, "loss": 0.0043, "step": 12524 }, { "epoch": 2.849829351535836, "grad_norm": 1.6961204901865732, "learning_rate": 4.888130774432737e-07, "loss": 0.075, "step": 12525 }, { "epoch": 2.850056882821388, "grad_norm": 1.4717831269806934, "learning_rate": 4.887258745039601e-07, "loss": 0.0217, "step": 12526 }, { "epoch": 2.8502844141069397, "grad_norm": 0.8518454402209044, "learning_rate": 4.886386743498405e-07, "loss": 0.0136, "step": 12527 }, { "epoch": 2.8505119453924914, "grad_norm": 1.1166396323601107, "learning_rate": 4.885514769826964e-07, "loss": 0.0228, "step": 12528 }, { "epoch": 2.850739476678043, "grad_norm": 0.9835150673867731, "learning_rate": 4.884642824043101e-07, "loss": 0.0053, "step": 12529 }, { "epoch": 2.850967007963595, "grad_norm": 0.9295719030989289, "learning_rate": 4.883770906164642e-07, "loss": 0.0076, "step": 12530 }, { "epoch": 2.8511945392491467, "grad_norm": 1.11431706925271, "learning_rate": 4.882899016209399e-07, "loss": 0.0232, "step": 12531 }, { "epoch": 2.8514220705346984, "grad_norm": 0.17725225172722334, "learning_rate": 4.882027154195199e-07, "loss": 0.0008, "step": 12532 }, { "epoch": 2.85164960182025, "grad_norm": 0.8985206469276106, "learning_rate": 4.881155320139857e-07, "loss": 0.0167, "step": 12533 }, { "epoch": 2.851877133105802, "grad_norm": 0.77098149511068, "learning_rate": 4.880283514061196e-07, "loss": 0.007, "step": 12534 }, { "epoch": 2.8521046643913537, "grad_norm": 0.5585457830021944, "learning_rate": 4.87941173597703e-07, "loss": 0.0059, "step": 12535 }, { "epoch": 2.8523321956769054, "grad_norm": 0.9057224130141569, "learning_rate": 4.878539985905177e-07, "loss": 0.008, "step": 12536 }, { "epoch": 2.852559726962457, "grad_norm": 1.2889609360707477, "learning_rate": 4.877668263863458e-07, "loss": 0.0205, "step": 12537 }, { "epoch": 2.852787258248009, "grad_norm": 1.0464033945499156, "learning_rate": 4.876796569869682e-07, "loss": 0.0098, "step": 12538 }, { "epoch": 2.853014789533561, "grad_norm": 1.1893773780899919, "learning_rate": 4.875924903941672e-07, "loss": 0.0198, "step": 12539 }, { "epoch": 2.8532423208191124, "grad_norm": 0.6881786780272869, "learning_rate": 4.875053266097239e-07, "loss": 0.0113, "step": 12540 }, { "epoch": 2.8534698521046646, "grad_norm": 2.3364959092193733, "learning_rate": 4.874181656354202e-07, "loss": 0.0226, "step": 12541 }, { "epoch": 2.853697383390216, "grad_norm": 1.1427700591492302, "learning_rate": 4.87331007473037e-07, "loss": 0.0173, "step": 12542 }, { "epoch": 2.853924914675768, "grad_norm": 1.092891272192677, "learning_rate": 4.872438521243558e-07, "loss": 0.0142, "step": 12543 }, { "epoch": 2.8541524459613195, "grad_norm": 0.14012162120756486, "learning_rate": 4.871566995911583e-07, "loss": 0.0009, "step": 12544 }, { "epoch": 2.8543799772468716, "grad_norm": 1.4934327104993912, "learning_rate": 4.870695498752251e-07, "loss": 0.0136, "step": 12545 }, { "epoch": 2.854607508532423, "grad_norm": 1.0231098772602514, "learning_rate": 4.869824029783378e-07, "loss": 0.0173, "step": 12546 }, { "epoch": 2.854835039817975, "grad_norm": 0.9672682627591667, "learning_rate": 4.868952589022775e-07, "loss": 0.0135, "step": 12547 }, { "epoch": 2.8550625711035265, "grad_norm": 0.8551174748562872, "learning_rate": 4.86808117648825e-07, "loss": 0.0111, "step": 12548 }, { "epoch": 2.8552901023890787, "grad_norm": 0.7013601471792654, "learning_rate": 4.867209792197617e-07, "loss": 0.0049, "step": 12549 }, { "epoch": 2.8555176336746304, "grad_norm": 0.8721041063183936, "learning_rate": 4.86633843616868e-07, "loss": 0.0142, "step": 12550 }, { "epoch": 2.855745164960182, "grad_norm": 0.7958306480780435, "learning_rate": 4.865467108419254e-07, "loss": 0.0079, "step": 12551 }, { "epoch": 2.855972696245734, "grad_norm": 0.8459313110625494, "learning_rate": 4.864595808967143e-07, "loss": 0.0101, "step": 12552 }, { "epoch": 2.8562002275312857, "grad_norm": 0.7328601623731444, "learning_rate": 4.863724537830159e-07, "loss": 0.013, "step": 12553 }, { "epoch": 2.8564277588168374, "grad_norm": 1.0370500014113093, "learning_rate": 4.862853295026105e-07, "loss": 0.0119, "step": 12554 }, { "epoch": 2.856655290102389, "grad_norm": 1.4378283225645667, "learning_rate": 4.861982080572789e-07, "loss": 0.015, "step": 12555 }, { "epoch": 2.856882821387941, "grad_norm": 0.9829757212544675, "learning_rate": 4.861110894488019e-07, "loss": 0.0045, "step": 12556 }, { "epoch": 2.8571103526734927, "grad_norm": 0.7667003797658285, "learning_rate": 4.860239736789596e-07, "loss": 0.0106, "step": 12557 }, { "epoch": 2.8573378839590444, "grad_norm": 1.083250257288016, "learning_rate": 4.859368607495329e-07, "loss": 0.0221, "step": 12558 }, { "epoch": 2.857565415244596, "grad_norm": 0.7743945328063018, "learning_rate": 4.85849750662302e-07, "loss": 0.0138, "step": 12559 }, { "epoch": 2.857792946530148, "grad_norm": 1.0822492914407098, "learning_rate": 4.857626434190475e-07, "loss": 0.0193, "step": 12560 }, { "epoch": 2.8580204778156997, "grad_norm": 1.6937995704312936, "learning_rate": 4.856755390215495e-07, "loss": 0.0332, "step": 12561 }, { "epoch": 2.8582480091012514, "grad_norm": 0.9661508286323655, "learning_rate": 4.855884374715882e-07, "loss": 0.0196, "step": 12562 }, { "epoch": 2.858475540386803, "grad_norm": 0.31223269477228727, "learning_rate": 4.855013387709442e-07, "loss": 0.0039, "step": 12563 }, { "epoch": 2.858703071672355, "grad_norm": 5.786063980329787, "learning_rate": 4.854142429213971e-07, "loss": 0.0427, "step": 12564 }, { "epoch": 2.8589306029579067, "grad_norm": 0.7784295313590469, "learning_rate": 4.853271499247274e-07, "loss": 0.0086, "step": 12565 }, { "epoch": 2.8591581342434584, "grad_norm": 0.7132921622936873, "learning_rate": 4.85240059782715e-07, "loss": 0.0051, "step": 12566 }, { "epoch": 2.85938566552901, "grad_norm": 1.2827170931529, "learning_rate": 4.851529724971395e-07, "loss": 0.0099, "step": 12567 }, { "epoch": 2.859613196814562, "grad_norm": 1.2175140165427962, "learning_rate": 4.850658880697814e-07, "loss": 0.0147, "step": 12568 }, { "epoch": 2.8598407281001137, "grad_norm": 0.49569246231874536, "learning_rate": 4.849788065024201e-07, "loss": 0.0037, "step": 12569 }, { "epoch": 2.8600682593856654, "grad_norm": 1.9449568717831416, "learning_rate": 4.848917277968358e-07, "loss": 0.0169, "step": 12570 }, { "epoch": 2.860295790671217, "grad_norm": 1.198793983685171, "learning_rate": 4.848046519548078e-07, "loss": 0.0163, "step": 12571 }, { "epoch": 2.860523321956769, "grad_norm": 0.5976147710078367, "learning_rate": 4.84717578978116e-07, "loss": 0.0052, "step": 12572 }, { "epoch": 2.8607508532423207, "grad_norm": 0.37399899636958417, "learning_rate": 4.846305088685401e-07, "loss": 0.0049, "step": 12573 }, { "epoch": 2.8609783845278725, "grad_norm": 1.2190308472791251, "learning_rate": 4.845434416278593e-07, "loss": 0.0202, "step": 12574 }, { "epoch": 2.861205915813424, "grad_norm": 0.2828855227446941, "learning_rate": 4.844563772578534e-07, "loss": 0.0018, "step": 12575 }, { "epoch": 2.861433447098976, "grad_norm": 1.2384815663021334, "learning_rate": 4.843693157603016e-07, "loss": 0.0115, "step": 12576 }, { "epoch": 2.8616609783845277, "grad_norm": 0.3141012870403284, "learning_rate": 4.842822571369837e-07, "loss": 0.0028, "step": 12577 }, { "epoch": 2.86188850967008, "grad_norm": 0.5499355302652401, "learning_rate": 4.841952013896788e-07, "loss": 0.0037, "step": 12578 }, { "epoch": 2.862116040955631, "grad_norm": 1.0277700009819004, "learning_rate": 4.841081485201659e-07, "loss": 0.0177, "step": 12579 }, { "epoch": 2.8623435722411834, "grad_norm": 0.736202215444282, "learning_rate": 4.840210985302245e-07, "loss": 0.0056, "step": 12580 }, { "epoch": 2.8625711035267347, "grad_norm": 0.682565681264615, "learning_rate": 4.839340514216335e-07, "loss": 0.006, "step": 12581 }, { "epoch": 2.862798634812287, "grad_norm": 0.5350708333281974, "learning_rate": 4.838470071961724e-07, "loss": 0.0067, "step": 12582 }, { "epoch": 2.863026166097838, "grad_norm": 0.8278095773702444, "learning_rate": 4.837599658556197e-07, "loss": 0.0112, "step": 12583 }, { "epoch": 2.8632536973833904, "grad_norm": 0.5040929767793727, "learning_rate": 4.836729274017549e-07, "loss": 0.0043, "step": 12584 }, { "epoch": 2.8634812286689417, "grad_norm": 0.560881758888998, "learning_rate": 4.835858918363565e-07, "loss": 0.0049, "step": 12585 }, { "epoch": 2.863708759954494, "grad_norm": 0.4783543242232969, "learning_rate": 4.834988591612034e-07, "loss": 0.0033, "step": 12586 }, { "epoch": 2.8639362912400452, "grad_norm": 0.7625649889943873, "learning_rate": 4.834118293780747e-07, "loss": 0.0138, "step": 12587 }, { "epoch": 2.8641638225255974, "grad_norm": 0.6122361299211595, "learning_rate": 4.833248024887486e-07, "loss": 0.0089, "step": 12588 }, { "epoch": 2.864391353811149, "grad_norm": 0.5953653571673632, "learning_rate": 4.832377784950043e-07, "loss": 0.0088, "step": 12589 }, { "epoch": 2.864618885096701, "grad_norm": 0.3893635180047958, "learning_rate": 4.831507573986199e-07, "loss": 0.0021, "step": 12590 }, { "epoch": 2.8648464163822527, "grad_norm": 1.1098160263419112, "learning_rate": 4.830637392013746e-07, "loss": 0.0112, "step": 12591 }, { "epoch": 2.8650739476678044, "grad_norm": 1.6431800889388948, "learning_rate": 4.829767239050465e-07, "loss": 0.0357, "step": 12592 }, { "epoch": 2.865301478953356, "grad_norm": 1.0703456378946685, "learning_rate": 4.828897115114137e-07, "loss": 0.0184, "step": 12593 }, { "epoch": 2.865529010238908, "grad_norm": 0.9458305852634625, "learning_rate": 4.828027020222554e-07, "loss": 0.0133, "step": 12594 }, { "epoch": 2.8657565415244597, "grad_norm": 0.8260574653130388, "learning_rate": 4.827156954393491e-07, "loss": 0.0068, "step": 12595 }, { "epoch": 2.8659840728100114, "grad_norm": 1.1188147045579262, "learning_rate": 4.826286917644734e-07, "loss": 0.0078, "step": 12596 }, { "epoch": 2.866211604095563, "grad_norm": 1.2159399868826835, "learning_rate": 4.825416909994068e-07, "loss": 0.0125, "step": 12597 }, { "epoch": 2.866439135381115, "grad_norm": 1.1610389684126734, "learning_rate": 4.824546931459268e-07, "loss": 0.0181, "step": 12598 }, { "epoch": 2.8666666666666667, "grad_norm": 0.6340121588378391, "learning_rate": 4.823676982058121e-07, "loss": 0.0093, "step": 12599 }, { "epoch": 2.8668941979522184, "grad_norm": 0.8981516077645059, "learning_rate": 4.822807061808402e-07, "loss": 0.0124, "step": 12600 }, { "epoch": 2.86712172923777, "grad_norm": 1.4611777891853692, "learning_rate": 4.821937170727896e-07, "loss": 0.0107, "step": 12601 }, { "epoch": 2.867349260523322, "grad_norm": 0.7545716292551519, "learning_rate": 4.821067308834374e-07, "loss": 0.0116, "step": 12602 }, { "epoch": 2.8675767918088737, "grad_norm": 0.7563484452087661, "learning_rate": 4.820197476145623e-07, "loss": 0.0137, "step": 12603 }, { "epoch": 2.8678043230944255, "grad_norm": 2.1198711262320007, "learning_rate": 4.819327672679418e-07, "loss": 0.0348, "step": 12604 }, { "epoch": 2.868031854379977, "grad_norm": 0.8787460369793649, "learning_rate": 4.818457898453531e-07, "loss": 0.0128, "step": 12605 }, { "epoch": 2.868259385665529, "grad_norm": 0.24875060438780883, "learning_rate": 4.817588153485746e-07, "loss": 0.0016, "step": 12606 }, { "epoch": 2.8684869169510807, "grad_norm": 1.5070740915154521, "learning_rate": 4.816718437793833e-07, "loss": 0.0126, "step": 12607 }, { "epoch": 2.8687144482366325, "grad_norm": 1.0703193909823674, "learning_rate": 4.815848751395573e-07, "loss": 0.0169, "step": 12608 }, { "epoch": 2.868941979522184, "grad_norm": 0.6345621388134647, "learning_rate": 4.814979094308735e-07, "loss": 0.0052, "step": 12609 }, { "epoch": 2.869169510807736, "grad_norm": 1.3174293527443695, "learning_rate": 4.8141094665511e-07, "loss": 0.0208, "step": 12610 }, { "epoch": 2.8693970420932877, "grad_norm": 0.7572296217170332, "learning_rate": 4.813239868140437e-07, "loss": 0.0117, "step": 12611 }, { "epoch": 2.8696245733788395, "grad_norm": 1.0186673480497954, "learning_rate": 4.812370299094517e-07, "loss": 0.0246, "step": 12612 }, { "epoch": 2.8698521046643912, "grad_norm": 0.6450151509307269, "learning_rate": 4.811500759431118e-07, "loss": 0.0034, "step": 12613 }, { "epoch": 2.870079635949943, "grad_norm": 0.7898228470126177, "learning_rate": 4.810631249168007e-07, "loss": 0.0074, "step": 12614 }, { "epoch": 2.8703071672354947, "grad_norm": 1.2482762771441436, "learning_rate": 4.809761768322959e-07, "loss": 0.0071, "step": 12615 }, { "epoch": 2.8705346985210465, "grad_norm": 0.9512686913027757, "learning_rate": 4.808892316913743e-07, "loss": 0.0148, "step": 12616 }, { "epoch": 2.8707622298065987, "grad_norm": 0.9350553844466212, "learning_rate": 4.808022894958126e-07, "loss": 0.0105, "step": 12617 }, { "epoch": 2.87098976109215, "grad_norm": 1.1579228040610154, "learning_rate": 4.807153502473883e-07, "loss": 0.0142, "step": 12618 }, { "epoch": 2.871217292377702, "grad_norm": 0.6653024145413261, "learning_rate": 4.806284139478777e-07, "loss": 0.0059, "step": 12619 }, { "epoch": 2.8714448236632535, "grad_norm": 1.5762860562169867, "learning_rate": 4.805414805990582e-07, "loss": 0.0325, "step": 12620 }, { "epoch": 2.8716723549488057, "grad_norm": 0.6929227574626761, "learning_rate": 4.804545502027061e-07, "loss": 0.0096, "step": 12621 }, { "epoch": 2.871899886234357, "grad_norm": 2.0509893096168494, "learning_rate": 4.803676227605984e-07, "loss": 0.0262, "step": 12622 }, { "epoch": 2.872127417519909, "grad_norm": 0.9764717369162318, "learning_rate": 4.802806982745117e-07, "loss": 0.0268, "step": 12623 }, { "epoch": 2.8723549488054605, "grad_norm": 0.6858798168606396, "learning_rate": 4.801937767462222e-07, "loss": 0.0064, "step": 12624 }, { "epoch": 2.8725824800910127, "grad_norm": 0.4926823098759393, "learning_rate": 4.801068581775068e-07, "loss": 0.0056, "step": 12625 }, { "epoch": 2.8728100113765644, "grad_norm": 1.7888912662471108, "learning_rate": 4.800199425701419e-07, "loss": 0.0321, "step": 12626 }, { "epoch": 2.873037542662116, "grad_norm": 0.6996074353885197, "learning_rate": 4.79933029925904e-07, "loss": 0.0056, "step": 12627 }, { "epoch": 2.873265073947668, "grad_norm": 0.5059582102818247, "learning_rate": 4.79846120246569e-07, "loss": 0.0034, "step": 12628 }, { "epoch": 2.8734926052332197, "grad_norm": 1.0166491200209835, "learning_rate": 4.797592135339136e-07, "loss": 0.0193, "step": 12629 }, { "epoch": 2.8737201365187715, "grad_norm": 1.2020526865680852, "learning_rate": 4.796723097897141e-07, "loss": 0.0257, "step": 12630 }, { "epoch": 2.873947667804323, "grad_norm": 1.194124108605279, "learning_rate": 4.795854090157461e-07, "loss": 0.0281, "step": 12631 }, { "epoch": 2.874175199089875, "grad_norm": 0.5854549223241102, "learning_rate": 4.794985112137862e-07, "loss": 0.0122, "step": 12632 }, { "epoch": 2.8744027303754267, "grad_norm": 1.526644125945453, "learning_rate": 4.794116163856101e-07, "loss": 0.0279, "step": 12633 }, { "epoch": 2.8746302616609785, "grad_norm": 1.5800844123051163, "learning_rate": 4.793247245329941e-07, "loss": 0.0192, "step": 12634 }, { "epoch": 2.87485779294653, "grad_norm": 1.1890446334662081, "learning_rate": 4.792378356577139e-07, "loss": 0.0176, "step": 12635 }, { "epoch": 2.875085324232082, "grad_norm": 1.9737403299588696, "learning_rate": 4.791509497615452e-07, "loss": 0.0071, "step": 12636 }, { "epoch": 2.8753128555176337, "grad_norm": 1.0980348115439096, "learning_rate": 4.790640668462644e-07, "loss": 0.016, "step": 12637 }, { "epoch": 2.8755403868031855, "grad_norm": 1.2220040112310837, "learning_rate": 4.789771869136464e-07, "loss": 0.0138, "step": 12638 }, { "epoch": 2.875767918088737, "grad_norm": 0.522176764359583, "learning_rate": 4.788903099654674e-07, "loss": 0.0044, "step": 12639 }, { "epoch": 2.875995449374289, "grad_norm": 0.5250983630588707, "learning_rate": 4.788034360035027e-07, "loss": 0.0038, "step": 12640 }, { "epoch": 2.8762229806598407, "grad_norm": 0.7183526651910974, "learning_rate": 4.787165650295284e-07, "loss": 0.0055, "step": 12641 }, { "epoch": 2.8764505119453925, "grad_norm": 1.1616898537373512, "learning_rate": 4.786296970453195e-07, "loss": 0.0149, "step": 12642 }, { "epoch": 2.8766780432309442, "grad_norm": 1.0925745016345307, "learning_rate": 4.785428320526514e-07, "loss": 0.0137, "step": 12643 }, { "epoch": 2.876905574516496, "grad_norm": 0.9367686636657112, "learning_rate": 4.784559700532998e-07, "loss": 0.0047, "step": 12644 }, { "epoch": 2.8771331058020477, "grad_norm": 1.562626621806983, "learning_rate": 4.783691110490394e-07, "loss": 0.0166, "step": 12645 }, { "epoch": 2.8773606370875995, "grad_norm": 0.39117227429467716, "learning_rate": 4.782822550416461e-07, "loss": 0.0036, "step": 12646 }, { "epoch": 2.8775881683731512, "grad_norm": 0.9311417120315446, "learning_rate": 4.781954020328947e-07, "loss": 0.0194, "step": 12647 }, { "epoch": 2.877815699658703, "grad_norm": 0.5035092169776393, "learning_rate": 4.781085520245606e-07, "loss": 0.003, "step": 12648 }, { "epoch": 2.8780432309442547, "grad_norm": 0.64590217497331, "learning_rate": 4.780217050184185e-07, "loss": 0.0045, "step": 12649 }, { "epoch": 2.8782707622298065, "grad_norm": 0.6066045750733168, "learning_rate": 4.779348610162436e-07, "loss": 0.0061, "step": 12650 }, { "epoch": 2.8784982935153582, "grad_norm": 0.850214880716244, "learning_rate": 4.77848020019811e-07, "loss": 0.0082, "step": 12651 }, { "epoch": 2.87872582480091, "grad_norm": 0.6097290979017135, "learning_rate": 4.777611820308951e-07, "loss": 0.0072, "step": 12652 }, { "epoch": 2.8789533560864617, "grad_norm": 0.5006587943682104, "learning_rate": 4.776743470512711e-07, "loss": 0.0038, "step": 12653 }, { "epoch": 2.8791808873720135, "grad_norm": 2.453074474767594, "learning_rate": 4.775875150827137e-07, "loss": 0.0321, "step": 12654 }, { "epoch": 2.8794084186575652, "grad_norm": 1.2617848058178966, "learning_rate": 4.775006861269974e-07, "loss": 0.0142, "step": 12655 }, { "epoch": 2.8796359499431174, "grad_norm": 0.593281025413991, "learning_rate": 4.77413860185897e-07, "loss": 0.0047, "step": 12656 }, { "epoch": 2.8798634812286688, "grad_norm": 0.9000343661984159, "learning_rate": 4.77327037261187e-07, "loss": 0.0108, "step": 12657 }, { "epoch": 2.880091012514221, "grad_norm": 0.7787772725354253, "learning_rate": 4.772402173546419e-07, "loss": 0.0046, "step": 12658 }, { "epoch": 2.8803185437997723, "grad_norm": 1.3288135588384427, "learning_rate": 4.771534004680361e-07, "loss": 0.0096, "step": 12659 }, { "epoch": 2.8805460750853245, "grad_norm": 1.4522145131106423, "learning_rate": 4.770665866031441e-07, "loss": 0.0178, "step": 12660 }, { "epoch": 2.8807736063708758, "grad_norm": 0.9683509944490493, "learning_rate": 4.769797757617403e-07, "loss": 0.0169, "step": 12661 }, { "epoch": 2.881001137656428, "grad_norm": 0.9080788793815752, "learning_rate": 4.768929679455984e-07, "loss": 0.0105, "step": 12662 }, { "epoch": 2.8812286689419793, "grad_norm": 0.8782803665096269, "learning_rate": 4.768061631564933e-07, "loss": 0.0117, "step": 12663 }, { "epoch": 2.8814562002275315, "grad_norm": 0.7694569925226747, "learning_rate": 4.767193613961986e-07, "loss": 0.0111, "step": 12664 }, { "epoch": 2.881683731513083, "grad_norm": 0.48689020236636465, "learning_rate": 4.766325626664889e-07, "loss": 0.0037, "step": 12665 }, { "epoch": 2.881911262798635, "grad_norm": 1.2774626551148272, "learning_rate": 4.7654576696913757e-07, "loss": 0.0182, "step": 12666 }, { "epoch": 2.8821387940841867, "grad_norm": 0.9336826307753154, "learning_rate": 4.764589743059191e-07, "loss": 0.017, "step": 12667 }, { "epoch": 2.8823663253697385, "grad_norm": 0.9622381354393426, "learning_rate": 4.7637218467860723e-07, "loss": 0.0089, "step": 12668 }, { "epoch": 2.88259385665529, "grad_norm": 1.6226552664552056, "learning_rate": 4.7628539808897543e-07, "loss": 0.0266, "step": 12669 }, { "epoch": 2.882821387940842, "grad_norm": 0.7552640128746161, "learning_rate": 4.7619861453879786e-07, "loss": 0.005, "step": 12670 }, { "epoch": 2.8830489192263937, "grad_norm": 0.5431294629736407, "learning_rate": 4.7611183402984804e-07, "loss": 0.0071, "step": 12671 }, { "epoch": 2.8832764505119455, "grad_norm": 1.0265800187614924, "learning_rate": 4.760250565638998e-07, "loss": 0.0159, "step": 12672 }, { "epoch": 2.8835039817974972, "grad_norm": 0.8703777855402733, "learning_rate": 4.759382821427265e-07, "loss": 0.017, "step": 12673 }, { "epoch": 2.883731513083049, "grad_norm": 0.32762517030489496, "learning_rate": 4.758515107681016e-07, "loss": 0.0027, "step": 12674 }, { "epoch": 2.8839590443686007, "grad_norm": 0.8523055805316028, "learning_rate": 4.757647424417988e-07, "loss": 0.0142, "step": 12675 }, { "epoch": 2.8841865756541525, "grad_norm": 1.2446068044429668, "learning_rate": 4.756779771655912e-07, "loss": 0.0211, "step": 12676 }, { "epoch": 2.8844141069397042, "grad_norm": 0.6400564418923828, "learning_rate": 4.7559121494125255e-07, "loss": 0.0081, "step": 12677 }, { "epoch": 2.884641638225256, "grad_norm": 0.8824890802332238, "learning_rate": 4.7550445577055556e-07, "loss": 0.0121, "step": 12678 }, { "epoch": 2.8848691695108077, "grad_norm": 1.0029073845803569, "learning_rate": 4.7541769965527387e-07, "loss": 0.0066, "step": 12679 }, { "epoch": 2.8850967007963595, "grad_norm": 0.851216022303577, "learning_rate": 4.753309465971806e-07, "loss": 0.0098, "step": 12680 }, { "epoch": 2.8853242320819112, "grad_norm": 0.35965630121235115, "learning_rate": 4.752441965980483e-07, "loss": 0.0035, "step": 12681 }, { "epoch": 2.885551763367463, "grad_norm": 0.8224473815781363, "learning_rate": 4.751574496596506e-07, "loss": 0.0036, "step": 12682 }, { "epoch": 2.8857792946530147, "grad_norm": 0.8515400404109131, "learning_rate": 4.7507070578376e-07, "loss": 0.0119, "step": 12683 }, { "epoch": 2.8860068259385665, "grad_norm": 0.3729036779880715, "learning_rate": 4.7498396497214995e-07, "loss": 0.0096, "step": 12684 }, { "epoch": 2.8862343572241183, "grad_norm": 0.8689933201805327, "learning_rate": 4.748972272265927e-07, "loss": 0.0149, "step": 12685 }, { "epoch": 2.88646188850967, "grad_norm": 1.7828814982875416, "learning_rate": 4.7481049254886114e-07, "loss": 0.0173, "step": 12686 }, { "epoch": 2.8866894197952218, "grad_norm": 1.4541206307047618, "learning_rate": 4.747237609407283e-07, "loss": 0.017, "step": 12687 }, { "epoch": 2.8869169510807735, "grad_norm": 1.0108574587424344, "learning_rate": 4.7463703240396627e-07, "loss": 0.0139, "step": 12688 }, { "epoch": 2.8871444823663253, "grad_norm": 0.6967918982137538, "learning_rate": 4.745503069403481e-07, "loss": 0.009, "step": 12689 }, { "epoch": 2.887372013651877, "grad_norm": 0.9903747378642697, "learning_rate": 4.74463584551646e-07, "loss": 0.0134, "step": 12690 }, { "epoch": 2.8875995449374288, "grad_norm": 0.9322970773651463, "learning_rate": 4.743768652396327e-07, "loss": 0.017, "step": 12691 }, { "epoch": 2.8878270762229805, "grad_norm": 0.6636963009574122, "learning_rate": 4.7429014900608043e-07, "loss": 0.0093, "step": 12692 }, { "epoch": 2.8880546075085323, "grad_norm": 0.7951758186153631, "learning_rate": 4.742034358527613e-07, "loss": 0.0164, "step": 12693 }, { "epoch": 2.888282138794084, "grad_norm": 0.8425206619044845, "learning_rate": 4.74116725781448e-07, "loss": 0.0109, "step": 12694 }, { "epoch": 2.888509670079636, "grad_norm": 1.137174308760195, "learning_rate": 4.740300187939123e-07, "loss": 0.0047, "step": 12695 }, { "epoch": 2.8887372013651875, "grad_norm": 0.41748851319666325, "learning_rate": 4.739433148919266e-07, "loss": 0.0043, "step": 12696 }, { "epoch": 2.8889647326507397, "grad_norm": 0.6381480462993437, "learning_rate": 4.7385661407726283e-07, "loss": 0.0069, "step": 12697 }, { "epoch": 2.889192263936291, "grad_norm": 0.8472835734259655, "learning_rate": 4.737699163516932e-07, "loss": 0.0181, "step": 12698 }, { "epoch": 2.8894197952218432, "grad_norm": 0.9581784156256885, "learning_rate": 4.7368322171698954e-07, "loss": 0.0075, "step": 12699 }, { "epoch": 2.8896473265073945, "grad_norm": 1.2984854392698004, "learning_rate": 4.7359653017492344e-07, "loss": 0.024, "step": 12700 }, { "epoch": 2.8898748577929467, "grad_norm": 0.8337139972635338, "learning_rate": 4.735098417272674e-07, "loss": 0.0092, "step": 12701 }, { "epoch": 2.890102389078498, "grad_norm": 0.2638158553540379, "learning_rate": 4.734231563757924e-07, "loss": 0.0019, "step": 12702 }, { "epoch": 2.8903299203640502, "grad_norm": 1.7105983399875997, "learning_rate": 4.733364741222705e-07, "loss": 0.0216, "step": 12703 }, { "epoch": 2.890557451649602, "grad_norm": 0.9495797140826089, "learning_rate": 4.732497949684736e-07, "loss": 0.0078, "step": 12704 }, { "epoch": 2.8907849829351537, "grad_norm": 0.599937953601937, "learning_rate": 4.731631189161727e-07, "loss": 0.0071, "step": 12705 }, { "epoch": 2.8910125142207055, "grad_norm": 0.9793536351583955, "learning_rate": 4.730764459671397e-07, "loss": 0.0089, "step": 12706 }, { "epoch": 2.8912400455062572, "grad_norm": 0.9779859887084057, "learning_rate": 4.729897761231457e-07, "loss": 0.0184, "step": 12707 }, { "epoch": 2.891467576791809, "grad_norm": 1.4688615365070112, "learning_rate": 4.7290310938596264e-07, "loss": 0.0218, "step": 12708 }, { "epoch": 2.8916951080773607, "grad_norm": 0.8877173638663652, "learning_rate": 4.728164457573611e-07, "loss": 0.0057, "step": 12709 }, { "epoch": 2.8919226393629125, "grad_norm": 0.6585149559618908, "learning_rate": 4.7272978523911294e-07, "loss": 0.0138, "step": 12710 }, { "epoch": 2.8921501706484642, "grad_norm": 0.5155144182407596, "learning_rate": 4.726431278329892e-07, "loss": 0.0042, "step": 12711 }, { "epoch": 2.892377701934016, "grad_norm": 0.8937561778697838, "learning_rate": 4.725564735407606e-07, "loss": 0.0171, "step": 12712 }, { "epoch": 2.8926052332195678, "grad_norm": 0.29233117528876174, "learning_rate": 4.724698223641987e-07, "loss": 0.0024, "step": 12713 }, { "epoch": 2.8928327645051195, "grad_norm": 0.41076696394350726, "learning_rate": 4.723831743050741e-07, "loss": 0.0024, "step": 12714 }, { "epoch": 2.8930602957906713, "grad_norm": 0.821690086398246, "learning_rate": 4.722965293651581e-07, "loss": 0.0115, "step": 12715 }, { "epoch": 2.893287827076223, "grad_norm": 0.4052436989677448, "learning_rate": 4.7220988754622124e-07, "loss": 0.0032, "step": 12716 }, { "epoch": 2.8935153583617748, "grad_norm": 1.355516110052873, "learning_rate": 4.721232488500345e-07, "loss": 0.0217, "step": 12717 }, { "epoch": 2.8937428896473265, "grad_norm": 0.8092635591026434, "learning_rate": 4.7203661327836873e-07, "loss": 0.0078, "step": 12718 }, { "epoch": 2.8939704209328783, "grad_norm": 1.321898005221804, "learning_rate": 4.719499808329942e-07, "loss": 0.0083, "step": 12719 }, { "epoch": 2.89419795221843, "grad_norm": 0.8371284599573121, "learning_rate": 4.718633515156819e-07, "loss": 0.009, "step": 12720 }, { "epoch": 2.8944254835039818, "grad_norm": 0.35684236404495273, "learning_rate": 4.717767253282021e-07, "loss": 0.0029, "step": 12721 }, { "epoch": 2.8946530147895335, "grad_norm": 1.0113089479601638, "learning_rate": 4.7169010227232566e-07, "loss": 0.0131, "step": 12722 }, { "epoch": 2.8948805460750853, "grad_norm": 0.8211733222911113, "learning_rate": 4.7160348234982264e-07, "loss": 0.0137, "step": 12723 }, { "epoch": 2.895108077360637, "grad_norm": 0.8495601108652503, "learning_rate": 4.7151686556246343e-07, "loss": 0.0069, "step": 12724 }, { "epoch": 2.8953356086461888, "grad_norm": 0.6358381618385401, "learning_rate": 4.714302519120185e-07, "loss": 0.0055, "step": 12725 }, { "epoch": 2.8955631399317405, "grad_norm": 0.4045675092414611, "learning_rate": 4.7134364140025786e-07, "loss": 0.003, "step": 12726 }, { "epoch": 2.8957906712172923, "grad_norm": 1.539132209005873, "learning_rate": 4.712570340289519e-07, "loss": 0.0193, "step": 12727 }, { "epoch": 2.896018202502844, "grad_norm": 0.5919070731990855, "learning_rate": 4.7117042979987044e-07, "loss": 0.0089, "step": 12728 }, { "epoch": 2.896245733788396, "grad_norm": 0.7144958363051278, "learning_rate": 4.710838287147839e-07, "loss": 0.0074, "step": 12729 }, { "epoch": 2.8964732650739475, "grad_norm": 0.5004337753554073, "learning_rate": 4.709972307754619e-07, "loss": 0.0088, "step": 12730 }, { "epoch": 2.8967007963594993, "grad_norm": 1.4126286129203125, "learning_rate": 4.709106359836744e-07, "loss": 0.0317, "step": 12731 }, { "epoch": 2.896928327645051, "grad_norm": 1.2248057889141402, "learning_rate": 4.7082404434119147e-07, "loss": 0.0097, "step": 12732 }, { "epoch": 2.897155858930603, "grad_norm": 1.042540294075592, "learning_rate": 4.707374558497824e-07, "loss": 0.0091, "step": 12733 }, { "epoch": 2.897383390216155, "grad_norm": 0.19827069957166216, "learning_rate": 4.7065087051121755e-07, "loss": 0.0008, "step": 12734 }, { "epoch": 2.8976109215017063, "grad_norm": 0.8050246563294057, "learning_rate": 4.70564288327266e-07, "loss": 0.0121, "step": 12735 }, { "epoch": 2.8978384527872585, "grad_norm": 0.4315598270006115, "learning_rate": 4.704777092996976e-07, "loss": 0.0029, "step": 12736 }, { "epoch": 2.89806598407281, "grad_norm": 0.3948986318652948, "learning_rate": 4.703911334302821e-07, "loss": 0.0019, "step": 12737 }, { "epoch": 2.898293515358362, "grad_norm": 0.6246627947763, "learning_rate": 4.703045607207883e-07, "loss": 0.0037, "step": 12738 }, { "epoch": 2.8985210466439133, "grad_norm": 0.6234488923798389, "learning_rate": 4.7021799117298615e-07, "loss": 0.011, "step": 12739 }, { "epoch": 2.8987485779294655, "grad_norm": 1.9150135267167303, "learning_rate": 4.7013142478864466e-07, "loss": 0.0206, "step": 12740 }, { "epoch": 2.898976109215017, "grad_norm": 1.0646529198461356, "learning_rate": 4.7004486156953346e-07, "loss": 0.0105, "step": 12741 }, { "epoch": 2.899203640500569, "grad_norm": 1.3272494396351107, "learning_rate": 4.699583015174214e-07, "loss": 0.0171, "step": 12742 }, { "epoch": 2.8994311717861208, "grad_norm": 0.8610121838700086, "learning_rate": 4.698717446340775e-07, "loss": 0.0088, "step": 12743 }, { "epoch": 2.8996587030716725, "grad_norm": 0.874148144485549, "learning_rate": 4.6978519092127146e-07, "loss": 0.0075, "step": 12744 }, { "epoch": 2.8998862343572243, "grad_norm": 0.7471862160132976, "learning_rate": 4.696986403807715e-07, "loss": 0.011, "step": 12745 }, { "epoch": 2.900113765642776, "grad_norm": 1.7103324459710885, "learning_rate": 4.6961209301434705e-07, "loss": 0.0162, "step": 12746 }, { "epoch": 2.9003412969283278, "grad_norm": 0.765590127496955, "learning_rate": 4.695255488237667e-07, "loss": 0.0144, "step": 12747 }, { "epoch": 2.9005688282138795, "grad_norm": 2.4595649755526114, "learning_rate": 4.6943900781079963e-07, "loss": 0.0196, "step": 12748 }, { "epoch": 2.9007963594994313, "grad_norm": 0.5964342079560994, "learning_rate": 4.6935246997721425e-07, "loss": 0.0049, "step": 12749 }, { "epoch": 2.901023890784983, "grad_norm": 0.5763766370027691, "learning_rate": 4.6926593532477916e-07, "loss": 0.0063, "step": 12750 }, { "epoch": 2.9012514220705348, "grad_norm": 0.15065558622859193, "learning_rate": 4.6917940385526344e-07, "loss": 0.001, "step": 12751 }, { "epoch": 2.9014789533560865, "grad_norm": 0.8263759278451528, "learning_rate": 4.6909287557043505e-07, "loss": 0.0086, "step": 12752 }, { "epoch": 2.9017064846416383, "grad_norm": 0.8574569506512155, "learning_rate": 4.690063504720629e-07, "loss": 0.0069, "step": 12753 }, { "epoch": 2.90193401592719, "grad_norm": 0.8507968869223641, "learning_rate": 4.689198285619151e-07, "loss": 0.0121, "step": 12754 }, { "epoch": 2.9021615472127418, "grad_norm": 1.163725636482505, "learning_rate": 4.688333098417604e-07, "loss": 0.0147, "step": 12755 }, { "epoch": 2.9023890784982935, "grad_norm": 1.1424295690656245, "learning_rate": 4.687467943133668e-07, "loss": 0.0142, "step": 12756 }, { "epoch": 2.9026166097838453, "grad_norm": 0.8060189856759536, "learning_rate": 4.686602819785024e-07, "loss": 0.0169, "step": 12757 }, { "epoch": 2.902844141069397, "grad_norm": 0.8974639783072844, "learning_rate": 4.6857377283893573e-07, "loss": 0.0083, "step": 12758 }, { "epoch": 2.903071672354949, "grad_norm": 1.7245899577795307, "learning_rate": 4.6848726689643436e-07, "loss": 0.0079, "step": 12759 }, { "epoch": 2.9032992036405005, "grad_norm": 0.737156468650241, "learning_rate": 4.6840076415276684e-07, "loss": 0.0091, "step": 12760 }, { "epoch": 2.9035267349260523, "grad_norm": 0.48352018951742987, "learning_rate": 4.6831426460970104e-07, "loss": 0.0054, "step": 12761 }, { "epoch": 2.903754266211604, "grad_norm": 2.0918674166408047, "learning_rate": 4.682277682690044e-07, "loss": 0.0136, "step": 12762 }, { "epoch": 2.903981797497156, "grad_norm": 0.5208443745149308, "learning_rate": 4.6814127513244517e-07, "loss": 0.0028, "step": 12763 }, { "epoch": 2.9042093287827075, "grad_norm": 1.343389810003789, "learning_rate": 4.6805478520179094e-07, "loss": 0.0246, "step": 12764 }, { "epoch": 2.9044368600682593, "grad_norm": 0.3880458698553829, "learning_rate": 4.6796829847880967e-07, "loss": 0.0035, "step": 12765 }, { "epoch": 2.904664391353811, "grad_norm": 0.9072464215662951, "learning_rate": 4.678818149652686e-07, "loss": 0.0123, "step": 12766 }, { "epoch": 2.904891922639363, "grad_norm": 0.8577609675489495, "learning_rate": 4.6779533466293553e-07, "loss": 0.0093, "step": 12767 }, { "epoch": 2.9051194539249146, "grad_norm": 1.3402497783670397, "learning_rate": 4.677088575735781e-07, "loss": 0.0141, "step": 12768 }, { "epoch": 2.9053469852104663, "grad_norm": 2.948583291715912, "learning_rate": 4.6762238369896324e-07, "loss": 0.0332, "step": 12769 }, { "epoch": 2.905574516496018, "grad_norm": 0.7665706786464921, "learning_rate": 4.675359130408588e-07, "loss": 0.0122, "step": 12770 }, { "epoch": 2.90580204778157, "grad_norm": 0.40903718708250997, "learning_rate": 4.674494456010319e-07, "loss": 0.0029, "step": 12771 }, { "epoch": 2.9060295790671216, "grad_norm": 0.59447625863035, "learning_rate": 4.6736298138124983e-07, "loss": 0.0099, "step": 12772 }, { "epoch": 2.9062571103526738, "grad_norm": 1.5151897341168519, "learning_rate": 4.672765203832796e-07, "loss": 0.0311, "step": 12773 }, { "epoch": 2.906484641638225, "grad_norm": 0.6619135743473515, "learning_rate": 4.671900626088886e-07, "loss": 0.0083, "step": 12774 }, { "epoch": 2.9067121729237773, "grad_norm": 1.1612595332149154, "learning_rate": 4.6710360805984373e-07, "loss": 0.0137, "step": 12775 }, { "epoch": 2.9069397042093286, "grad_norm": 0.3670028067269292, "learning_rate": 4.670171567379117e-07, "loss": 0.0075, "step": 12776 }, { "epoch": 2.9071672354948808, "grad_norm": 0.47183410863429615, "learning_rate": 4.6693070864485983e-07, "loss": 0.0028, "step": 12777 }, { "epoch": 2.907394766780432, "grad_norm": 0.5833159632879681, "learning_rate": 4.668442637824547e-07, "loss": 0.0052, "step": 12778 }, { "epoch": 2.9076222980659843, "grad_norm": 0.6212920035146879, "learning_rate": 4.667578221524633e-07, "loss": 0.0057, "step": 12779 }, { "epoch": 2.9078498293515356, "grad_norm": 0.7050551978578539, "learning_rate": 4.6667138375665217e-07, "loss": 0.0067, "step": 12780 }, { "epoch": 2.9080773606370878, "grad_norm": 0.7453267182132152, "learning_rate": 4.6658494859678785e-07, "loss": 0.0127, "step": 12781 }, { "epoch": 2.9083048919226395, "grad_norm": 0.7102746645970163, "learning_rate": 4.6649851667463725e-07, "loss": 0.0072, "step": 12782 }, { "epoch": 2.9085324232081913, "grad_norm": 0.42405632653352704, "learning_rate": 4.664120879919665e-07, "loss": 0.0026, "step": 12783 }, { "epoch": 2.908759954493743, "grad_norm": 0.7596223538623741, "learning_rate": 4.663256625505423e-07, "loss": 0.0092, "step": 12784 }, { "epoch": 2.908987485779295, "grad_norm": 0.8407496285184061, "learning_rate": 4.6623924035213083e-07, "loss": 0.0101, "step": 12785 }, { "epoch": 2.9092150170648465, "grad_norm": 1.0313200245882321, "learning_rate": 4.6615282139849887e-07, "loss": 0.0141, "step": 12786 }, { "epoch": 2.9094425483503983, "grad_norm": 0.7929713466164361, "learning_rate": 4.6606640569141216e-07, "loss": 0.0135, "step": 12787 }, { "epoch": 2.90967007963595, "grad_norm": 0.9486966021270764, "learning_rate": 4.6597999323263693e-07, "loss": 0.009, "step": 12788 }, { "epoch": 2.909897610921502, "grad_norm": 0.43834862207420017, "learning_rate": 4.6589358402393967e-07, "loss": 0.0044, "step": 12789 }, { "epoch": 2.9101251422070535, "grad_norm": 0.751347314596474, "learning_rate": 4.6580717806708585e-07, "loss": 0.0198, "step": 12790 }, { "epoch": 2.9103526734926053, "grad_norm": 1.108836648824182, "learning_rate": 4.6572077536384197e-07, "loss": 0.0039, "step": 12791 }, { "epoch": 2.910580204778157, "grad_norm": 1.5501324460020283, "learning_rate": 4.656343759159737e-07, "loss": 0.0323, "step": 12792 }, { "epoch": 2.910807736063709, "grad_norm": 0.5282592532104862, "learning_rate": 4.655479797252468e-07, "loss": 0.004, "step": 12793 }, { "epoch": 2.9110352673492605, "grad_norm": 0.5778529138236115, "learning_rate": 4.6546158679342727e-07, "loss": 0.0046, "step": 12794 }, { "epoch": 2.9112627986348123, "grad_norm": 0.5059901168952242, "learning_rate": 4.653751971222806e-07, "loss": 0.0056, "step": 12795 }, { "epoch": 2.911490329920364, "grad_norm": 0.7116799537900728, "learning_rate": 4.652888107135727e-07, "loss": 0.0041, "step": 12796 }, { "epoch": 2.911717861205916, "grad_norm": 1.1487678763739047, "learning_rate": 4.652024275690689e-07, "loss": 0.0138, "step": 12797 }, { "epoch": 2.9119453924914676, "grad_norm": 0.8448036053590029, "learning_rate": 4.65116047690535e-07, "loss": 0.0096, "step": 12798 }, { "epoch": 2.9121729237770193, "grad_norm": 0.8793151111374307, "learning_rate": 4.6502967107973624e-07, "loss": 0.0151, "step": 12799 }, { "epoch": 2.912400455062571, "grad_norm": 0.8688384637729046, "learning_rate": 4.6494329773843785e-07, "loss": 0.0061, "step": 12800 }, { "epoch": 2.912627986348123, "grad_norm": 1.0969566184603772, "learning_rate": 4.6485692766840563e-07, "loss": 0.0171, "step": 12801 }, { "epoch": 2.9128555176336746, "grad_norm": 0.5276891204444404, "learning_rate": 4.647705608714043e-07, "loss": 0.0037, "step": 12802 }, { "epoch": 2.9130830489192263, "grad_norm": 0.9254774031321886, "learning_rate": 4.6468419734919927e-07, "loss": 0.013, "step": 12803 }, { "epoch": 2.913310580204778, "grad_norm": 0.4472995326316818, "learning_rate": 4.6459783710355555e-07, "loss": 0.0032, "step": 12804 }, { "epoch": 2.91353811149033, "grad_norm": 0.7573507731247006, "learning_rate": 4.645114801362385e-07, "loss": 0.0061, "step": 12805 }, { "epoch": 2.9137656427758816, "grad_norm": 2.25402784495481, "learning_rate": 4.644251264490128e-07, "loss": 0.0365, "step": 12806 }, { "epoch": 2.9139931740614333, "grad_norm": 1.412417669911625, "learning_rate": 4.643387760436433e-07, "loss": 0.0181, "step": 12807 }, { "epoch": 2.914220705346985, "grad_norm": 1.1115789660131465, "learning_rate": 4.6425242892189527e-07, "loss": 0.0199, "step": 12808 }, { "epoch": 2.914448236632537, "grad_norm": 0.911347850047623, "learning_rate": 4.641660850855329e-07, "loss": 0.0102, "step": 12809 }, { "epoch": 2.9146757679180886, "grad_norm": 0.811567023118777, "learning_rate": 4.6407974453632134e-07, "loss": 0.0161, "step": 12810 }, { "epoch": 2.9149032992036403, "grad_norm": 0.9037203928092857, "learning_rate": 4.639934072760252e-07, "loss": 0.0094, "step": 12811 }, { "epoch": 2.9151308304891925, "grad_norm": 1.116735085059527, "learning_rate": 4.639070733064087e-07, "loss": 0.0148, "step": 12812 }, { "epoch": 2.915358361774744, "grad_norm": 0.6024278290561492, "learning_rate": 4.6382074262923677e-07, "loss": 0.0052, "step": 12813 }, { "epoch": 2.915585893060296, "grad_norm": 0.5879763197675522, "learning_rate": 4.6373441524627346e-07, "loss": 0.0065, "step": 12814 }, { "epoch": 2.9158134243458473, "grad_norm": 0.5547460146788754, "learning_rate": 4.6364809115928366e-07, "loss": 0.0047, "step": 12815 }, { "epoch": 2.9160409556313995, "grad_norm": 0.7394545179701385, "learning_rate": 4.635617703700311e-07, "loss": 0.0087, "step": 12816 }, { "epoch": 2.916268486916951, "grad_norm": 1.2526324086017058, "learning_rate": 4.6347545288028046e-07, "loss": 0.017, "step": 12817 }, { "epoch": 2.916496018202503, "grad_norm": 0.46434956955537643, "learning_rate": 4.6338913869179586e-07, "loss": 0.004, "step": 12818 }, { "epoch": 2.9167235494880543, "grad_norm": 0.2588530401892584, "learning_rate": 4.63302827806341e-07, "loss": 0.0014, "step": 12819 }, { "epoch": 2.9169510807736065, "grad_norm": 1.5113190067024456, "learning_rate": 4.632165202256804e-07, "loss": 0.0221, "step": 12820 }, { "epoch": 2.9171786120591583, "grad_norm": 0.5928493980733982, "learning_rate": 4.6313021595157765e-07, "loss": 0.0036, "step": 12821 }, { "epoch": 2.91740614334471, "grad_norm": 0.8705362569493753, "learning_rate": 4.630439149857971e-07, "loss": 0.0108, "step": 12822 }, { "epoch": 2.917633674630262, "grad_norm": 0.9803503345179063, "learning_rate": 4.62957617330102e-07, "loss": 0.0224, "step": 12823 }, { "epoch": 2.9178612059158135, "grad_norm": 0.535398145439957, "learning_rate": 4.628713229862566e-07, "loss": 0.0055, "step": 12824 }, { "epoch": 2.9180887372013653, "grad_norm": 0.4450943921462671, "learning_rate": 4.6278503195602465e-07, "loss": 0.0054, "step": 12825 }, { "epoch": 2.918316268486917, "grad_norm": 0.5515181606338789, "learning_rate": 4.6269874424116926e-07, "loss": 0.0073, "step": 12826 }, { "epoch": 2.918543799772469, "grad_norm": 0.6104663923108186, "learning_rate": 4.626124598434544e-07, "loss": 0.0059, "step": 12827 }, { "epoch": 2.9187713310580206, "grad_norm": 0.4246998284843671, "learning_rate": 4.6252617876464333e-07, "loss": 0.0022, "step": 12828 }, { "epoch": 2.9189988623435723, "grad_norm": 0.21044014722915344, "learning_rate": 4.6243990100649993e-07, "loss": 0.002, "step": 12829 }, { "epoch": 2.919226393629124, "grad_norm": 0.45683685867282314, "learning_rate": 4.6235362657078705e-07, "loss": 0.0022, "step": 12830 }, { "epoch": 2.919453924914676, "grad_norm": 0.7639173845161749, "learning_rate": 4.6226735545926805e-07, "loss": 0.0128, "step": 12831 }, { "epoch": 2.9196814562002276, "grad_norm": 0.7947290008570426, "learning_rate": 4.621810876737065e-07, "loss": 0.0098, "step": 12832 }, { "epoch": 2.9199089874857793, "grad_norm": 0.5708879515873349, "learning_rate": 4.6209482321586513e-07, "loss": 0.0057, "step": 12833 }, { "epoch": 2.920136518771331, "grad_norm": 0.4297787354900985, "learning_rate": 4.6200856208750736e-07, "loss": 0.0039, "step": 12834 }, { "epoch": 2.920364050056883, "grad_norm": 0.7187611910909307, "learning_rate": 4.6192230429039587e-07, "loss": 0.009, "step": 12835 }, { "epoch": 2.9205915813424346, "grad_norm": 0.4601406375859492, "learning_rate": 4.6183604982629417e-07, "loss": 0.0045, "step": 12836 }, { "epoch": 2.9208191126279863, "grad_norm": 0.9021136871514787, "learning_rate": 4.617497986969646e-07, "loss": 0.009, "step": 12837 }, { "epoch": 2.921046643913538, "grad_norm": 0.25627070857408796, "learning_rate": 4.6166355090417e-07, "loss": 0.002, "step": 12838 }, { "epoch": 2.92127417519909, "grad_norm": 1.0323903879525793, "learning_rate": 4.615773064496737e-07, "loss": 0.0088, "step": 12839 }, { "epoch": 2.9215017064846416, "grad_norm": 1.502027316279452, "learning_rate": 4.614910653352375e-07, "loss": 0.0207, "step": 12840 }, { "epoch": 2.9217292377701933, "grad_norm": 0.5945660917033601, "learning_rate": 4.614048275626248e-07, "loss": 0.0072, "step": 12841 }, { "epoch": 2.921956769055745, "grad_norm": 1.4285405090382488, "learning_rate": 4.6131859313359757e-07, "loss": 0.023, "step": 12842 }, { "epoch": 2.922184300341297, "grad_norm": 0.34095067183881755, "learning_rate": 4.612323620499187e-07, "loss": 0.0022, "step": 12843 }, { "epoch": 2.9224118316268486, "grad_norm": 0.5751702471074731, "learning_rate": 4.6114613431335044e-07, "loss": 0.0036, "step": 12844 }, { "epoch": 2.9226393629124003, "grad_norm": 0.6631297780173653, "learning_rate": 4.6105990992565493e-07, "loss": 0.0137, "step": 12845 }, { "epoch": 2.922866894197952, "grad_norm": 0.5778201233907976, "learning_rate": 4.609736888885949e-07, "loss": 0.0055, "step": 12846 }, { "epoch": 2.923094425483504, "grad_norm": 1.7897542452773452, "learning_rate": 4.60887471203932e-07, "loss": 0.0187, "step": 12847 }, { "epoch": 2.9233219567690556, "grad_norm": 0.861812556098705, "learning_rate": 4.608012568734288e-07, "loss": 0.0072, "step": 12848 }, { "epoch": 2.9235494880546073, "grad_norm": 0.7364280010117673, "learning_rate": 4.6071504589884726e-07, "loss": 0.0119, "step": 12849 }, { "epoch": 2.923777019340159, "grad_norm": 0.823980226939319, "learning_rate": 4.6062883828194903e-07, "loss": 0.0058, "step": 12850 }, { "epoch": 2.9240045506257113, "grad_norm": 1.4546295011667902, "learning_rate": 4.605426340244965e-07, "loss": 0.0068, "step": 12851 }, { "epoch": 2.9242320819112626, "grad_norm": 0.7338862338496441, "learning_rate": 4.6045643312825123e-07, "loss": 0.0154, "step": 12852 }, { "epoch": 2.924459613196815, "grad_norm": 0.9197349730156921, "learning_rate": 4.603702355949753e-07, "loss": 0.0143, "step": 12853 }, { "epoch": 2.924687144482366, "grad_norm": 0.6112644846960276, "learning_rate": 4.6028404142642993e-07, "loss": 0.0067, "step": 12854 }, { "epoch": 2.9249146757679183, "grad_norm": 0.3918300011255161, "learning_rate": 4.6019785062437746e-07, "loss": 0.0029, "step": 12855 }, { "epoch": 2.9251422070534696, "grad_norm": 0.7338125557176478, "learning_rate": 4.601116631905791e-07, "loss": 0.0058, "step": 12856 }, { "epoch": 2.925369738339022, "grad_norm": 0.8142691839419626, "learning_rate": 4.6002547912679594e-07, "loss": 0.0069, "step": 12857 }, { "epoch": 2.925597269624573, "grad_norm": 0.832898052546025, "learning_rate": 4.599392984347903e-07, "loss": 0.0069, "step": 12858 }, { "epoch": 2.9258248009101253, "grad_norm": 0.8670898091592589, "learning_rate": 4.598531211163228e-07, "loss": 0.0086, "step": 12859 }, { "epoch": 2.926052332195677, "grad_norm": 0.452425675971998, "learning_rate": 4.5976694717315517e-07, "loss": 0.0037, "step": 12860 }, { "epoch": 2.926279863481229, "grad_norm": 0.5200559875751642, "learning_rate": 4.596807766070484e-07, "loss": 0.0041, "step": 12861 }, { "epoch": 2.9265073947667806, "grad_norm": 0.32181408384569055, "learning_rate": 4.595946094197641e-07, "loss": 0.0029, "step": 12862 }, { "epoch": 2.9267349260523323, "grad_norm": 1.5400575015477866, "learning_rate": 4.595084456130629e-07, "loss": 0.0328, "step": 12863 }, { "epoch": 2.926962457337884, "grad_norm": 0.5307165667263435, "learning_rate": 4.594222851887059e-07, "loss": 0.0069, "step": 12864 }, { "epoch": 2.927189988623436, "grad_norm": 0.5750257696970784, "learning_rate": 4.593361281484543e-07, "loss": 0.0041, "step": 12865 }, { "epoch": 2.9274175199089876, "grad_norm": 0.24400337945441675, "learning_rate": 4.592499744940687e-07, "loss": 0.0019, "step": 12866 }, { "epoch": 2.9276450511945393, "grad_norm": 1.6986041676003671, "learning_rate": 4.591638242273101e-07, "loss": 0.015, "step": 12867 }, { "epoch": 2.927872582480091, "grad_norm": 0.45571752409111754, "learning_rate": 4.590776773499395e-07, "loss": 0.005, "step": 12868 }, { "epoch": 2.928100113765643, "grad_norm": 0.9323877945388233, "learning_rate": 4.5899153386371686e-07, "loss": 0.0094, "step": 12869 }, { "epoch": 2.9283276450511946, "grad_norm": 1.1161241952884478, "learning_rate": 4.589053937704034e-07, "loss": 0.018, "step": 12870 }, { "epoch": 2.9285551763367463, "grad_norm": 0.9775937379853742, "learning_rate": 4.588192570717595e-07, "loss": 0.0139, "step": 12871 }, { "epoch": 2.928782707622298, "grad_norm": 0.5410077217747965, "learning_rate": 4.587331237695458e-07, "loss": 0.0064, "step": 12872 }, { "epoch": 2.92901023890785, "grad_norm": 1.2554492828222943, "learning_rate": 4.5864699386552234e-07, "loss": 0.0059, "step": 12873 }, { "epoch": 2.9292377701934016, "grad_norm": 0.7997475848331245, "learning_rate": 4.5856086736144973e-07, "loss": 0.0136, "step": 12874 }, { "epoch": 2.9294653014789533, "grad_norm": 0.11670628295144289, "learning_rate": 4.584747442590883e-07, "loss": 0.0008, "step": 12875 }, { "epoch": 2.929692832764505, "grad_norm": 1.7594213350244645, "learning_rate": 4.583886245601979e-07, "loss": 0.0211, "step": 12876 }, { "epoch": 2.929920364050057, "grad_norm": 1.0615806229249598, "learning_rate": 4.5830250826653905e-07, "loss": 0.0135, "step": 12877 }, { "epoch": 2.9301478953356086, "grad_norm": 1.3205975205441005, "learning_rate": 4.5821639537987144e-07, "loss": 0.0186, "step": 12878 }, { "epoch": 2.9303754266211604, "grad_norm": 1.7766986107556715, "learning_rate": 4.5813028590195553e-07, "loss": 0.0203, "step": 12879 }, { "epoch": 2.930602957906712, "grad_norm": 1.0522257381337652, "learning_rate": 4.580441798345507e-07, "loss": 0.0162, "step": 12880 }, { "epoch": 2.930830489192264, "grad_norm": 1.1662645010936166, "learning_rate": 4.5795807717941727e-07, "loss": 0.0185, "step": 12881 }, { "epoch": 2.9310580204778156, "grad_norm": 1.2277568979591007, "learning_rate": 4.5787197793831486e-07, "loss": 0.02, "step": 12882 }, { "epoch": 2.9312855517633674, "grad_norm": 1.1956604364030552, "learning_rate": 4.5778588211300295e-07, "loss": 0.01, "step": 12883 }, { "epoch": 2.931513083048919, "grad_norm": 0.9875790976549446, "learning_rate": 4.576997897052414e-07, "loss": 0.0183, "step": 12884 }, { "epoch": 2.931740614334471, "grad_norm": 0.4329636954176859, "learning_rate": 4.576137007167897e-07, "loss": 0.004, "step": 12885 }, { "epoch": 2.9319681456200226, "grad_norm": 1.6764446277450222, "learning_rate": 4.5752761514940764e-07, "loss": 0.0123, "step": 12886 }, { "epoch": 2.9321956769055744, "grad_norm": 0.8049782876398932, "learning_rate": 4.5744153300485435e-07, "loss": 0.0161, "step": 12887 }, { "epoch": 2.932423208191126, "grad_norm": 0.6568717856644241, "learning_rate": 4.5735545428488904e-07, "loss": 0.0128, "step": 12888 }, { "epoch": 2.932650739476678, "grad_norm": 1.330693791578059, "learning_rate": 4.572693789912715e-07, "loss": 0.0188, "step": 12889 }, { "epoch": 2.93287827076223, "grad_norm": 0.7719381244789265, "learning_rate": 4.5718330712576046e-07, "loss": 0.0094, "step": 12890 }, { "epoch": 2.9331058020477814, "grad_norm": 1.4509066804550983, "learning_rate": 4.570972386901154e-07, "loss": 0.0138, "step": 12891 }, { "epoch": 2.9333333333333336, "grad_norm": 1.1469001756076724, "learning_rate": 4.5701117368609505e-07, "loss": 0.0234, "step": 12892 }, { "epoch": 2.933560864618885, "grad_norm": 0.6101958662955994, "learning_rate": 4.56925112115459e-07, "loss": 0.0105, "step": 12893 }, { "epoch": 2.933788395904437, "grad_norm": 2.7014096081077996, "learning_rate": 4.5683905397996573e-07, "loss": 0.0293, "step": 12894 }, { "epoch": 2.9340159271899884, "grad_norm": 0.5873782334046819, "learning_rate": 4.5675299928137406e-07, "loss": 0.0096, "step": 12895 }, { "epoch": 2.9342434584755406, "grad_norm": 0.5723350730598887, "learning_rate": 4.566669480214432e-07, "loss": 0.0049, "step": 12896 }, { "epoch": 2.934470989761092, "grad_norm": 1.0927413721402173, "learning_rate": 4.565809002019314e-07, "loss": 0.0165, "step": 12897 }, { "epoch": 2.934698521046644, "grad_norm": 0.6914432651535706, "learning_rate": 4.564948558245977e-07, "loss": 0.0088, "step": 12898 }, { "epoch": 2.934926052332196, "grad_norm": 1.271744031840958, "learning_rate": 4.5640881489120067e-07, "loss": 0.0093, "step": 12899 }, { "epoch": 2.9351535836177476, "grad_norm": 1.332685645785161, "learning_rate": 4.5632277740349845e-07, "loss": 0.0113, "step": 12900 }, { "epoch": 2.9353811149032993, "grad_norm": 0.8432012468971435, "learning_rate": 4.5623674336324987e-07, "loss": 0.0123, "step": 12901 }, { "epoch": 2.935608646188851, "grad_norm": 1.4931022013026856, "learning_rate": 4.56150712772213e-07, "loss": 0.0139, "step": 12902 }, { "epoch": 2.935836177474403, "grad_norm": 1.1527058704326467, "learning_rate": 4.5606468563214664e-07, "loss": 0.0161, "step": 12903 }, { "epoch": 2.9360637087599546, "grad_norm": 0.7695683101821791, "learning_rate": 4.559786619448084e-07, "loss": 0.0151, "step": 12904 }, { "epoch": 2.9362912400455063, "grad_norm": 1.1233169319946035, "learning_rate": 4.558926417119569e-07, "loss": 0.009, "step": 12905 }, { "epoch": 2.936518771331058, "grad_norm": 0.581683001734931, "learning_rate": 4.558066249353503e-07, "loss": 0.0035, "step": 12906 }, { "epoch": 2.93674630261661, "grad_norm": 2.0010651071868613, "learning_rate": 4.5572061161674613e-07, "loss": 0.0257, "step": 12907 }, { "epoch": 2.9369738339021616, "grad_norm": 0.7543866845065976, "learning_rate": 4.5563460175790277e-07, "loss": 0.0123, "step": 12908 }, { "epoch": 2.9372013651877134, "grad_norm": 0.7864080888072773, "learning_rate": 4.555485953605778e-07, "loss": 0.0109, "step": 12909 }, { "epoch": 2.937428896473265, "grad_norm": 1.5908012927166757, "learning_rate": 4.554625924265295e-07, "loss": 0.0117, "step": 12910 }, { "epoch": 2.937656427758817, "grad_norm": 0.6737006112918561, "learning_rate": 4.5537659295751507e-07, "loss": 0.0051, "step": 12911 }, { "epoch": 2.9378839590443686, "grad_norm": 1.134139021961303, "learning_rate": 4.5529059695529253e-07, "loss": 0.0084, "step": 12912 }, { "epoch": 2.9381114903299204, "grad_norm": 2.0894469800257665, "learning_rate": 4.5520460442161946e-07, "loss": 0.0327, "step": 12913 }, { "epoch": 2.938339021615472, "grad_norm": 1.0157705494862908, "learning_rate": 4.551186153582532e-07, "loss": 0.0142, "step": 12914 }, { "epoch": 2.938566552901024, "grad_norm": 0.9568134519577708, "learning_rate": 4.5503262976695136e-07, "loss": 0.0072, "step": 12915 }, { "epoch": 2.9387940841865756, "grad_norm": 0.5676808023980217, "learning_rate": 4.5494664764947114e-07, "loss": 0.0072, "step": 12916 }, { "epoch": 2.9390216154721274, "grad_norm": 0.5071180332834717, "learning_rate": 4.548606690075703e-07, "loss": 0.0029, "step": 12917 }, { "epoch": 2.939249146757679, "grad_norm": 1.5122429605243006, "learning_rate": 4.547746938430056e-07, "loss": 0.019, "step": 12918 }, { "epoch": 2.939476678043231, "grad_norm": 0.8291276380345534, "learning_rate": 4.5468872215753434e-07, "loss": 0.0081, "step": 12919 }, { "epoch": 2.9397042093287826, "grad_norm": 0.9220415057509984, "learning_rate": 4.546027539529138e-07, "loss": 0.0175, "step": 12920 }, { "epoch": 2.9399317406143344, "grad_norm": 1.4194450581356663, "learning_rate": 4.545167892309008e-07, "loss": 0.0321, "step": 12921 }, { "epoch": 2.940159271899886, "grad_norm": 0.5946526272914447, "learning_rate": 4.544308279932526e-07, "loss": 0.0038, "step": 12922 }, { "epoch": 2.940386803185438, "grad_norm": 0.5644556643043922, "learning_rate": 4.5434487024172564e-07, "loss": 0.009, "step": 12923 }, { "epoch": 2.9406143344709896, "grad_norm": 0.7665052602167679, "learning_rate": 4.542589159780772e-07, "loss": 0.0062, "step": 12924 }, { "epoch": 2.9408418657565414, "grad_norm": 0.7454623583049726, "learning_rate": 4.5417296520406385e-07, "loss": 0.0074, "step": 12925 }, { "epoch": 2.941069397042093, "grad_norm": 0.905173103509957, "learning_rate": 4.5408701792144203e-07, "loss": 0.0097, "step": 12926 }, { "epoch": 2.941296928327645, "grad_norm": 0.8725732734510714, "learning_rate": 4.5400107413196863e-07, "loss": 0.0095, "step": 12927 }, { "epoch": 2.9415244596131966, "grad_norm": 0.8335744105325052, "learning_rate": 4.539151338374e-07, "loss": 0.0078, "step": 12928 }, { "epoch": 2.941751990898749, "grad_norm": 0.9056593971892082, "learning_rate": 4.53829197039493e-07, "loss": 0.0097, "step": 12929 }, { "epoch": 2.9419795221843, "grad_norm": 0.6560692552615115, "learning_rate": 4.5374326374000347e-07, "loss": 0.0041, "step": 12930 }, { "epoch": 2.9422070534698523, "grad_norm": 1.677333839942385, "learning_rate": 4.536573339406881e-07, "loss": 0.0176, "step": 12931 }, { "epoch": 2.9424345847554036, "grad_norm": 0.610323392146614, "learning_rate": 4.5357140764330313e-07, "loss": 0.0034, "step": 12932 }, { "epoch": 2.942662116040956, "grad_norm": 0.7568712460864627, "learning_rate": 4.534854848496044e-07, "loss": 0.0107, "step": 12933 }, { "epoch": 2.942889647326507, "grad_norm": 1.091878653116083, "learning_rate": 4.5339956556134836e-07, "loss": 0.0223, "step": 12934 }, { "epoch": 2.9431171786120593, "grad_norm": 0.4819687186116766, "learning_rate": 4.533136497802909e-07, "loss": 0.0046, "step": 12935 }, { "epoch": 2.9433447098976107, "grad_norm": 0.9186850188896792, "learning_rate": 4.532277375081881e-07, "loss": 0.0083, "step": 12936 }, { "epoch": 2.943572241183163, "grad_norm": 0.2696670696076296, "learning_rate": 4.5314182874679576e-07, "loss": 0.0013, "step": 12937 }, { "epoch": 2.9437997724687146, "grad_norm": 1.4027480344420253, "learning_rate": 4.5305592349786954e-07, "loss": 0.0257, "step": 12938 }, { "epoch": 2.9440273037542664, "grad_norm": 1.0948384384643022, "learning_rate": 4.529700217631655e-07, "loss": 0.0061, "step": 12939 }, { "epoch": 2.944254835039818, "grad_norm": 0.9113110726168094, "learning_rate": 4.52884123544439e-07, "loss": 0.0067, "step": 12940 }, { "epoch": 2.94448236632537, "grad_norm": 0.7444780187711354, "learning_rate": 4.5279822884344593e-07, "loss": 0.0066, "step": 12941 }, { "epoch": 2.9447098976109216, "grad_norm": 1.0035237071245735, "learning_rate": 4.527123376619415e-07, "loss": 0.0162, "step": 12942 }, { "epoch": 2.9449374288964734, "grad_norm": 0.9557276945366704, "learning_rate": 4.5262645000168166e-07, "loss": 0.0183, "step": 12943 }, { "epoch": 2.945164960182025, "grad_norm": 0.809043696818351, "learning_rate": 4.5254056586442135e-07, "loss": 0.0132, "step": 12944 }, { "epoch": 2.945392491467577, "grad_norm": 0.6052773316192123, "learning_rate": 4.5245468525191597e-07, "loss": 0.0056, "step": 12945 }, { "epoch": 2.9456200227531286, "grad_norm": 0.6074372410716321, "learning_rate": 4.52368808165921e-07, "loss": 0.0043, "step": 12946 }, { "epoch": 2.9458475540386804, "grad_norm": 1.2312578503348761, "learning_rate": 4.522829346081912e-07, "loss": 0.023, "step": 12947 }, { "epoch": 2.946075085324232, "grad_norm": 0.7816103689234343, "learning_rate": 4.52197064580482e-07, "loss": 0.0063, "step": 12948 }, { "epoch": 2.946302616609784, "grad_norm": 0.559983220241378, "learning_rate": 4.5211119808454823e-07, "loss": 0.0079, "step": 12949 }, { "epoch": 2.9465301478953356, "grad_norm": 0.5877047819369501, "learning_rate": 4.5202533512214514e-07, "loss": 0.0062, "step": 12950 }, { "epoch": 2.9467576791808874, "grad_norm": 1.3876451304472703, "learning_rate": 4.519394756950274e-07, "loss": 0.0179, "step": 12951 }, { "epoch": 2.946985210466439, "grad_norm": 1.6333315863237565, "learning_rate": 4.518536198049496e-07, "loss": 0.0337, "step": 12952 }, { "epoch": 2.947212741751991, "grad_norm": 0.9839921794984213, "learning_rate": 4.5176776745366706e-07, "loss": 0.0156, "step": 12953 }, { "epoch": 2.9474402730375426, "grad_norm": 0.8474181721142352, "learning_rate": 4.5168191864293374e-07, "loss": 0.0077, "step": 12954 }, { "epoch": 2.9476678043230944, "grad_norm": 1.0584314011465679, "learning_rate": 4.515960733745047e-07, "loss": 0.0263, "step": 12955 }, { "epoch": 2.947895335608646, "grad_norm": 1.031401748694576, "learning_rate": 4.5151023165013457e-07, "loss": 0.0096, "step": 12956 }, { "epoch": 2.948122866894198, "grad_norm": 1.5215573496479067, "learning_rate": 4.514243934715773e-07, "loss": 0.0096, "step": 12957 }, { "epoch": 2.9483503981797496, "grad_norm": 1.0788883714535888, "learning_rate": 4.5133855884058764e-07, "loss": 0.0221, "step": 12958 }, { "epoch": 2.9485779294653014, "grad_norm": 0.6507938771576465, "learning_rate": 4.512527277589196e-07, "loss": 0.0047, "step": 12959 }, { "epoch": 2.948805460750853, "grad_norm": 1.4557385139139163, "learning_rate": 4.5116690022832796e-07, "loss": 0.0209, "step": 12960 }, { "epoch": 2.949032992036405, "grad_norm": 0.6434215544231524, "learning_rate": 4.5108107625056614e-07, "loss": 0.0072, "step": 12961 }, { "epoch": 2.9492605233219567, "grad_norm": 0.182820710175442, "learning_rate": 4.509952558273889e-07, "loss": 0.0016, "step": 12962 }, { "epoch": 2.9494880546075084, "grad_norm": 1.4774706925197103, "learning_rate": 4.509094389605499e-07, "loss": 0.0288, "step": 12963 }, { "epoch": 2.94971558589306, "grad_norm": 1.2128257486236416, "learning_rate": 4.5082362565180305e-07, "loss": 0.016, "step": 12964 }, { "epoch": 2.949943117178612, "grad_norm": 0.7439990540452139, "learning_rate": 4.5073781590290233e-07, "loss": 0.0069, "step": 12965 }, { "epoch": 2.9501706484641637, "grad_norm": 0.46230274555958945, "learning_rate": 4.506520097156014e-07, "loss": 0.0051, "step": 12966 }, { "epoch": 2.9503981797497154, "grad_norm": 0.15749970977442224, "learning_rate": 4.5056620709165436e-07, "loss": 0.0012, "step": 12967 }, { "epoch": 2.9506257110352676, "grad_norm": 0.7456449600861982, "learning_rate": 4.504804080328143e-07, "loss": 0.008, "step": 12968 }, { "epoch": 2.950853242320819, "grad_norm": 0.7637801634110645, "learning_rate": 4.5039461254083525e-07, "loss": 0.0067, "step": 12969 }, { "epoch": 2.951080773606371, "grad_norm": 0.514175523676804, "learning_rate": 4.5030882061747063e-07, "loss": 0.0039, "step": 12970 }, { "epoch": 2.9513083048919224, "grad_norm": 0.13226417077187647, "learning_rate": 4.5022303226447365e-07, "loss": 0.0009, "step": 12971 }, { "epoch": 2.9515358361774746, "grad_norm": 0.6189618306993453, "learning_rate": 4.501372474835978e-07, "loss": 0.0103, "step": 12972 }, { "epoch": 2.951763367463026, "grad_norm": 2.664803724372606, "learning_rate": 4.5005146627659623e-07, "loss": 0.0522, "step": 12973 }, { "epoch": 2.951990898748578, "grad_norm": 0.7997483740596181, "learning_rate": 4.4996568864522266e-07, "loss": 0.0113, "step": 12974 }, { "epoch": 2.9522184300341294, "grad_norm": 0.4452432056516009, "learning_rate": 4.498799145912296e-07, "loss": 0.0024, "step": 12975 }, { "epoch": 2.9524459613196816, "grad_norm": 0.13525275010782112, "learning_rate": 4.497941441163703e-07, "loss": 0.001, "step": 12976 }, { "epoch": 2.9526734926052334, "grad_norm": 0.6051442550356323, "learning_rate": 4.4970837722239804e-07, "loss": 0.0055, "step": 12977 }, { "epoch": 2.952901023890785, "grad_norm": 1.2042856327468978, "learning_rate": 4.4962261391106517e-07, "loss": 0.0168, "step": 12978 }, { "epoch": 2.953128555176337, "grad_norm": 1.0959820643175207, "learning_rate": 4.4953685418412523e-07, "loss": 0.0101, "step": 12979 }, { "epoch": 2.9533560864618886, "grad_norm": 1.0737172860812532, "learning_rate": 4.4945109804333045e-07, "loss": 0.0106, "step": 12980 }, { "epoch": 2.9535836177474404, "grad_norm": 0.7272896491220827, "learning_rate": 4.4936534549043383e-07, "loss": 0.0097, "step": 12981 }, { "epoch": 2.953811149032992, "grad_norm": 0.8894946235187843, "learning_rate": 4.4927959652718804e-07, "loss": 0.0104, "step": 12982 }, { "epoch": 2.954038680318544, "grad_norm": 0.8524045600058668, "learning_rate": 4.491938511553451e-07, "loss": 0.0089, "step": 12983 }, { "epoch": 2.9542662116040956, "grad_norm": 0.45776508349608336, "learning_rate": 4.4910810937665813e-07, "loss": 0.0037, "step": 12984 }, { "epoch": 2.9544937428896474, "grad_norm": 0.5191072782246792, "learning_rate": 4.490223711928791e-07, "loss": 0.005, "step": 12985 }, { "epoch": 2.954721274175199, "grad_norm": 2.135268520751451, "learning_rate": 4.4893663660576077e-07, "loss": 0.0327, "step": 12986 }, { "epoch": 2.954948805460751, "grad_norm": 0.8721011233345001, "learning_rate": 4.4885090561705487e-07, "loss": 0.0147, "step": 12987 }, { "epoch": 2.9551763367463026, "grad_norm": 1.0183412234845357, "learning_rate": 4.4876517822851397e-07, "loss": 0.0095, "step": 12988 }, { "epoch": 2.9554038680318544, "grad_norm": 0.6441783683554027, "learning_rate": 4.486794544418903e-07, "loss": 0.0126, "step": 12989 }, { "epoch": 2.955631399317406, "grad_norm": 0.8146039958476368, "learning_rate": 4.485937342589353e-07, "loss": 0.0097, "step": 12990 }, { "epoch": 2.955858930602958, "grad_norm": 0.7714753794696021, "learning_rate": 4.4850801768140147e-07, "loss": 0.005, "step": 12991 }, { "epoch": 2.9560864618885097, "grad_norm": 1.0266183942038591, "learning_rate": 4.4842230471104036e-07, "loss": 0.0238, "step": 12992 }, { "epoch": 2.9563139931740614, "grad_norm": 1.3949180803966408, "learning_rate": 4.4833659534960426e-07, "loss": 0.0156, "step": 12993 }, { "epoch": 2.956541524459613, "grad_norm": 0.6668609512547711, "learning_rate": 4.482508895988445e-07, "loss": 0.0099, "step": 12994 }, { "epoch": 2.956769055745165, "grad_norm": 0.8386765953775356, "learning_rate": 4.481651874605127e-07, "loss": 0.0116, "step": 12995 }, { "epoch": 2.9569965870307167, "grad_norm": 1.1757502602149894, "learning_rate": 4.4807948893636085e-07, "loss": 0.024, "step": 12996 }, { "epoch": 2.9572241183162684, "grad_norm": 0.4083428577042723, "learning_rate": 4.4799379402813995e-07, "loss": 0.0026, "step": 12997 }, { "epoch": 2.95745164960182, "grad_norm": 0.9861136974180085, "learning_rate": 4.4790810273760194e-07, "loss": 0.0167, "step": 12998 }, { "epoch": 2.957679180887372, "grad_norm": 1.6861826457981608, "learning_rate": 4.478224150664978e-07, "loss": 0.0397, "step": 12999 }, { "epoch": 2.9579067121729237, "grad_norm": 1.2251954196314963, "learning_rate": 4.477367310165793e-07, "loss": 0.0072, "step": 13000 }, { "epoch": 2.9581342434584754, "grad_norm": 0.6735618622178475, "learning_rate": 4.4765105058959715e-07, "loss": 0.004, "step": 13001 }, { "epoch": 2.958361774744027, "grad_norm": 0.6449446594372824, "learning_rate": 4.475653737873027e-07, "loss": 0.0107, "step": 13002 }, { "epoch": 2.958589306029579, "grad_norm": 0.7970318492651226, "learning_rate": 4.474797006114473e-07, "loss": 0.0069, "step": 13003 }, { "epoch": 2.9588168373151307, "grad_norm": 0.740597804047752, "learning_rate": 4.473940310637814e-07, "loss": 0.0078, "step": 13004 }, { "epoch": 2.9590443686006824, "grad_norm": 1.044740804201778, "learning_rate": 4.4730836514605643e-07, "loss": 0.0174, "step": 13005 }, { "epoch": 2.959271899886234, "grad_norm": 0.9112710404188713, "learning_rate": 4.4722270286002316e-07, "loss": 0.0119, "step": 13006 }, { "epoch": 2.9594994311717864, "grad_norm": 0.6868480977878624, "learning_rate": 4.4713704420743195e-07, "loss": 0.0129, "step": 13007 }, { "epoch": 2.9597269624573377, "grad_norm": 1.0886503528694507, "learning_rate": 4.4705138919003394e-07, "loss": 0.0124, "step": 13008 }, { "epoch": 2.95995449374289, "grad_norm": 1.6117630924641129, "learning_rate": 4.4696573780957956e-07, "loss": 0.0088, "step": 13009 }, { "epoch": 2.960182025028441, "grad_norm": 0.7737448415234218, "learning_rate": 4.468800900678197e-07, "loss": 0.0061, "step": 13010 }, { "epoch": 2.9604095563139934, "grad_norm": 1.0259284566997526, "learning_rate": 4.467944459665043e-07, "loss": 0.0154, "step": 13011 }, { "epoch": 2.9606370875995447, "grad_norm": 0.629745865064031, "learning_rate": 4.467088055073843e-07, "loss": 0.0123, "step": 13012 }, { "epoch": 2.960864618885097, "grad_norm": 1.2045272221847976, "learning_rate": 4.466231686922098e-07, "loss": 0.0088, "step": 13013 }, { "epoch": 2.961092150170648, "grad_norm": 0.6623475819240708, "learning_rate": 4.4653753552273085e-07, "loss": 0.0081, "step": 13014 }, { "epoch": 2.9613196814562004, "grad_norm": 1.4938233150012732, "learning_rate": 4.464519060006979e-07, "loss": 0.0177, "step": 13015 }, { "epoch": 2.961547212741752, "grad_norm": 1.242490926110331, "learning_rate": 4.4636628012786084e-07, "loss": 0.0157, "step": 13016 }, { "epoch": 2.961774744027304, "grad_norm": 0.7611879702455836, "learning_rate": 4.462806579059702e-07, "loss": 0.0186, "step": 13017 }, { "epoch": 2.9620022753128556, "grad_norm": 0.3798248299776088, "learning_rate": 4.4619503933677534e-07, "loss": 0.0025, "step": 13018 }, { "epoch": 2.9622298065984074, "grad_norm": 1.6480302317978932, "learning_rate": 4.4610942442202653e-07, "loss": 0.0136, "step": 13019 }, { "epoch": 2.962457337883959, "grad_norm": 1.8125661331529002, "learning_rate": 4.4602381316347353e-07, "loss": 0.0371, "step": 13020 }, { "epoch": 2.962684869169511, "grad_norm": 1.3928169469703995, "learning_rate": 4.459382055628658e-07, "loss": 0.019, "step": 13021 }, { "epoch": 2.9629124004550627, "grad_norm": 1.6591017599365774, "learning_rate": 4.4585260162195337e-07, "loss": 0.0162, "step": 13022 }, { "epoch": 2.9631399317406144, "grad_norm": 1.3749963146079072, "learning_rate": 4.4576700134248557e-07, "loss": 0.015, "step": 13023 }, { "epoch": 2.963367463026166, "grad_norm": 0.7866499744548426, "learning_rate": 4.456814047262122e-07, "loss": 0.0068, "step": 13024 }, { "epoch": 2.963594994311718, "grad_norm": 0.7633822315127341, "learning_rate": 4.455958117748824e-07, "loss": 0.0049, "step": 13025 }, { "epoch": 2.9638225255972697, "grad_norm": 1.0277431927512486, "learning_rate": 4.455102224902455e-07, "loss": 0.01, "step": 13026 }, { "epoch": 2.9640500568828214, "grad_norm": 0.4401754467531657, "learning_rate": 4.454246368740512e-07, "loss": 0.0041, "step": 13027 }, { "epoch": 2.964277588168373, "grad_norm": 0.9723759933118012, "learning_rate": 4.453390549280482e-07, "loss": 0.0242, "step": 13028 }, { "epoch": 2.964505119453925, "grad_norm": 1.0565856768974469, "learning_rate": 4.4525347665398594e-07, "loss": 0.0176, "step": 13029 }, { "epoch": 2.9647326507394767, "grad_norm": 0.4492162173964103, "learning_rate": 4.4516790205361345e-07, "loss": 0.0046, "step": 13030 }, { "epoch": 2.9649601820250284, "grad_norm": 1.3852547217773672, "learning_rate": 4.450823311286798e-07, "loss": 0.0294, "step": 13031 }, { "epoch": 2.96518771331058, "grad_norm": 0.3119129442919913, "learning_rate": 4.4499676388093373e-07, "loss": 0.002, "step": 13032 }, { "epoch": 2.965415244596132, "grad_norm": 0.4660407006427118, "learning_rate": 4.4491120031212406e-07, "loss": 0.0029, "step": 13033 }, { "epoch": 2.9656427758816837, "grad_norm": 0.46552702122048384, "learning_rate": 4.4482564042399987e-07, "loss": 0.0032, "step": 13034 }, { "epoch": 2.9658703071672354, "grad_norm": 1.3103389623623833, "learning_rate": 4.447400842183093e-07, "loss": 0.0174, "step": 13035 }, { "epoch": 2.966097838452787, "grad_norm": 1.183774114083701, "learning_rate": 4.446545316968015e-07, "loss": 0.0077, "step": 13036 }, { "epoch": 2.966325369738339, "grad_norm": 1.0637448571364645, "learning_rate": 4.445689828612246e-07, "loss": 0.0131, "step": 13037 }, { "epoch": 2.9665529010238907, "grad_norm": 2.258306354764054, "learning_rate": 4.444834377133275e-07, "loss": 0.0384, "step": 13038 }, { "epoch": 2.9667804323094424, "grad_norm": 0.7313903576272953, "learning_rate": 4.4439789625485826e-07, "loss": 0.0068, "step": 13039 }, { "epoch": 2.967007963594994, "grad_norm": 0.770968131737806, "learning_rate": 4.4431235848756505e-07, "loss": 0.0087, "step": 13040 }, { "epoch": 2.967235494880546, "grad_norm": 0.5467705560257514, "learning_rate": 4.442268244131965e-07, "loss": 0.0064, "step": 13041 }, { "epoch": 2.9674630261660977, "grad_norm": 0.5321217875403266, "learning_rate": 4.4414129403350046e-07, "loss": 0.0067, "step": 13042 }, { "epoch": 2.9676905574516494, "grad_norm": 0.43672426695194005, "learning_rate": 4.440557673502253e-07, "loss": 0.0027, "step": 13043 }, { "epoch": 2.967918088737201, "grad_norm": 0.6836525467935451, "learning_rate": 4.439702443651189e-07, "loss": 0.0099, "step": 13044 }, { "epoch": 2.968145620022753, "grad_norm": 0.7020317344504288, "learning_rate": 4.43884725079929e-07, "loss": 0.0116, "step": 13045 }, { "epoch": 2.968373151308305, "grad_norm": 0.7715416931576158, "learning_rate": 4.4379920949640385e-07, "loss": 0.0093, "step": 13046 }, { "epoch": 2.9686006825938565, "grad_norm": 0.20252524279436837, "learning_rate": 4.4371369761629075e-07, "loss": 0.0014, "step": 13047 }, { "epoch": 2.9688282138794087, "grad_norm": 0.6392267446196775, "learning_rate": 4.4362818944133773e-07, "loss": 0.0077, "step": 13048 }, { "epoch": 2.96905574516496, "grad_norm": 0.9172206082555143, "learning_rate": 4.435426849732923e-07, "loss": 0.0095, "step": 13049 }, { "epoch": 2.969283276450512, "grad_norm": 0.5065829746136554, "learning_rate": 4.434571842139023e-07, "loss": 0.0054, "step": 13050 }, { "epoch": 2.9695108077360635, "grad_norm": 0.4495789083370564, "learning_rate": 4.433716871649149e-07, "loss": 0.0078, "step": 13051 }, { "epoch": 2.9697383390216157, "grad_norm": 0.8955499568945653, "learning_rate": 4.4328619382807736e-07, "loss": 0.0133, "step": 13052 }, { "epoch": 2.969965870307167, "grad_norm": 1.1476937802453042, "learning_rate": 4.4320070420513757e-07, "loss": 0.0205, "step": 13053 }, { "epoch": 2.970193401592719, "grad_norm": 0.555667335303818, "learning_rate": 4.431152182978421e-07, "loss": 0.0054, "step": 13054 }, { "epoch": 2.970420932878271, "grad_norm": 0.997656217649146, "learning_rate": 4.430297361079386e-07, "loss": 0.0231, "step": 13055 }, { "epoch": 2.9706484641638227, "grad_norm": 0.8167506518625944, "learning_rate": 4.42944257637174e-07, "loss": 0.007, "step": 13056 }, { "epoch": 2.9708759954493744, "grad_norm": 1.3376209246901054, "learning_rate": 4.4285878288729545e-07, "loss": 0.0157, "step": 13057 }, { "epoch": 2.971103526734926, "grad_norm": 1.407484943459465, "learning_rate": 4.4277331186004976e-07, "loss": 0.0135, "step": 13058 }, { "epoch": 2.971331058020478, "grad_norm": 0.5896438287993675, "learning_rate": 4.4268784455718374e-07, "loss": 0.0047, "step": 13059 }, { "epoch": 2.9715585893060297, "grad_norm": 0.3205494052596655, "learning_rate": 4.4260238098044446e-07, "loss": 0.0019, "step": 13060 }, { "epoch": 2.9717861205915814, "grad_norm": 1.6310006500478116, "learning_rate": 4.425169211315783e-07, "loss": 0.0343, "step": 13061 }, { "epoch": 2.972013651877133, "grad_norm": 0.6924411546228854, "learning_rate": 4.4243146501233216e-07, "loss": 0.0154, "step": 13062 }, { "epoch": 2.972241183162685, "grad_norm": 1.0832843192214043, "learning_rate": 4.423460126244526e-07, "loss": 0.0095, "step": 13063 }, { "epoch": 2.9724687144482367, "grad_norm": 0.8188108422907803, "learning_rate": 4.422605639696859e-07, "loss": 0.0114, "step": 13064 }, { "epoch": 2.9726962457337884, "grad_norm": 0.8989518078329068, "learning_rate": 4.421751190497786e-07, "loss": 0.0115, "step": 13065 }, { "epoch": 2.97292377701934, "grad_norm": 0.7160154465991968, "learning_rate": 4.4208967786647696e-07, "loss": 0.0065, "step": 13066 }, { "epoch": 2.973151308304892, "grad_norm": 0.8052395969578269, "learning_rate": 4.420042404215276e-07, "loss": 0.0104, "step": 13067 }, { "epoch": 2.9733788395904437, "grad_norm": 1.5767743585439837, "learning_rate": 4.4191880671667615e-07, "loss": 0.0095, "step": 13068 }, { "epoch": 2.9736063708759954, "grad_norm": 0.7709161238100516, "learning_rate": 4.4183337675366906e-07, "loss": 0.0165, "step": 13069 }, { "epoch": 2.973833902161547, "grad_norm": 0.8254572695262581, "learning_rate": 4.4174795053425256e-07, "loss": 0.0095, "step": 13070 }, { "epoch": 2.974061433447099, "grad_norm": 0.7472452962926969, "learning_rate": 4.4166252806017196e-07, "loss": 0.0081, "step": 13071 }, { "epoch": 2.9742889647326507, "grad_norm": 1.3086836301490434, "learning_rate": 4.4157710933317375e-07, "loss": 0.0113, "step": 13072 }, { "epoch": 2.9745164960182024, "grad_norm": 1.1922375362465303, "learning_rate": 4.4149169435500323e-07, "loss": 0.0207, "step": 13073 }, { "epoch": 2.974744027303754, "grad_norm": 0.5320530122128821, "learning_rate": 4.414062831274068e-07, "loss": 0.004, "step": 13074 }, { "epoch": 2.974971558589306, "grad_norm": 0.7107741763014488, "learning_rate": 4.413208756521294e-07, "loss": 0.0092, "step": 13075 }, { "epoch": 2.9751990898748577, "grad_norm": 1.0729605073022095, "learning_rate": 4.41235471930917e-07, "loss": 0.0282, "step": 13076 }, { "epoch": 2.9754266211604095, "grad_norm": 0.3298063869428333, "learning_rate": 4.411500719655151e-07, "loss": 0.0013, "step": 13077 }, { "epoch": 2.975654152445961, "grad_norm": 0.6467577793164062, "learning_rate": 4.4106467575766893e-07, "loss": 0.006, "step": 13078 }, { "epoch": 2.975881683731513, "grad_norm": 0.8474605061848557, "learning_rate": 4.4097928330912395e-07, "loss": 0.0041, "step": 13079 }, { "epoch": 2.9761092150170647, "grad_norm": 0.6277653716625928, "learning_rate": 4.4089389462162527e-07, "loss": 0.0053, "step": 13080 }, { "epoch": 2.9763367463026165, "grad_norm": 0.7337474005512183, "learning_rate": 4.408085096969184e-07, "loss": 0.0114, "step": 13081 }, { "epoch": 2.976564277588168, "grad_norm": 1.1755575196344, "learning_rate": 4.407231285367482e-07, "loss": 0.0135, "step": 13082 }, { "epoch": 2.9767918088737204, "grad_norm": 1.1515489021833885, "learning_rate": 4.4063775114285973e-07, "loss": 0.0089, "step": 13083 }, { "epoch": 2.9770193401592717, "grad_norm": 0.5481174808367675, "learning_rate": 4.4055237751699816e-07, "loss": 0.0082, "step": 13084 }, { "epoch": 2.977246871444824, "grad_norm": 1.055700117002259, "learning_rate": 4.4046700766090785e-07, "loss": 0.0174, "step": 13085 }, { "epoch": 2.9774744027303752, "grad_norm": 0.8751935186120131, "learning_rate": 4.4038164157633413e-07, "loss": 0.0112, "step": 13086 }, { "epoch": 2.9777019340159274, "grad_norm": 0.7085261376227496, "learning_rate": 4.4029627926502146e-07, "loss": 0.0066, "step": 13087 }, { "epoch": 2.9779294653014787, "grad_norm": 1.3731745493848102, "learning_rate": 4.402109207287149e-07, "loss": 0.0123, "step": 13088 }, { "epoch": 2.978156996587031, "grad_norm": 1.6494916909925768, "learning_rate": 4.401255659691584e-07, "loss": 0.0254, "step": 13089 }, { "epoch": 2.9783845278725822, "grad_norm": 0.7216394390967542, "learning_rate": 4.400402149880967e-07, "loss": 0.0095, "step": 13090 }, { "epoch": 2.9786120591581344, "grad_norm": 0.6235816218049344, "learning_rate": 4.3995486778727455e-07, "loss": 0.0056, "step": 13091 }, { "epoch": 2.9788395904436857, "grad_norm": 0.8868451681309851, "learning_rate": 4.398695243684357e-07, "loss": 0.0146, "step": 13092 }, { "epoch": 2.979067121729238, "grad_norm": 1.909324259137222, "learning_rate": 4.397841847333249e-07, "loss": 0.017, "step": 13093 }, { "epoch": 2.9792946530147897, "grad_norm": 0.6581375093284497, "learning_rate": 4.3969884888368597e-07, "loss": 0.008, "step": 13094 }, { "epoch": 2.9795221843003414, "grad_norm": 1.2213275348750907, "learning_rate": 4.3961351682126356e-07, "loss": 0.0195, "step": 13095 }, { "epoch": 2.979749715585893, "grad_norm": 0.8452415972986291, "learning_rate": 4.395281885478011e-07, "loss": 0.0171, "step": 13096 }, { "epoch": 2.979977246871445, "grad_norm": 0.8620185409601105, "learning_rate": 4.394428640650427e-07, "loss": 0.0136, "step": 13097 }, { "epoch": 2.9802047781569967, "grad_norm": 1.503281549844694, "learning_rate": 4.3935754337473264e-07, "loss": 0.0135, "step": 13098 }, { "epoch": 2.9804323094425484, "grad_norm": 0.6607434672994138, "learning_rate": 4.3927222647861397e-07, "loss": 0.0054, "step": 13099 }, { "epoch": 2.9806598407281, "grad_norm": 1.284810727110611, "learning_rate": 4.391869133784312e-07, "loss": 0.0198, "step": 13100 }, { "epoch": 2.980887372013652, "grad_norm": 1.0729867167370288, "learning_rate": 4.391016040759277e-07, "loss": 0.009, "step": 13101 }, { "epoch": 2.9811149032992037, "grad_norm": 2.6575688343686696, "learning_rate": 4.3901629857284646e-07, "loss": 0.0231, "step": 13102 }, { "epoch": 2.9813424345847555, "grad_norm": 0.61888864915851, "learning_rate": 4.3893099687093197e-07, "loss": 0.0111, "step": 13103 }, { "epoch": 2.981569965870307, "grad_norm": 0.8181433475107248, "learning_rate": 4.3884569897192684e-07, "loss": 0.0064, "step": 13104 }, { "epoch": 2.981797497155859, "grad_norm": 0.5813802891926447, "learning_rate": 4.387604048775748e-07, "loss": 0.0054, "step": 13105 }, { "epoch": 2.9820250284414107, "grad_norm": 1.2682624615726439, "learning_rate": 4.3867511458961886e-07, "loss": 0.0197, "step": 13106 }, { "epoch": 2.9822525597269625, "grad_norm": 0.7042283709265256, "learning_rate": 4.3858982810980265e-07, "loss": 0.0104, "step": 13107 }, { "epoch": 2.982480091012514, "grad_norm": 1.007314705383513, "learning_rate": 4.3850454543986885e-07, "loss": 0.0115, "step": 13108 }, { "epoch": 2.982707622298066, "grad_norm": 0.7830685613843933, "learning_rate": 4.3841926658156034e-07, "loss": 0.0032, "step": 13109 }, { "epoch": 2.9829351535836177, "grad_norm": 0.508860940396669, "learning_rate": 4.383339915366207e-07, "loss": 0.0055, "step": 13110 }, { "epoch": 2.9831626848691695, "grad_norm": 0.6621786965211814, "learning_rate": 4.382487203067921e-07, "loss": 0.0115, "step": 13111 }, { "epoch": 2.983390216154721, "grad_norm": 1.1127342332802488, "learning_rate": 4.381634528938178e-07, "loss": 0.0141, "step": 13112 }, { "epoch": 2.983617747440273, "grad_norm": 1.17150637944537, "learning_rate": 4.380781892994404e-07, "loss": 0.0094, "step": 13113 }, { "epoch": 2.9838452787258247, "grad_norm": 0.4766395342648714, "learning_rate": 4.3799292952540226e-07, "loss": 0.0084, "step": 13114 }, { "epoch": 2.9840728100113765, "grad_norm": 0.9198584211359192, "learning_rate": 4.379076735734463e-07, "loss": 0.0133, "step": 13115 }, { "epoch": 2.9843003412969282, "grad_norm": 0.6461178700803034, "learning_rate": 4.3782242144531474e-07, "loss": 0.0119, "step": 13116 }, { "epoch": 2.98452787258248, "grad_norm": 0.5672719203072825, "learning_rate": 4.377371731427503e-07, "loss": 0.0077, "step": 13117 }, { "epoch": 2.9847554038680317, "grad_norm": 0.7813834677888303, "learning_rate": 4.3765192866749485e-07, "loss": 0.0178, "step": 13118 }, { "epoch": 2.9849829351535835, "grad_norm": 1.2950367305959787, "learning_rate": 4.37566688021291e-07, "loss": 0.0295, "step": 13119 }, { "epoch": 2.9852104664391352, "grad_norm": 1.139880272361906, "learning_rate": 4.374814512058809e-07, "loss": 0.0144, "step": 13120 }, { "epoch": 2.985437997724687, "grad_norm": 0.3103910193166126, "learning_rate": 4.373962182230063e-07, "loss": 0.0014, "step": 13121 }, { "epoch": 2.985665529010239, "grad_norm": 0.6472077151322185, "learning_rate": 4.3731098907440944e-07, "loss": 0.0041, "step": 13122 }, { "epoch": 2.9858930602957905, "grad_norm": 0.6069157713419772, "learning_rate": 4.372257637618321e-07, "loss": 0.0102, "step": 13123 }, { "epoch": 2.9861205915813427, "grad_norm": 0.572917871752222, "learning_rate": 4.371405422870166e-07, "loss": 0.0041, "step": 13124 }, { "epoch": 2.986348122866894, "grad_norm": 0.7961195510441954, "learning_rate": 4.3705532465170413e-07, "loss": 0.007, "step": 13125 }, { "epoch": 2.986575654152446, "grad_norm": 0.2726670798106948, "learning_rate": 4.3697011085763664e-07, "loss": 0.0017, "step": 13126 }, { "epoch": 2.9868031854379975, "grad_norm": 0.5216413279153306, "learning_rate": 4.3688490090655593e-07, "loss": 0.0058, "step": 13127 }, { "epoch": 2.9870307167235497, "grad_norm": 0.9661545231442007, "learning_rate": 4.3679969480020303e-07, "loss": 0.0135, "step": 13128 }, { "epoch": 2.987258248009101, "grad_norm": 0.6911615026619379, "learning_rate": 4.3671449254031987e-07, "loss": 0.0082, "step": 13129 }, { "epoch": 2.987485779294653, "grad_norm": 0.4780444567593444, "learning_rate": 4.366292941286475e-07, "loss": 0.0071, "step": 13130 }, { "epoch": 2.9877133105802045, "grad_norm": 1.175019673402278, "learning_rate": 4.3654409956692763e-07, "loss": 0.0244, "step": 13131 }, { "epoch": 2.9879408418657567, "grad_norm": 2.1697727308168395, "learning_rate": 4.3645890885690113e-07, "loss": 0.0267, "step": 13132 }, { "epoch": 2.9881683731513085, "grad_norm": 0.6421374147987828, "learning_rate": 4.3637372200030905e-07, "loss": 0.0093, "step": 13133 }, { "epoch": 2.98839590443686, "grad_norm": 1.5967140349090247, "learning_rate": 4.362885389988929e-07, "loss": 0.0192, "step": 13134 }, { "epoch": 2.988623435722412, "grad_norm": 0.4153692388696304, "learning_rate": 4.362033598543932e-07, "loss": 0.0036, "step": 13135 }, { "epoch": 2.9888509670079637, "grad_norm": 0.8888048072961804, "learning_rate": 4.3611818456855117e-07, "loss": 0.0087, "step": 13136 }, { "epoch": 2.9890784982935155, "grad_norm": 0.8146130193243275, "learning_rate": 4.3603301314310737e-07, "loss": 0.0087, "step": 13137 }, { "epoch": 2.989306029579067, "grad_norm": 2.021634336997595, "learning_rate": 4.35947845579803e-07, "loss": 0.0113, "step": 13138 }, { "epoch": 2.989533560864619, "grad_norm": 0.46670436385591374, "learning_rate": 4.358626818803782e-07, "loss": 0.0021, "step": 13139 }, { "epoch": 2.9897610921501707, "grad_norm": 0.8929404608234807, "learning_rate": 4.357775220465738e-07, "loss": 0.0157, "step": 13140 }, { "epoch": 2.9899886234357225, "grad_norm": 0.6394876766642676, "learning_rate": 4.356923660801305e-07, "loss": 0.0081, "step": 13141 }, { "epoch": 2.9902161547212742, "grad_norm": 1.3924060966150789, "learning_rate": 4.3560721398278826e-07, "loss": 0.0265, "step": 13142 }, { "epoch": 2.990443686006826, "grad_norm": 0.9122838166435079, "learning_rate": 4.355220657562879e-07, "loss": 0.0182, "step": 13143 }, { "epoch": 2.9906712172923777, "grad_norm": 0.8498320262055103, "learning_rate": 4.3543692140236933e-07, "loss": 0.0099, "step": 13144 }, { "epoch": 2.9908987485779295, "grad_norm": 1.45881906974314, "learning_rate": 4.3535178092277325e-07, "loss": 0.0077, "step": 13145 }, { "epoch": 2.9911262798634812, "grad_norm": 0.6420652773102503, "learning_rate": 4.352666443192392e-07, "loss": 0.0068, "step": 13146 }, { "epoch": 2.991353811149033, "grad_norm": 0.3019692541963142, "learning_rate": 4.3518151159350745e-07, "loss": 0.0014, "step": 13147 }, { "epoch": 2.9915813424345847, "grad_norm": 1.0554747145560959, "learning_rate": 4.3509638274731814e-07, "loss": 0.0266, "step": 13148 }, { "epoch": 2.9918088737201365, "grad_norm": 1.7291076563256729, "learning_rate": 4.350112577824107e-07, "loss": 0.024, "step": 13149 }, { "epoch": 2.9920364050056882, "grad_norm": 0.6019380437109045, "learning_rate": 4.3492613670052537e-07, "loss": 0.0069, "step": 13150 }, { "epoch": 2.99226393629124, "grad_norm": 0.5468197339241909, "learning_rate": 4.3484101950340183e-07, "loss": 0.0036, "step": 13151 }, { "epoch": 2.9924914675767917, "grad_norm": 1.1268211581696552, "learning_rate": 4.3475590619277933e-07, "loss": 0.0116, "step": 13152 }, { "epoch": 2.9927189988623435, "grad_norm": 0.22584590394083512, "learning_rate": 4.346707967703978e-07, "loss": 0.0017, "step": 13153 }, { "epoch": 2.9929465301478952, "grad_norm": 0.9594455499117971, "learning_rate": 4.345856912379966e-07, "loss": 0.0142, "step": 13154 }, { "epoch": 2.993174061433447, "grad_norm": 1.0748244652725725, "learning_rate": 4.3450058959731525e-07, "loss": 0.01, "step": 13155 }, { "epoch": 2.9934015927189987, "grad_norm": 0.4446791839128591, "learning_rate": 4.344154918500928e-07, "loss": 0.0047, "step": 13156 }, { "epoch": 2.9936291240045505, "grad_norm": 0.7075296925763603, "learning_rate": 4.3433039799806867e-07, "loss": 0.0093, "step": 13157 }, { "epoch": 2.9938566552901023, "grad_norm": 1.2999423343687109, "learning_rate": 4.342453080429823e-07, "loss": 0.0156, "step": 13158 }, { "epoch": 2.994084186575654, "grad_norm": 0.7398500232538494, "learning_rate": 4.341602219865722e-07, "loss": 0.0083, "step": 13159 }, { "epoch": 2.9943117178612058, "grad_norm": 0.31478163483743077, "learning_rate": 4.3407513983057767e-07, "loss": 0.0032, "step": 13160 }, { "epoch": 2.994539249146758, "grad_norm": 2.0224508222635205, "learning_rate": 4.339900615767376e-07, "loss": 0.0141, "step": 13161 }, { "epoch": 2.9947667804323093, "grad_norm": 0.35911775634528936, "learning_rate": 4.33904987226791e-07, "loss": 0.0016, "step": 13162 }, { "epoch": 2.9949943117178615, "grad_norm": 0.6326811735449243, "learning_rate": 4.3381991678247614e-07, "loss": 0.0148, "step": 13163 }, { "epoch": 2.9952218430034128, "grad_norm": 23.946626734015787, "learning_rate": 4.337348502455325e-07, "loss": 0.1879, "step": 13164 }, { "epoch": 2.995449374288965, "grad_norm": 0.4200925748624823, "learning_rate": 4.33649787617698e-07, "loss": 0.0037, "step": 13165 }, { "epoch": 2.9956769055745163, "grad_norm": 0.7792581287297266, "learning_rate": 4.335647289007114e-07, "loss": 0.0055, "step": 13166 }, { "epoch": 2.9959044368600685, "grad_norm": 0.6494692515136692, "learning_rate": 4.334796740963114e-07, "loss": 0.0033, "step": 13167 }, { "epoch": 2.9961319681456198, "grad_norm": 0.27397844576381847, "learning_rate": 4.3339462320623567e-07, "loss": 0.0023, "step": 13168 }, { "epoch": 2.996359499431172, "grad_norm": 0.9207287615745284, "learning_rate": 4.333095762322233e-07, "loss": 0.0104, "step": 13169 }, { "epoch": 2.9965870307167233, "grad_norm": 0.31392698640478556, "learning_rate": 4.332245331760121e-07, "loss": 0.0014, "step": 13170 }, { "epoch": 2.9968145620022755, "grad_norm": 0.456454421594984, "learning_rate": 4.3313949403933996e-07, "loss": 0.0049, "step": 13171 }, { "epoch": 2.9970420932878272, "grad_norm": 2.0606101730195596, "learning_rate": 4.330544588239454e-07, "loss": 0.0219, "step": 13172 }, { "epoch": 2.997269624573379, "grad_norm": 0.6906430089218989, "learning_rate": 4.32969427531566e-07, "loss": 0.006, "step": 13173 }, { "epoch": 2.9974971558589307, "grad_norm": 1.4081735965716622, "learning_rate": 4.3288440016394007e-07, "loss": 0.017, "step": 13174 }, { "epoch": 2.9977246871444825, "grad_norm": 0.8387390691422044, "learning_rate": 4.327993767228049e-07, "loss": 0.0094, "step": 13175 }, { "epoch": 2.9979522184300342, "grad_norm": 0.9403419785264918, "learning_rate": 4.3271435720989857e-07, "loss": 0.0143, "step": 13176 }, { "epoch": 2.998179749715586, "grad_norm": 0.4818801905738472, "learning_rate": 4.326293416269588e-07, "loss": 0.0046, "step": 13177 }, { "epoch": 2.9984072810011377, "grad_norm": 1.766648809711717, "learning_rate": 4.3254432997572273e-07, "loss": 0.0425, "step": 13178 }, { "epoch": 2.9986348122866895, "grad_norm": 0.38441890032214177, "learning_rate": 4.324593222579282e-07, "loss": 0.005, "step": 13179 }, { "epoch": 2.9988623435722412, "grad_norm": 0.30652987200682813, "learning_rate": 4.3237431847531237e-07, "loss": 0.002, "step": 13180 }, { "epoch": 2.999089874857793, "grad_norm": 1.5101718667194055, "learning_rate": 4.3228931862961285e-07, "loss": 0.0094, "step": 13181 }, { "epoch": 2.9993174061433447, "grad_norm": 0.42062751019760025, "learning_rate": 4.322043227225666e-07, "loss": 0.0034, "step": 13182 }, { "epoch": 2.9995449374288965, "grad_norm": 0.6605794088083355, "learning_rate": 4.32119330755911e-07, "loss": 0.0077, "step": 13183 }, { "epoch": 2.9997724687144482, "grad_norm": 1.212817208167042, "learning_rate": 4.320343427313832e-07, "loss": 0.0139, "step": 13184 }, { "epoch": 3.0, "grad_norm": 0.7735554755247142, "learning_rate": 4.319493586507197e-07, "loss": 0.0159, "step": 13185 }, { "epoch": 3.0002275312855518, "grad_norm": 0.5386666308418633, "learning_rate": 4.3186437851565794e-07, "loss": 0.0038, "step": 13186 }, { "epoch": 3.0004550625711035, "grad_norm": 0.24542417219057622, "learning_rate": 4.317794023279344e-07, "loss": 0.0014, "step": 13187 }, { "epoch": 3.0006825938566553, "grad_norm": 0.37939108083991646, "learning_rate": 4.3169443008928627e-07, "loss": 0.0032, "step": 13188 }, { "epoch": 3.000910125142207, "grad_norm": 0.3983985914210711, "learning_rate": 4.316094618014499e-07, "loss": 0.0029, "step": 13189 }, { "epoch": 3.0011376564277588, "grad_norm": 0.4247950497057746, "learning_rate": 4.3152449746616177e-07, "loss": 0.0027, "step": 13190 }, { "epoch": 3.0013651877133105, "grad_norm": 0.5735333474144669, "learning_rate": 4.3143953708515886e-07, "loss": 0.0021, "step": 13191 }, { "epoch": 3.0015927189988623, "grad_norm": 0.3162496997453434, "learning_rate": 4.3135458066017705e-07, "loss": 0.0048, "step": 13192 }, { "epoch": 3.001820250284414, "grad_norm": 0.25101473767144755, "learning_rate": 4.3126962819295304e-07, "loss": 0.0026, "step": 13193 }, { "epoch": 3.0020477815699658, "grad_norm": 0.4566973864870464, "learning_rate": 4.3118467968522303e-07, "loss": 0.004, "step": 13194 }, { "epoch": 3.0022753128555175, "grad_norm": 0.40325924834624893, "learning_rate": 4.3109973513872333e-07, "loss": 0.0053, "step": 13195 }, { "epoch": 3.0025028441410693, "grad_norm": 0.19321846813363303, "learning_rate": 4.3101479455518993e-07, "loss": 0.002, "step": 13196 }, { "epoch": 3.002730375426621, "grad_norm": 0.2220881075518713, "learning_rate": 4.309298579363587e-07, "loss": 0.0014, "step": 13197 }, { "epoch": 3.0029579067121728, "grad_norm": 0.610024237096434, "learning_rate": 4.30844925283966e-07, "loss": 0.0049, "step": 13198 }, { "epoch": 3.0031854379977245, "grad_norm": 0.8506109752004815, "learning_rate": 4.307599965997471e-07, "loss": 0.0147, "step": 13199 }, { "epoch": 3.0034129692832763, "grad_norm": 0.30238758951480915, "learning_rate": 4.306750718854383e-07, "loss": 0.0021, "step": 13200 }, { "epoch": 3.003640500568828, "grad_norm": 0.2908588235356442, "learning_rate": 4.3059015114277514e-07, "loss": 0.0039, "step": 13201 }, { "epoch": 3.00386803185438, "grad_norm": 0.23798608594561188, "learning_rate": 4.3050523437349333e-07, "loss": 0.002, "step": 13202 }, { "epoch": 3.0040955631399315, "grad_norm": 0.319922033901026, "learning_rate": 4.304203215793283e-07, "loss": 0.0034, "step": 13203 }, { "epoch": 3.0043230944254833, "grad_norm": 0.33162318932369733, "learning_rate": 4.3033541276201546e-07, "loss": 0.0046, "step": 13204 }, { "epoch": 3.0045506257110355, "grad_norm": 0.3432937695043764, "learning_rate": 4.302505079232905e-07, "loss": 0.0097, "step": 13205 }, { "epoch": 3.0047781569965872, "grad_norm": 0.8763576504712742, "learning_rate": 4.301656070648881e-07, "loss": 0.0123, "step": 13206 }, { "epoch": 3.005005688282139, "grad_norm": 1.5486434260469313, "learning_rate": 4.3008071018854417e-07, "loss": 0.0152, "step": 13207 }, { "epoch": 3.0052332195676907, "grad_norm": 0.4008576159470576, "learning_rate": 4.299958172959935e-07, "loss": 0.0025, "step": 13208 }, { "epoch": 3.0054607508532425, "grad_norm": 0.7479290223372728, "learning_rate": 4.299109283889711e-07, "loss": 0.0157, "step": 13209 }, { "epoch": 3.0056882821387942, "grad_norm": 1.0366209905048536, "learning_rate": 4.298260434692121e-07, "loss": 0.0058, "step": 13210 }, { "epoch": 3.005915813424346, "grad_norm": 0.5876765945435218, "learning_rate": 4.2974116253845124e-07, "loss": 0.0049, "step": 13211 }, { "epoch": 3.0061433447098977, "grad_norm": 0.10901701340108433, "learning_rate": 4.296562855984236e-07, "loss": 0.0006, "step": 13212 }, { "epoch": 3.0063708759954495, "grad_norm": 0.4475494781924965, "learning_rate": 4.295714126508635e-07, "loss": 0.009, "step": 13213 }, { "epoch": 3.0065984072810013, "grad_norm": 0.7804841306340757, "learning_rate": 4.29486543697506e-07, "loss": 0.0036, "step": 13214 }, { "epoch": 3.006825938566553, "grad_norm": 0.41611548937064263, "learning_rate": 4.2940167874008553e-07, "loss": 0.006, "step": 13215 }, { "epoch": 3.0070534698521048, "grad_norm": 0.18329240414034326, "learning_rate": 4.293168177803363e-07, "loss": 0.0009, "step": 13216 }, { "epoch": 3.0072810011376565, "grad_norm": 0.28441562340279203, "learning_rate": 4.29231960819993e-07, "loss": 0.001, "step": 13217 }, { "epoch": 3.0075085324232083, "grad_norm": 0.82802337937374, "learning_rate": 4.2914710786078986e-07, "loss": 0.0069, "step": 13218 }, { "epoch": 3.00773606370876, "grad_norm": 0.4964021771847052, "learning_rate": 4.2906225890446135e-07, "loss": 0.0084, "step": 13219 }, { "epoch": 3.0079635949943118, "grad_norm": 0.2976887768776523, "learning_rate": 4.2897741395274134e-07, "loss": 0.0024, "step": 13220 }, { "epoch": 3.0081911262798635, "grad_norm": 0.6647303725516598, "learning_rate": 4.2889257300736383e-07, "loss": 0.0037, "step": 13221 }, { "epoch": 3.0084186575654153, "grad_norm": 0.6175313228390806, "learning_rate": 4.288077360700632e-07, "loss": 0.004, "step": 13222 }, { "epoch": 3.008646188850967, "grad_norm": 0.25837683677536777, "learning_rate": 4.2872290314257284e-07, "loss": 0.0015, "step": 13223 }, { "epoch": 3.0088737201365188, "grad_norm": 0.6659356633813082, "learning_rate": 4.2863807422662717e-07, "loss": 0.0064, "step": 13224 }, { "epoch": 3.0091012514220705, "grad_norm": 0.5444061617054972, "learning_rate": 4.2855324932395944e-07, "loss": 0.0032, "step": 13225 }, { "epoch": 3.0093287827076223, "grad_norm": 0.32881316872918415, "learning_rate": 4.284684284363036e-07, "loss": 0.0013, "step": 13226 }, { "epoch": 3.009556313993174, "grad_norm": 0.5328861137743416, "learning_rate": 4.283836115653933e-07, "loss": 0.0043, "step": 13227 }, { "epoch": 3.0097838452787258, "grad_norm": 0.22910483673576218, "learning_rate": 4.2829879871296163e-07, "loss": 0.0025, "step": 13228 }, { "epoch": 3.0100113765642775, "grad_norm": 0.11300731179770267, "learning_rate": 4.282139898807425e-07, "loss": 0.0006, "step": 13229 }, { "epoch": 3.0102389078498293, "grad_norm": 0.3105075009312532, "learning_rate": 4.281291850704687e-07, "loss": 0.0025, "step": 13230 }, { "epoch": 3.010466439135381, "grad_norm": 0.32854668665242814, "learning_rate": 4.280443842838743e-07, "loss": 0.0022, "step": 13231 }, { "epoch": 3.010693970420933, "grad_norm": 0.4545310835558744, "learning_rate": 4.279595875226915e-07, "loss": 0.0057, "step": 13232 }, { "epoch": 3.0109215017064845, "grad_norm": 1.3856620189857005, "learning_rate": 4.278747947886541e-07, "loss": 0.0107, "step": 13233 }, { "epoch": 3.0111490329920363, "grad_norm": 0.3392906291496042, "learning_rate": 4.2779000608349497e-07, "loss": 0.0037, "step": 13234 }, { "epoch": 3.011376564277588, "grad_norm": 0.42311118331058356, "learning_rate": 4.2770522140894675e-07, "loss": 0.0037, "step": 13235 }, { "epoch": 3.01160409556314, "grad_norm": 0.29476111052898624, "learning_rate": 4.276204407667425e-07, "loss": 0.003, "step": 13236 }, { "epoch": 3.0118316268486915, "grad_norm": 0.27330718794709163, "learning_rate": 4.2753566415861494e-07, "loss": 0.0023, "step": 13237 }, { "epoch": 3.0120591581342433, "grad_norm": 0.7146986886542046, "learning_rate": 4.2745089158629697e-07, "loss": 0.0073, "step": 13238 }, { "epoch": 3.012286689419795, "grad_norm": 0.38366844751786944, "learning_rate": 4.273661230515209e-07, "loss": 0.0038, "step": 13239 }, { "epoch": 3.012514220705347, "grad_norm": 0.7237817262228297, "learning_rate": 4.272813585560191e-07, "loss": 0.0079, "step": 13240 }, { "epoch": 3.0127417519908986, "grad_norm": 0.6758940172282167, "learning_rate": 4.271965981015246e-07, "loss": 0.0135, "step": 13241 }, { "epoch": 3.0129692832764503, "grad_norm": 0.5311332499669064, "learning_rate": 4.27111841689769e-07, "loss": 0.0033, "step": 13242 }, { "epoch": 3.013196814562002, "grad_norm": 0.28089810453884645, "learning_rate": 4.2702708932248514e-07, "loss": 0.0028, "step": 13243 }, { "epoch": 3.0134243458475543, "grad_norm": 0.5056496046159215, "learning_rate": 4.2694234100140486e-07, "loss": 0.007, "step": 13244 }, { "epoch": 3.013651877133106, "grad_norm": 0.1888272332609651, "learning_rate": 4.268575967282607e-07, "loss": 0.0013, "step": 13245 }, { "epoch": 3.0138794084186578, "grad_norm": 0.37823830647664375, "learning_rate": 4.267728565047842e-07, "loss": 0.0014, "step": 13246 }, { "epoch": 3.0141069397042095, "grad_norm": 0.22099485030107954, "learning_rate": 4.2668812033270737e-07, "loss": 0.0011, "step": 13247 }, { "epoch": 3.0143344709897613, "grad_norm": 0.49666637117177476, "learning_rate": 4.266033882137624e-07, "loss": 0.0026, "step": 13248 }, { "epoch": 3.014562002275313, "grad_norm": 1.3294980043139344, "learning_rate": 4.265186601496806e-07, "loss": 0.0043, "step": 13249 }, { "epoch": 3.0147895335608648, "grad_norm": 0.41310462731894493, "learning_rate": 4.2643393614219404e-07, "loss": 0.0019, "step": 13250 }, { "epoch": 3.0150170648464165, "grad_norm": 0.11703254223260336, "learning_rate": 4.2634921619303396e-07, "loss": 0.0008, "step": 13251 }, { "epoch": 3.0152445961319683, "grad_norm": 0.07455058954685463, "learning_rate": 4.2626450030393244e-07, "loss": 0.0004, "step": 13252 }, { "epoch": 3.01547212741752, "grad_norm": 0.566164894716168, "learning_rate": 4.261797884766204e-07, "loss": 0.0091, "step": 13253 }, { "epoch": 3.0156996587030718, "grad_norm": 0.19932217164810706, "learning_rate": 4.260950807128292e-07, "loss": 0.0013, "step": 13254 }, { "epoch": 3.0159271899886235, "grad_norm": 0.12167145130833555, "learning_rate": 4.260103770142906e-07, "loss": 0.001, "step": 13255 }, { "epoch": 3.0161547212741753, "grad_norm": 0.5490343673708646, "learning_rate": 4.259256773827351e-07, "loss": 0.0054, "step": 13256 }, { "epoch": 3.016382252559727, "grad_norm": 0.3009078353066032, "learning_rate": 4.258409818198944e-07, "loss": 0.0022, "step": 13257 }, { "epoch": 3.016609783845279, "grad_norm": 0.3685904625124778, "learning_rate": 4.2575629032749937e-07, "loss": 0.003, "step": 13258 }, { "epoch": 3.0168373151308305, "grad_norm": 0.5398801312461567, "learning_rate": 4.256716029072806e-07, "loss": 0.0024, "step": 13259 }, { "epoch": 3.0170648464163823, "grad_norm": 0.35019934688888193, "learning_rate": 4.2558691956096936e-07, "loss": 0.0017, "step": 13260 }, { "epoch": 3.017292377701934, "grad_norm": 0.2931509518753092, "learning_rate": 4.2550224029029616e-07, "loss": 0.0042, "step": 13261 }, { "epoch": 3.017519908987486, "grad_norm": 1.602942008751276, "learning_rate": 4.25417565096992e-07, "loss": 0.0073, "step": 13262 }, { "epoch": 3.0177474402730375, "grad_norm": 0.87523627629285, "learning_rate": 4.2533289398278715e-07, "loss": 0.0118, "step": 13263 }, { "epoch": 3.0179749715585893, "grad_norm": 0.3212124082988477, "learning_rate": 4.252482269494123e-07, "loss": 0.0017, "step": 13264 }, { "epoch": 3.018202502844141, "grad_norm": 0.26779180805747765, "learning_rate": 4.2516356399859804e-07, "loss": 0.002, "step": 13265 }, { "epoch": 3.018430034129693, "grad_norm": 0.18542594865203182, "learning_rate": 4.250789051320743e-07, "loss": 0.0015, "step": 13266 }, { "epoch": 3.0186575654152445, "grad_norm": 0.1494656203996284, "learning_rate": 4.2499425035157167e-07, "loss": 0.0011, "step": 13267 }, { "epoch": 3.0188850967007963, "grad_norm": 1.0283114206063841, "learning_rate": 4.249095996588202e-07, "loss": 0.0065, "step": 13268 }, { "epoch": 3.019112627986348, "grad_norm": 0.3088609691677827, "learning_rate": 4.248249530555503e-07, "loss": 0.0016, "step": 13269 }, { "epoch": 3.0193401592719, "grad_norm": 0.16462620095149041, "learning_rate": 4.247403105434915e-07, "loss": 0.001, "step": 13270 }, { "epoch": 3.0195676905574516, "grad_norm": 0.6146051152994135, "learning_rate": 4.2465567212437415e-07, "loss": 0.0051, "step": 13271 }, { "epoch": 3.0197952218430033, "grad_norm": 0.24924010019382456, "learning_rate": 4.2457103779992807e-07, "loss": 0.0013, "step": 13272 }, { "epoch": 3.020022753128555, "grad_norm": 0.670796700667198, "learning_rate": 4.2448640757188255e-07, "loss": 0.0026, "step": 13273 }, { "epoch": 3.020250284414107, "grad_norm": 0.3750197045376314, "learning_rate": 4.2440178144196794e-07, "loss": 0.0031, "step": 13274 }, { "epoch": 3.0204778156996586, "grad_norm": 0.1490163416622236, "learning_rate": 4.2431715941191323e-07, "loss": 0.0009, "step": 13275 }, { "epoch": 3.0207053469852103, "grad_norm": 0.23232361477833174, "learning_rate": 4.242325414834486e-07, "loss": 0.0025, "step": 13276 }, { "epoch": 3.020932878270762, "grad_norm": 0.45761417347177386, "learning_rate": 4.24147927658303e-07, "loss": 0.0024, "step": 13277 }, { "epoch": 3.021160409556314, "grad_norm": 0.46172402171050364, "learning_rate": 4.240633179382058e-07, "loss": 0.0055, "step": 13278 }, { "epoch": 3.0213879408418656, "grad_norm": 0.5908589793497232, "learning_rate": 4.239787123248866e-07, "loss": 0.0042, "step": 13279 }, { "epoch": 3.0216154721274173, "grad_norm": 0.4258799208455602, "learning_rate": 4.238941108200742e-07, "loss": 0.0056, "step": 13280 }, { "epoch": 3.021843003412969, "grad_norm": 0.7823711975448445, "learning_rate": 4.2380951342549786e-07, "loss": 0.0097, "step": 13281 }, { "epoch": 3.022070534698521, "grad_norm": 0.11296698660171897, "learning_rate": 4.2372492014288656e-07, "loss": 0.0007, "step": 13282 }, { "epoch": 3.022298065984073, "grad_norm": 0.15961049667700491, "learning_rate": 4.236403309739695e-07, "loss": 0.0012, "step": 13283 }, { "epoch": 3.0225255972696248, "grad_norm": 0.840001391654169, "learning_rate": 4.235557459204752e-07, "loss": 0.0134, "step": 13284 }, { "epoch": 3.0227531285551765, "grad_norm": 0.6094536509518094, "learning_rate": 4.2347116498413233e-07, "loss": 0.0055, "step": 13285 }, { "epoch": 3.0229806598407283, "grad_norm": 1.1519505419801799, "learning_rate": 4.233865881666702e-07, "loss": 0.0092, "step": 13286 }, { "epoch": 3.02320819112628, "grad_norm": 0.21685732968069352, "learning_rate": 4.233020154698164e-07, "loss": 0.0021, "step": 13287 }, { "epoch": 3.023435722411832, "grad_norm": 0.47534944383911876, "learning_rate": 4.2321744689530043e-07, "loss": 0.0047, "step": 13288 }, { "epoch": 3.0236632536973835, "grad_norm": 0.08527372241497183, "learning_rate": 4.231328824448501e-07, "loss": 0.0005, "step": 13289 }, { "epoch": 3.0238907849829353, "grad_norm": 0.3629463124381008, "learning_rate": 4.2304832212019405e-07, "loss": 0.0046, "step": 13290 }, { "epoch": 3.024118316268487, "grad_norm": 0.4762063265893794, "learning_rate": 4.229637659230606e-07, "loss": 0.0045, "step": 13291 }, { "epoch": 3.024345847554039, "grad_norm": 0.4425454846411092, "learning_rate": 4.228792138551775e-07, "loss": 0.0074, "step": 13292 }, { "epoch": 3.0245733788395905, "grad_norm": 0.14518536567251483, "learning_rate": 4.2279466591827315e-07, "loss": 0.0005, "step": 13293 }, { "epoch": 3.0248009101251423, "grad_norm": 1.2092220408217396, "learning_rate": 4.2271012211407546e-07, "loss": 0.0128, "step": 13294 }, { "epoch": 3.025028441410694, "grad_norm": 0.13282129937011347, "learning_rate": 4.226255824443126e-07, "loss": 0.0005, "step": 13295 }, { "epoch": 3.025255972696246, "grad_norm": 0.3707940557434067, "learning_rate": 4.225410469107121e-07, "loss": 0.0043, "step": 13296 }, { "epoch": 3.0254835039817976, "grad_norm": 2.541635916155825, "learning_rate": 4.224565155150017e-07, "loss": 0.0079, "step": 13297 }, { "epoch": 3.0257110352673493, "grad_norm": 0.4537898726711521, "learning_rate": 4.2237198825890944e-07, "loss": 0.0023, "step": 13298 }, { "epoch": 3.025938566552901, "grad_norm": 0.3167078501556244, "learning_rate": 4.2228746514416233e-07, "loss": 0.0014, "step": 13299 }, { "epoch": 3.026166097838453, "grad_norm": 0.3335731960450541, "learning_rate": 4.2220294617248836e-07, "loss": 0.0026, "step": 13300 }, { "epoch": 3.0263936291240046, "grad_norm": 0.3407482766734956, "learning_rate": 4.2211843134561464e-07, "loss": 0.0031, "step": 13301 }, { "epoch": 3.0266211604095563, "grad_norm": 0.6072461962403493, "learning_rate": 4.220339206652689e-07, "loss": 0.007, "step": 13302 }, { "epoch": 3.026848691695108, "grad_norm": 0.6065807594611108, "learning_rate": 4.219494141331779e-07, "loss": 0.0058, "step": 13303 }, { "epoch": 3.02707622298066, "grad_norm": 0.11005374133147039, "learning_rate": 4.2186491175106895e-07, "loss": 0.0006, "step": 13304 }, { "epoch": 3.0273037542662116, "grad_norm": 0.3642354809922653, "learning_rate": 4.217804135206695e-07, "loss": 0.0025, "step": 13305 }, { "epoch": 3.0275312855517633, "grad_norm": 0.4954602996600238, "learning_rate": 4.2169591944370576e-07, "loss": 0.0073, "step": 13306 }, { "epoch": 3.027758816837315, "grad_norm": 0.4019516641446374, "learning_rate": 4.2161142952190536e-07, "loss": 0.004, "step": 13307 }, { "epoch": 3.027986348122867, "grad_norm": 0.25096068991482584, "learning_rate": 4.2152694375699474e-07, "loss": 0.0016, "step": 13308 }, { "epoch": 3.0282138794084186, "grad_norm": 0.4016945172973851, "learning_rate": 4.2144246215070096e-07, "loss": 0.0021, "step": 13309 }, { "epoch": 3.0284414106939703, "grad_norm": 0.23895730202679197, "learning_rate": 4.213579847047503e-07, "loss": 0.0012, "step": 13310 }, { "epoch": 3.028668941979522, "grad_norm": 0.34648425516900166, "learning_rate": 4.212735114208694e-07, "loss": 0.0039, "step": 13311 }, { "epoch": 3.028896473265074, "grad_norm": 0.5391117349606003, "learning_rate": 4.2118904230078505e-07, "loss": 0.003, "step": 13312 }, { "epoch": 3.0291240045506256, "grad_norm": 0.6485909212268418, "learning_rate": 4.2110457734622314e-07, "loss": 0.0111, "step": 13313 }, { "epoch": 3.0293515358361773, "grad_norm": 0.39360969752410324, "learning_rate": 4.210201165589105e-07, "loss": 0.0035, "step": 13314 }, { "epoch": 3.029579067121729, "grad_norm": 0.9183200039392818, "learning_rate": 4.209356599405731e-07, "loss": 0.0097, "step": 13315 }, { "epoch": 3.029806598407281, "grad_norm": 0.2779246598914387, "learning_rate": 4.208512074929368e-07, "loss": 0.004, "step": 13316 }, { "epoch": 3.0300341296928326, "grad_norm": 0.35903418072647614, "learning_rate": 4.207667592177282e-07, "loss": 0.0042, "step": 13317 }, { "epoch": 3.0302616609783843, "grad_norm": 0.5678311450979274, "learning_rate": 4.2068231511667277e-07, "loss": 0.0059, "step": 13318 }, { "epoch": 3.030489192263936, "grad_norm": 0.5355240504550551, "learning_rate": 4.205978751914969e-07, "loss": 0.0072, "step": 13319 }, { "epoch": 3.030716723549488, "grad_norm": 0.6147906513452238, "learning_rate": 4.205134394439259e-07, "loss": 0.0037, "step": 13320 }, { "epoch": 3.03094425483504, "grad_norm": 0.5752923652803649, "learning_rate": 4.2042900787568586e-07, "loss": 0.0088, "step": 13321 }, { "epoch": 3.031171786120592, "grad_norm": 0.4970191132426975, "learning_rate": 4.203445804885023e-07, "loss": 0.0044, "step": 13322 }, { "epoch": 3.0313993174061435, "grad_norm": 0.35322292791543286, "learning_rate": 4.2026015728410043e-07, "loss": 0.0101, "step": 13323 }, { "epoch": 3.0316268486916953, "grad_norm": 0.2811293618937951, "learning_rate": 4.201757382642062e-07, "loss": 0.0023, "step": 13324 }, { "epoch": 3.031854379977247, "grad_norm": 0.6321939495157084, "learning_rate": 4.200913234305446e-07, "loss": 0.0057, "step": 13325 }, { "epoch": 3.032081911262799, "grad_norm": 0.45288497100763786, "learning_rate": 4.2000691278484134e-07, "loss": 0.004, "step": 13326 }, { "epoch": 3.0323094425483506, "grad_norm": 0.49265082793519355, "learning_rate": 4.1992250632882105e-07, "loss": 0.0045, "step": 13327 }, { "epoch": 3.0325369738339023, "grad_norm": 0.4533799053439082, "learning_rate": 4.198381040642093e-07, "loss": 0.0033, "step": 13328 }, { "epoch": 3.032764505119454, "grad_norm": 0.2021278030146354, "learning_rate": 4.197537059927311e-07, "loss": 0.0011, "step": 13329 }, { "epoch": 3.032992036405006, "grad_norm": 0.25157502414268507, "learning_rate": 4.1966931211611106e-07, "loss": 0.0014, "step": 13330 }, { "epoch": 3.0332195676905576, "grad_norm": 1.0601153072320053, "learning_rate": 4.195849224360743e-07, "loss": 0.011, "step": 13331 }, { "epoch": 3.0334470989761093, "grad_norm": 0.490596003290123, "learning_rate": 4.1950053695434535e-07, "loss": 0.0063, "step": 13332 }, { "epoch": 3.033674630261661, "grad_norm": 0.4875911665363358, "learning_rate": 4.194161556726494e-07, "loss": 0.003, "step": 13333 }, { "epoch": 3.033902161547213, "grad_norm": 0.3319855566743725, "learning_rate": 4.1933177859271064e-07, "loss": 0.0026, "step": 13334 }, { "epoch": 3.0341296928327646, "grad_norm": 0.5756981894967472, "learning_rate": 4.1924740571625345e-07, "loss": 0.0082, "step": 13335 }, { "epoch": 3.0343572241183163, "grad_norm": 0.39445435936475876, "learning_rate": 4.191630370450027e-07, "loss": 0.004, "step": 13336 }, { "epoch": 3.034584755403868, "grad_norm": 0.24823094317467337, "learning_rate": 4.190786725806823e-07, "loss": 0.0014, "step": 13337 }, { "epoch": 3.03481228668942, "grad_norm": 0.48555483021357054, "learning_rate": 4.189943123250168e-07, "loss": 0.0067, "step": 13338 }, { "epoch": 3.0350398179749716, "grad_norm": 1.0334829457614858, "learning_rate": 4.189099562797302e-07, "loss": 0.0051, "step": 13339 }, { "epoch": 3.0352673492605233, "grad_norm": 0.21078074773143182, "learning_rate": 4.188256044465469e-07, "loss": 0.0014, "step": 13340 }, { "epoch": 3.035494880546075, "grad_norm": 1.1287561261042192, "learning_rate": 4.187412568271905e-07, "loss": 0.023, "step": 13341 }, { "epoch": 3.035722411831627, "grad_norm": 0.05127502623202248, "learning_rate": 4.186569134233849e-07, "loss": 0.0001, "step": 13342 }, { "epoch": 3.0359499431171786, "grad_norm": 0.12312621362818257, "learning_rate": 4.185725742368545e-07, "loss": 0.0008, "step": 13343 }, { "epoch": 3.0361774744027303, "grad_norm": 0.674770526227735, "learning_rate": 4.1848823926932207e-07, "loss": 0.0034, "step": 13344 }, { "epoch": 3.036405005688282, "grad_norm": 0.6819752868248127, "learning_rate": 4.184039085225122e-07, "loss": 0.003, "step": 13345 }, { "epoch": 3.036632536973834, "grad_norm": 0.3612903495783116, "learning_rate": 4.1831958199814825e-07, "loss": 0.0021, "step": 13346 }, { "epoch": 3.0368600682593856, "grad_norm": 0.5450599695185131, "learning_rate": 4.1823525969795306e-07, "loss": 0.0046, "step": 13347 }, { "epoch": 3.0370875995449373, "grad_norm": 0.7426801421744023, "learning_rate": 4.181509416236508e-07, "loss": 0.0033, "step": 13348 }, { "epoch": 3.037315130830489, "grad_norm": 0.2943345404203919, "learning_rate": 4.1806662777696424e-07, "loss": 0.0021, "step": 13349 }, { "epoch": 3.037542662116041, "grad_norm": 0.6737147554860153, "learning_rate": 4.1798231815961693e-07, "loss": 0.0063, "step": 13350 }, { "epoch": 3.0377701934015926, "grad_norm": 0.45878452221368654, "learning_rate": 4.178980127733319e-07, "loss": 0.0027, "step": 13351 }, { "epoch": 3.0379977246871444, "grad_norm": 0.8676718262512086, "learning_rate": 4.178137116198323e-07, "loss": 0.0097, "step": 13352 }, { "epoch": 3.038225255972696, "grad_norm": 0.33810955024882017, "learning_rate": 4.17729414700841e-07, "loss": 0.002, "step": 13353 }, { "epoch": 3.038452787258248, "grad_norm": 0.5210266157491229, "learning_rate": 4.1764512201808063e-07, "loss": 0.0049, "step": 13354 }, { "epoch": 3.0386803185437996, "grad_norm": 0.7731132528704647, "learning_rate": 4.1756083357327464e-07, "loss": 0.0033, "step": 13355 }, { "epoch": 3.0389078498293514, "grad_norm": 0.7415469472439026, "learning_rate": 4.1747654936814485e-07, "loss": 0.0054, "step": 13356 }, { "epoch": 3.039135381114903, "grad_norm": 0.46636875392263244, "learning_rate": 4.1739226940441454e-07, "loss": 0.0019, "step": 13357 }, { "epoch": 3.039362912400455, "grad_norm": 0.3506948399879121, "learning_rate": 4.1730799368380593e-07, "loss": 0.0025, "step": 13358 }, { "epoch": 3.0395904436860066, "grad_norm": 0.5087745921470026, "learning_rate": 4.172237222080418e-07, "loss": 0.0079, "step": 13359 }, { "epoch": 3.039817974971559, "grad_norm": 0.8071053591063705, "learning_rate": 4.1713945497884405e-07, "loss": 0.0035, "step": 13360 }, { "epoch": 3.0400455062571106, "grad_norm": 1.3096138285410492, "learning_rate": 4.170551919979351e-07, "loss": 0.0061, "step": 13361 }, { "epoch": 3.0402730375426623, "grad_norm": 0.4142553856436488, "learning_rate": 4.1697093326703746e-07, "loss": 0.003, "step": 13362 }, { "epoch": 3.040500568828214, "grad_norm": 0.49454977128709093, "learning_rate": 4.1688667878787266e-07, "loss": 0.0079, "step": 13363 }, { "epoch": 3.040728100113766, "grad_norm": 1.1505653978806945, "learning_rate": 4.1680242856216316e-07, "loss": 0.0044, "step": 13364 }, { "epoch": 3.0409556313993176, "grad_norm": 1.1471565344208976, "learning_rate": 4.1671818259163087e-07, "loss": 0.0067, "step": 13365 }, { "epoch": 3.0411831626848693, "grad_norm": 0.12060786596518992, "learning_rate": 4.1663394087799723e-07, "loss": 0.0005, "step": 13366 }, { "epoch": 3.041410693970421, "grad_norm": 0.4537733380776681, "learning_rate": 4.1654970342298437e-07, "loss": 0.0054, "step": 13367 }, { "epoch": 3.041638225255973, "grad_norm": 0.9021989460575346, "learning_rate": 4.1646547022831355e-07, "loss": 0.0047, "step": 13368 }, { "epoch": 3.0418657565415246, "grad_norm": 0.577658087274182, "learning_rate": 4.16381241295707e-07, "loss": 0.0056, "step": 13369 }, { "epoch": 3.0420932878270763, "grad_norm": 0.6329763082637333, "learning_rate": 4.162970166268855e-07, "loss": 0.0049, "step": 13370 }, { "epoch": 3.042320819112628, "grad_norm": 0.2561689456579047, "learning_rate": 4.16212796223571e-07, "loss": 0.0022, "step": 13371 }, { "epoch": 3.04254835039818, "grad_norm": 0.21884440466311297, "learning_rate": 4.161285800874845e-07, "loss": 0.0022, "step": 13372 }, { "epoch": 3.0427758816837316, "grad_norm": 0.26083224164693913, "learning_rate": 4.160443682203472e-07, "loss": 0.0017, "step": 13373 }, { "epoch": 3.0430034129692833, "grad_norm": 1.5598925052539208, "learning_rate": 4.159601606238804e-07, "loss": 0.003, "step": 13374 }, { "epoch": 3.043230944254835, "grad_norm": 0.47765640062023546, "learning_rate": 4.158759572998049e-07, "loss": 0.0054, "step": 13375 }, { "epoch": 3.043458475540387, "grad_norm": 0.11508438155015333, "learning_rate": 4.157917582498422e-07, "loss": 0.0004, "step": 13376 }, { "epoch": 3.0436860068259386, "grad_norm": 0.4374723945743143, "learning_rate": 4.1570756347571256e-07, "loss": 0.0037, "step": 13377 }, { "epoch": 3.0439135381114903, "grad_norm": 0.2422893720836525, "learning_rate": 4.15623372979137e-07, "loss": 0.0008, "step": 13378 }, { "epoch": 3.044141069397042, "grad_norm": 0.3521758360501481, "learning_rate": 4.155391867618365e-07, "loss": 0.002, "step": 13379 }, { "epoch": 3.044368600682594, "grad_norm": 0.5882261913641719, "learning_rate": 4.154550048255311e-07, "loss": 0.0041, "step": 13380 }, { "epoch": 3.0445961319681456, "grad_norm": 1.2726646377610247, "learning_rate": 4.1537082717194184e-07, "loss": 0.0089, "step": 13381 }, { "epoch": 3.0448236632536974, "grad_norm": 0.31386656516374617, "learning_rate": 4.1528665380278875e-07, "loss": 0.0013, "step": 13382 }, { "epoch": 3.045051194539249, "grad_norm": 0.34350958480316524, "learning_rate": 4.152024847197926e-07, "loss": 0.0027, "step": 13383 }, { "epoch": 3.045278725824801, "grad_norm": 0.16747288976705263, "learning_rate": 4.151183199246735e-07, "loss": 0.0008, "step": 13384 }, { "epoch": 3.0455062571103526, "grad_norm": 0.2774288761685954, "learning_rate": 4.150341594191512e-07, "loss": 0.002, "step": 13385 }, { "epoch": 3.0457337883959044, "grad_norm": 0.46603080318871104, "learning_rate": 4.149500032049465e-07, "loss": 0.0056, "step": 13386 }, { "epoch": 3.045961319681456, "grad_norm": 0.41460474125129654, "learning_rate": 4.148658512837789e-07, "loss": 0.0047, "step": 13387 }, { "epoch": 3.046188850967008, "grad_norm": 37.121593955930905, "learning_rate": 4.147817036573684e-07, "loss": 0.0731, "step": 13388 }, { "epoch": 3.0464163822525596, "grad_norm": 0.7157692736567205, "learning_rate": 4.146975603274349e-07, "loss": 0.0036, "step": 13389 }, { "epoch": 3.0466439135381114, "grad_norm": 0.5277332736042848, "learning_rate": 4.146134212956983e-07, "loss": 0.0029, "step": 13390 }, { "epoch": 3.046871444823663, "grad_norm": 0.03715564094128927, "learning_rate": 4.1452928656387793e-07, "loss": 0.0002, "step": 13391 }, { "epoch": 3.047098976109215, "grad_norm": 0.5217938467425188, "learning_rate": 4.144451561336933e-07, "loss": 0.0044, "step": 13392 }, { "epoch": 3.0473265073947666, "grad_norm": 0.8084496160214651, "learning_rate": 4.143610300068644e-07, "loss": 0.0035, "step": 13393 }, { "epoch": 3.0475540386803184, "grad_norm": 0.5659722076567384, "learning_rate": 4.1427690818511003e-07, "loss": 0.0018, "step": 13394 }, { "epoch": 3.04778156996587, "grad_norm": 0.458491010703312, "learning_rate": 4.1419279067014985e-07, "loss": 0.0021, "step": 13395 }, { "epoch": 3.048009101251422, "grad_norm": 0.7899362798525797, "learning_rate": 4.141086774637028e-07, "loss": 0.005, "step": 13396 }, { "epoch": 3.0482366325369736, "grad_norm": 0.5860351177800834, "learning_rate": 4.1402456856748846e-07, "loss": 0.0032, "step": 13397 }, { "epoch": 3.0484641638225254, "grad_norm": 0.41623756006924834, "learning_rate": 4.1394046398322536e-07, "loss": 0.0035, "step": 13398 }, { "epoch": 3.0486916951080776, "grad_norm": 0.5028144393035772, "learning_rate": 4.138563637126326e-07, "loss": 0.0041, "step": 13399 }, { "epoch": 3.0489192263936293, "grad_norm": 1.3567793939750172, "learning_rate": 4.137722677574293e-07, "loss": 0.0058, "step": 13400 }, { "epoch": 3.049146757679181, "grad_norm": 0.4419470029423704, "learning_rate": 4.136881761193337e-07, "loss": 0.0029, "step": 13401 }, { "epoch": 3.049374288964733, "grad_norm": 0.5542918515362691, "learning_rate": 4.1360408880006487e-07, "loss": 0.0026, "step": 13402 }, { "epoch": 3.0496018202502846, "grad_norm": 0.18855359656223117, "learning_rate": 4.1352000580134153e-07, "loss": 0.0005, "step": 13403 }, { "epoch": 3.0498293515358363, "grad_norm": 1.2334814612621934, "learning_rate": 4.134359271248817e-07, "loss": 0.0062, "step": 13404 }, { "epoch": 3.050056882821388, "grad_norm": 0.46955647557934543, "learning_rate": 4.133518527724042e-07, "loss": 0.0065, "step": 13405 }, { "epoch": 3.05028441410694, "grad_norm": 0.7490338643516604, "learning_rate": 4.1326778274562706e-07, "loss": 0.0098, "step": 13406 }, { "epoch": 3.0505119453924916, "grad_norm": 1.0550305421995627, "learning_rate": 4.1318371704626894e-07, "loss": 0.0073, "step": 13407 }, { "epoch": 3.0507394766780433, "grad_norm": 0.2891686375207977, "learning_rate": 4.1309965567604726e-07, "loss": 0.0063, "step": 13408 }, { "epoch": 3.050967007963595, "grad_norm": 0.4216432515917695, "learning_rate": 4.13015598636681e-07, "loss": 0.0031, "step": 13409 }, { "epoch": 3.051194539249147, "grad_norm": 0.4224543503126501, "learning_rate": 4.1293154592988756e-07, "loss": 0.004, "step": 13410 }, { "epoch": 3.0514220705346986, "grad_norm": 2.1440596074090927, "learning_rate": 4.128474975573847e-07, "loss": 0.0138, "step": 13411 }, { "epoch": 3.0516496018202504, "grad_norm": 1.4632320131830878, "learning_rate": 4.1276345352089083e-07, "loss": 0.0053, "step": 13412 }, { "epoch": 3.051877133105802, "grad_norm": 0.39823616830692926, "learning_rate": 4.12679413822123e-07, "loss": 0.0045, "step": 13413 }, { "epoch": 3.052104664391354, "grad_norm": 0.8847517887142823, "learning_rate": 4.1259537846279926e-07, "loss": 0.0058, "step": 13414 }, { "epoch": 3.0523321956769056, "grad_norm": 0.5729422057617716, "learning_rate": 4.125113474446367e-07, "loss": 0.0074, "step": 13415 }, { "epoch": 3.0525597269624574, "grad_norm": 0.823300743425504, "learning_rate": 4.124273207693534e-07, "loss": 0.0045, "step": 13416 }, { "epoch": 3.052787258248009, "grad_norm": 0.24294167508378706, "learning_rate": 4.1234329843866624e-07, "loss": 0.0019, "step": 13417 }, { "epoch": 3.053014789533561, "grad_norm": 0.3344676500901769, "learning_rate": 4.122592804542925e-07, "loss": 0.0029, "step": 13418 }, { "epoch": 3.0532423208191126, "grad_norm": 0.45921525464459956, "learning_rate": 4.121752668179496e-07, "loss": 0.0043, "step": 13419 }, { "epoch": 3.0534698521046644, "grad_norm": 0.18781767900320662, "learning_rate": 4.1209125753135434e-07, "loss": 0.0011, "step": 13420 }, { "epoch": 3.053697383390216, "grad_norm": 1.3861363826374484, "learning_rate": 4.120072525962239e-07, "loss": 0.0074, "step": 13421 }, { "epoch": 3.053924914675768, "grad_norm": 0.6143434215249761, "learning_rate": 4.119232520142753e-07, "loss": 0.0072, "step": 13422 }, { "epoch": 3.0541524459613196, "grad_norm": 0.6216378174797651, "learning_rate": 4.11839255787225e-07, "loss": 0.0012, "step": 13423 }, { "epoch": 3.0543799772468714, "grad_norm": 0.6893291955864601, "learning_rate": 4.1175526391678997e-07, "loss": 0.005, "step": 13424 }, { "epoch": 3.054607508532423, "grad_norm": 0.07679442885475107, "learning_rate": 4.1167127640468667e-07, "loss": 0.0003, "step": 13425 }, { "epoch": 3.054835039817975, "grad_norm": 0.40285080161312936, "learning_rate": 4.1158729325263205e-07, "loss": 0.0015, "step": 13426 }, { "epoch": 3.0550625711035266, "grad_norm": 0.4081855235069103, "learning_rate": 4.115033144623421e-07, "loss": 0.0055, "step": 13427 }, { "epoch": 3.0552901023890784, "grad_norm": 0.7237126575984394, "learning_rate": 4.1141934003553346e-07, "loss": 0.0079, "step": 13428 }, { "epoch": 3.05551763367463, "grad_norm": 0.15523546182943726, "learning_rate": 4.113353699739225e-07, "loss": 0.0005, "step": 13429 }, { "epoch": 3.055745164960182, "grad_norm": 0.7839829843816255, "learning_rate": 4.112514042792251e-07, "loss": 0.0105, "step": 13430 }, { "epoch": 3.0559726962457336, "grad_norm": 1.6444179025225136, "learning_rate": 4.111674429531576e-07, "loss": 0.0178, "step": 13431 }, { "epoch": 3.0562002275312854, "grad_norm": 0.27913455110881497, "learning_rate": 4.110834859974358e-07, "loss": 0.0019, "step": 13432 }, { "epoch": 3.056427758816837, "grad_norm": 0.1461432439790756, "learning_rate": 4.1099953341377603e-07, "loss": 0.0009, "step": 13433 }, { "epoch": 3.056655290102389, "grad_norm": 0.2717317649778411, "learning_rate": 4.1091558520389375e-07, "loss": 0.0015, "step": 13434 }, { "epoch": 3.0568828213879407, "grad_norm": 0.1458893410909564, "learning_rate": 4.108316413695048e-07, "loss": 0.0009, "step": 13435 }, { "epoch": 3.0571103526734924, "grad_norm": 0.6054178552298642, "learning_rate": 4.10747701912325e-07, "loss": 0.0025, "step": 13436 }, { "epoch": 3.057337883959044, "grad_norm": 0.19832239873305488, "learning_rate": 4.106637668340696e-07, "loss": 0.0006, "step": 13437 }, { "epoch": 3.0575654152445964, "grad_norm": 0.669180290864434, "learning_rate": 4.105798361364544e-07, "loss": 0.0053, "step": 13438 }, { "epoch": 3.057792946530148, "grad_norm": 0.36031407169267665, "learning_rate": 4.1049590982119454e-07, "loss": 0.0053, "step": 13439 }, { "epoch": 3.0580204778157, "grad_norm": 0.2825824264188096, "learning_rate": 4.104119878900056e-07, "loss": 0.0024, "step": 13440 }, { "epoch": 3.0582480091012516, "grad_norm": 0.4159418431110561, "learning_rate": 4.1032807034460263e-07, "loss": 0.0009, "step": 13441 }, { "epoch": 3.0584755403868034, "grad_norm": 0.5351582800555152, "learning_rate": 4.102441571867005e-07, "loss": 0.0037, "step": 13442 }, { "epoch": 3.058703071672355, "grad_norm": 0.37727598926640205, "learning_rate": 4.1016024841801485e-07, "loss": 0.0022, "step": 13443 }, { "epoch": 3.058930602957907, "grad_norm": 0.23095194753585635, "learning_rate": 4.1007634404025996e-07, "loss": 0.0009, "step": 13444 }, { "epoch": 3.0591581342434586, "grad_norm": 0.26289701203846233, "learning_rate": 4.0999244405515117e-07, "loss": 0.002, "step": 13445 }, { "epoch": 3.0593856655290104, "grad_norm": 0.18233068654275902, "learning_rate": 4.099085484644028e-07, "loss": 0.0008, "step": 13446 }, { "epoch": 3.059613196814562, "grad_norm": 0.350369657603455, "learning_rate": 4.098246572697301e-07, "loss": 0.0013, "step": 13447 }, { "epoch": 3.059840728100114, "grad_norm": 1.2788551912255464, "learning_rate": 4.097407704728472e-07, "loss": 0.0137, "step": 13448 }, { "epoch": 3.0600682593856656, "grad_norm": 0.45593650703243216, "learning_rate": 4.096568880754686e-07, "loss": 0.0048, "step": 13449 }, { "epoch": 3.0602957906712174, "grad_norm": 0.7060339585435844, "learning_rate": 4.095730100793091e-07, "loss": 0.0054, "step": 13450 }, { "epoch": 3.060523321956769, "grad_norm": 0.3637271889130551, "learning_rate": 4.0948913648608244e-07, "loss": 0.0011, "step": 13451 }, { "epoch": 3.060750853242321, "grad_norm": 0.32580695928830306, "learning_rate": 4.094052672975033e-07, "loss": 0.0019, "step": 13452 }, { "epoch": 3.0609783845278726, "grad_norm": 0.25887891341868374, "learning_rate": 4.093214025152858e-07, "loss": 0.0006, "step": 13453 }, { "epoch": 3.0612059158134244, "grad_norm": 0.5700918695452434, "learning_rate": 4.092375421411435e-07, "loss": 0.0028, "step": 13454 }, { "epoch": 3.061433447098976, "grad_norm": 0.458789056939547, "learning_rate": 4.091536861767909e-07, "loss": 0.0022, "step": 13455 }, { "epoch": 3.061660978384528, "grad_norm": 0.19888494553381425, "learning_rate": 4.090698346239415e-07, "loss": 0.0007, "step": 13456 }, { "epoch": 3.0618885096700796, "grad_norm": 0.4806523982088987, "learning_rate": 4.089859874843094e-07, "loss": 0.0027, "step": 13457 }, { "epoch": 3.0621160409556314, "grad_norm": 0.26869692608231893, "learning_rate": 4.0890214475960793e-07, "loss": 0.0017, "step": 13458 }, { "epoch": 3.062343572241183, "grad_norm": 0.07978137989008778, "learning_rate": 4.08818306451551e-07, "loss": 0.0003, "step": 13459 }, { "epoch": 3.062571103526735, "grad_norm": 1.5312008424945143, "learning_rate": 4.087344725618521e-07, "loss": 0.0029, "step": 13460 }, { "epoch": 3.0627986348122866, "grad_norm": 0.34570081487922144, "learning_rate": 4.086506430922242e-07, "loss": 0.0027, "step": 13461 }, { "epoch": 3.0630261660978384, "grad_norm": 0.6533307632354496, "learning_rate": 4.085668180443811e-07, "loss": 0.0071, "step": 13462 }, { "epoch": 3.06325369738339, "grad_norm": 1.260368021090018, "learning_rate": 4.084829974200356e-07, "loss": 0.0024, "step": 13463 }, { "epoch": 3.063481228668942, "grad_norm": 0.3874773619513052, "learning_rate": 4.0839918122090156e-07, "loss": 0.0021, "step": 13464 }, { "epoch": 3.0637087599544937, "grad_norm": 0.8850099609789168, "learning_rate": 4.0831536944869106e-07, "loss": 0.013, "step": 13465 }, { "epoch": 3.0639362912400454, "grad_norm": 0.5722101454255005, "learning_rate": 4.0823156210511806e-07, "loss": 0.004, "step": 13466 }, { "epoch": 3.064163822525597, "grad_norm": 0.4667986489886474, "learning_rate": 4.081477591918948e-07, "loss": 0.0038, "step": 13467 }, { "epoch": 3.064391353811149, "grad_norm": 1.4370763581062984, "learning_rate": 4.0806396071073395e-07, "loss": 0.0224, "step": 13468 }, { "epoch": 3.0646188850967007, "grad_norm": 0.6838717032865566, "learning_rate": 4.079801666633487e-07, "loss": 0.005, "step": 13469 }, { "epoch": 3.0648464163822524, "grad_norm": 0.43593279322179335, "learning_rate": 4.0789637705145114e-07, "loss": 0.0035, "step": 13470 }, { "epoch": 3.065073947667804, "grad_norm": 0.6048631691248314, "learning_rate": 4.078125918767543e-07, "loss": 0.003, "step": 13471 }, { "epoch": 3.065301478953356, "grad_norm": 0.059570441778314456, "learning_rate": 4.077288111409703e-07, "loss": 0.0002, "step": 13472 }, { "epoch": 3.0655290102389077, "grad_norm": 0.39881911084698746, "learning_rate": 4.076450348458113e-07, "loss": 0.0054, "step": 13473 }, { "epoch": 3.0657565415244594, "grad_norm": 0.3821858072463764, "learning_rate": 4.075612629929898e-07, "loss": 0.0042, "step": 13474 }, { "epoch": 3.065984072810011, "grad_norm": 0.20173766453026748, "learning_rate": 4.0747749558421785e-07, "loss": 0.0009, "step": 13475 }, { "epoch": 3.066211604095563, "grad_norm": 0.3990339085346847, "learning_rate": 4.073937326212077e-07, "loss": 0.0043, "step": 13476 }, { "epoch": 3.066439135381115, "grad_norm": 0.6785755315565686, "learning_rate": 4.073099741056708e-07, "loss": 0.0051, "step": 13477 }, { "epoch": 3.066666666666667, "grad_norm": 0.3326119769802622, "learning_rate": 4.0722622003931956e-07, "loss": 0.0022, "step": 13478 }, { "epoch": 3.0668941979522186, "grad_norm": 0.2681029833737405, "learning_rate": 4.0714247042386565e-07, "loss": 0.0024, "step": 13479 }, { "epoch": 3.0671217292377704, "grad_norm": 1.951532507459147, "learning_rate": 4.0705872526102043e-07, "loss": 0.0244, "step": 13480 }, { "epoch": 3.067349260523322, "grad_norm": 1.705897947436398, "learning_rate": 4.0697498455249585e-07, "loss": 0.0113, "step": 13481 }, { "epoch": 3.067576791808874, "grad_norm": 0.5774455401958378, "learning_rate": 4.068912483000032e-07, "loss": 0.0075, "step": 13482 }, { "epoch": 3.0678043230944256, "grad_norm": 0.6035768565557693, "learning_rate": 4.0680751650525416e-07, "loss": 0.0041, "step": 13483 }, { "epoch": 3.0680318543799774, "grad_norm": 0.449350340079092, "learning_rate": 4.0672378916995966e-07, "loss": 0.0022, "step": 13484 }, { "epoch": 3.068259385665529, "grad_norm": 0.4578266306953242, "learning_rate": 4.0664006629583125e-07, "loss": 0.0032, "step": 13485 }, { "epoch": 3.068486916951081, "grad_norm": 0.8651822616087936, "learning_rate": 4.0655634788458006e-07, "loss": 0.0025, "step": 13486 }, { "epoch": 3.0687144482366326, "grad_norm": 0.5932394899687546, "learning_rate": 4.064726339379169e-07, "loss": 0.0046, "step": 13487 }, { "epoch": 3.0689419795221844, "grad_norm": 0.20604483164798665, "learning_rate": 4.0638892445755305e-07, "loss": 0.0015, "step": 13488 }, { "epoch": 3.069169510807736, "grad_norm": 0.8388336804415129, "learning_rate": 4.06305219445199e-07, "loss": 0.002, "step": 13489 }, { "epoch": 3.069397042093288, "grad_norm": 0.379847546016707, "learning_rate": 4.06221518902566e-07, "loss": 0.0044, "step": 13490 }, { "epoch": 3.0696245733788396, "grad_norm": 0.4199456790991891, "learning_rate": 4.061378228313644e-07, "loss": 0.0023, "step": 13491 }, { "epoch": 3.0698521046643914, "grad_norm": 0.2821504959836516, "learning_rate": 4.0605413123330476e-07, "loss": 0.0018, "step": 13492 }, { "epoch": 3.070079635949943, "grad_norm": 0.6219998559944057, "learning_rate": 4.059704441100979e-07, "loss": 0.0034, "step": 13493 }, { "epoch": 3.070307167235495, "grad_norm": 0.8040910791459764, "learning_rate": 4.058867614634538e-07, "loss": 0.0046, "step": 13494 }, { "epoch": 3.0705346985210467, "grad_norm": 0.6139647214148617, "learning_rate": 4.0580308329508315e-07, "loss": 0.0084, "step": 13495 }, { "epoch": 3.0707622298065984, "grad_norm": 0.8856898476080379, "learning_rate": 4.057194096066959e-07, "loss": 0.0087, "step": 13496 }, { "epoch": 3.07098976109215, "grad_norm": 0.4028729253205752, "learning_rate": 4.056357404000026e-07, "loss": 0.0043, "step": 13497 }, { "epoch": 3.071217292377702, "grad_norm": 0.8331899048493742, "learning_rate": 4.055520756767128e-07, "loss": 0.0076, "step": 13498 }, { "epoch": 3.0714448236632537, "grad_norm": 0.5124714155087006, "learning_rate": 4.054684154385366e-07, "loss": 0.003, "step": 13499 }, { "epoch": 3.0716723549488054, "grad_norm": 0.601438382940817, "learning_rate": 4.053847596871843e-07, "loss": 0.0042, "step": 13500 }, { "epoch": 3.071899886234357, "grad_norm": 0.7362516188356609, "learning_rate": 4.0530110842436494e-07, "loss": 0.0045, "step": 13501 }, { "epoch": 3.072127417519909, "grad_norm": 0.9889994097453253, "learning_rate": 4.0521746165178865e-07, "loss": 0.0046, "step": 13502 }, { "epoch": 3.0723549488054607, "grad_norm": 0.3826789230679254, "learning_rate": 4.0513381937116487e-07, "loss": 0.003, "step": 13503 }, { "epoch": 3.0725824800910124, "grad_norm": 0.6023931495695178, "learning_rate": 4.050501815842034e-07, "loss": 0.0028, "step": 13504 }, { "epoch": 3.072810011376564, "grad_norm": 0.2553326086661939, "learning_rate": 4.0496654829261323e-07, "loss": 0.0015, "step": 13505 }, { "epoch": 3.073037542662116, "grad_norm": 0.2893468299845937, "learning_rate": 4.048829194981038e-07, "loss": 0.002, "step": 13506 }, { "epoch": 3.0732650739476677, "grad_norm": 0.5265528734443193, "learning_rate": 4.0479929520238456e-07, "loss": 0.0028, "step": 13507 }, { "epoch": 3.0734926052332194, "grad_norm": 0.04006804723294966, "learning_rate": 4.047156754071642e-07, "loss": 0.0002, "step": 13508 }, { "epoch": 3.073720136518771, "grad_norm": 0.6359909732511456, "learning_rate": 4.046320601141522e-07, "loss": 0.0036, "step": 13509 }, { "epoch": 3.073947667804323, "grad_norm": 0.6214212686628351, "learning_rate": 4.045484493250573e-07, "loss": 0.0079, "step": 13510 }, { "epoch": 3.0741751990898747, "grad_norm": 0.2617041400843389, "learning_rate": 4.0446484304158815e-07, "loss": 0.0011, "step": 13511 }, { "epoch": 3.0744027303754264, "grad_norm": 0.8942028585787498, "learning_rate": 4.0438124126545396e-07, "loss": 0.0068, "step": 13512 }, { "epoch": 3.074630261660978, "grad_norm": 0.6104577003704682, "learning_rate": 4.042976439983629e-07, "loss": 0.006, "step": 13513 }, { "epoch": 3.07485779294653, "grad_norm": 0.7426089598347656, "learning_rate": 4.0421405124202407e-07, "loss": 0.0084, "step": 13514 }, { "epoch": 3.0750853242320817, "grad_norm": 0.9095859348016209, "learning_rate": 4.0413046299814547e-07, "loss": 0.0165, "step": 13515 }, { "epoch": 3.075312855517634, "grad_norm": 0.29965037110855863, "learning_rate": 4.0404687926843583e-07, "loss": 0.0009, "step": 13516 }, { "epoch": 3.0755403868031856, "grad_norm": 0.6781928054383012, "learning_rate": 4.039633000546034e-07, "loss": 0.0023, "step": 13517 }, { "epoch": 3.0757679180887374, "grad_norm": 0.5191831293048532, "learning_rate": 4.038797253583561e-07, "loss": 0.0039, "step": 13518 }, { "epoch": 3.075995449374289, "grad_norm": 0.7899624816113024, "learning_rate": 4.0379615518140235e-07, "loss": 0.0054, "step": 13519 }, { "epoch": 3.076222980659841, "grad_norm": 0.2862571879602212, "learning_rate": 4.0371258952545e-07, "loss": 0.0012, "step": 13520 }, { "epoch": 3.0764505119453927, "grad_norm": 0.3609070847752029, "learning_rate": 4.036290283922072e-07, "loss": 0.0046, "step": 13521 }, { "epoch": 3.0766780432309444, "grad_norm": 0.3279330229363698, "learning_rate": 4.0354547178338146e-07, "loss": 0.0013, "step": 13522 }, { "epoch": 3.076905574516496, "grad_norm": 0.8650267264014051, "learning_rate": 4.0346191970068086e-07, "loss": 0.0074, "step": 13523 }, { "epoch": 3.077133105802048, "grad_norm": 0.2980318933682432, "learning_rate": 4.0337837214581294e-07, "loss": 0.0013, "step": 13524 }, { "epoch": 3.0773606370875997, "grad_norm": 0.7031814696212748, "learning_rate": 4.0329482912048504e-07, "loss": 0.0066, "step": 13525 }, { "epoch": 3.0775881683731514, "grad_norm": 0.7250220796937153, "learning_rate": 4.0321129062640484e-07, "loss": 0.0018, "step": 13526 }, { "epoch": 3.077815699658703, "grad_norm": 0.9631171942885438, "learning_rate": 4.031277566652796e-07, "loss": 0.0084, "step": 13527 }, { "epoch": 3.078043230944255, "grad_norm": 0.9843975234270441, "learning_rate": 4.030442272388169e-07, "loss": 0.013, "step": 13528 }, { "epoch": 3.0782707622298067, "grad_norm": 0.5867201855762284, "learning_rate": 4.0296070234872363e-07, "loss": 0.0024, "step": 13529 }, { "epoch": 3.0784982935153584, "grad_norm": 0.4798442575271047, "learning_rate": 4.0287718199670675e-07, "loss": 0.0018, "step": 13530 }, { "epoch": 3.07872582480091, "grad_norm": 0.10622723173889774, "learning_rate": 4.0279366618447375e-07, "loss": 0.0003, "step": 13531 }, { "epoch": 3.078953356086462, "grad_norm": 0.2511788675595275, "learning_rate": 4.0271015491373093e-07, "loss": 0.0019, "step": 13532 }, { "epoch": 3.0791808873720137, "grad_norm": 0.43200785909256784, "learning_rate": 4.0262664818618575e-07, "loss": 0.0048, "step": 13533 }, { "epoch": 3.0794084186575654, "grad_norm": 0.3679149019649993, "learning_rate": 4.0254314600354444e-07, "loss": 0.0025, "step": 13534 }, { "epoch": 3.079635949943117, "grad_norm": 0.7073380711528056, "learning_rate": 4.024596483675139e-07, "loss": 0.0044, "step": 13535 }, { "epoch": 3.079863481228669, "grad_norm": 0.6251814007335117, "learning_rate": 4.023761552798007e-07, "loss": 0.0074, "step": 13536 }, { "epoch": 3.0800910125142207, "grad_norm": 0.16928878365387706, "learning_rate": 4.0229266674211094e-07, "loss": 0.0005, "step": 13537 }, { "epoch": 3.0803185437997724, "grad_norm": 0.6390202507512602, "learning_rate": 4.0220918275615134e-07, "loss": 0.0047, "step": 13538 }, { "epoch": 3.080546075085324, "grad_norm": 1.3119272569521194, "learning_rate": 4.0212570332362783e-07, "loss": 0.0104, "step": 13539 }, { "epoch": 3.080773606370876, "grad_norm": 0.09504452550595477, "learning_rate": 4.0204222844624713e-07, "loss": 0.0004, "step": 13540 }, { "epoch": 3.0810011376564277, "grad_norm": 0.7637773975528258, "learning_rate": 4.0195875812571465e-07, "loss": 0.0069, "step": 13541 }, { "epoch": 3.0812286689419794, "grad_norm": 0.7648693249237041, "learning_rate": 4.018752923637367e-07, "loss": 0.0046, "step": 13542 }, { "epoch": 3.081456200227531, "grad_norm": 0.33109352184381263, "learning_rate": 4.0179183116201943e-07, "loss": 0.0027, "step": 13543 }, { "epoch": 3.081683731513083, "grad_norm": 0.5656691315448165, "learning_rate": 4.01708374522268e-07, "loss": 0.003, "step": 13544 }, { "epoch": 3.0819112627986347, "grad_norm": 0.677299470830207, "learning_rate": 4.0162492244618867e-07, "loss": 0.0066, "step": 13545 }, { "epoch": 3.0821387940841865, "grad_norm": 0.8306678858960598, "learning_rate": 4.0154147493548657e-07, "loss": 0.0073, "step": 13546 }, { "epoch": 3.082366325369738, "grad_norm": 0.14276294860039734, "learning_rate": 4.014580319918678e-07, "loss": 0.0005, "step": 13547 }, { "epoch": 3.08259385665529, "grad_norm": 0.4785240083799407, "learning_rate": 4.0137459361703734e-07, "loss": 0.0038, "step": 13548 }, { "epoch": 3.0828213879408417, "grad_norm": 0.4642119187049624, "learning_rate": 4.0129115981270046e-07, "loss": 0.0037, "step": 13549 }, { "epoch": 3.0830489192263935, "grad_norm": 0.3257355008829364, "learning_rate": 4.012077305805629e-07, "loss": 0.003, "step": 13550 }, { "epoch": 3.083276450511945, "grad_norm": 0.2180387438661478, "learning_rate": 4.011243059223292e-07, "loss": 0.0007, "step": 13551 }, { "epoch": 3.083503981797497, "grad_norm": 0.3715475533272962, "learning_rate": 4.0104088583970477e-07, "loss": 0.0015, "step": 13552 }, { "epoch": 3.0837315130830487, "grad_norm": 0.6099323621188637, "learning_rate": 4.0095747033439436e-07, "loss": 0.0054, "step": 13553 }, { "epoch": 3.0839590443686005, "grad_norm": 0.30146580144497387, "learning_rate": 4.0087405940810314e-07, "loss": 0.0008, "step": 13554 }, { "epoch": 3.0841865756541527, "grad_norm": 0.35610448393313, "learning_rate": 4.0079065306253567e-07, "loss": 0.0023, "step": 13555 }, { "epoch": 3.0844141069397044, "grad_norm": 0.743919526138895, "learning_rate": 4.007072512993965e-07, "loss": 0.0062, "step": 13556 }, { "epoch": 3.084641638225256, "grad_norm": 0.5455529090734754, "learning_rate": 4.006238541203905e-07, "loss": 0.0079, "step": 13557 }, { "epoch": 3.084869169510808, "grad_norm": 0.36175686240764326, "learning_rate": 4.005404615272217e-07, "loss": 0.0025, "step": 13558 }, { "epoch": 3.0850967007963597, "grad_norm": 0.9768796548908047, "learning_rate": 4.00457073521595e-07, "loss": 0.0131, "step": 13559 }, { "epoch": 3.0853242320819114, "grad_norm": 0.10026463431277852, "learning_rate": 4.0037369010521464e-07, "loss": 0.0003, "step": 13560 }, { "epoch": 3.085551763367463, "grad_norm": 0.18264915847032992, "learning_rate": 4.002903112797844e-07, "loss": 0.0011, "step": 13561 }, { "epoch": 3.085779294653015, "grad_norm": 0.4696295407392139, "learning_rate": 4.002069370470088e-07, "loss": 0.0018, "step": 13562 }, { "epoch": 3.0860068259385667, "grad_norm": 0.7347120481714878, "learning_rate": 4.001235674085915e-07, "loss": 0.0111, "step": 13563 }, { "epoch": 3.0862343572241184, "grad_norm": 0.7797820590351052, "learning_rate": 4.0004020236623695e-07, "loss": 0.0068, "step": 13564 }, { "epoch": 3.08646188850967, "grad_norm": 0.46764849593653324, "learning_rate": 3.9995684192164847e-07, "loss": 0.0042, "step": 13565 }, { "epoch": 3.086689419795222, "grad_norm": 0.5584339096724338, "learning_rate": 3.9987348607653005e-07, "loss": 0.0031, "step": 13566 }, { "epoch": 3.0869169510807737, "grad_norm": 0.3861417483976141, "learning_rate": 3.997901348325854e-07, "loss": 0.0016, "step": 13567 }, { "epoch": 3.0871444823663254, "grad_norm": 1.0970534012387603, "learning_rate": 3.997067881915177e-07, "loss": 0.0092, "step": 13568 }, { "epoch": 3.087372013651877, "grad_norm": 1.3697239414868274, "learning_rate": 3.996234461550308e-07, "loss": 0.0071, "step": 13569 }, { "epoch": 3.087599544937429, "grad_norm": 0.5239828565681448, "learning_rate": 3.995401087248278e-07, "loss": 0.0042, "step": 13570 }, { "epoch": 3.0878270762229807, "grad_norm": 0.5265032298043754, "learning_rate": 3.994567759026123e-07, "loss": 0.0031, "step": 13571 }, { "epoch": 3.0880546075085324, "grad_norm": 0.8118432668899249, "learning_rate": 3.993734476900869e-07, "loss": 0.0078, "step": 13572 }, { "epoch": 3.088282138794084, "grad_norm": 0.807972360979287, "learning_rate": 3.992901240889552e-07, "loss": 0.0053, "step": 13573 }, { "epoch": 3.088509670079636, "grad_norm": 0.7744621941523958, "learning_rate": 3.9920680510092015e-07, "loss": 0.0053, "step": 13574 }, { "epoch": 3.0887372013651877, "grad_norm": 1.3629847602181746, "learning_rate": 3.991234907276842e-07, "loss": 0.0041, "step": 13575 }, { "epoch": 3.0889647326507395, "grad_norm": 0.45699384731576764, "learning_rate": 3.9904018097095055e-07, "loss": 0.0031, "step": 13576 }, { "epoch": 3.089192263936291, "grad_norm": 0.4407578260940569, "learning_rate": 3.9895687583242164e-07, "loss": 0.0019, "step": 13577 }, { "epoch": 3.089419795221843, "grad_norm": 0.26474474065535314, "learning_rate": 3.9887357531380053e-07, "loss": 0.002, "step": 13578 }, { "epoch": 3.0896473265073947, "grad_norm": 0.3835659645611336, "learning_rate": 3.987902794167892e-07, "loss": 0.0028, "step": 13579 }, { "epoch": 3.0898748577929465, "grad_norm": 1.1406293941910703, "learning_rate": 3.987069881430902e-07, "loss": 0.0054, "step": 13580 }, { "epoch": 3.090102389078498, "grad_norm": 1.1296369740330046, "learning_rate": 3.986237014944062e-07, "loss": 0.0076, "step": 13581 }, { "epoch": 3.09032992036405, "grad_norm": 0.4110055793642066, "learning_rate": 3.985404194724388e-07, "loss": 0.0032, "step": 13582 }, { "epoch": 3.0905574516496017, "grad_norm": 1.5106815713457586, "learning_rate": 3.9845714207889073e-07, "loss": 0.0079, "step": 13583 }, { "epoch": 3.0907849829351535, "grad_norm": 0.7623898681866864, "learning_rate": 3.983738693154636e-07, "loss": 0.0056, "step": 13584 }, { "epoch": 3.091012514220705, "grad_norm": 0.9677212142897534, "learning_rate": 3.982906011838598e-07, "loss": 0.0077, "step": 13585 }, { "epoch": 3.091240045506257, "grad_norm": 0.39990403284246717, "learning_rate": 3.982073376857808e-07, "loss": 0.0045, "step": 13586 }, { "epoch": 3.0914675767918087, "grad_norm": 0.45909619841024957, "learning_rate": 3.981240788229282e-07, "loss": 0.004, "step": 13587 }, { "epoch": 3.0916951080773605, "grad_norm": 0.709703864886982, "learning_rate": 3.980408245970044e-07, "loss": 0.0024, "step": 13588 }, { "epoch": 3.0919226393629122, "grad_norm": 0.24523024131224455, "learning_rate": 3.979575750097099e-07, "loss": 0.0034, "step": 13589 }, { "epoch": 3.092150170648464, "grad_norm": 0.5246001926924099, "learning_rate": 3.978743300627473e-07, "loss": 0.0028, "step": 13590 }, { "epoch": 3.0923777019340157, "grad_norm": 0.24749212955159777, "learning_rate": 3.977910897578171e-07, "loss": 0.001, "step": 13591 }, { "epoch": 3.0926052332195675, "grad_norm": 0.2643815722689115, "learning_rate": 3.97707854096621e-07, "loss": 0.001, "step": 13592 }, { "epoch": 3.0928327645051192, "grad_norm": 0.08010226423736888, "learning_rate": 3.9762462308086033e-07, "loss": 0.0004, "step": 13593 }, { "epoch": 3.0930602957906714, "grad_norm": 1.2460223077436854, "learning_rate": 3.975413967122356e-07, "loss": 0.0163, "step": 13594 }, { "epoch": 3.093287827076223, "grad_norm": 0.49492700601622125, "learning_rate": 3.9745817499244834e-07, "loss": 0.0047, "step": 13595 }, { "epoch": 3.093515358361775, "grad_norm": 0.42186415625234636, "learning_rate": 3.973749579231992e-07, "loss": 0.0021, "step": 13596 }, { "epoch": 3.0937428896473267, "grad_norm": 0.7661258372900503, "learning_rate": 3.972917455061891e-07, "loss": 0.006, "step": 13597 }, { "epoch": 3.0939704209328784, "grad_norm": 1.2162572569500174, "learning_rate": 3.9720853774311866e-07, "loss": 0.0146, "step": 13598 }, { "epoch": 3.09419795221843, "grad_norm": 0.04116373893204648, "learning_rate": 3.9712533463568846e-07, "loss": 0.0001, "step": 13599 }, { "epoch": 3.094425483503982, "grad_norm": 0.4469404322826402, "learning_rate": 3.970421361855993e-07, "loss": 0.0015, "step": 13600 }, { "epoch": 3.0946530147895337, "grad_norm": 0.41874861178475675, "learning_rate": 3.9695894239455123e-07, "loss": 0.003, "step": 13601 }, { "epoch": 3.0948805460750854, "grad_norm": 0.7112831421924185, "learning_rate": 3.968757532642448e-07, "loss": 0.0079, "step": 13602 }, { "epoch": 3.095108077360637, "grad_norm": 0.8727681559324996, "learning_rate": 3.9679256879637995e-07, "loss": 0.0025, "step": 13603 }, { "epoch": 3.095335608646189, "grad_norm": 0.20211042535559315, "learning_rate": 3.9670938899265735e-07, "loss": 0.0014, "step": 13604 }, { "epoch": 3.0955631399317407, "grad_norm": 0.5685083660316468, "learning_rate": 3.966262138547767e-07, "loss": 0.0072, "step": 13605 }, { "epoch": 3.0957906712172925, "grad_norm": 0.6113028684030618, "learning_rate": 3.9654304338443776e-07, "loss": 0.0046, "step": 13606 }, { "epoch": 3.096018202502844, "grad_norm": 0.3199712441983628, "learning_rate": 3.96459877583341e-07, "loss": 0.002, "step": 13607 }, { "epoch": 3.096245733788396, "grad_norm": 0.2569704743225515, "learning_rate": 3.9637671645318545e-07, "loss": 0.0017, "step": 13608 }, { "epoch": 3.0964732650739477, "grad_norm": 0.18618540799889707, "learning_rate": 3.9629355999567124e-07, "loss": 0.0008, "step": 13609 }, { "epoch": 3.0967007963594995, "grad_norm": 0.974646943218071, "learning_rate": 3.9621040821249767e-07, "loss": 0.0044, "step": 13610 }, { "epoch": 3.096928327645051, "grad_norm": 0.4218843501389423, "learning_rate": 3.9612726110536466e-07, "loss": 0.0019, "step": 13611 }, { "epoch": 3.097155858930603, "grad_norm": 0.5155203616611058, "learning_rate": 3.9604411867597115e-07, "loss": 0.0017, "step": 13612 }, { "epoch": 3.0973833902161547, "grad_norm": 0.30260790680059313, "learning_rate": 3.9596098092601635e-07, "loss": 0.0015, "step": 13613 }, { "epoch": 3.0976109215017065, "grad_norm": 0.15439917004207462, "learning_rate": 3.9587784785719993e-07, "loss": 0.0007, "step": 13614 }, { "epoch": 3.0978384527872582, "grad_norm": 1.0553128705158539, "learning_rate": 3.957947194712204e-07, "loss": 0.0105, "step": 13615 }, { "epoch": 3.09806598407281, "grad_norm": 0.2895590460503937, "learning_rate": 3.9571159576977714e-07, "loss": 0.0022, "step": 13616 }, { "epoch": 3.0982935153583617, "grad_norm": 1.628744705176481, "learning_rate": 3.956284767545691e-07, "loss": 0.0056, "step": 13617 }, { "epoch": 3.0985210466439135, "grad_norm": 1.362853591319931, "learning_rate": 3.9554536242729464e-07, "loss": 0.0109, "step": 13618 }, { "epoch": 3.0987485779294652, "grad_norm": 0.6640887103720029, "learning_rate": 3.9546225278965296e-07, "loss": 0.0093, "step": 13619 }, { "epoch": 3.098976109215017, "grad_norm": 0.5115766380905987, "learning_rate": 3.953791478433423e-07, "loss": 0.0046, "step": 13620 }, { "epoch": 3.0992036405005687, "grad_norm": 0.4605437558378388, "learning_rate": 3.952960475900615e-07, "loss": 0.0037, "step": 13621 }, { "epoch": 3.0994311717861205, "grad_norm": 0.17515985424513525, "learning_rate": 3.952129520315086e-07, "loss": 0.0008, "step": 13622 }, { "epoch": 3.0996587030716722, "grad_norm": 0.31201567324301527, "learning_rate": 3.951298611693823e-07, "loss": 0.0009, "step": 13623 }, { "epoch": 3.099886234357224, "grad_norm": 1.1990414655435335, "learning_rate": 3.9504677500538065e-07, "loss": 0.0096, "step": 13624 }, { "epoch": 3.1001137656427757, "grad_norm": 0.1485949424773666, "learning_rate": 3.9496369354120163e-07, "loss": 0.0006, "step": 13625 }, { "epoch": 3.1003412969283275, "grad_norm": 1.2592668381417882, "learning_rate": 3.948806167785435e-07, "loss": 0.018, "step": 13626 }, { "epoch": 3.1005688282138792, "grad_norm": 2.682347916292992, "learning_rate": 3.947975447191041e-07, "loss": 0.0152, "step": 13627 }, { "epoch": 3.100796359499431, "grad_norm": 0.3353485347968531, "learning_rate": 3.947144773645814e-07, "loss": 0.002, "step": 13628 }, { "epoch": 3.1010238907849828, "grad_norm": 0.3023330750744793, "learning_rate": 3.9463141471667286e-07, "loss": 0.0015, "step": 13629 }, { "epoch": 3.1012514220705345, "grad_norm": 0.47479496366154167, "learning_rate": 3.9454835677707635e-07, "loss": 0.0025, "step": 13630 }, { "epoch": 3.1014789533560863, "grad_norm": 0.2617942482834629, "learning_rate": 3.944653035474896e-07, "loss": 0.0023, "step": 13631 }, { "epoch": 3.101706484641638, "grad_norm": 0.8002766516907905, "learning_rate": 3.943822550296096e-07, "loss": 0.0133, "step": 13632 }, { "epoch": 3.10193401592719, "grad_norm": 1.6892480166903134, "learning_rate": 3.9429921122513397e-07, "loss": 0.0084, "step": 13633 }, { "epoch": 3.102161547212742, "grad_norm": 0.6113343956927337, "learning_rate": 3.9421617213575993e-07, "loss": 0.0036, "step": 13634 }, { "epoch": 3.1023890784982937, "grad_norm": 0.44059278952094516, "learning_rate": 3.941331377631849e-07, "loss": 0.0019, "step": 13635 }, { "epoch": 3.1026166097838455, "grad_norm": 0.6415646315799547, "learning_rate": 3.9405010810910555e-07, "loss": 0.007, "step": 13636 }, { "epoch": 3.102844141069397, "grad_norm": 0.34949190823159526, "learning_rate": 3.939670831752189e-07, "loss": 0.0019, "step": 13637 }, { "epoch": 3.103071672354949, "grad_norm": 0.1733259516058199, "learning_rate": 3.938840629632222e-07, "loss": 0.0009, "step": 13638 }, { "epoch": 3.1032992036405007, "grad_norm": 0.6318318927776045, "learning_rate": 3.9380104747481176e-07, "loss": 0.0072, "step": 13639 }, { "epoch": 3.1035267349260525, "grad_norm": 2.062528814281345, "learning_rate": 3.937180367116847e-07, "loss": 0.0294, "step": 13640 }, { "epoch": 3.103754266211604, "grad_norm": 0.9308917596738533, "learning_rate": 3.9363503067553714e-07, "loss": 0.0076, "step": 13641 }, { "epoch": 3.103981797497156, "grad_norm": 0.4254509614528316, "learning_rate": 3.935520293680661e-07, "loss": 0.002, "step": 13642 }, { "epoch": 3.1042093287827077, "grad_norm": 1.0016524103782354, "learning_rate": 3.9346903279096765e-07, "loss": 0.0033, "step": 13643 }, { "epoch": 3.1044368600682595, "grad_norm": 0.4451380258095032, "learning_rate": 3.9338604094593795e-07, "loss": 0.005, "step": 13644 }, { "epoch": 3.1046643913538112, "grad_norm": 1.243308406480071, "learning_rate": 3.933030538346737e-07, "loss": 0.0098, "step": 13645 }, { "epoch": 3.104891922639363, "grad_norm": 0.672156255885296, "learning_rate": 3.9322007145887043e-07, "loss": 0.0075, "step": 13646 }, { "epoch": 3.1051194539249147, "grad_norm": 0.1439762021610723, "learning_rate": 3.931370938202245e-07, "loss": 0.0009, "step": 13647 }, { "epoch": 3.1053469852104665, "grad_norm": 0.5889517061557841, "learning_rate": 3.9305412092043164e-07, "loss": 0.0041, "step": 13648 }, { "epoch": 3.1055745164960182, "grad_norm": 0.6509246879105185, "learning_rate": 3.9297115276118796e-07, "loss": 0.0047, "step": 13649 }, { "epoch": 3.10580204778157, "grad_norm": 0.28368142148737313, "learning_rate": 3.928881893441889e-07, "loss": 0.0009, "step": 13650 }, { "epoch": 3.1060295790671217, "grad_norm": 0.19530754279962179, "learning_rate": 3.9280523067113003e-07, "loss": 0.0004, "step": 13651 }, { "epoch": 3.1062571103526735, "grad_norm": 0.7249174908674783, "learning_rate": 3.9272227674370725e-07, "loss": 0.0165, "step": 13652 }, { "epoch": 3.1064846416382252, "grad_norm": 0.40069081577872345, "learning_rate": 3.9263932756361535e-07, "loss": 0.0026, "step": 13653 }, { "epoch": 3.106712172923777, "grad_norm": 0.47468622771717817, "learning_rate": 3.9255638313255046e-07, "loss": 0.0097, "step": 13654 }, { "epoch": 3.1069397042093287, "grad_norm": 0.628627861298901, "learning_rate": 3.9247344345220735e-07, "loss": 0.0043, "step": 13655 }, { "epoch": 3.1071672354948805, "grad_norm": 0.9040958783773467, "learning_rate": 3.923905085242808e-07, "loss": 0.0099, "step": 13656 }, { "epoch": 3.1073947667804322, "grad_norm": 1.0075148713436095, "learning_rate": 3.9230757835046666e-07, "loss": 0.0109, "step": 13657 }, { "epoch": 3.107622298065984, "grad_norm": 0.14913482764066552, "learning_rate": 3.922246529324593e-07, "loss": 0.0006, "step": 13658 }, { "epoch": 3.1078498293515358, "grad_norm": 0.38828469601771926, "learning_rate": 3.921417322719537e-07, "loss": 0.002, "step": 13659 }, { "epoch": 3.1080773606370875, "grad_norm": 0.19619748293447392, "learning_rate": 3.920588163706446e-07, "loss": 0.001, "step": 13660 }, { "epoch": 3.1083048919226393, "grad_norm": 0.4761806075633297, "learning_rate": 3.919759052302269e-07, "loss": 0.0021, "step": 13661 }, { "epoch": 3.108532423208191, "grad_norm": 0.204992119961753, "learning_rate": 3.918929988523948e-07, "loss": 0.0012, "step": 13662 }, { "epoch": 3.1087599544937428, "grad_norm": 0.5851799966920752, "learning_rate": 3.918100972388428e-07, "loss": 0.0057, "step": 13663 }, { "epoch": 3.1089874857792945, "grad_norm": 0.42234657244224816, "learning_rate": 3.917272003912656e-07, "loss": 0.0032, "step": 13664 }, { "epoch": 3.1092150170648463, "grad_norm": 1.0857862502361364, "learning_rate": 3.916443083113569e-07, "loss": 0.0118, "step": 13665 }, { "epoch": 3.109442548350398, "grad_norm": 0.44577001949994416, "learning_rate": 3.915614210008113e-07, "loss": 0.0018, "step": 13666 }, { "epoch": 3.1096700796359498, "grad_norm": 0.7954960816072131, "learning_rate": 3.9147853846132294e-07, "loss": 0.0036, "step": 13667 }, { "epoch": 3.1098976109215015, "grad_norm": 0.3604700135763512, "learning_rate": 3.913956606945853e-07, "loss": 0.0041, "step": 13668 }, { "epoch": 3.1101251422070533, "grad_norm": 1.3356676557998635, "learning_rate": 3.913127877022926e-07, "loss": 0.0135, "step": 13669 }, { "epoch": 3.110352673492605, "grad_norm": 0.324854385215188, "learning_rate": 3.912299194861385e-07, "loss": 0.0022, "step": 13670 }, { "epoch": 3.1105802047781568, "grad_norm": 0.5909413239547774, "learning_rate": 3.911470560478169e-07, "loss": 0.0069, "step": 13671 }, { "epoch": 3.110807736063709, "grad_norm": 0.33133842608650405, "learning_rate": 3.9106419738902094e-07, "loss": 0.001, "step": 13672 }, { "epoch": 3.1110352673492607, "grad_norm": 0.4677871329404647, "learning_rate": 3.9098134351144445e-07, "loss": 0.0046, "step": 13673 }, { "epoch": 3.1112627986348125, "grad_norm": 0.6362585149191781, "learning_rate": 3.908984944167809e-07, "loss": 0.0125, "step": 13674 }, { "epoch": 3.1114903299203642, "grad_norm": 0.34866814566909005, "learning_rate": 3.9081565010672313e-07, "loss": 0.0047, "step": 13675 }, { "epoch": 3.111717861205916, "grad_norm": 0.15542793713207181, "learning_rate": 3.907328105829647e-07, "loss": 0.0008, "step": 13676 }, { "epoch": 3.1119453924914677, "grad_norm": 0.39922572626751546, "learning_rate": 3.906499758471984e-07, "loss": 0.0038, "step": 13677 }, { "epoch": 3.1121729237770195, "grad_norm": 0.20654925220352918, "learning_rate": 3.9056714590111775e-07, "loss": 0.0008, "step": 13678 }, { "epoch": 3.1124004550625712, "grad_norm": 0.42238125930934733, "learning_rate": 3.90484320746415e-07, "loss": 0.0039, "step": 13679 }, { "epoch": 3.112627986348123, "grad_norm": 0.2912039929322535, "learning_rate": 3.904015003847833e-07, "loss": 0.003, "step": 13680 }, { "epoch": 3.1128555176336747, "grad_norm": 0.29168321563282246, "learning_rate": 3.903186848179155e-07, "loss": 0.0017, "step": 13681 }, { "epoch": 3.1130830489192265, "grad_norm": 0.09232800625014631, "learning_rate": 3.902358740475037e-07, "loss": 0.0005, "step": 13682 }, { "epoch": 3.1133105802047782, "grad_norm": 0.311335836371491, "learning_rate": 3.9015306807524077e-07, "loss": 0.0014, "step": 13683 }, { "epoch": 3.11353811149033, "grad_norm": 0.8892346958975093, "learning_rate": 3.90070266902819e-07, "loss": 0.0067, "step": 13684 }, { "epoch": 3.1137656427758817, "grad_norm": 0.02619625854866601, "learning_rate": 3.899874705319309e-07, "loss": 0.0001, "step": 13685 }, { "epoch": 3.1139931740614335, "grad_norm": 1.93999507262376, "learning_rate": 3.899046789642684e-07, "loss": 0.0142, "step": 13686 }, { "epoch": 3.1142207053469853, "grad_norm": 0.9089515860908949, "learning_rate": 3.898218922015236e-07, "loss": 0.0048, "step": 13687 }, { "epoch": 3.114448236632537, "grad_norm": 0.16192365359004743, "learning_rate": 3.8973911024538883e-07, "loss": 0.0016, "step": 13688 }, { "epoch": 3.1146757679180888, "grad_norm": 0.2395834955088365, "learning_rate": 3.896563330975556e-07, "loss": 0.0009, "step": 13689 }, { "epoch": 3.1149032992036405, "grad_norm": 0.22433798863448448, "learning_rate": 3.8957356075971593e-07, "loss": 0.0007, "step": 13690 }, { "epoch": 3.1151308304891923, "grad_norm": 0.5925930511882774, "learning_rate": 3.894907932335614e-07, "loss": 0.0062, "step": 13691 }, { "epoch": 3.115358361774744, "grad_norm": 0.23282258116639556, "learning_rate": 3.89408030520784e-07, "loss": 0.0009, "step": 13692 }, { "epoch": 3.1155858930602958, "grad_norm": 1.0136086620432527, "learning_rate": 3.893252726230749e-07, "loss": 0.0077, "step": 13693 }, { "epoch": 3.1158134243458475, "grad_norm": 0.923360102407535, "learning_rate": 3.8924251954212553e-07, "loss": 0.0077, "step": 13694 }, { "epoch": 3.1160409556313993, "grad_norm": 0.4767597083534533, "learning_rate": 3.8915977127962743e-07, "loss": 0.0035, "step": 13695 }, { "epoch": 3.116268486916951, "grad_norm": 0.8675104573714081, "learning_rate": 3.890770278372716e-07, "loss": 0.0167, "step": 13696 }, { "epoch": 3.1164960182025028, "grad_norm": 0.7109895685453009, "learning_rate": 3.889942892167492e-07, "loss": 0.0037, "step": 13697 }, { "epoch": 3.1167235494880545, "grad_norm": 0.23538698876857253, "learning_rate": 3.8891155541975124e-07, "loss": 0.0008, "step": 13698 }, { "epoch": 3.1169510807736063, "grad_norm": 0.519057982336505, "learning_rate": 3.8882882644796894e-07, "loss": 0.0032, "step": 13699 }, { "epoch": 3.117178612059158, "grad_norm": 0.1805006963503221, "learning_rate": 3.887461023030929e-07, "loss": 0.0009, "step": 13700 }, { "epoch": 3.11740614334471, "grad_norm": 0.23956590944262118, "learning_rate": 3.8866338298681363e-07, "loss": 0.0017, "step": 13701 }, { "epoch": 3.1176336746302615, "grad_norm": 0.632867957819569, "learning_rate": 3.8858066850082214e-07, "loss": 0.0032, "step": 13702 }, { "epoch": 3.1178612059158133, "grad_norm": 0.5793730649041166, "learning_rate": 3.8849795884680866e-07, "loss": 0.0035, "step": 13703 }, { "epoch": 3.118088737201365, "grad_norm": 0.7614140072035244, "learning_rate": 3.884152540264639e-07, "loss": 0.0022, "step": 13704 }, { "epoch": 3.118316268486917, "grad_norm": 1.1841656395529123, "learning_rate": 3.8833255404147813e-07, "loss": 0.007, "step": 13705 }, { "epoch": 3.1185437997724685, "grad_norm": 0.8461431408039372, "learning_rate": 3.8824985889354137e-07, "loss": 0.0121, "step": 13706 }, { "epoch": 3.1187713310580203, "grad_norm": 0.3108161910287266, "learning_rate": 3.8816716858434395e-07, "loss": 0.0024, "step": 13707 }, { "epoch": 3.118998862343572, "grad_norm": 0.5177771878719377, "learning_rate": 3.880844831155757e-07, "loss": 0.0027, "step": 13708 }, { "epoch": 3.119226393629124, "grad_norm": 1.203803693583744, "learning_rate": 3.8800180248892706e-07, "loss": 0.0069, "step": 13709 }, { "epoch": 3.1194539249146755, "grad_norm": 0.6464258095834154, "learning_rate": 3.879191267060871e-07, "loss": 0.0139, "step": 13710 }, { "epoch": 3.1196814562002277, "grad_norm": 0.42166763739786556, "learning_rate": 3.878364557687464e-07, "loss": 0.0051, "step": 13711 }, { "epoch": 3.1199089874857795, "grad_norm": 0.9879181143584042, "learning_rate": 3.877537896785942e-07, "loss": 0.0048, "step": 13712 }, { "epoch": 3.1201365187713312, "grad_norm": 0.5118564978039727, "learning_rate": 3.8767112843731966e-07, "loss": 0.0047, "step": 13713 }, { "epoch": 3.120364050056883, "grad_norm": 0.11797516679553596, "learning_rate": 3.87588472046613e-07, "loss": 0.0005, "step": 13714 }, { "epoch": 3.1205915813424348, "grad_norm": 0.285439825605053, "learning_rate": 3.8750582050816295e-07, "loss": 0.0011, "step": 13715 }, { "epoch": 3.1208191126279865, "grad_norm": 0.10603198014329722, "learning_rate": 3.8742317382365905e-07, "loss": 0.0004, "step": 13716 }, { "epoch": 3.1210466439135383, "grad_norm": 2.7612055127028325, "learning_rate": 3.873405319947903e-07, "loss": 0.0206, "step": 13717 }, { "epoch": 3.12127417519909, "grad_norm": 0.39742006613592773, "learning_rate": 3.8725789502324605e-07, "loss": 0.0059, "step": 13718 }, { "epoch": 3.1215017064846418, "grad_norm": 0.6757539044410511, "learning_rate": 3.871752629107149e-07, "loss": 0.009, "step": 13719 }, { "epoch": 3.1217292377701935, "grad_norm": 0.670605518183217, "learning_rate": 3.870926356588857e-07, "loss": 0.0054, "step": 13720 }, { "epoch": 3.1219567690557453, "grad_norm": 0.47585802212261463, "learning_rate": 3.870100132694475e-07, "loss": 0.0068, "step": 13721 }, { "epoch": 3.122184300341297, "grad_norm": 0.8416848246059101, "learning_rate": 3.869273957440886e-07, "loss": 0.007, "step": 13722 }, { "epoch": 3.1224118316268488, "grad_norm": 0.14190827638350229, "learning_rate": 3.8684478308449786e-07, "loss": 0.0005, "step": 13723 }, { "epoch": 3.1226393629124005, "grad_norm": 0.33133199875324293, "learning_rate": 3.8676217529236364e-07, "loss": 0.0024, "step": 13724 }, { "epoch": 3.1228668941979523, "grad_norm": 1.3722489723042055, "learning_rate": 3.866795723693741e-07, "loss": 0.0201, "step": 13725 }, { "epoch": 3.123094425483504, "grad_norm": 0.08007079604652112, "learning_rate": 3.8659697431721767e-07, "loss": 0.0003, "step": 13726 }, { "epoch": 3.1233219567690558, "grad_norm": 0.18148914879755945, "learning_rate": 3.865143811375824e-07, "loss": 0.0009, "step": 13727 }, { "epoch": 3.1235494880546075, "grad_norm": 0.29240548271719863, "learning_rate": 3.864317928321566e-07, "loss": 0.002, "step": 13728 }, { "epoch": 3.1237770193401593, "grad_norm": 0.30769210242878847, "learning_rate": 3.8634920940262785e-07, "loss": 0.0013, "step": 13729 }, { "epoch": 3.124004550625711, "grad_norm": 0.1581618726516665, "learning_rate": 3.8626663085068433e-07, "loss": 0.0008, "step": 13730 }, { "epoch": 3.124232081911263, "grad_norm": 0.6642608163175941, "learning_rate": 3.861840571780137e-07, "loss": 0.0072, "step": 13731 }, { "epoch": 3.1244596131968145, "grad_norm": 0.2848025064950112, "learning_rate": 3.861014883863033e-07, "loss": 0.0011, "step": 13732 }, { "epoch": 3.1246871444823663, "grad_norm": 0.29175770570106624, "learning_rate": 3.8601892447724126e-07, "loss": 0.0033, "step": 13733 }, { "epoch": 3.124914675767918, "grad_norm": 0.07711331966132698, "learning_rate": 3.8593636545251446e-07, "loss": 0.0005, "step": 13734 }, { "epoch": 3.12514220705347, "grad_norm": 0.9993932474910553, "learning_rate": 3.8585381131381076e-07, "loss": 0.0007, "step": 13735 }, { "epoch": 3.1253697383390215, "grad_norm": 0.5578235070697413, "learning_rate": 3.85771262062817e-07, "loss": 0.0027, "step": 13736 }, { "epoch": 3.1255972696245733, "grad_norm": 1.1769227062246579, "learning_rate": 3.856887177012206e-07, "loss": 0.0046, "step": 13737 }, { "epoch": 3.125824800910125, "grad_norm": 0.618958383405469, "learning_rate": 3.856061782307087e-07, "loss": 0.0034, "step": 13738 }, { "epoch": 3.126052332195677, "grad_norm": 0.540809486644964, "learning_rate": 3.855236436529678e-07, "loss": 0.0042, "step": 13739 }, { "epoch": 3.1262798634812285, "grad_norm": 0.22201321715842834, "learning_rate": 3.8544111396968523e-07, "loss": 0.0029, "step": 13740 }, { "epoch": 3.1265073947667803, "grad_norm": 0.49833030628193486, "learning_rate": 3.853585891825473e-07, "loss": 0.0052, "step": 13741 }, { "epoch": 3.126734926052332, "grad_norm": 0.4490746517960452, "learning_rate": 3.852760692932413e-07, "loss": 0.003, "step": 13742 }, { "epoch": 3.126962457337884, "grad_norm": 0.6764058274729505, "learning_rate": 3.8519355430345326e-07, "loss": 0.0084, "step": 13743 }, { "epoch": 3.1271899886234356, "grad_norm": 1.083703163022809, "learning_rate": 3.851110442148696e-07, "loss": 0.0046, "step": 13744 }, { "epoch": 3.1274175199089873, "grad_norm": 0.14206739274224148, "learning_rate": 3.850285390291772e-07, "loss": 0.0012, "step": 13745 }, { "epoch": 3.127645051194539, "grad_norm": 0.4794586328489579, "learning_rate": 3.849460387480617e-07, "loss": 0.0044, "step": 13746 }, { "epoch": 3.127872582480091, "grad_norm": 0.305673804005513, "learning_rate": 3.8486354337320966e-07, "loss": 0.0016, "step": 13747 }, { "epoch": 3.128100113765643, "grad_norm": 0.6102327981909041, "learning_rate": 3.847810529063068e-07, "loss": 0.0062, "step": 13748 }, { "epoch": 3.1283276450511943, "grad_norm": 0.930472055344504, "learning_rate": 3.8469856734903955e-07, "loss": 0.0117, "step": 13749 }, { "epoch": 3.1285551763367465, "grad_norm": 0.5343265822316852, "learning_rate": 3.8461608670309346e-07, "loss": 0.0014, "step": 13750 }, { "epoch": 3.1287827076222983, "grad_norm": 1.2801789885249328, "learning_rate": 3.845336109701542e-07, "loss": 0.0068, "step": 13751 }, { "epoch": 3.12901023890785, "grad_norm": 0.42540130497787915, "learning_rate": 3.844511401519077e-07, "loss": 0.0031, "step": 13752 }, { "epoch": 3.1292377701934018, "grad_norm": 0.2054823142635902, "learning_rate": 3.843686742500391e-07, "loss": 0.0013, "step": 13753 }, { "epoch": 3.1294653014789535, "grad_norm": 1.370218836279152, "learning_rate": 3.8428621326623424e-07, "loss": 0.0073, "step": 13754 }, { "epoch": 3.1296928327645053, "grad_norm": 0.6251035419824865, "learning_rate": 3.842037572021783e-07, "loss": 0.0029, "step": 13755 }, { "epoch": 3.129920364050057, "grad_norm": 1.1554444444651615, "learning_rate": 3.841213060595567e-07, "loss": 0.0087, "step": 13756 }, { "epoch": 3.1301478953356088, "grad_norm": 0.48261413275071285, "learning_rate": 3.8403885984005433e-07, "loss": 0.0045, "step": 13757 }, { "epoch": 3.1303754266211605, "grad_norm": 0.1205590913682102, "learning_rate": 3.8395641854535627e-07, "loss": 0.0004, "step": 13758 }, { "epoch": 3.1306029579067123, "grad_norm": 0.3571060683782041, "learning_rate": 3.8387398217714784e-07, "loss": 0.0037, "step": 13759 }, { "epoch": 3.130830489192264, "grad_norm": 0.4368548184437618, "learning_rate": 3.837915507371133e-07, "loss": 0.004, "step": 13760 }, { "epoch": 3.131058020477816, "grad_norm": 0.508200109929493, "learning_rate": 3.8370912422693795e-07, "loss": 0.0069, "step": 13761 }, { "epoch": 3.1312855517633675, "grad_norm": 0.25982846380126795, "learning_rate": 3.836267026483062e-07, "loss": 0.0017, "step": 13762 }, { "epoch": 3.1315130830489193, "grad_norm": 0.8041545558270563, "learning_rate": 3.835442860029024e-07, "loss": 0.0035, "step": 13763 }, { "epoch": 3.131740614334471, "grad_norm": 0.5766655866245921, "learning_rate": 3.834618742924113e-07, "loss": 0.0076, "step": 13764 }, { "epoch": 3.131968145620023, "grad_norm": 0.887486119373737, "learning_rate": 3.83379467518517e-07, "loss": 0.0105, "step": 13765 }, { "epoch": 3.1321956769055745, "grad_norm": 0.04580635399034981, "learning_rate": 3.8329706568290416e-07, "loss": 0.0002, "step": 13766 }, { "epoch": 3.1324232081911263, "grad_norm": 0.4333683446232595, "learning_rate": 3.8321466878725634e-07, "loss": 0.002, "step": 13767 }, { "epoch": 3.132650739476678, "grad_norm": 0.9091299675922737, "learning_rate": 3.831322768332581e-07, "loss": 0.0034, "step": 13768 }, { "epoch": 3.13287827076223, "grad_norm": 0.542673493271332, "learning_rate": 3.830498898225932e-07, "loss": 0.0022, "step": 13769 }, { "epoch": 3.1331058020477816, "grad_norm": 0.7546970482818709, "learning_rate": 3.829675077569452e-07, "loss": 0.0044, "step": 13770 }, { "epoch": 3.1333333333333333, "grad_norm": 0.15332913062643197, "learning_rate": 3.8288513063799826e-07, "loss": 0.0007, "step": 13771 }, { "epoch": 3.133560864618885, "grad_norm": 0.42689755107493926, "learning_rate": 3.828027584674357e-07, "loss": 0.0027, "step": 13772 }, { "epoch": 3.133788395904437, "grad_norm": 0.39436226645064476, "learning_rate": 3.827203912469414e-07, "loss": 0.0011, "step": 13773 }, { "epoch": 3.1340159271899886, "grad_norm": 0.46358246341982057, "learning_rate": 3.8263802897819847e-07, "loss": 0.0035, "step": 13774 }, { "epoch": 3.1342434584755403, "grad_norm": 1.114931783200684, "learning_rate": 3.8255567166289023e-07, "loss": 0.0219, "step": 13775 }, { "epoch": 3.134470989761092, "grad_norm": 1.0081316863586738, "learning_rate": 3.8247331930270033e-07, "loss": 0.0135, "step": 13776 }, { "epoch": 3.134698521046644, "grad_norm": 0.36157310213690785, "learning_rate": 3.8239097189931124e-07, "loss": 0.0038, "step": 13777 }, { "epoch": 3.1349260523321956, "grad_norm": 0.5879992839261503, "learning_rate": 3.823086294544067e-07, "loss": 0.0014, "step": 13778 }, { "epoch": 3.1351535836177473, "grad_norm": 0.9832363420652118, "learning_rate": 3.822262919696691e-07, "loss": 0.0137, "step": 13779 }, { "epoch": 3.135381114903299, "grad_norm": 6.806958874229219, "learning_rate": 3.821439594467816e-07, "loss": 0.0598, "step": 13780 }, { "epoch": 3.135608646188851, "grad_norm": 0.25177231203811756, "learning_rate": 3.8206163188742695e-07, "loss": 0.0039, "step": 13781 }, { "epoch": 3.1358361774744026, "grad_norm": 0.24306675346847584, "learning_rate": 3.8197930929328724e-07, "loss": 0.001, "step": 13782 }, { "epoch": 3.1360637087599543, "grad_norm": 0.15809793297254185, "learning_rate": 3.818969916660456e-07, "loss": 0.0007, "step": 13783 }, { "epoch": 3.136291240045506, "grad_norm": 2.679306147113145, "learning_rate": 3.8181467900738415e-07, "loss": 0.0072, "step": 13784 }, { "epoch": 3.136518771331058, "grad_norm": 0.3044844809848375, "learning_rate": 3.8173237131898545e-07, "loss": 0.0013, "step": 13785 }, { "epoch": 3.1367463026166096, "grad_norm": 1.2476628777431005, "learning_rate": 3.816500686025314e-07, "loss": 0.0113, "step": 13786 }, { "epoch": 3.136973833902162, "grad_norm": 0.5077326338569348, "learning_rate": 3.815677708597043e-07, "loss": 0.0016, "step": 13787 }, { "epoch": 3.137201365187713, "grad_norm": 0.7801094951785138, "learning_rate": 3.814854780921863e-07, "loss": 0.0135, "step": 13788 }, { "epoch": 3.1374288964732653, "grad_norm": 0.8009936811062793, "learning_rate": 3.8140319030165886e-07, "loss": 0.0095, "step": 13789 }, { "epoch": 3.137656427758817, "grad_norm": 0.36685836448410125, "learning_rate": 3.813209074898043e-07, "loss": 0.0021, "step": 13790 }, { "epoch": 3.137883959044369, "grad_norm": 0.21889868796656664, "learning_rate": 3.8123862965830384e-07, "loss": 0.001, "step": 13791 }, { "epoch": 3.1381114903299205, "grad_norm": 0.2196831257713132, "learning_rate": 3.811563568088398e-07, "loss": 0.0012, "step": 13792 }, { "epoch": 3.1383390216154723, "grad_norm": 0.06855896055582797, "learning_rate": 3.8107408894309306e-07, "loss": 0.0003, "step": 13793 }, { "epoch": 3.138566552901024, "grad_norm": 0.24047993988974756, "learning_rate": 3.80991826062745e-07, "loss": 0.0006, "step": 13794 }, { "epoch": 3.138794084186576, "grad_norm": 1.04000468828695, "learning_rate": 3.8090956816947743e-07, "loss": 0.0093, "step": 13795 }, { "epoch": 3.1390216154721275, "grad_norm": 0.887460079587106, "learning_rate": 3.808273152649711e-07, "loss": 0.0121, "step": 13796 }, { "epoch": 3.1392491467576793, "grad_norm": 0.6506773750855729, "learning_rate": 3.807450673509073e-07, "loss": 0.0048, "step": 13797 }, { "epoch": 3.139476678043231, "grad_norm": 0.9461666561442387, "learning_rate": 3.8066282442896686e-07, "loss": 0.0159, "step": 13798 }, { "epoch": 3.139704209328783, "grad_norm": 0.9426258098077588, "learning_rate": 3.805805865008311e-07, "loss": 0.0078, "step": 13799 }, { "epoch": 3.1399317406143346, "grad_norm": 0.5181153533547658, "learning_rate": 3.8049835356818034e-07, "loss": 0.0051, "step": 13800 }, { "epoch": 3.1401592718998863, "grad_norm": 0.1329353268968666, "learning_rate": 3.804161256326953e-07, "loss": 0.0005, "step": 13801 }, { "epoch": 3.140386803185438, "grad_norm": 0.3384064241472927, "learning_rate": 3.803339026960571e-07, "loss": 0.0019, "step": 13802 }, { "epoch": 3.14061433447099, "grad_norm": 0.37293388105843145, "learning_rate": 3.802516847599455e-07, "loss": 0.0048, "step": 13803 }, { "epoch": 3.1408418657565416, "grad_norm": 0.7181037006859261, "learning_rate": 3.801694718260414e-07, "loss": 0.0076, "step": 13804 }, { "epoch": 3.1410693970420933, "grad_norm": 0.4878836611082951, "learning_rate": 3.8008726389602473e-07, "loss": 0.0047, "step": 13805 }, { "epoch": 3.141296928327645, "grad_norm": 0.3609013044779127, "learning_rate": 3.8000506097157617e-07, "loss": 0.0028, "step": 13806 }, { "epoch": 3.141524459613197, "grad_norm": 0.122561022613689, "learning_rate": 3.7992286305437527e-07, "loss": 0.0006, "step": 13807 }, { "epoch": 3.1417519908987486, "grad_norm": 0.5185134254050605, "learning_rate": 3.7984067014610216e-07, "loss": 0.0014, "step": 13808 }, { "epoch": 3.1419795221843003, "grad_norm": 0.4793432377188145, "learning_rate": 3.7975848224843694e-07, "loss": 0.0034, "step": 13809 }, { "epoch": 3.142207053469852, "grad_norm": 1.1236990918229133, "learning_rate": 3.79676299363059e-07, "loss": 0.0103, "step": 13810 }, { "epoch": 3.142434584755404, "grad_norm": 0.44625833327400766, "learning_rate": 3.7959412149164836e-07, "loss": 0.0032, "step": 13811 }, { "epoch": 3.1426621160409556, "grad_norm": 0.9486763199500354, "learning_rate": 3.795119486358845e-07, "loss": 0.0069, "step": 13812 }, { "epoch": 3.1428896473265073, "grad_norm": 1.1481461298979205, "learning_rate": 3.7942978079744663e-07, "loss": 0.0116, "step": 13813 }, { "epoch": 3.143117178612059, "grad_norm": 0.4543417941333312, "learning_rate": 3.7934761797801437e-07, "loss": 0.0081, "step": 13814 }, { "epoch": 3.143344709897611, "grad_norm": 0.5979782389561871, "learning_rate": 3.792654601792668e-07, "loss": 0.0035, "step": 13815 }, { "epoch": 3.1435722411831626, "grad_norm": 0.1675797609968996, "learning_rate": 3.7918330740288346e-07, "loss": 0.0011, "step": 13816 }, { "epoch": 3.1437997724687143, "grad_norm": 0.8648311478655896, "learning_rate": 3.791011596505428e-07, "loss": 0.0076, "step": 13817 }, { "epoch": 3.144027303754266, "grad_norm": 0.3138328538902706, "learning_rate": 3.790190169239243e-07, "loss": 0.0021, "step": 13818 }, { "epoch": 3.144254835039818, "grad_norm": 0.14981998173588545, "learning_rate": 3.7893687922470676e-07, "loss": 0.001, "step": 13819 }, { "epoch": 3.1444823663253696, "grad_norm": 1.1467254804178162, "learning_rate": 3.7885474655456845e-07, "loss": 0.0081, "step": 13820 }, { "epoch": 3.1447098976109213, "grad_norm": 0.0654667279105405, "learning_rate": 3.787726189151884e-07, "loss": 0.0003, "step": 13821 }, { "epoch": 3.144937428896473, "grad_norm": 1.646352365393351, "learning_rate": 3.786904963082451e-07, "loss": 0.0185, "step": 13822 }, { "epoch": 3.145164960182025, "grad_norm": 0.8038027248444555, "learning_rate": 3.786083787354171e-07, "loss": 0.0077, "step": 13823 }, { "epoch": 3.1453924914675766, "grad_norm": 0.5012267831627788, "learning_rate": 3.785262661983823e-07, "loss": 0.0032, "step": 13824 }, { "epoch": 3.1456200227531284, "grad_norm": 0.4304702312580086, "learning_rate": 3.784441586988194e-07, "loss": 0.0049, "step": 13825 }, { "epoch": 3.1458475540386805, "grad_norm": 0.4160012001154668, "learning_rate": 3.7836205623840653e-07, "loss": 0.0031, "step": 13826 }, { "epoch": 3.146075085324232, "grad_norm": 0.7171864621133354, "learning_rate": 3.782799588188212e-07, "loss": 0.0035, "step": 13827 }, { "epoch": 3.146302616609784, "grad_norm": 0.37115157252034264, "learning_rate": 3.7819786644174184e-07, "loss": 0.0022, "step": 13828 }, { "epoch": 3.146530147895336, "grad_norm": 0.1939127454228389, "learning_rate": 3.78115779108846e-07, "loss": 0.0006, "step": 13829 }, { "epoch": 3.1467576791808876, "grad_norm": 0.4905729783518153, "learning_rate": 3.7803369682181166e-07, "loss": 0.0034, "step": 13830 }, { "epoch": 3.1469852104664393, "grad_norm": 0.42358146782160816, "learning_rate": 3.7795161958231616e-07, "loss": 0.0066, "step": 13831 }, { "epoch": 3.147212741751991, "grad_norm": 0.6284884449783176, "learning_rate": 3.77869547392037e-07, "loss": 0.0039, "step": 13832 }, { "epoch": 3.147440273037543, "grad_norm": 1.2637222062959736, "learning_rate": 3.7778748025265194e-07, "loss": 0.0093, "step": 13833 }, { "epoch": 3.1476678043230946, "grad_norm": 0.21385133912651194, "learning_rate": 3.777054181658377e-07, "loss": 0.0008, "step": 13834 }, { "epoch": 3.1478953356086463, "grad_norm": 0.50506377727536, "learning_rate": 3.776233611332722e-07, "loss": 0.0035, "step": 13835 }, { "epoch": 3.148122866894198, "grad_norm": 0.6638879795696511, "learning_rate": 3.7754130915663187e-07, "loss": 0.0028, "step": 13836 }, { "epoch": 3.14835039817975, "grad_norm": 0.6447244428382642, "learning_rate": 3.7745926223759403e-07, "loss": 0.003, "step": 13837 }, { "epoch": 3.1485779294653016, "grad_norm": 0.9581257370044817, "learning_rate": 3.773772203778357e-07, "loss": 0.0042, "step": 13838 }, { "epoch": 3.1488054607508533, "grad_norm": 0.44530004525932304, "learning_rate": 3.772951835790332e-07, "loss": 0.0033, "step": 13839 }, { "epoch": 3.149032992036405, "grad_norm": 0.33285807076948815, "learning_rate": 3.772131518428637e-07, "loss": 0.0046, "step": 13840 }, { "epoch": 3.149260523321957, "grad_norm": 0.3035039340151652, "learning_rate": 3.771311251710034e-07, "loss": 0.0024, "step": 13841 }, { "epoch": 3.1494880546075086, "grad_norm": 0.6245127147640289, "learning_rate": 3.770491035651292e-07, "loss": 0.0082, "step": 13842 }, { "epoch": 3.1497155858930603, "grad_norm": 0.6586785397319362, "learning_rate": 3.7696708702691695e-07, "loss": 0.0013, "step": 13843 }, { "epoch": 3.149943117178612, "grad_norm": 0.23847987463587475, "learning_rate": 3.768850755580433e-07, "loss": 0.0015, "step": 13844 }, { "epoch": 3.150170648464164, "grad_norm": 0.899429044234229, "learning_rate": 3.7680306916018457e-07, "loss": 0.0154, "step": 13845 }, { "epoch": 3.1503981797497156, "grad_norm": 1.0672624748158084, "learning_rate": 3.7672106783501626e-07, "loss": 0.0049, "step": 13846 }, { "epoch": 3.1506257110352673, "grad_norm": 0.4990697486785244, "learning_rate": 3.7663907158421465e-07, "loss": 0.0016, "step": 13847 }, { "epoch": 3.150853242320819, "grad_norm": 1.1723674147228462, "learning_rate": 3.7655708040945554e-07, "loss": 0.0107, "step": 13848 }, { "epoch": 3.151080773606371, "grad_norm": 0.15786786474276895, "learning_rate": 3.7647509431241485e-07, "loss": 0.0005, "step": 13849 }, { "epoch": 3.1513083048919226, "grad_norm": 1.2196002032847655, "learning_rate": 3.7639311329476806e-07, "loss": 0.01, "step": 13850 }, { "epoch": 3.1515358361774743, "grad_norm": 0.5811712812859059, "learning_rate": 3.7631113735819055e-07, "loss": 0.0081, "step": 13851 }, { "epoch": 3.151763367463026, "grad_norm": 1.168890368346021, "learning_rate": 3.762291665043581e-07, "loss": 0.017, "step": 13852 }, { "epoch": 3.151990898748578, "grad_norm": 1.1952351673565775, "learning_rate": 3.7614720073494576e-07, "loss": 0.0101, "step": 13853 }, { "epoch": 3.1522184300341296, "grad_norm": 0.6403771495315274, "learning_rate": 3.7606524005162904e-07, "loss": 0.0025, "step": 13854 }, { "epoch": 3.1524459613196814, "grad_norm": 0.8554189568301218, "learning_rate": 3.759832844560827e-07, "loss": 0.0054, "step": 13855 }, { "epoch": 3.152673492605233, "grad_norm": 0.3303485146116129, "learning_rate": 3.759013339499823e-07, "loss": 0.0034, "step": 13856 }, { "epoch": 3.152901023890785, "grad_norm": 0.07214342516365252, "learning_rate": 3.758193885350022e-07, "loss": 0.0002, "step": 13857 }, { "epoch": 3.1531285551763366, "grad_norm": 18.505861604963602, "learning_rate": 3.757374482128174e-07, "loss": 0.0378, "step": 13858 }, { "epoch": 3.1533560864618884, "grad_norm": 0.5849260835242925, "learning_rate": 3.756555129851029e-07, "loss": 0.0026, "step": 13859 }, { "epoch": 3.15358361774744, "grad_norm": 0.43836367020367495, "learning_rate": 3.755735828535328e-07, "loss": 0.0036, "step": 13860 }, { "epoch": 3.153811149032992, "grad_norm": 0.7041953086618142, "learning_rate": 3.7549165781978195e-07, "loss": 0.0069, "step": 13861 }, { "epoch": 3.1540386803185436, "grad_norm": 0.5298338878915716, "learning_rate": 3.7540973788552456e-07, "loss": 0.0039, "step": 13862 }, { "epoch": 3.1542662116040954, "grad_norm": 0.3997491089617089, "learning_rate": 3.7532782305243526e-07, "loss": 0.0036, "step": 13863 }, { "epoch": 3.154493742889647, "grad_norm": 0.24821970576312638, "learning_rate": 3.752459133221879e-07, "loss": 0.001, "step": 13864 }, { "epoch": 3.1547212741751993, "grad_norm": 0.43253435149621905, "learning_rate": 3.7516400869645655e-07, "loss": 0.0032, "step": 13865 }, { "epoch": 3.1549488054607506, "grad_norm": 0.19626052386709428, "learning_rate": 3.7508210917691556e-07, "loss": 0.0013, "step": 13866 }, { "epoch": 3.155176336746303, "grad_norm": 1.190011932259634, "learning_rate": 3.750002147652383e-07, "loss": 0.0079, "step": 13867 }, { "epoch": 3.1554038680318546, "grad_norm": 0.3571084898787252, "learning_rate": 3.7491832546309894e-07, "loss": 0.0017, "step": 13868 }, { "epoch": 3.1556313993174063, "grad_norm": 0.24961102835315369, "learning_rate": 3.748364412721711e-07, "loss": 0.001, "step": 13869 }, { "epoch": 3.155858930602958, "grad_norm": 0.7576726872213839, "learning_rate": 3.74754562194128e-07, "loss": 0.009, "step": 13870 }, { "epoch": 3.15608646188851, "grad_norm": 0.43809380046402646, "learning_rate": 3.7467268823064354e-07, "loss": 0.0045, "step": 13871 }, { "epoch": 3.1563139931740616, "grad_norm": 0.37272691330419955, "learning_rate": 3.745908193833907e-07, "loss": 0.0012, "step": 13872 }, { "epoch": 3.1565415244596133, "grad_norm": 0.5530625149938723, "learning_rate": 3.7450895565404316e-07, "loss": 0.0043, "step": 13873 }, { "epoch": 3.156769055745165, "grad_norm": 0.30195753436296574, "learning_rate": 3.7442709704427357e-07, "loss": 0.0026, "step": 13874 }, { "epoch": 3.156996587030717, "grad_norm": 0.8781019152267231, "learning_rate": 3.7434524355575527e-07, "loss": 0.0117, "step": 13875 }, { "epoch": 3.1572241183162686, "grad_norm": 0.7405564579332585, "learning_rate": 3.7426339519016133e-07, "loss": 0.0065, "step": 13876 }, { "epoch": 3.1574516496018203, "grad_norm": 0.39547372579165224, "learning_rate": 3.7418155194916415e-07, "loss": 0.0014, "step": 13877 }, { "epoch": 3.157679180887372, "grad_norm": 1.0563832727156828, "learning_rate": 3.7409971383443684e-07, "loss": 0.0044, "step": 13878 }, { "epoch": 3.157906712172924, "grad_norm": 0.9456869127418189, "learning_rate": 3.740178808476517e-07, "loss": 0.007, "step": 13879 }, { "epoch": 3.1581342434584756, "grad_norm": 0.5871000937639509, "learning_rate": 3.739360529904817e-07, "loss": 0.0007, "step": 13880 }, { "epoch": 3.1583617747440274, "grad_norm": 0.7179599852526974, "learning_rate": 3.738542302645988e-07, "loss": 0.0043, "step": 13881 }, { "epoch": 3.158589306029579, "grad_norm": 0.4866805484207952, "learning_rate": 3.737724126716755e-07, "loss": 0.0023, "step": 13882 }, { "epoch": 3.158816837315131, "grad_norm": 0.19672928194569625, "learning_rate": 3.73690600213384e-07, "loss": 0.0006, "step": 13883 }, { "epoch": 3.1590443686006826, "grad_norm": 0.678741880453794, "learning_rate": 3.7360879289139633e-07, "loss": 0.0121, "step": 13884 }, { "epoch": 3.1592718998862344, "grad_norm": 0.2313553824474523, "learning_rate": 3.7352699070738455e-07, "loss": 0.0019, "step": 13885 }, { "epoch": 3.159499431171786, "grad_norm": 0.32622222532739537, "learning_rate": 3.734451936630204e-07, "loss": 0.0019, "step": 13886 }, { "epoch": 3.159726962457338, "grad_norm": 1.2258531350101953, "learning_rate": 3.733634017599762e-07, "loss": 0.0046, "step": 13887 }, { "epoch": 3.1599544937428896, "grad_norm": 0.5283895342309067, "learning_rate": 3.732816149999229e-07, "loss": 0.0051, "step": 13888 }, { "epoch": 3.1601820250284414, "grad_norm": 1.1406592006373812, "learning_rate": 3.731998333845324e-07, "loss": 0.0109, "step": 13889 }, { "epoch": 3.160409556313993, "grad_norm": 1.0801539905342832, "learning_rate": 3.731180569154763e-07, "loss": 0.0155, "step": 13890 }, { "epoch": 3.160637087599545, "grad_norm": 0.6247526502511481, "learning_rate": 3.730362855944256e-07, "loss": 0.0043, "step": 13891 }, { "epoch": 3.1608646188850966, "grad_norm": 0.45473206269572414, "learning_rate": 3.7295451942305187e-07, "loss": 0.0033, "step": 13892 }, { "epoch": 3.1610921501706484, "grad_norm": 0.5229607311259904, "learning_rate": 3.728727584030261e-07, "loss": 0.002, "step": 13893 }, { "epoch": 3.1613196814562, "grad_norm": 0.5706170474218496, "learning_rate": 3.7279100253601963e-07, "loss": 0.0019, "step": 13894 }, { "epoch": 3.161547212741752, "grad_norm": 0.6555545397269104, "learning_rate": 3.7270925182370306e-07, "loss": 0.005, "step": 13895 }, { "epoch": 3.1617747440273036, "grad_norm": 0.7031081219494405, "learning_rate": 3.7262750626774714e-07, "loss": 0.0035, "step": 13896 }, { "epoch": 3.1620022753128554, "grad_norm": 0.06463721113076089, "learning_rate": 3.725457658698231e-07, "loss": 0.0003, "step": 13897 }, { "epoch": 3.162229806598407, "grad_norm": 0.054481797399774996, "learning_rate": 3.724640306316009e-07, "loss": 0.0001, "step": 13898 }, { "epoch": 3.162457337883959, "grad_norm": 0.8901439332796516, "learning_rate": 3.723823005547518e-07, "loss": 0.0094, "step": 13899 }, { "epoch": 3.1626848691695106, "grad_norm": 1.2259159443615564, "learning_rate": 3.7230057564094574e-07, "loss": 0.0089, "step": 13900 }, { "epoch": 3.1629124004550624, "grad_norm": 0.11589783113234055, "learning_rate": 3.7221885589185286e-07, "loss": 0.0009, "step": 13901 }, { "epoch": 3.163139931740614, "grad_norm": 0.6510534291461003, "learning_rate": 3.721371413091439e-07, "loss": 0.0033, "step": 13902 }, { "epoch": 3.163367463026166, "grad_norm": 0.7838898895618971, "learning_rate": 3.720554318944884e-07, "loss": 0.001, "step": 13903 }, { "epoch": 3.163594994311718, "grad_norm": 0.5103634220499009, "learning_rate": 3.7197372764955683e-07, "loss": 0.0032, "step": 13904 }, { "epoch": 3.1638225255972694, "grad_norm": 0.32748971585385706, "learning_rate": 3.718920285760187e-07, "loss": 0.0028, "step": 13905 }, { "epoch": 3.1640500568828216, "grad_norm": 1.218154082883578, "learning_rate": 3.718103346755443e-07, "loss": 0.0044, "step": 13906 }, { "epoch": 3.1642775881683733, "grad_norm": 0.7648296403752921, "learning_rate": 3.717286459498028e-07, "loss": 0.0044, "step": 13907 }, { "epoch": 3.164505119453925, "grad_norm": 0.4613469768585305, "learning_rate": 3.716469624004637e-07, "loss": 0.0007, "step": 13908 }, { "epoch": 3.164732650739477, "grad_norm": 0.17357956834115196, "learning_rate": 3.715652840291971e-07, "loss": 0.0006, "step": 13909 }, { "epoch": 3.1649601820250286, "grad_norm": 0.18593315528438667, "learning_rate": 3.7148361083767155e-07, "loss": 0.0013, "step": 13910 }, { "epoch": 3.1651877133105804, "grad_norm": 3.1581377005852085, "learning_rate": 3.714019428275569e-07, "loss": 0.0212, "step": 13911 }, { "epoch": 3.165415244596132, "grad_norm": 0.2212581789382093, "learning_rate": 3.7132028000052196e-07, "loss": 0.001, "step": 13912 }, { "epoch": 3.165642775881684, "grad_norm": 0.955525919610157, "learning_rate": 3.712386223582362e-07, "loss": 0.0095, "step": 13913 }, { "epoch": 3.1658703071672356, "grad_norm": 2.1834859911603512, "learning_rate": 3.711569699023681e-07, "loss": 0.011, "step": 13914 }, { "epoch": 3.1660978384527874, "grad_norm": 0.40647111383351037, "learning_rate": 3.7107532263458653e-07, "loss": 0.0016, "step": 13915 }, { "epoch": 3.166325369738339, "grad_norm": 0.9991817770359136, "learning_rate": 3.7099368055656054e-07, "loss": 0.0117, "step": 13916 }, { "epoch": 3.166552901023891, "grad_norm": 0.7794847933791793, "learning_rate": 3.709120436699583e-07, "loss": 0.0057, "step": 13917 }, { "epoch": 3.1667804323094426, "grad_norm": 0.16256103685272222, "learning_rate": 3.708304119764487e-07, "loss": 0.0007, "step": 13918 }, { "epoch": 3.1670079635949944, "grad_norm": 0.292545662492898, "learning_rate": 3.7074878547770005e-07, "loss": 0.0014, "step": 13919 }, { "epoch": 3.167235494880546, "grad_norm": 0.7918512719776449, "learning_rate": 3.7066716417538035e-07, "loss": 0.0169, "step": 13920 }, { "epoch": 3.167463026166098, "grad_norm": 1.0881656195635738, "learning_rate": 3.705855480711582e-07, "loss": 0.0072, "step": 13921 }, { "epoch": 3.1676905574516496, "grad_norm": 0.5062558037795301, "learning_rate": 3.7050393716670135e-07, "loss": 0.0045, "step": 13922 }, { "epoch": 3.1679180887372014, "grad_norm": 0.711084356722007, "learning_rate": 3.7042233146367816e-07, "loss": 0.0103, "step": 13923 }, { "epoch": 3.168145620022753, "grad_norm": 0.25480822906801315, "learning_rate": 3.7034073096375607e-07, "loss": 0.0009, "step": 13924 }, { "epoch": 3.168373151308305, "grad_norm": 0.5431028713293794, "learning_rate": 3.702591356686032e-07, "loss": 0.0029, "step": 13925 }, { "epoch": 3.1686006825938566, "grad_norm": 0.01474442456664322, "learning_rate": 3.7017754557988716e-07, "loss": 0.0, "step": 13926 }, { "epoch": 3.1688282138794084, "grad_norm": 1.556203967815051, "learning_rate": 3.7009596069927516e-07, "loss": 0.0099, "step": 13927 }, { "epoch": 3.16905574516496, "grad_norm": 0.2770606482137358, "learning_rate": 3.70014381028435e-07, "loss": 0.0023, "step": 13928 }, { "epoch": 3.169283276450512, "grad_norm": 0.8955102496537233, "learning_rate": 3.6993280656903376e-07, "loss": 0.0059, "step": 13929 }, { "epoch": 3.1695108077360636, "grad_norm": 0.4815040301651737, "learning_rate": 3.698512373227391e-07, "loss": 0.0012, "step": 13930 }, { "epoch": 3.1697383390216154, "grad_norm": 0.3866394271020985, "learning_rate": 3.697696732912176e-07, "loss": 0.0024, "step": 13931 }, { "epoch": 3.169965870307167, "grad_norm": 0.8180956409378682, "learning_rate": 3.6968811447613677e-07, "loss": 0.0139, "step": 13932 }, { "epoch": 3.170193401592719, "grad_norm": 0.42154590932278396, "learning_rate": 3.6960656087916324e-07, "loss": 0.0028, "step": 13933 }, { "epoch": 3.1704209328782706, "grad_norm": 0.2654397479619021, "learning_rate": 3.695250125019638e-07, "loss": 0.0023, "step": 13934 }, { "epoch": 3.1706484641638224, "grad_norm": 0.6289621786089779, "learning_rate": 3.694434693462053e-07, "loss": 0.0063, "step": 13935 }, { "epoch": 3.170875995449374, "grad_norm": 0.5906649364153275, "learning_rate": 3.69361931413554e-07, "loss": 0.0048, "step": 13936 }, { "epoch": 3.171103526734926, "grad_norm": 0.59773070575826, "learning_rate": 3.69280398705677e-07, "loss": 0.0044, "step": 13937 }, { "epoch": 3.1713310580204777, "grad_norm": 1.1544241484338296, "learning_rate": 3.6919887122424015e-07, "loss": 0.0093, "step": 13938 }, { "epoch": 3.1715585893060294, "grad_norm": 0.30046874622427217, "learning_rate": 3.691173489709099e-07, "loss": 0.0026, "step": 13939 }, { "epoch": 3.171786120591581, "grad_norm": 0.04023049616474261, "learning_rate": 3.6903583194735256e-07, "loss": 0.0002, "step": 13940 }, { "epoch": 3.172013651877133, "grad_norm": 0.09722069449681296, "learning_rate": 3.6895432015523375e-07, "loss": 0.0003, "step": 13941 }, { "epoch": 3.1722411831626847, "grad_norm": 0.4861167686791173, "learning_rate": 3.6887281359621996e-07, "loss": 0.0026, "step": 13942 }, { "epoch": 3.172468714448237, "grad_norm": 0.43790938212235914, "learning_rate": 3.6879131227197657e-07, "loss": 0.0069, "step": 13943 }, { "epoch": 3.172696245733788, "grad_norm": 0.7162690351650292, "learning_rate": 3.687098161841698e-07, "loss": 0.0053, "step": 13944 }, { "epoch": 3.1729237770193404, "grad_norm": 0.7748018684802185, "learning_rate": 3.6862832533446483e-07, "loss": 0.0083, "step": 13945 }, { "epoch": 3.173151308304892, "grad_norm": 0.6872188878265746, "learning_rate": 3.6854683972452734e-07, "loss": 0.0068, "step": 13946 }, { "epoch": 3.173378839590444, "grad_norm": 0.3007901659225943, "learning_rate": 3.6846535935602295e-07, "loss": 0.0031, "step": 13947 }, { "epoch": 3.1736063708759956, "grad_norm": 1.6832744476588282, "learning_rate": 3.6838388423061655e-07, "loss": 0.0255, "step": 13948 }, { "epoch": 3.1738339021615474, "grad_norm": 0.143645029419539, "learning_rate": 3.6830241434997374e-07, "loss": 0.0006, "step": 13949 }, { "epoch": 3.174061433447099, "grad_norm": 0.9386342303250517, "learning_rate": 3.6822094971575936e-07, "loss": 0.0096, "step": 13950 }, { "epoch": 3.174288964732651, "grad_norm": 0.30474850725047165, "learning_rate": 3.681394903296387e-07, "loss": 0.0021, "step": 13951 }, { "epoch": 3.1745164960182026, "grad_norm": 0.21528809399333185, "learning_rate": 3.680580361932764e-07, "loss": 0.0012, "step": 13952 }, { "epoch": 3.1747440273037544, "grad_norm": 0.9286450571607849, "learning_rate": 3.6797658730833704e-07, "loss": 0.0139, "step": 13953 }, { "epoch": 3.174971558589306, "grad_norm": 0.5652859135158181, "learning_rate": 3.6789514367648584e-07, "loss": 0.0034, "step": 13954 }, { "epoch": 3.175199089874858, "grad_norm": 1.3195954116779143, "learning_rate": 3.6781370529938666e-07, "loss": 0.0082, "step": 13955 }, { "epoch": 3.1754266211604096, "grad_norm": 1.255291686727104, "learning_rate": 3.677322721787047e-07, "loss": 0.0119, "step": 13956 }, { "epoch": 3.1756541524459614, "grad_norm": 0.12964831988467454, "learning_rate": 3.6765084431610394e-07, "loss": 0.0008, "step": 13957 }, { "epoch": 3.175881683731513, "grad_norm": 0.5772327363776648, "learning_rate": 3.675694217132482e-07, "loss": 0.0095, "step": 13958 }, { "epoch": 3.176109215017065, "grad_norm": 0.2884624443240457, "learning_rate": 3.674880043718024e-07, "loss": 0.0024, "step": 13959 }, { "epoch": 3.1763367463026166, "grad_norm": 0.3978323671832626, "learning_rate": 3.6740659229342985e-07, "loss": 0.0011, "step": 13960 }, { "epoch": 3.1765642775881684, "grad_norm": 1.2018933134237404, "learning_rate": 3.6732518547979503e-07, "loss": 0.0104, "step": 13961 }, { "epoch": 3.17679180887372, "grad_norm": 0.35320765370452034, "learning_rate": 3.672437839325614e-07, "loss": 0.0045, "step": 13962 }, { "epoch": 3.177019340159272, "grad_norm": 0.7315138581333726, "learning_rate": 3.6716238765339295e-07, "loss": 0.0049, "step": 13963 }, { "epoch": 3.1772468714448237, "grad_norm": 0.2629835820822845, "learning_rate": 3.67080996643953e-07, "loss": 0.0014, "step": 13964 }, { "epoch": 3.1774744027303754, "grad_norm": 0.668277699934555, "learning_rate": 3.6699961090590497e-07, "loss": 0.0072, "step": 13965 }, { "epoch": 3.177701934015927, "grad_norm": 1.8748276643939148, "learning_rate": 3.6691823044091267e-07, "loss": 0.0029, "step": 13966 }, { "epoch": 3.177929465301479, "grad_norm": 0.05310195487085882, "learning_rate": 3.668368552506388e-07, "loss": 0.0003, "step": 13967 }, { "epoch": 3.1781569965870307, "grad_norm": 0.19047746843867117, "learning_rate": 3.667554853367469e-07, "loss": 0.0008, "step": 13968 }, { "epoch": 3.1783845278725824, "grad_norm": 0.522786222989364, "learning_rate": 3.6667412070089986e-07, "loss": 0.0054, "step": 13969 }, { "epoch": 3.178612059158134, "grad_norm": 0.8829828165659652, "learning_rate": 3.6659276134476106e-07, "loss": 0.0054, "step": 13970 }, { "epoch": 3.178839590443686, "grad_norm": 0.3805669202680814, "learning_rate": 3.665114072699927e-07, "loss": 0.0022, "step": 13971 }, { "epoch": 3.1790671217292377, "grad_norm": 0.21873642233541166, "learning_rate": 3.6643005847825765e-07, "loss": 0.0012, "step": 13972 }, { "epoch": 3.1792946530147894, "grad_norm": 1.8453865583073807, "learning_rate": 3.6634871497121887e-07, "loss": 0.0092, "step": 13973 }, { "epoch": 3.179522184300341, "grad_norm": 0.3848005050356219, "learning_rate": 3.662673767505385e-07, "loss": 0.0016, "step": 13974 }, { "epoch": 3.179749715585893, "grad_norm": 0.4593582056822358, "learning_rate": 3.661860438178792e-07, "loss": 0.0042, "step": 13975 }, { "epoch": 3.1799772468714447, "grad_norm": 1.085916689688083, "learning_rate": 3.661047161749033e-07, "loss": 0.0041, "step": 13976 }, { "epoch": 3.1802047781569964, "grad_norm": 0.6988514856711284, "learning_rate": 3.660233938232725e-07, "loss": 0.0077, "step": 13977 }, { "epoch": 3.180432309442548, "grad_norm": 0.36817705851228477, "learning_rate": 3.659420767646495e-07, "loss": 0.0021, "step": 13978 }, { "epoch": 3.1806598407281, "grad_norm": 0.48649153824488167, "learning_rate": 3.6586076500069583e-07, "loss": 0.0034, "step": 13979 }, { "epoch": 3.1808873720136517, "grad_norm": 0.6003848853130246, "learning_rate": 3.657794585330738e-07, "loss": 0.0061, "step": 13980 }, { "epoch": 3.1811149032992034, "grad_norm": 0.9499134352216103, "learning_rate": 3.6569815736344467e-07, "loss": 0.0108, "step": 13981 }, { "epoch": 3.1813424345847556, "grad_norm": 0.5379779769987447, "learning_rate": 3.6561686149347034e-07, "loss": 0.01, "step": 13982 }, { "epoch": 3.181569965870307, "grad_norm": 0.44723755072389054, "learning_rate": 3.6553557092481254e-07, "loss": 0.0091, "step": 13983 }, { "epoch": 3.181797497155859, "grad_norm": 0.5638187476471587, "learning_rate": 3.6545428565913225e-07, "loss": 0.0028, "step": 13984 }, { "epoch": 3.182025028441411, "grad_norm": 0.5727700426842129, "learning_rate": 3.653730056980911e-07, "loss": 0.0027, "step": 13985 }, { "epoch": 3.1822525597269626, "grad_norm": 0.4881166608293664, "learning_rate": 3.6529173104335013e-07, "loss": 0.0027, "step": 13986 }, { "epoch": 3.1824800910125144, "grad_norm": 0.43276658854174654, "learning_rate": 3.6521046169657073e-07, "loss": 0.0039, "step": 13987 }, { "epoch": 3.182707622298066, "grad_norm": 0.5939016314828092, "learning_rate": 3.651291976594136e-07, "loss": 0.0055, "step": 13988 }, { "epoch": 3.182935153583618, "grad_norm": 0.3860263665340987, "learning_rate": 3.650479389335397e-07, "loss": 0.0029, "step": 13989 }, { "epoch": 3.1831626848691696, "grad_norm": 0.4620858779554169, "learning_rate": 3.6496668552061007e-07, "loss": 0.0067, "step": 13990 }, { "epoch": 3.1833902161547214, "grad_norm": 0.2511699333343254, "learning_rate": 3.6488543742228473e-07, "loss": 0.0018, "step": 13991 }, { "epoch": 3.183617747440273, "grad_norm": 0.5776684275456709, "learning_rate": 3.6480419464022504e-07, "loss": 0.0076, "step": 13992 }, { "epoch": 3.183845278725825, "grad_norm": 0.5228991589445829, "learning_rate": 3.647229571760909e-07, "loss": 0.003, "step": 13993 }, { "epoch": 3.1840728100113767, "grad_norm": 1.092618093979578, "learning_rate": 3.64641725031543e-07, "loss": 0.0057, "step": 13994 }, { "epoch": 3.1843003412969284, "grad_norm": 0.7689273590426942, "learning_rate": 3.645604982082415e-07, "loss": 0.0071, "step": 13995 }, { "epoch": 3.18452787258248, "grad_norm": 0.5779514713271144, "learning_rate": 3.6447927670784623e-07, "loss": 0.0047, "step": 13996 }, { "epoch": 3.184755403868032, "grad_norm": 0.4504889221729089, "learning_rate": 3.643980605320174e-07, "loss": 0.0033, "step": 13997 }, { "epoch": 3.1849829351535837, "grad_norm": 0.34788583330974737, "learning_rate": 3.643168496824148e-07, "loss": 0.0025, "step": 13998 }, { "epoch": 3.1852104664391354, "grad_norm": 0.46975725790000045, "learning_rate": 3.6423564416069884e-07, "loss": 0.0046, "step": 13999 }, { "epoch": 3.185437997724687, "grad_norm": 0.7847251725498421, "learning_rate": 3.6415444396852835e-07, "loss": 0.01, "step": 14000 }, { "epoch": 3.185665529010239, "grad_norm": 1.4493419820789137, "learning_rate": 3.640732491075636e-07, "loss": 0.0201, "step": 14001 }, { "epoch": 3.1858930602957907, "grad_norm": 0.9532080049333187, "learning_rate": 3.6399205957946384e-07, "loss": 0.0164, "step": 14002 }, { "epoch": 3.1861205915813424, "grad_norm": 0.9185709993153962, "learning_rate": 3.6391087538588797e-07, "loss": 0.007, "step": 14003 }, { "epoch": 3.186348122866894, "grad_norm": 0.633833362567459, "learning_rate": 3.638296965284957e-07, "loss": 0.0098, "step": 14004 }, { "epoch": 3.186575654152446, "grad_norm": 0.5133864709689034, "learning_rate": 3.6374852300894615e-07, "loss": 0.0032, "step": 14005 }, { "epoch": 3.1868031854379977, "grad_norm": 0.17844583879186848, "learning_rate": 3.636673548288986e-07, "loss": 0.0012, "step": 14006 }, { "epoch": 3.1870307167235494, "grad_norm": 0.708823438981112, "learning_rate": 3.6358619199001173e-07, "loss": 0.0082, "step": 14007 }, { "epoch": 3.187258248009101, "grad_norm": 0.43531773759920983, "learning_rate": 3.635050344939441e-07, "loss": 0.0049, "step": 14008 }, { "epoch": 3.187485779294653, "grad_norm": 0.5686474679684584, "learning_rate": 3.634238823423548e-07, "loss": 0.0046, "step": 14009 }, { "epoch": 3.1877133105802047, "grad_norm": 0.21163670287969968, "learning_rate": 3.633427355369021e-07, "loss": 0.0004, "step": 14010 }, { "epoch": 3.1879408418657564, "grad_norm": 0.4052170359151791, "learning_rate": 3.6326159407924464e-07, "loss": 0.004, "step": 14011 }, { "epoch": 3.188168373151308, "grad_norm": 0.4829855211541494, "learning_rate": 3.6318045797104086e-07, "loss": 0.0024, "step": 14012 }, { "epoch": 3.18839590443686, "grad_norm": 0.4353208556529922, "learning_rate": 3.6309932721394936e-07, "loss": 0.0013, "step": 14013 }, { "epoch": 3.1886234357224117, "grad_norm": 0.8117979621431132, "learning_rate": 3.630182018096279e-07, "loss": 0.0021, "step": 14014 }, { "epoch": 3.1888509670079634, "grad_norm": 0.725031738119499, "learning_rate": 3.629370817597343e-07, "loss": 0.0087, "step": 14015 }, { "epoch": 3.189078498293515, "grad_norm": 0.4598227421818844, "learning_rate": 3.6285596706592705e-07, "loss": 0.0023, "step": 14016 }, { "epoch": 3.189306029579067, "grad_norm": 1.2376012305663888, "learning_rate": 3.6277485772986336e-07, "loss": 0.0111, "step": 14017 }, { "epoch": 3.1895335608646187, "grad_norm": 0.4990831767248238, "learning_rate": 3.6269375375320174e-07, "loss": 0.0024, "step": 14018 }, { "epoch": 3.1897610921501705, "grad_norm": 0.8987279849096599, "learning_rate": 3.626126551375992e-07, "loss": 0.0061, "step": 14019 }, { "epoch": 3.189988623435722, "grad_norm": 1.7356581724962872, "learning_rate": 3.6253156188471363e-07, "loss": 0.0061, "step": 14020 }, { "epoch": 3.1902161547212744, "grad_norm": 0.22422291054927107, "learning_rate": 3.624504739962023e-07, "loss": 0.0012, "step": 14021 }, { "epoch": 3.1904436860068257, "grad_norm": 1.1699080800540143, "learning_rate": 3.6236939147372223e-07, "loss": 0.0198, "step": 14022 }, { "epoch": 3.190671217292378, "grad_norm": 1.0049715456908286, "learning_rate": 3.6228831431893074e-07, "loss": 0.0076, "step": 14023 }, { "epoch": 3.1908987485779297, "grad_norm": 0.262947989870802, "learning_rate": 3.6220724253348503e-07, "loss": 0.0015, "step": 14024 }, { "epoch": 3.1911262798634814, "grad_norm": 0.9141958472514216, "learning_rate": 3.621261761190422e-07, "loss": 0.0094, "step": 14025 }, { "epoch": 3.191353811149033, "grad_norm": 0.37087849330392003, "learning_rate": 3.6204511507725894e-07, "loss": 0.0011, "step": 14026 }, { "epoch": 3.191581342434585, "grad_norm": 0.24947697955913092, "learning_rate": 3.619640594097917e-07, "loss": 0.0006, "step": 14027 }, { "epoch": 3.1918088737201367, "grad_norm": 0.21502338651988875, "learning_rate": 3.618830091182976e-07, "loss": 0.0007, "step": 14028 }, { "epoch": 3.1920364050056884, "grad_norm": 0.5888355745951184, "learning_rate": 3.618019642044326e-07, "loss": 0.0022, "step": 14029 }, { "epoch": 3.19226393629124, "grad_norm": 0.47228553639991944, "learning_rate": 3.6172092466985346e-07, "loss": 0.007, "step": 14030 }, { "epoch": 3.192491467576792, "grad_norm": 0.07812285992255757, "learning_rate": 3.6163989051621647e-07, "loss": 0.0002, "step": 14031 }, { "epoch": 3.1927189988623437, "grad_norm": 0.7246769613763653, "learning_rate": 3.6155886174517804e-07, "loss": 0.0049, "step": 14032 }, { "epoch": 3.1929465301478954, "grad_norm": 0.32976607115264617, "learning_rate": 3.61477838358394e-07, "loss": 0.0025, "step": 14033 }, { "epoch": 3.193174061433447, "grad_norm": 0.3689073460323305, "learning_rate": 3.6139682035752017e-07, "loss": 0.006, "step": 14034 }, { "epoch": 3.193401592718999, "grad_norm": 1.07887881293858, "learning_rate": 3.613158077442127e-07, "loss": 0.0063, "step": 14035 }, { "epoch": 3.1936291240045507, "grad_norm": 0.27845136049081703, "learning_rate": 3.6123480052012705e-07, "loss": 0.0014, "step": 14036 }, { "epoch": 3.1938566552901024, "grad_norm": 0.5335783595828189, "learning_rate": 3.611537986869189e-07, "loss": 0.0059, "step": 14037 }, { "epoch": 3.194084186575654, "grad_norm": 0.47982658861277366, "learning_rate": 3.6107280224624396e-07, "loss": 0.0035, "step": 14038 }, { "epoch": 3.194311717861206, "grad_norm": 0.6950290498369845, "learning_rate": 3.6099181119975775e-07, "loss": 0.0033, "step": 14039 }, { "epoch": 3.1945392491467577, "grad_norm": 0.5204717028986449, "learning_rate": 3.609108255491155e-07, "loss": 0.0091, "step": 14040 }, { "epoch": 3.1947667804323094, "grad_norm": 2.199182067896185, "learning_rate": 3.6082984529597205e-07, "loss": 0.0182, "step": 14041 }, { "epoch": 3.194994311717861, "grad_norm": 2.9055477306218815, "learning_rate": 3.60748870441983e-07, "loss": 0.0412, "step": 14042 }, { "epoch": 3.195221843003413, "grad_norm": 0.6379050542273406, "learning_rate": 3.6066790098880283e-07, "loss": 0.0042, "step": 14043 }, { "epoch": 3.1954493742889647, "grad_norm": 1.0607260167577448, "learning_rate": 3.605869369380867e-07, "loss": 0.0182, "step": 14044 }, { "epoch": 3.1956769055745164, "grad_norm": 0.6229726616168231, "learning_rate": 3.6050597829148957e-07, "loss": 0.0093, "step": 14045 }, { "epoch": 3.195904436860068, "grad_norm": 0.879215365155623, "learning_rate": 3.604250250506656e-07, "loss": 0.0054, "step": 14046 }, { "epoch": 3.19613196814562, "grad_norm": 0.2401252403000306, "learning_rate": 3.6034407721726975e-07, "loss": 0.0006, "step": 14047 }, { "epoch": 3.1963594994311717, "grad_norm": 0.6208213404275003, "learning_rate": 3.602631347929561e-07, "loss": 0.0049, "step": 14048 }, { "epoch": 3.1965870307167235, "grad_norm": 0.316964177707174, "learning_rate": 3.601821977793794e-07, "loss": 0.0015, "step": 14049 }, { "epoch": 3.196814562002275, "grad_norm": 0.33541425500031957, "learning_rate": 3.601012661781932e-07, "loss": 0.0026, "step": 14050 }, { "epoch": 3.197042093287827, "grad_norm": 1.079298475263081, "learning_rate": 3.600203399910521e-07, "loss": 0.0128, "step": 14051 }, { "epoch": 3.1972696245733787, "grad_norm": 0.4688119261480565, "learning_rate": 3.5993941921961017e-07, "loss": 0.001, "step": 14052 }, { "epoch": 3.1974971558589305, "grad_norm": 0.21881603338751696, "learning_rate": 3.5985850386552083e-07, "loss": 0.0012, "step": 14053 }, { "epoch": 3.197724687144482, "grad_norm": 0.6440217981230283, "learning_rate": 3.5977759393043834e-07, "loss": 0.0062, "step": 14054 }, { "epoch": 3.197952218430034, "grad_norm": 0.2369528846567227, "learning_rate": 3.5969668941601587e-07, "loss": 0.0008, "step": 14055 }, { "epoch": 3.1981797497155857, "grad_norm": 0.06000064557499165, "learning_rate": 3.5961579032390737e-07, "loss": 0.0002, "step": 14056 }, { "epoch": 3.1984072810011375, "grad_norm": 0.3856061273453108, "learning_rate": 3.5953489665576594e-07, "loss": 0.0043, "step": 14057 }, { "epoch": 3.198634812286689, "grad_norm": 0.3638915681740627, "learning_rate": 3.59454008413245e-07, "loss": 0.0024, "step": 14058 }, { "epoch": 3.198862343572241, "grad_norm": 1.022273597497242, "learning_rate": 3.5937312559799804e-07, "loss": 0.0027, "step": 14059 }, { "epoch": 3.199089874857793, "grad_norm": 0.4501261357440516, "learning_rate": 3.592922482116777e-07, "loss": 0.0011, "step": 14060 }, { "epoch": 3.1993174061433445, "grad_norm": 0.2137546154364581, "learning_rate": 3.5921137625593737e-07, "loss": 0.001, "step": 14061 }, { "epoch": 3.1995449374288967, "grad_norm": 0.42042907871276464, "learning_rate": 3.591305097324295e-07, "loss": 0.0021, "step": 14062 }, { "epoch": 3.1997724687144484, "grad_norm": 0.3937722618858295, "learning_rate": 3.5904964864280745e-07, "loss": 0.0031, "step": 14063 }, { "epoch": 3.2, "grad_norm": 0.10536140796264139, "learning_rate": 3.589687929887232e-07, "loss": 0.0003, "step": 14064 }, { "epoch": 3.200227531285552, "grad_norm": 0.3952142031355048, "learning_rate": 3.588879427718296e-07, "loss": 0.0034, "step": 14065 }, { "epoch": 3.2004550625711037, "grad_norm": 0.6341216509594844, "learning_rate": 3.588070979937793e-07, "loss": 0.0049, "step": 14066 }, { "epoch": 3.2006825938566554, "grad_norm": 0.4314017960219125, "learning_rate": 3.5872625865622423e-07, "loss": 0.0041, "step": 14067 }, { "epoch": 3.200910125142207, "grad_norm": 0.5412830831361235, "learning_rate": 3.58645424760817e-07, "loss": 0.0082, "step": 14068 }, { "epoch": 3.201137656427759, "grad_norm": 0.17103028441401102, "learning_rate": 3.585645963092092e-07, "loss": 0.0005, "step": 14069 }, { "epoch": 3.2013651877133107, "grad_norm": 0.6681012908551024, "learning_rate": 3.5848377330305335e-07, "loss": 0.0045, "step": 14070 }, { "epoch": 3.2015927189988624, "grad_norm": 0.19462056471901917, "learning_rate": 3.5840295574400087e-07, "loss": 0.001, "step": 14071 }, { "epoch": 3.201820250284414, "grad_norm": 0.3657791000931397, "learning_rate": 3.5832214363370366e-07, "loss": 0.0039, "step": 14072 }, { "epoch": 3.202047781569966, "grad_norm": 0.3333688031074531, "learning_rate": 3.582413369738137e-07, "loss": 0.0033, "step": 14073 }, { "epoch": 3.2022753128555177, "grad_norm": 0.5216197084910532, "learning_rate": 3.581605357659821e-07, "loss": 0.0039, "step": 14074 }, { "epoch": 3.2025028441410694, "grad_norm": 0.6664500627681497, "learning_rate": 3.580797400118607e-07, "loss": 0.0023, "step": 14075 }, { "epoch": 3.202730375426621, "grad_norm": 0.3965770570205664, "learning_rate": 3.579989497131002e-07, "loss": 0.0023, "step": 14076 }, { "epoch": 3.202957906712173, "grad_norm": 0.6094352975987201, "learning_rate": 3.5791816487135256e-07, "loss": 0.0062, "step": 14077 }, { "epoch": 3.2031854379977247, "grad_norm": 0.1561110126291996, "learning_rate": 3.5783738548826814e-07, "loss": 0.0006, "step": 14078 }, { "epoch": 3.2034129692832765, "grad_norm": 0.8413577473312134, "learning_rate": 3.5775661156549834e-07, "loss": 0.0033, "step": 14079 }, { "epoch": 3.203640500568828, "grad_norm": 2.318209518377296, "learning_rate": 3.5767584310469424e-07, "loss": 0.0134, "step": 14080 }, { "epoch": 3.20386803185438, "grad_norm": 0.6570824882527502, "learning_rate": 3.57595080107506e-07, "loss": 0.0062, "step": 14081 }, { "epoch": 3.2040955631399317, "grad_norm": 0.026374807309821863, "learning_rate": 3.575143225755849e-07, "loss": 0.0001, "step": 14082 }, { "epoch": 3.2043230944254835, "grad_norm": 0.614645611104066, "learning_rate": 3.574335705105811e-07, "loss": 0.0019, "step": 14083 }, { "epoch": 3.204550625711035, "grad_norm": 0.5233351597454896, "learning_rate": 3.5735282391414467e-07, "loss": 0.0025, "step": 14084 }, { "epoch": 3.204778156996587, "grad_norm": 0.7976996527307297, "learning_rate": 3.5727208278792683e-07, "loss": 0.0087, "step": 14085 }, { "epoch": 3.2050056882821387, "grad_norm": 0.16164815760137155, "learning_rate": 3.5719134713357704e-07, "loss": 0.0006, "step": 14086 }, { "epoch": 3.2052332195676905, "grad_norm": 0.7470310076058179, "learning_rate": 3.5711061695274585e-07, "loss": 0.0067, "step": 14087 }, { "epoch": 3.2054607508532422, "grad_norm": 0.7008712410289921, "learning_rate": 3.570298922470829e-07, "loss": 0.0055, "step": 14088 }, { "epoch": 3.205688282138794, "grad_norm": 0.504485466961715, "learning_rate": 3.5694917301823826e-07, "loss": 0.0045, "step": 14089 }, { "epoch": 3.2059158134243457, "grad_norm": 0.5944643170480736, "learning_rate": 3.568684592678615e-07, "loss": 0.006, "step": 14090 }, { "epoch": 3.2061433447098975, "grad_norm": 0.5782437192112367, "learning_rate": 3.567877509976023e-07, "loss": 0.0067, "step": 14091 }, { "epoch": 3.2063708759954492, "grad_norm": 0.24688517707156202, "learning_rate": 3.5670704820911063e-07, "loss": 0.0016, "step": 14092 }, { "epoch": 3.206598407281001, "grad_norm": 1.0275339501674492, "learning_rate": 3.5662635090403517e-07, "loss": 0.0075, "step": 14093 }, { "epoch": 3.2068259385665527, "grad_norm": 0.26630994749327214, "learning_rate": 3.565456590840259e-07, "loss": 0.0031, "step": 14094 }, { "epoch": 3.2070534698521045, "grad_norm": 1.1000448902866566, "learning_rate": 3.564649727507316e-07, "loss": 0.0075, "step": 14095 }, { "epoch": 3.2072810011376562, "grad_norm": 0.7992462451432828, "learning_rate": 3.563842919058014e-07, "loss": 0.0042, "step": 14096 }, { "epoch": 3.207508532423208, "grad_norm": 0.7348163073262458, "learning_rate": 3.5630361655088415e-07, "loss": 0.0052, "step": 14097 }, { "epoch": 3.2077360637087597, "grad_norm": 1.4836007203679313, "learning_rate": 3.5622294668762896e-07, "loss": 0.0214, "step": 14098 }, { "epoch": 3.207963594994312, "grad_norm": 0.8490779350579114, "learning_rate": 3.561422823176848e-07, "loss": 0.0032, "step": 14099 }, { "epoch": 3.2081911262798632, "grad_norm": 0.7189121869304027, "learning_rate": 3.560616234426997e-07, "loss": 0.0076, "step": 14100 }, { "epoch": 3.2084186575654154, "grad_norm": 0.5358656693869333, "learning_rate": 3.559809700643227e-07, "loss": 0.0042, "step": 14101 }, { "epoch": 3.208646188850967, "grad_norm": 0.4263852302141992, "learning_rate": 3.5590032218420204e-07, "loss": 0.0046, "step": 14102 }, { "epoch": 3.208873720136519, "grad_norm": 0.30306864129979494, "learning_rate": 3.5581967980398564e-07, "loss": 0.0017, "step": 14103 }, { "epoch": 3.2091012514220707, "grad_norm": 0.33974083717093745, "learning_rate": 3.557390429253221e-07, "loss": 0.0027, "step": 14104 }, { "epoch": 3.2093287827076225, "grad_norm": 0.2557435685339923, "learning_rate": 3.5565841154985933e-07, "loss": 0.0019, "step": 14105 }, { "epoch": 3.209556313993174, "grad_norm": 0.5032032685434423, "learning_rate": 3.5557778567924554e-07, "loss": 0.0051, "step": 14106 }, { "epoch": 3.209783845278726, "grad_norm": 0.10816527636654966, "learning_rate": 3.5549716531512824e-07, "loss": 0.0005, "step": 14107 }, { "epoch": 3.2100113765642777, "grad_norm": 1.1346087079376113, "learning_rate": 3.5541655045915556e-07, "loss": 0.0071, "step": 14108 }, { "epoch": 3.2102389078498295, "grad_norm": 0.7333090772767044, "learning_rate": 3.553359411129748e-07, "loss": 0.0082, "step": 14109 }, { "epoch": 3.210466439135381, "grad_norm": 0.22827003240804586, "learning_rate": 3.552553372782334e-07, "loss": 0.0016, "step": 14110 }, { "epoch": 3.210693970420933, "grad_norm": 0.7858339889974061, "learning_rate": 3.5517473895657877e-07, "loss": 0.0099, "step": 14111 }, { "epoch": 3.2109215017064847, "grad_norm": 0.5807652558349599, "learning_rate": 3.550941461496583e-07, "loss": 0.0066, "step": 14112 }, { "epoch": 3.2111490329920365, "grad_norm": 0.7125170655623186, "learning_rate": 3.5501355885911957e-07, "loss": 0.0165, "step": 14113 }, { "epoch": 3.211376564277588, "grad_norm": 0.4997392883890054, "learning_rate": 3.549329770866092e-07, "loss": 0.0054, "step": 14114 }, { "epoch": 3.21160409556314, "grad_norm": 0.7039870713128461, "learning_rate": 3.5485240083377376e-07, "loss": 0.0085, "step": 14115 }, { "epoch": 3.2118316268486917, "grad_norm": 0.7343497185752715, "learning_rate": 3.54771830102261e-07, "loss": 0.003, "step": 14116 }, { "epoch": 3.2120591581342435, "grad_norm": 0.6348544956581941, "learning_rate": 3.546912648937167e-07, "loss": 0.0056, "step": 14117 }, { "epoch": 3.2122866894197952, "grad_norm": 0.36708253347040526, "learning_rate": 3.5461070520978797e-07, "loss": 0.0014, "step": 14118 }, { "epoch": 3.212514220705347, "grad_norm": 0.29139850373166426, "learning_rate": 3.5453015105212117e-07, "loss": 0.0021, "step": 14119 }, { "epoch": 3.2127417519908987, "grad_norm": 0.8036418073798035, "learning_rate": 3.54449602422363e-07, "loss": 0.0069, "step": 14120 }, { "epoch": 3.2129692832764505, "grad_norm": 0.6662208086642227, "learning_rate": 3.543690593221595e-07, "loss": 0.0087, "step": 14121 }, { "epoch": 3.2131968145620022, "grad_norm": 0.5851125971881366, "learning_rate": 3.5428852175315656e-07, "loss": 0.0036, "step": 14122 }, { "epoch": 3.213424345847554, "grad_norm": 0.5667302081987708, "learning_rate": 3.5420798971700074e-07, "loss": 0.0034, "step": 14123 }, { "epoch": 3.2136518771331057, "grad_norm": 0.6507208072493805, "learning_rate": 3.541274632153373e-07, "loss": 0.0068, "step": 14124 }, { "epoch": 3.2138794084186575, "grad_norm": 0.72241816954674, "learning_rate": 3.540469422498125e-07, "loss": 0.0092, "step": 14125 }, { "epoch": 3.2141069397042092, "grad_norm": 0.619330691580553, "learning_rate": 3.5396642682207204e-07, "loss": 0.0018, "step": 14126 }, { "epoch": 3.214334470989761, "grad_norm": 0.3112123583079504, "learning_rate": 3.538859169337616e-07, "loss": 0.0025, "step": 14127 }, { "epoch": 3.2145620022753127, "grad_norm": 0.2549672543226317, "learning_rate": 3.538054125865265e-07, "loss": 0.001, "step": 14128 }, { "epoch": 3.2147895335608645, "grad_norm": 0.08836250209476817, "learning_rate": 3.537249137820119e-07, "loss": 0.0003, "step": 14129 }, { "epoch": 3.2150170648464163, "grad_norm": 0.24344250706493142, "learning_rate": 3.536444205218634e-07, "loss": 0.0018, "step": 14130 }, { "epoch": 3.215244596131968, "grad_norm": 0.2952028662328581, "learning_rate": 3.535639328077258e-07, "loss": 0.0027, "step": 14131 }, { "epoch": 3.2154721274175198, "grad_norm": 0.454055124870473, "learning_rate": 3.534834506412443e-07, "loss": 0.0033, "step": 14132 }, { "epoch": 3.2156996587030715, "grad_norm": 1.1671036301299411, "learning_rate": 3.534029740240641e-07, "loss": 0.0072, "step": 14133 }, { "epoch": 3.2159271899886233, "grad_norm": 0.26856012832399256, "learning_rate": 3.5332250295782937e-07, "loss": 0.0009, "step": 14134 }, { "epoch": 3.216154721274175, "grad_norm": 0.21243500024584655, "learning_rate": 3.5324203744418544e-07, "loss": 0.0009, "step": 14135 }, { "epoch": 3.2163822525597268, "grad_norm": 0.21249977007457715, "learning_rate": 3.5316157748477625e-07, "loss": 0.001, "step": 14136 }, { "epoch": 3.2166097838452785, "grad_norm": 0.23300636100908578, "learning_rate": 3.5308112308124685e-07, "loss": 0.0016, "step": 14137 }, { "epoch": 3.2168373151308307, "grad_norm": 0.8344487891777497, "learning_rate": 3.5300067423524086e-07, "loss": 0.0083, "step": 14138 }, { "epoch": 3.217064846416382, "grad_norm": 0.029943170098958485, "learning_rate": 3.5292023094840336e-07, "loss": 0.0001, "step": 14139 }, { "epoch": 3.217292377701934, "grad_norm": 0.8453657908146244, "learning_rate": 3.5283979322237804e-07, "loss": 0.0069, "step": 14140 }, { "epoch": 3.217519908987486, "grad_norm": 0.3754339279312153, "learning_rate": 3.527593610588087e-07, "loss": 0.0015, "step": 14141 }, { "epoch": 3.2177474402730377, "grad_norm": 0.3994928157781002, "learning_rate": 3.5267893445933967e-07, "loss": 0.0023, "step": 14142 }, { "epoch": 3.2179749715585895, "grad_norm": 0.9554912454927296, "learning_rate": 3.525985134256143e-07, "loss": 0.0047, "step": 14143 }, { "epoch": 3.218202502844141, "grad_norm": 0.6500862364624078, "learning_rate": 3.5251809795927637e-07, "loss": 0.0085, "step": 14144 }, { "epoch": 3.218430034129693, "grad_norm": 0.5006227708393104, "learning_rate": 3.5243768806196954e-07, "loss": 0.0054, "step": 14145 }, { "epoch": 3.2186575654152447, "grad_norm": 0.5143258978483557, "learning_rate": 3.5235728373533736e-07, "loss": 0.0072, "step": 14146 }, { "epoch": 3.2188850967007965, "grad_norm": 0.9988131953982449, "learning_rate": 3.522768849810231e-07, "loss": 0.0071, "step": 14147 }, { "epoch": 3.2191126279863482, "grad_norm": 0.49722725123976846, "learning_rate": 3.521964918006694e-07, "loss": 0.0025, "step": 14148 }, { "epoch": 3.2193401592719, "grad_norm": 1.4160039253212922, "learning_rate": 3.521161041959202e-07, "loss": 0.0075, "step": 14149 }, { "epoch": 3.2195676905574517, "grad_norm": 0.6772094591995972, "learning_rate": 3.520357221684178e-07, "loss": 0.0092, "step": 14150 }, { "epoch": 3.2197952218430035, "grad_norm": 0.19798965141909394, "learning_rate": 3.519553457198053e-07, "loss": 0.0014, "step": 14151 }, { "epoch": 3.2200227531285552, "grad_norm": 0.3415472812624786, "learning_rate": 3.518749748517257e-07, "loss": 0.0065, "step": 14152 }, { "epoch": 3.220250284414107, "grad_norm": 0.3779695491474782, "learning_rate": 3.517946095658212e-07, "loss": 0.003, "step": 14153 }, { "epoch": 3.2204778156996587, "grad_norm": 0.6287369180407836, "learning_rate": 3.5171424986373477e-07, "loss": 0.0026, "step": 14154 }, { "epoch": 3.2207053469852105, "grad_norm": 0.2503942951484098, "learning_rate": 3.516338957471083e-07, "loss": 0.0012, "step": 14155 }, { "epoch": 3.2209328782707622, "grad_norm": 0.3632230947833914, "learning_rate": 3.515535472175846e-07, "loss": 0.0031, "step": 14156 }, { "epoch": 3.221160409556314, "grad_norm": 0.7226344886723451, "learning_rate": 3.5147320427680543e-07, "loss": 0.0027, "step": 14157 }, { "epoch": 3.2213879408418657, "grad_norm": 0.3302686052441469, "learning_rate": 3.51392866926413e-07, "loss": 0.0047, "step": 14158 }, { "epoch": 3.2216154721274175, "grad_norm": 0.3183042734499546, "learning_rate": 3.513125351680495e-07, "loss": 0.0016, "step": 14159 }, { "epoch": 3.2218430034129693, "grad_norm": 1.0128568492997232, "learning_rate": 3.5123220900335623e-07, "loss": 0.0049, "step": 14160 }, { "epoch": 3.222070534698521, "grad_norm": 0.3388364182813109, "learning_rate": 3.5115188843397557e-07, "loss": 0.0029, "step": 14161 }, { "epoch": 3.2222980659840728, "grad_norm": 1.1610042525560134, "learning_rate": 3.510715734615485e-07, "loss": 0.0082, "step": 14162 }, { "epoch": 3.2225255972696245, "grad_norm": 0.8755807651004172, "learning_rate": 3.5099126408771707e-07, "loss": 0.0074, "step": 14163 }, { "epoch": 3.2227531285551763, "grad_norm": 0.6032817693253427, "learning_rate": 3.509109603141221e-07, "loss": 0.0027, "step": 14164 }, { "epoch": 3.222980659840728, "grad_norm": 0.6050689741704921, "learning_rate": 3.5083066214240513e-07, "loss": 0.0024, "step": 14165 }, { "epoch": 3.2232081911262798, "grad_norm": 0.4550229073480171, "learning_rate": 3.507503695742076e-07, "loss": 0.0015, "step": 14166 }, { "epoch": 3.2234357224118315, "grad_norm": 0.311532505303128, "learning_rate": 3.5067008261116994e-07, "loss": 0.0012, "step": 14167 }, { "epoch": 3.2236632536973833, "grad_norm": 1.3683347319622314, "learning_rate": 3.5058980125493366e-07, "loss": 0.0122, "step": 14168 }, { "epoch": 3.223890784982935, "grad_norm": 4.57151073703537, "learning_rate": 3.5050952550713906e-07, "loss": 0.0661, "step": 14169 }, { "epoch": 3.2241183162684868, "grad_norm": 0.46972698516488853, "learning_rate": 3.5042925536942727e-07, "loss": 0.0038, "step": 14170 }, { "epoch": 3.2243458475540385, "grad_norm": 0.8210059005125667, "learning_rate": 3.5034899084343835e-07, "loss": 0.0134, "step": 14171 }, { "epoch": 3.2245733788395903, "grad_norm": 0.23665402124061943, "learning_rate": 3.5026873193081316e-07, "loss": 0.0007, "step": 14172 }, { "epoch": 3.224800910125142, "grad_norm": 0.29690613335698174, "learning_rate": 3.5018847863319215e-07, "loss": 0.0036, "step": 14173 }, { "epoch": 3.225028441410694, "grad_norm": 0.5775319211689307, "learning_rate": 3.501082309522151e-07, "loss": 0.0034, "step": 14174 }, { "epoch": 3.2252559726962455, "grad_norm": 0.43642213137805164, "learning_rate": 3.500279888895226e-07, "loss": 0.0038, "step": 14175 }, { "epoch": 3.2254835039817973, "grad_norm": 0.28106641942682664, "learning_rate": 3.499477524467541e-07, "loss": 0.0025, "step": 14176 }, { "epoch": 3.2257110352673495, "grad_norm": 0.843807639328657, "learning_rate": 3.498675216255502e-07, "loss": 0.0105, "step": 14177 }, { "epoch": 3.225938566552901, "grad_norm": 0.07647314188838444, "learning_rate": 3.4978729642754993e-07, "loss": 0.0002, "step": 14178 }, { "epoch": 3.226166097838453, "grad_norm": 0.4843920271324386, "learning_rate": 3.4970707685439335e-07, "loss": 0.0028, "step": 14179 }, { "epoch": 3.2263936291240047, "grad_norm": 0.42895741681171523, "learning_rate": 3.4962686290772015e-07, "loss": 0.0016, "step": 14180 }, { "epoch": 3.2266211604095565, "grad_norm": 0.8917078582389659, "learning_rate": 3.495466545891693e-07, "loss": 0.0099, "step": 14181 }, { "epoch": 3.2268486916951082, "grad_norm": 0.620205289926425, "learning_rate": 3.494664519003806e-07, "loss": 0.0052, "step": 14182 }, { "epoch": 3.22707622298066, "grad_norm": 0.4523694941073517, "learning_rate": 3.4938625484299286e-07, "loss": 0.0044, "step": 14183 }, { "epoch": 3.2273037542662117, "grad_norm": 0.23443968581387228, "learning_rate": 3.493060634186454e-07, "loss": 0.0014, "step": 14184 }, { "epoch": 3.2275312855517635, "grad_norm": 0.5009536464850688, "learning_rate": 3.4922587762897695e-07, "loss": 0.003, "step": 14185 }, { "epoch": 3.2277588168373152, "grad_norm": 0.22111289630146305, "learning_rate": 3.4914569747562645e-07, "loss": 0.0009, "step": 14186 }, { "epoch": 3.227986348122867, "grad_norm": 0.24206373572584353, "learning_rate": 3.4906552296023297e-07, "loss": 0.0017, "step": 14187 }, { "epoch": 3.2282138794084188, "grad_norm": 1.7993887635296344, "learning_rate": 3.4898535408443466e-07, "loss": 0.0051, "step": 14188 }, { "epoch": 3.2284414106939705, "grad_norm": 0.39995866475554614, "learning_rate": 3.4890519084987044e-07, "loss": 0.0014, "step": 14189 }, { "epoch": 3.2286689419795223, "grad_norm": 1.3558697380281102, "learning_rate": 3.488250332581784e-07, "loss": 0.0109, "step": 14190 }, { "epoch": 3.228896473265074, "grad_norm": 0.7332269484336676, "learning_rate": 3.4874488131099673e-07, "loss": 0.005, "step": 14191 }, { "epoch": 3.2291240045506258, "grad_norm": 0.6694959405181727, "learning_rate": 3.4866473500996367e-07, "loss": 0.0097, "step": 14192 }, { "epoch": 3.2293515358361775, "grad_norm": 0.5786303186097296, "learning_rate": 3.4858459435671734e-07, "loss": 0.0085, "step": 14193 }, { "epoch": 3.2295790671217293, "grad_norm": 0.26494963824461415, "learning_rate": 3.485044593528959e-07, "loss": 0.0008, "step": 14194 }, { "epoch": 3.229806598407281, "grad_norm": 0.2315602406218596, "learning_rate": 3.4842433000013677e-07, "loss": 0.0015, "step": 14195 }, { "epoch": 3.2300341296928328, "grad_norm": 0.5621681491497378, "learning_rate": 3.48344206300078e-07, "loss": 0.0044, "step": 14196 }, { "epoch": 3.2302616609783845, "grad_norm": 0.08716651158464903, "learning_rate": 3.4826408825435704e-07, "loss": 0.0005, "step": 14197 }, { "epoch": 3.2304891922639363, "grad_norm": 1.164828080861236, "learning_rate": 3.4818397586461106e-07, "loss": 0.0043, "step": 14198 }, { "epoch": 3.230716723549488, "grad_norm": 0.2837678562321079, "learning_rate": 3.4810386913247755e-07, "loss": 0.001, "step": 14199 }, { "epoch": 3.2309442548350398, "grad_norm": 0.27062370362989624, "learning_rate": 3.4802376805959393e-07, "loss": 0.0011, "step": 14200 }, { "epoch": 3.2311717861205915, "grad_norm": 1.4958566149079981, "learning_rate": 3.4794367264759746e-07, "loss": 0.0038, "step": 14201 }, { "epoch": 3.2313993174061433, "grad_norm": 2.1166004392103885, "learning_rate": 3.478635828981249e-07, "loss": 0.0104, "step": 14202 }, { "epoch": 3.231626848691695, "grad_norm": 0.8327263072384081, "learning_rate": 3.4778349881281286e-07, "loss": 0.0042, "step": 14203 }, { "epoch": 3.231854379977247, "grad_norm": 0.7624621439065516, "learning_rate": 3.477034203932987e-07, "loss": 0.005, "step": 14204 }, { "epoch": 3.2320819112627985, "grad_norm": 0.16509533524415174, "learning_rate": 3.476233476412183e-07, "loss": 0.001, "step": 14205 }, { "epoch": 3.2323094425483503, "grad_norm": 0.17948681926288554, "learning_rate": 3.475432805582092e-07, "loss": 0.0009, "step": 14206 }, { "epoch": 3.232536973833902, "grad_norm": 0.7508664742064951, "learning_rate": 3.474632191459071e-07, "loss": 0.0088, "step": 14207 }, { "epoch": 3.232764505119454, "grad_norm": 0.30967008164728005, "learning_rate": 3.4738316340594866e-07, "loss": 0.0016, "step": 14208 }, { "epoch": 3.2329920364050055, "grad_norm": 0.31680066149968245, "learning_rate": 3.4730311333996997e-07, "loss": 0.0025, "step": 14209 }, { "epoch": 3.2332195676905573, "grad_norm": 0.18164358584830095, "learning_rate": 3.472230689496068e-07, "loss": 0.0015, "step": 14210 }, { "epoch": 3.233447098976109, "grad_norm": 0.5352783696953575, "learning_rate": 3.4714303023649536e-07, "loss": 0.0069, "step": 14211 }, { "epoch": 3.233674630261661, "grad_norm": 0.6797703000811797, "learning_rate": 3.470629972022715e-07, "loss": 0.0014, "step": 14212 }, { "epoch": 3.2339021615472126, "grad_norm": 0.5058287357597611, "learning_rate": 3.4698296984857134e-07, "loss": 0.0014, "step": 14213 }, { "epoch": 3.2341296928327643, "grad_norm": 1.46737693037793, "learning_rate": 3.4690294817702974e-07, "loss": 0.0141, "step": 14214 }, { "epoch": 3.234357224118316, "grad_norm": 0.48818319909378416, "learning_rate": 3.468229321892828e-07, "loss": 0.0044, "step": 14215 }, { "epoch": 3.2345847554038683, "grad_norm": 0.9675438586036409, "learning_rate": 3.467429218869657e-07, "loss": 0.0046, "step": 14216 }, { "epoch": 3.23481228668942, "grad_norm": 0.15255199134465805, "learning_rate": 3.466629172717135e-07, "loss": 0.0006, "step": 14217 }, { "epoch": 3.2350398179749718, "grad_norm": 0.6064371490817917, "learning_rate": 3.4658291834516145e-07, "loss": 0.0062, "step": 14218 }, { "epoch": 3.2352673492605235, "grad_norm": 0.40139239509301966, "learning_rate": 3.465029251089447e-07, "loss": 0.0013, "step": 14219 }, { "epoch": 3.2354948805460753, "grad_norm": 0.769996383752103, "learning_rate": 3.464229375646983e-07, "loss": 0.0019, "step": 14220 }, { "epoch": 3.235722411831627, "grad_norm": 0.4663442329398157, "learning_rate": 3.4634295571405686e-07, "loss": 0.0034, "step": 14221 }, { "epoch": 3.2359499431171788, "grad_norm": 0.3719491179596309, "learning_rate": 3.4626297955865485e-07, "loss": 0.0077, "step": 14222 }, { "epoch": 3.2361774744027305, "grad_norm": 1.7654858371827866, "learning_rate": 3.461830091001274e-07, "loss": 0.0769, "step": 14223 }, { "epoch": 3.2364050056882823, "grad_norm": 0.29663585775786255, "learning_rate": 3.461030443401082e-07, "loss": 0.003, "step": 14224 }, { "epoch": 3.236632536973834, "grad_norm": 0.5854154586673112, "learning_rate": 3.460230852802321e-07, "loss": 0.0034, "step": 14225 }, { "epoch": 3.2368600682593858, "grad_norm": 0.3752989601978667, "learning_rate": 3.459431319221332e-07, "loss": 0.003, "step": 14226 }, { "epoch": 3.2370875995449375, "grad_norm": 0.5440466012763658, "learning_rate": 3.4586318426744585e-07, "loss": 0.0031, "step": 14227 }, { "epoch": 3.2373151308304893, "grad_norm": 0.3583721337411654, "learning_rate": 3.457832423178038e-07, "loss": 0.0029, "step": 14228 }, { "epoch": 3.237542662116041, "grad_norm": 0.3475870771617719, "learning_rate": 3.457033060748406e-07, "loss": 0.0032, "step": 14229 }, { "epoch": 3.2377701934015928, "grad_norm": 0.27307917935510945, "learning_rate": 3.4562337554019066e-07, "loss": 0.0027, "step": 14230 }, { "epoch": 3.2379977246871445, "grad_norm": 1.3173488705685164, "learning_rate": 3.45543450715487e-07, "loss": 0.0165, "step": 14231 }, { "epoch": 3.2382252559726963, "grad_norm": 0.9382785212052192, "learning_rate": 3.454635316023634e-07, "loss": 0.0105, "step": 14232 }, { "epoch": 3.238452787258248, "grad_norm": 0.6079201799351314, "learning_rate": 3.453836182024533e-07, "loss": 0.0051, "step": 14233 }, { "epoch": 3.2386803185438, "grad_norm": 0.6641900290041964, "learning_rate": 3.4530371051739024e-07, "loss": 0.0022, "step": 14234 }, { "epoch": 3.2389078498293515, "grad_norm": 0.7851986387596928, "learning_rate": 3.4522380854880695e-07, "loss": 0.0074, "step": 14235 }, { "epoch": 3.2391353811149033, "grad_norm": 0.25950981170804155, "learning_rate": 3.451439122983365e-07, "loss": 0.0012, "step": 14236 }, { "epoch": 3.239362912400455, "grad_norm": 0.18675165105216668, "learning_rate": 3.450640217676121e-07, "loss": 0.0009, "step": 14237 }, { "epoch": 3.239590443686007, "grad_norm": 0.2855627271165137, "learning_rate": 3.4498413695826624e-07, "loss": 0.0017, "step": 14238 }, { "epoch": 3.2398179749715585, "grad_norm": 0.29506581771400825, "learning_rate": 3.449042578719318e-07, "loss": 0.0006, "step": 14239 }, { "epoch": 3.2400455062571103, "grad_norm": 0.3307138830178998, "learning_rate": 3.4482438451024154e-07, "loss": 0.0023, "step": 14240 }, { "epoch": 3.240273037542662, "grad_norm": 2.289654514659615, "learning_rate": 3.4474451687482756e-07, "loss": 0.0072, "step": 14241 }, { "epoch": 3.240500568828214, "grad_norm": 0.9948729731505431, "learning_rate": 3.446646549673226e-07, "loss": 0.0071, "step": 14242 }, { "epoch": 3.2407281001137656, "grad_norm": 0.7344827309341448, "learning_rate": 3.4458479878935844e-07, "loss": 0.009, "step": 14243 }, { "epoch": 3.2409556313993173, "grad_norm": 1.3045573143644822, "learning_rate": 3.4450494834256776e-07, "loss": 0.0082, "step": 14244 }, { "epoch": 3.241183162684869, "grad_norm": 0.6502547026182495, "learning_rate": 3.4442510362858187e-07, "loss": 0.002, "step": 14245 }, { "epoch": 3.241410693970421, "grad_norm": 0.9623958676235812, "learning_rate": 3.443452646490331e-07, "loss": 0.0039, "step": 14246 }, { "epoch": 3.2416382252559726, "grad_norm": 0.2773673161476276, "learning_rate": 3.442654314055534e-07, "loss": 0.0009, "step": 14247 }, { "epoch": 3.2418657565415243, "grad_norm": 0.41194133134276634, "learning_rate": 3.441856038997737e-07, "loss": 0.0041, "step": 14248 }, { "epoch": 3.242093287827076, "grad_norm": 0.12134211537416477, "learning_rate": 3.4410578213332645e-07, "loss": 0.0003, "step": 14249 }, { "epoch": 3.242320819112628, "grad_norm": 2.8828192197897384, "learning_rate": 3.440259661078422e-07, "loss": 0.0374, "step": 14250 }, { "epoch": 3.2425483503981796, "grad_norm": 0.3795748421866553, "learning_rate": 3.4394615582495295e-07, "loss": 0.0022, "step": 14251 }, { "epoch": 3.2427758816837313, "grad_norm": 0.2602217075858129, "learning_rate": 3.4386635128628916e-07, "loss": 0.0022, "step": 14252 }, { "epoch": 3.243003412969283, "grad_norm": 0.20926190994382668, "learning_rate": 3.437865524934824e-07, "loss": 0.0015, "step": 14253 }, { "epoch": 3.243230944254835, "grad_norm": 0.6162227155296895, "learning_rate": 3.437067594481637e-07, "loss": 0.0036, "step": 14254 }, { "epoch": 3.243458475540387, "grad_norm": 0.5009582748765699, "learning_rate": 3.4362697215196347e-07, "loss": 0.0034, "step": 14255 }, { "epoch": 3.2436860068259388, "grad_norm": 0.6360089700305348, "learning_rate": 3.4354719060651286e-07, "loss": 0.0039, "step": 14256 }, { "epoch": 3.2439135381114905, "grad_norm": 1.2169357412316382, "learning_rate": 3.434674148134419e-07, "loss": 0.0104, "step": 14257 }, { "epoch": 3.2441410693970423, "grad_norm": 0.43142631819020233, "learning_rate": 3.433876447743817e-07, "loss": 0.0026, "step": 14258 }, { "epoch": 3.244368600682594, "grad_norm": 0.18835902687251208, "learning_rate": 3.4330788049096196e-07, "loss": 0.0012, "step": 14259 }, { "epoch": 3.244596131968146, "grad_norm": 0.6040757628775055, "learning_rate": 3.432281219648133e-07, "loss": 0.0053, "step": 14260 }, { "epoch": 3.2448236632536975, "grad_norm": 0.25240938969058424, "learning_rate": 3.431483691975661e-07, "loss": 0.0015, "step": 14261 }, { "epoch": 3.2450511945392493, "grad_norm": 0.20136556435888933, "learning_rate": 3.430686221908497e-07, "loss": 0.0006, "step": 14262 }, { "epoch": 3.245278725824801, "grad_norm": 0.46425726481070945, "learning_rate": 3.429888809462946e-07, "loss": 0.0032, "step": 14263 }, { "epoch": 3.245506257110353, "grad_norm": 0.5651383285557547, "learning_rate": 3.4290914546553006e-07, "loss": 0.0053, "step": 14264 }, { "epoch": 3.2457337883959045, "grad_norm": 0.38178479171168056, "learning_rate": 3.428294157501859e-07, "loss": 0.0019, "step": 14265 }, { "epoch": 3.2459613196814563, "grad_norm": 1.3554911244563563, "learning_rate": 3.4274969180189203e-07, "loss": 0.0137, "step": 14266 }, { "epoch": 3.246188850967008, "grad_norm": 0.42152522112570673, "learning_rate": 3.426699736222773e-07, "loss": 0.006, "step": 14267 }, { "epoch": 3.24641638225256, "grad_norm": 0.8161003941883703, "learning_rate": 3.4259026121297145e-07, "loss": 0.0102, "step": 14268 }, { "epoch": 3.2466439135381115, "grad_norm": 0.32758775167785814, "learning_rate": 3.4251055457560326e-07, "loss": 0.004, "step": 14269 }, { "epoch": 3.2468714448236633, "grad_norm": 1.0237995744603852, "learning_rate": 3.4243085371180223e-07, "loss": 0.0064, "step": 14270 }, { "epoch": 3.247098976109215, "grad_norm": 1.2159991514501378, "learning_rate": 3.423511586231967e-07, "loss": 0.0105, "step": 14271 }, { "epoch": 3.247326507394767, "grad_norm": 0.8609530923947994, "learning_rate": 3.42271469311416e-07, "loss": 0.0026, "step": 14272 }, { "epoch": 3.2475540386803186, "grad_norm": 0.46442084259253774, "learning_rate": 3.4219178577808874e-07, "loss": 0.0021, "step": 14273 }, { "epoch": 3.2477815699658703, "grad_norm": 0.6212958820172476, "learning_rate": 3.421121080248433e-07, "loss": 0.0018, "step": 14274 }, { "epoch": 3.248009101251422, "grad_norm": 0.7719885005180686, "learning_rate": 3.4203243605330854e-07, "loss": 0.0052, "step": 14275 }, { "epoch": 3.248236632536974, "grad_norm": 0.32887200557872714, "learning_rate": 3.419527698651123e-07, "loss": 0.0036, "step": 14276 }, { "epoch": 3.2484641638225256, "grad_norm": 0.3233971147011152, "learning_rate": 3.4187310946188323e-07, "loss": 0.0014, "step": 14277 }, { "epoch": 3.2486916951080773, "grad_norm": 0.4967817444300952, "learning_rate": 3.4179345484524907e-07, "loss": 0.002, "step": 14278 }, { "epoch": 3.248919226393629, "grad_norm": 0.37973665455393035, "learning_rate": 3.417138060168381e-07, "loss": 0.0013, "step": 14279 }, { "epoch": 3.249146757679181, "grad_norm": 1.1443154312883408, "learning_rate": 3.4163416297827835e-07, "loss": 0.0168, "step": 14280 }, { "epoch": 3.2493742889647326, "grad_norm": 0.49279094359903125, "learning_rate": 3.415545257311971e-07, "loss": 0.0029, "step": 14281 }, { "epoch": 3.2496018202502843, "grad_norm": 0.38243426529753494, "learning_rate": 3.4147489427722236e-07, "loss": 0.0015, "step": 14282 }, { "epoch": 3.249829351535836, "grad_norm": 0.10176896663611819, "learning_rate": 3.4139526861798143e-07, "loss": 0.0004, "step": 14283 }, { "epoch": 3.250056882821388, "grad_norm": 0.44201379829903303, "learning_rate": 3.4131564875510206e-07, "loss": 0.003, "step": 14284 }, { "epoch": 3.2502844141069396, "grad_norm": 0.6367594594199756, "learning_rate": 3.4123603469021095e-07, "loss": 0.0054, "step": 14285 }, { "epoch": 3.2505119453924913, "grad_norm": 0.18248929145372397, "learning_rate": 3.4115642642493565e-07, "loss": 0.0009, "step": 14286 }, { "epoch": 3.250739476678043, "grad_norm": 0.20424464340575793, "learning_rate": 3.4107682396090345e-07, "loss": 0.0012, "step": 14287 }, { "epoch": 3.250967007963595, "grad_norm": 0.9799575840555046, "learning_rate": 3.409972272997407e-07, "loss": 0.0058, "step": 14288 }, { "epoch": 3.2511945392491466, "grad_norm": 0.07179798800020944, "learning_rate": 3.409176364430747e-07, "loss": 0.0002, "step": 14289 }, { "epoch": 3.2514220705346983, "grad_norm": 0.5282419865534194, "learning_rate": 3.4083805139253174e-07, "loss": 0.0028, "step": 14290 }, { "epoch": 3.25164960182025, "grad_norm": 0.12396248044525754, "learning_rate": 3.407584721497388e-07, "loss": 0.0006, "step": 14291 }, { "epoch": 3.2518771331058023, "grad_norm": 0.2920957491339719, "learning_rate": 3.406788987163219e-07, "loss": 0.0012, "step": 14292 }, { "epoch": 3.2521046643913536, "grad_norm": 0.17689594944651157, "learning_rate": 3.4059933109390766e-07, "loss": 0.0011, "step": 14293 }, { "epoch": 3.252332195676906, "grad_norm": 0.6548980028769739, "learning_rate": 3.4051976928412237e-07, "loss": 0.003, "step": 14294 }, { "epoch": 3.252559726962457, "grad_norm": 1.0089821479432697, "learning_rate": 3.404402132885919e-07, "loss": 0.012, "step": 14295 }, { "epoch": 3.2527872582480093, "grad_norm": 0.11393754270653086, "learning_rate": 3.403606631089424e-07, "loss": 0.0003, "step": 14296 }, { "epoch": 3.253014789533561, "grad_norm": 1.1692414851172253, "learning_rate": 3.402811187467997e-07, "loss": 0.0069, "step": 14297 }, { "epoch": 3.253242320819113, "grad_norm": 0.7090305919518911, "learning_rate": 3.402015802037893e-07, "loss": 0.0073, "step": 14298 }, { "epoch": 3.2534698521046646, "grad_norm": 0.09504394155234512, "learning_rate": 3.40122047481537e-07, "loss": 0.0005, "step": 14299 }, { "epoch": 3.2536973833902163, "grad_norm": 0.06792093448340042, "learning_rate": 3.4004252058166833e-07, "loss": 0.0002, "step": 14300 }, { "epoch": 3.253924914675768, "grad_norm": 0.35619122178321794, "learning_rate": 3.399629995058089e-07, "loss": 0.0039, "step": 14301 }, { "epoch": 3.25415244596132, "grad_norm": 0.43057185436232875, "learning_rate": 3.398834842555835e-07, "loss": 0.005, "step": 14302 }, { "epoch": 3.2543799772468716, "grad_norm": 0.06250827199036885, "learning_rate": 3.3980397483261775e-07, "loss": 0.0002, "step": 14303 }, { "epoch": 3.2546075085324233, "grad_norm": 0.4036349672612533, "learning_rate": 3.3972447123853644e-07, "loss": 0.0031, "step": 14304 }, { "epoch": 3.254835039817975, "grad_norm": 0.2219724713554763, "learning_rate": 3.396449734749643e-07, "loss": 0.0009, "step": 14305 }, { "epoch": 3.255062571103527, "grad_norm": 0.964502999760496, "learning_rate": 3.395654815435262e-07, "loss": 0.0119, "step": 14306 }, { "epoch": 3.2552901023890786, "grad_norm": 0.4470693451197466, "learning_rate": 3.3948599544584697e-07, "loss": 0.0015, "step": 14307 }, { "epoch": 3.2555176336746303, "grad_norm": 0.2881297217687933, "learning_rate": 3.394065151835513e-07, "loss": 0.0027, "step": 14308 }, { "epoch": 3.255745164960182, "grad_norm": 0.07181958302120031, "learning_rate": 3.3932704075826344e-07, "loss": 0.0003, "step": 14309 }, { "epoch": 3.255972696245734, "grad_norm": 0.7608969394572772, "learning_rate": 3.3924757217160745e-07, "loss": 0.0046, "step": 14310 }, { "epoch": 3.2562002275312856, "grad_norm": 0.3280470638077614, "learning_rate": 3.391681094252079e-07, "loss": 0.0025, "step": 14311 }, { "epoch": 3.2564277588168373, "grad_norm": 0.5852634096714957, "learning_rate": 3.3908865252068866e-07, "loss": 0.0054, "step": 14312 }, { "epoch": 3.256655290102389, "grad_norm": 0.6274562008582578, "learning_rate": 3.390092014596736e-07, "loss": 0.0019, "step": 14313 }, { "epoch": 3.256882821387941, "grad_norm": 0.3702236785448362, "learning_rate": 3.3892975624378677e-07, "loss": 0.0031, "step": 14314 }, { "epoch": 3.2571103526734926, "grad_norm": 0.6045802798289465, "learning_rate": 3.3885031687465197e-07, "loss": 0.0028, "step": 14315 }, { "epoch": 3.2573378839590443, "grad_norm": 0.46591907956893736, "learning_rate": 3.3877088335389273e-07, "loss": 0.0037, "step": 14316 }, { "epoch": 3.257565415244596, "grad_norm": 0.15035432429249665, "learning_rate": 3.386914556831321e-07, "loss": 0.0004, "step": 14317 }, { "epoch": 3.257792946530148, "grad_norm": 0.5942569279983371, "learning_rate": 3.386120338639941e-07, "loss": 0.0073, "step": 14318 }, { "epoch": 3.2580204778156996, "grad_norm": 1.1763885768999562, "learning_rate": 3.385326178981013e-07, "loss": 0.0025, "step": 14319 }, { "epoch": 3.2582480091012513, "grad_norm": 0.13731204626236018, "learning_rate": 3.3845320778707726e-07, "loss": 0.0004, "step": 14320 }, { "epoch": 3.258475540386803, "grad_norm": 0.4181637289395453, "learning_rate": 3.383738035325447e-07, "loss": 0.0023, "step": 14321 }, { "epoch": 3.258703071672355, "grad_norm": 1.03354629045266, "learning_rate": 3.3829440513612697e-07, "loss": 0.0053, "step": 14322 }, { "epoch": 3.2589306029579066, "grad_norm": 0.3298622014813958, "learning_rate": 3.382150125994466e-07, "loss": 0.0018, "step": 14323 }, { "epoch": 3.2591581342434583, "grad_norm": 0.28705599041987323, "learning_rate": 3.3813562592412586e-07, "loss": 0.0031, "step": 14324 }, { "epoch": 3.25938566552901, "grad_norm": 0.4380226908128975, "learning_rate": 3.3805624511178784e-07, "loss": 0.0022, "step": 14325 }, { "epoch": 3.259613196814562, "grad_norm": 0.3952725417977495, "learning_rate": 3.379768701640541e-07, "loss": 0.0032, "step": 14326 }, { "epoch": 3.2598407281001136, "grad_norm": 0.15837876689536642, "learning_rate": 3.3789750108254803e-07, "loss": 0.0003, "step": 14327 }, { "epoch": 3.2600682593856654, "grad_norm": 0.6626088849081666, "learning_rate": 3.378181378688912e-07, "loss": 0.004, "step": 14328 }, { "epoch": 3.260295790671217, "grad_norm": 0.45400925283877447, "learning_rate": 3.3773878052470544e-07, "loss": 0.0012, "step": 14329 }, { "epoch": 3.260523321956769, "grad_norm": 0.13804303478583096, "learning_rate": 3.3765942905161317e-07, "loss": 0.0007, "step": 14330 }, { "epoch": 3.260750853242321, "grad_norm": 0.1558900728059799, "learning_rate": 3.3758008345123565e-07, "loss": 0.0004, "step": 14331 }, { "epoch": 3.2609783845278724, "grad_norm": 1.3949665789248682, "learning_rate": 3.375007437251949e-07, "loss": 0.0137, "step": 14332 }, { "epoch": 3.2612059158134246, "grad_norm": 0.6002535155577537, "learning_rate": 3.374214098751124e-07, "loss": 0.0019, "step": 14333 }, { "epoch": 3.261433447098976, "grad_norm": 0.689615169749543, "learning_rate": 3.373420819026098e-07, "loss": 0.004, "step": 14334 }, { "epoch": 3.261660978384528, "grad_norm": 0.24577247142436204, "learning_rate": 3.3726275980930826e-07, "loss": 0.002, "step": 14335 }, { "epoch": 3.26188850967008, "grad_norm": 0.6389355264104996, "learning_rate": 3.371834435968287e-07, "loss": 0.0039, "step": 14336 }, { "epoch": 3.2621160409556316, "grad_norm": 0.47753910483212475, "learning_rate": 3.371041332667927e-07, "loss": 0.0034, "step": 14337 }, { "epoch": 3.2623435722411833, "grad_norm": 0.17081660519056546, "learning_rate": 3.370248288208207e-07, "loss": 0.0004, "step": 14338 }, { "epoch": 3.262571103526735, "grad_norm": 1.318983755652027, "learning_rate": 3.369455302605338e-07, "loss": 0.0092, "step": 14339 }, { "epoch": 3.262798634812287, "grad_norm": 0.7113606410492864, "learning_rate": 3.368662375875527e-07, "loss": 0.005, "step": 14340 }, { "epoch": 3.2630261660978386, "grad_norm": 0.1683125138409986, "learning_rate": 3.367869508034983e-07, "loss": 0.0007, "step": 14341 }, { "epoch": 3.2632536973833903, "grad_norm": 0.2070536543486333, "learning_rate": 3.3670766990999075e-07, "loss": 0.0005, "step": 14342 }, { "epoch": 3.263481228668942, "grad_norm": 0.27779711259439055, "learning_rate": 3.3662839490865016e-07, "loss": 0.0013, "step": 14343 }, { "epoch": 3.263708759954494, "grad_norm": 1.153546994913953, "learning_rate": 3.365491258010974e-07, "loss": 0.0078, "step": 14344 }, { "epoch": 3.2639362912400456, "grad_norm": 0.17188534230312147, "learning_rate": 3.364698625889519e-07, "loss": 0.0006, "step": 14345 }, { "epoch": 3.2641638225255973, "grad_norm": 1.683090943402373, "learning_rate": 3.3639060527383403e-07, "loss": 0.0091, "step": 14346 }, { "epoch": 3.264391353811149, "grad_norm": 0.8400944129399044, "learning_rate": 3.3631135385736385e-07, "loss": 0.0058, "step": 14347 }, { "epoch": 3.264618885096701, "grad_norm": 0.30813515556251325, "learning_rate": 3.362321083411607e-07, "loss": 0.0019, "step": 14348 }, { "epoch": 3.2648464163822526, "grad_norm": 0.6694839099025628, "learning_rate": 3.361528687268446e-07, "loss": 0.0075, "step": 14349 }, { "epoch": 3.2650739476678043, "grad_norm": 2.0997307903075484, "learning_rate": 3.3607363501603457e-07, "loss": 0.0124, "step": 14350 }, { "epoch": 3.265301478953356, "grad_norm": 0.270185539947247, "learning_rate": 3.359944072103506e-07, "loss": 0.0018, "step": 14351 }, { "epoch": 3.265529010238908, "grad_norm": 0.46333698275507695, "learning_rate": 3.3591518531141146e-07, "loss": 0.0046, "step": 14352 }, { "epoch": 3.2657565415244596, "grad_norm": 0.5674593517733076, "learning_rate": 3.3583596932083645e-07, "loss": 0.002, "step": 14353 }, { "epoch": 3.2659840728100114, "grad_norm": 0.5277503662866756, "learning_rate": 3.3575675924024483e-07, "loss": 0.0017, "step": 14354 }, { "epoch": 3.266211604095563, "grad_norm": 0.6566915066339861, "learning_rate": 3.3567755507125513e-07, "loss": 0.0061, "step": 14355 }, { "epoch": 3.266439135381115, "grad_norm": 0.40035400316556935, "learning_rate": 3.355983568154866e-07, "loss": 0.0043, "step": 14356 }, { "epoch": 3.2666666666666666, "grad_norm": 0.3670612548653154, "learning_rate": 3.355191644745574e-07, "loss": 0.0011, "step": 14357 }, { "epoch": 3.2668941979522184, "grad_norm": 1.4752279915189463, "learning_rate": 3.354399780500866e-07, "loss": 0.0193, "step": 14358 }, { "epoch": 3.26712172923777, "grad_norm": 0.24817378659287403, "learning_rate": 3.3536079754369206e-07, "loss": 0.0008, "step": 14359 }, { "epoch": 3.267349260523322, "grad_norm": 0.19607523861739726, "learning_rate": 3.352816229569923e-07, "loss": 0.0008, "step": 14360 }, { "epoch": 3.2675767918088736, "grad_norm": 0.32218401288977594, "learning_rate": 3.3520245429160596e-07, "loss": 0.0017, "step": 14361 }, { "epoch": 3.2678043230944254, "grad_norm": 0.3827697536758245, "learning_rate": 3.3512329154915033e-07, "loss": 0.0018, "step": 14362 }, { "epoch": 3.268031854379977, "grad_norm": 0.41567490445087935, "learning_rate": 3.3504413473124415e-07, "loss": 0.0019, "step": 14363 }, { "epoch": 3.268259385665529, "grad_norm": 0.1193766358749132, "learning_rate": 3.349649838395044e-07, "loss": 0.0004, "step": 14364 }, { "epoch": 3.2684869169510806, "grad_norm": 0.6510426660675915, "learning_rate": 3.348858388755495e-07, "loss": 0.0051, "step": 14365 }, { "epoch": 3.2687144482366324, "grad_norm": 0.5413578704777489, "learning_rate": 3.348066998409966e-07, "loss": 0.0035, "step": 14366 }, { "epoch": 3.268941979522184, "grad_norm": 0.48313430945554, "learning_rate": 3.347275667374632e-07, "loss": 0.0039, "step": 14367 }, { "epoch": 3.269169510807736, "grad_norm": 1.2937139155518356, "learning_rate": 3.3464843956656694e-07, "loss": 0.0041, "step": 14368 }, { "epoch": 3.2693970420932876, "grad_norm": 0.7746944250205968, "learning_rate": 3.3456931832992465e-07, "loss": 0.0092, "step": 14369 }, { "epoch": 3.26962457337884, "grad_norm": 1.351113455659074, "learning_rate": 3.344902030291538e-07, "loss": 0.0037, "step": 14370 }, { "epoch": 3.269852104664391, "grad_norm": 0.2874691498609565, "learning_rate": 3.3441109366587095e-07, "loss": 0.0009, "step": 14371 }, { "epoch": 3.2700796359499433, "grad_norm": 0.21867001637894304, "learning_rate": 3.343319902416933e-07, "loss": 0.0007, "step": 14372 }, { "epoch": 3.2703071672354946, "grad_norm": 0.8492008539162741, "learning_rate": 3.3425289275823724e-07, "loss": 0.0051, "step": 14373 }, { "epoch": 3.270534698521047, "grad_norm": 0.166074256586089, "learning_rate": 3.341738012171196e-07, "loss": 0.0006, "step": 14374 }, { "epoch": 3.2707622298065986, "grad_norm": 0.41543822526588065, "learning_rate": 3.340947156199571e-07, "loss": 0.0015, "step": 14375 }, { "epoch": 3.2709897610921503, "grad_norm": 0.5598123129329727, "learning_rate": 3.3401563596836556e-07, "loss": 0.003, "step": 14376 }, { "epoch": 3.271217292377702, "grad_norm": 0.34848694871716845, "learning_rate": 3.339365622639618e-07, "loss": 0.0019, "step": 14377 }, { "epoch": 3.271444823663254, "grad_norm": 1.0026238942337258, "learning_rate": 3.338574945083614e-07, "loss": 0.0102, "step": 14378 }, { "epoch": 3.2716723549488056, "grad_norm": 0.6297190566506379, "learning_rate": 3.337784327031808e-07, "loss": 0.0106, "step": 14379 }, { "epoch": 3.2718998862343573, "grad_norm": 0.1313155913668039, "learning_rate": 3.3369937685003546e-07, "loss": 0.0006, "step": 14380 }, { "epoch": 3.272127417519909, "grad_norm": 0.1366521249497639, "learning_rate": 3.3362032695054144e-07, "loss": 0.0008, "step": 14381 }, { "epoch": 3.272354948805461, "grad_norm": 0.3439752889907554, "learning_rate": 3.335412830063145e-07, "loss": 0.0013, "step": 14382 }, { "epoch": 3.2725824800910126, "grad_norm": 0.1843654724035726, "learning_rate": 3.3346224501896963e-07, "loss": 0.0026, "step": 14383 }, { "epoch": 3.2728100113765644, "grad_norm": 25.171007605682668, "learning_rate": 3.3338321299012285e-07, "loss": 0.0793, "step": 14384 }, { "epoch": 3.273037542662116, "grad_norm": 0.274993079055697, "learning_rate": 3.333041869213892e-07, "loss": 0.0013, "step": 14385 }, { "epoch": 3.273265073947668, "grad_norm": 0.5142239537751572, "learning_rate": 3.332251668143831e-07, "loss": 0.0048, "step": 14386 }, { "epoch": 3.2734926052332196, "grad_norm": 0.4252750508202569, "learning_rate": 3.331461526707208e-07, "loss": 0.0033, "step": 14387 }, { "epoch": 3.2737201365187714, "grad_norm": 0.1452372564895674, "learning_rate": 3.3306714449201647e-07, "loss": 0.0003, "step": 14388 }, { "epoch": 3.273947667804323, "grad_norm": 1.2161839381392987, "learning_rate": 3.329881422798852e-07, "loss": 0.0079, "step": 14389 }, { "epoch": 3.274175199089875, "grad_norm": 0.5087926134092695, "learning_rate": 3.3290914603594136e-07, "loss": 0.0061, "step": 14390 }, { "epoch": 3.2744027303754266, "grad_norm": 0.4915358629050351, "learning_rate": 3.328301557617998e-07, "loss": 0.0038, "step": 14391 }, { "epoch": 3.2746302616609784, "grad_norm": 0.40151137945577914, "learning_rate": 3.3275117145907457e-07, "loss": 0.0032, "step": 14392 }, { "epoch": 3.27485779294653, "grad_norm": 0.5837827653402694, "learning_rate": 3.3267219312938014e-07, "loss": 0.0014, "step": 14393 }, { "epoch": 3.275085324232082, "grad_norm": 0.4846771257394226, "learning_rate": 3.325932207743309e-07, "loss": 0.0009, "step": 14394 }, { "epoch": 3.2753128555176336, "grad_norm": 0.656135465774693, "learning_rate": 3.3251425439554054e-07, "loss": 0.0053, "step": 14395 }, { "epoch": 3.2755403868031854, "grad_norm": 0.7927297299763933, "learning_rate": 3.3243529399462336e-07, "loss": 0.0079, "step": 14396 }, { "epoch": 3.275767918088737, "grad_norm": 1.0081173916155768, "learning_rate": 3.323563395731928e-07, "loss": 0.0118, "step": 14397 }, { "epoch": 3.275995449374289, "grad_norm": 0.7047157137666978, "learning_rate": 3.322773911328629e-07, "loss": 0.0025, "step": 14398 }, { "epoch": 3.2762229806598406, "grad_norm": 0.4246638282812041, "learning_rate": 3.321984486752468e-07, "loss": 0.0042, "step": 14399 }, { "epoch": 3.2764505119453924, "grad_norm": 9.419341083346131, "learning_rate": 3.3211951220195813e-07, "loss": 0.008, "step": 14400 }, { "epoch": 3.276678043230944, "grad_norm": 0.6022795987159323, "learning_rate": 3.320405817146105e-07, "loss": 0.0062, "step": 14401 }, { "epoch": 3.276905574516496, "grad_norm": 0.6514640216576498, "learning_rate": 3.319616572148166e-07, "loss": 0.0055, "step": 14402 }, { "epoch": 3.2771331058020476, "grad_norm": 1.0800248115506212, "learning_rate": 3.3188273870419e-07, "loss": 0.0046, "step": 14403 }, { "epoch": 3.2773606370875994, "grad_norm": 0.07484562131612997, "learning_rate": 3.3180382618434344e-07, "loss": 0.0001, "step": 14404 }, { "epoch": 3.277588168373151, "grad_norm": 0.09145384380403651, "learning_rate": 3.3172491965688947e-07, "loss": 0.0003, "step": 14405 }, { "epoch": 3.277815699658703, "grad_norm": 1.2515159220961618, "learning_rate": 3.3164601912344096e-07, "loss": 0.0034, "step": 14406 }, { "epoch": 3.2780432309442546, "grad_norm": 0.7838327554985699, "learning_rate": 3.3156712458561057e-07, "loss": 0.0088, "step": 14407 }, { "epoch": 3.2782707622298064, "grad_norm": 22.562732156745543, "learning_rate": 3.3148823604501114e-07, "loss": 0.0095, "step": 14408 }, { "epoch": 3.2784982935153586, "grad_norm": 0.626438448013314, "learning_rate": 3.314093535032542e-07, "loss": 0.0023, "step": 14409 }, { "epoch": 3.27872582480091, "grad_norm": 0.8186481253082292, "learning_rate": 3.313304769619527e-07, "loss": 0.0022, "step": 14410 }, { "epoch": 3.278953356086462, "grad_norm": 1.1332563080916915, "learning_rate": 3.312516064227185e-07, "loss": 0.0022, "step": 14411 }, { "epoch": 3.2791808873720134, "grad_norm": 0.3576445117102249, "learning_rate": 3.311727418871631e-07, "loss": 0.0017, "step": 14412 }, { "epoch": 3.2794084186575656, "grad_norm": 0.8752930618339653, "learning_rate": 3.3109388335689885e-07, "loss": 0.0061, "step": 14413 }, { "epoch": 3.2796359499431174, "grad_norm": 1.1454336061080508, "learning_rate": 3.3101503083353736e-07, "loss": 0.0073, "step": 14414 }, { "epoch": 3.279863481228669, "grad_norm": 0.10485752630863186, "learning_rate": 3.3093618431869043e-07, "loss": 0.0005, "step": 14415 }, { "epoch": 3.280091012514221, "grad_norm": 0.24988107544431631, "learning_rate": 3.3085734381396937e-07, "loss": 0.0018, "step": 14416 }, { "epoch": 3.2803185437997726, "grad_norm": 0.6049075557898941, "learning_rate": 3.307785093209852e-07, "loss": 0.0047, "step": 14417 }, { "epoch": 3.2805460750853244, "grad_norm": 1.4345009837801985, "learning_rate": 3.306996808413498e-07, "loss": 0.0112, "step": 14418 }, { "epoch": 3.280773606370876, "grad_norm": 0.15775411224851815, "learning_rate": 3.306208583766736e-07, "loss": 0.0005, "step": 14419 }, { "epoch": 3.281001137656428, "grad_norm": 0.511409970937522, "learning_rate": 3.305420419285679e-07, "loss": 0.004, "step": 14420 }, { "epoch": 3.2812286689419796, "grad_norm": 1.0190519589886244, "learning_rate": 3.3046323149864377e-07, "loss": 0.0063, "step": 14421 }, { "epoch": 3.2814562002275314, "grad_norm": 0.48740599959109776, "learning_rate": 3.303844270885118e-07, "loss": 0.0032, "step": 14422 }, { "epoch": 3.281683731513083, "grad_norm": 0.24329384492679162, "learning_rate": 3.303056286997827e-07, "loss": 0.0016, "step": 14423 }, { "epoch": 3.281911262798635, "grad_norm": 0.1240416228339175, "learning_rate": 3.302268363340666e-07, "loss": 0.0003, "step": 14424 }, { "epoch": 3.2821387940841866, "grad_norm": 1.168305271973839, "learning_rate": 3.3014804999297433e-07, "loss": 0.0076, "step": 14425 }, { "epoch": 3.2823663253697384, "grad_norm": 0.3267113839807941, "learning_rate": 3.3006926967811566e-07, "loss": 0.0036, "step": 14426 }, { "epoch": 3.28259385665529, "grad_norm": 0.6507419073510992, "learning_rate": 3.29990495391101e-07, "loss": 0.0066, "step": 14427 }, { "epoch": 3.282821387940842, "grad_norm": 1.10611609504279, "learning_rate": 3.299117271335403e-07, "loss": 0.0079, "step": 14428 }, { "epoch": 3.2830489192263936, "grad_norm": 1.6491150824367968, "learning_rate": 3.298329649070438e-07, "loss": 0.0035, "step": 14429 }, { "epoch": 3.2832764505119454, "grad_norm": 0.30079216907558837, "learning_rate": 3.2975420871322087e-07, "loss": 0.0027, "step": 14430 }, { "epoch": 3.283503981797497, "grad_norm": 0.35600882840598, "learning_rate": 3.2967545855368094e-07, "loss": 0.0021, "step": 14431 }, { "epoch": 3.283731513083049, "grad_norm": 1.4126219073783577, "learning_rate": 3.2959671443003395e-07, "loss": 0.0123, "step": 14432 }, { "epoch": 3.2839590443686006, "grad_norm": 0.2985960171876106, "learning_rate": 3.2951797634388894e-07, "loss": 0.0018, "step": 14433 }, { "epoch": 3.2841865756541524, "grad_norm": 1.0911949228837332, "learning_rate": 3.294392442968554e-07, "loss": 0.0125, "step": 14434 }, { "epoch": 3.284414106939704, "grad_norm": 0.6471356291293934, "learning_rate": 3.293605182905426e-07, "loss": 0.0025, "step": 14435 }, { "epoch": 3.284641638225256, "grad_norm": 0.635695714200421, "learning_rate": 3.2928179832655916e-07, "loss": 0.0071, "step": 14436 }, { "epoch": 3.2848691695108077, "grad_norm": 0.8125783084141657, "learning_rate": 3.292030844065144e-07, "loss": 0.0101, "step": 14437 }, { "epoch": 3.2850967007963594, "grad_norm": 0.42840066939535026, "learning_rate": 3.291243765320166e-07, "loss": 0.0016, "step": 14438 }, { "epoch": 3.285324232081911, "grad_norm": 0.34182493875090425, "learning_rate": 3.290456747046749e-07, "loss": 0.002, "step": 14439 }, { "epoch": 3.285551763367463, "grad_norm": 0.2980961035909062, "learning_rate": 3.289669789260974e-07, "loss": 0.0026, "step": 14440 }, { "epoch": 3.2857792946530147, "grad_norm": 0.1813535544487996, "learning_rate": 3.288882891978927e-07, "loss": 0.0008, "step": 14441 }, { "epoch": 3.2860068259385664, "grad_norm": 0.3191553861048454, "learning_rate": 3.2880960552166926e-07, "loss": 0.0014, "step": 14442 }, { "epoch": 3.286234357224118, "grad_norm": 0.5325367660940024, "learning_rate": 3.287309278990349e-07, "loss": 0.0055, "step": 14443 }, { "epoch": 3.28646188850967, "grad_norm": 0.3537714386711151, "learning_rate": 3.286522563315979e-07, "loss": 0.0015, "step": 14444 }, { "epoch": 3.2866894197952217, "grad_norm": 0.26579483183418845, "learning_rate": 3.2857359082096576e-07, "loss": 0.0022, "step": 14445 }, { "epoch": 3.2869169510807734, "grad_norm": 0.7281246862810632, "learning_rate": 3.284949313687469e-07, "loss": 0.0028, "step": 14446 }, { "epoch": 3.287144482366325, "grad_norm": 0.17925248966207283, "learning_rate": 3.284162779765481e-07, "loss": 0.0005, "step": 14447 }, { "epoch": 3.2873720136518774, "grad_norm": 0.5872568965483076, "learning_rate": 3.283376306459779e-07, "loss": 0.0045, "step": 14448 }, { "epoch": 3.2875995449374287, "grad_norm": 0.9943335448723596, "learning_rate": 3.2825898937864325e-07, "loss": 0.0089, "step": 14449 }, { "epoch": 3.287827076222981, "grad_norm": 0.3815167088026231, "learning_rate": 3.2818035417615107e-07, "loss": 0.0022, "step": 14450 }, { "epoch": 3.288054607508532, "grad_norm": 0.08037284704073734, "learning_rate": 3.2810172504010917e-07, "loss": 0.0003, "step": 14451 }, { "epoch": 3.2882821387940844, "grad_norm": 0.39406254185369943, "learning_rate": 3.28023101972124e-07, "loss": 0.0023, "step": 14452 }, { "epoch": 3.288509670079636, "grad_norm": 0.952884186152421, "learning_rate": 3.2794448497380283e-07, "loss": 0.0022, "step": 14453 }, { "epoch": 3.288737201365188, "grad_norm": 0.388120505593154, "learning_rate": 3.2786587404675246e-07, "loss": 0.0022, "step": 14454 }, { "epoch": 3.2889647326507396, "grad_norm": 1.3871773334562585, "learning_rate": 3.277872691925793e-07, "loss": 0.0116, "step": 14455 }, { "epoch": 3.2891922639362914, "grad_norm": 0.6764208819388681, "learning_rate": 3.2770867041289024e-07, "loss": 0.0081, "step": 14456 }, { "epoch": 3.289419795221843, "grad_norm": 0.2674087939280082, "learning_rate": 3.2763007770929125e-07, "loss": 0.0021, "step": 14457 }, { "epoch": 3.289647326507395, "grad_norm": 1.8984799538140844, "learning_rate": 3.2755149108338907e-07, "loss": 0.0252, "step": 14458 }, { "epoch": 3.2898748577929466, "grad_norm": 0.20607414328427118, "learning_rate": 3.2747291053678953e-07, "loss": 0.0008, "step": 14459 }, { "epoch": 3.2901023890784984, "grad_norm": 0.48415215764923153, "learning_rate": 3.2739433607109865e-07, "loss": 0.0058, "step": 14460 }, { "epoch": 3.29032992036405, "grad_norm": 0.2749049266232476, "learning_rate": 3.2731576768792283e-07, "loss": 0.0007, "step": 14461 }, { "epoch": 3.290557451649602, "grad_norm": 1.342442337401741, "learning_rate": 3.2723720538886725e-07, "loss": 0.0092, "step": 14462 }, { "epoch": 3.2907849829351536, "grad_norm": 0.6729569172913258, "learning_rate": 3.2715864917553826e-07, "loss": 0.008, "step": 14463 }, { "epoch": 3.2910125142207054, "grad_norm": 0.1776088335994558, "learning_rate": 3.2708009904954055e-07, "loss": 0.0009, "step": 14464 }, { "epoch": 3.291240045506257, "grad_norm": 0.6313356154421601, "learning_rate": 3.2700155501248043e-07, "loss": 0.0048, "step": 14465 }, { "epoch": 3.291467576791809, "grad_norm": 0.2588343599659278, "learning_rate": 3.269230170659624e-07, "loss": 0.0009, "step": 14466 }, { "epoch": 3.2916951080773607, "grad_norm": 0.18512006482970686, "learning_rate": 3.2684448521159206e-07, "loss": 0.001, "step": 14467 }, { "epoch": 3.2919226393629124, "grad_norm": 0.6281507566084357, "learning_rate": 3.2676595945097465e-07, "loss": 0.0024, "step": 14468 }, { "epoch": 3.292150170648464, "grad_norm": 0.15233546849924745, "learning_rate": 3.266874397857145e-07, "loss": 0.0006, "step": 14469 }, { "epoch": 3.292377701934016, "grad_norm": 0.6676302904485315, "learning_rate": 3.2660892621741706e-07, "loss": 0.0053, "step": 14470 }, { "epoch": 3.2926052332195677, "grad_norm": 1.537463245933098, "learning_rate": 3.2653041874768645e-07, "loss": 0.0089, "step": 14471 }, { "epoch": 3.2928327645051194, "grad_norm": 0.31680947699179735, "learning_rate": 3.2645191737812766e-07, "loss": 0.0027, "step": 14472 }, { "epoch": 3.293060295790671, "grad_norm": 0.6859083180425345, "learning_rate": 3.263734221103447e-07, "loss": 0.0063, "step": 14473 }, { "epoch": 3.293287827076223, "grad_norm": 0.7099373457198672, "learning_rate": 3.262949329459421e-07, "loss": 0.0033, "step": 14474 }, { "epoch": 3.2935153583617747, "grad_norm": 0.3988082853482622, "learning_rate": 3.262164498865243e-07, "loss": 0.0045, "step": 14475 }, { "epoch": 3.2937428896473264, "grad_norm": 0.5411971086547992, "learning_rate": 3.261379729336948e-07, "loss": 0.0063, "step": 14476 }, { "epoch": 3.293970420932878, "grad_norm": 0.03271610267006796, "learning_rate": 3.2605950208905793e-07, "loss": 0.0001, "step": 14477 }, { "epoch": 3.29419795221843, "grad_norm": 0.09821186063916233, "learning_rate": 3.2598103735421723e-07, "loss": 0.0003, "step": 14478 }, { "epoch": 3.2944254835039817, "grad_norm": 0.11543823510940168, "learning_rate": 3.2590257873077673e-07, "loss": 0.0005, "step": 14479 }, { "epoch": 3.2946530147895334, "grad_norm": 0.23194855212465, "learning_rate": 3.2582412622033945e-07, "loss": 0.0014, "step": 14480 }, { "epoch": 3.294880546075085, "grad_norm": 0.48882422332371733, "learning_rate": 3.2574567982450913e-07, "loss": 0.0038, "step": 14481 }, { "epoch": 3.295108077360637, "grad_norm": 0.5359602773886692, "learning_rate": 3.2566723954488933e-07, "loss": 0.0063, "step": 14482 }, { "epoch": 3.2953356086461887, "grad_norm": 0.10463324110841381, "learning_rate": 3.255888053830827e-07, "loss": 0.0006, "step": 14483 }, { "epoch": 3.2955631399317404, "grad_norm": 0.4056866430722305, "learning_rate": 3.255103773406928e-07, "loss": 0.0024, "step": 14484 }, { "epoch": 3.295790671217292, "grad_norm": 0.5359907899338038, "learning_rate": 3.254319554193221e-07, "loss": 0.0037, "step": 14485 }, { "epoch": 3.296018202502844, "grad_norm": 0.13548275061717294, "learning_rate": 3.253535396205737e-07, "loss": 0.0007, "step": 14486 }, { "epoch": 3.296245733788396, "grad_norm": 0.4058509415614038, "learning_rate": 3.2527512994605e-07, "loss": 0.0018, "step": 14487 }, { "epoch": 3.2964732650739474, "grad_norm": 1.1140529083470145, "learning_rate": 3.251967263973538e-07, "loss": 0.008, "step": 14488 }, { "epoch": 3.2967007963594996, "grad_norm": 0.6842275376838135, "learning_rate": 3.251183289760876e-07, "loss": 0.005, "step": 14489 }, { "epoch": 3.296928327645051, "grad_norm": 0.3423122602276473, "learning_rate": 3.2503993768385325e-07, "loss": 0.0048, "step": 14490 }, { "epoch": 3.297155858930603, "grad_norm": 0.44604401187195336, "learning_rate": 3.249615525222536e-07, "loss": 0.006, "step": 14491 }, { "epoch": 3.297383390216155, "grad_norm": 1.348527885511469, "learning_rate": 3.248831734928903e-07, "loss": 0.0072, "step": 14492 }, { "epoch": 3.2976109215017066, "grad_norm": 0.44756173224847406, "learning_rate": 3.2480480059736495e-07, "loss": 0.0022, "step": 14493 }, { "epoch": 3.2978384527872584, "grad_norm": 0.27257230622049533, "learning_rate": 3.247264338372798e-07, "loss": 0.0017, "step": 14494 }, { "epoch": 3.29806598407281, "grad_norm": 0.24268898255724963, "learning_rate": 3.246480732142364e-07, "loss": 0.0011, "step": 14495 }, { "epoch": 3.298293515358362, "grad_norm": 0.5187902623135718, "learning_rate": 3.245697187298365e-07, "loss": 0.0049, "step": 14496 }, { "epoch": 3.2985210466439137, "grad_norm": 0.09040348179208105, "learning_rate": 3.244913703856811e-07, "loss": 0.0005, "step": 14497 }, { "epoch": 3.2987485779294654, "grad_norm": 0.46903805236594853, "learning_rate": 3.2441302818337205e-07, "loss": 0.0033, "step": 14498 }, { "epoch": 3.298976109215017, "grad_norm": 1.1222450692285943, "learning_rate": 3.243346921245101e-07, "loss": 0.0027, "step": 14499 }, { "epoch": 3.299203640500569, "grad_norm": 0.2236003610019604, "learning_rate": 3.2425636221069633e-07, "loss": 0.002, "step": 14500 }, { "epoch": 3.2994311717861207, "grad_norm": 0.38375015781274063, "learning_rate": 3.2417803844353156e-07, "loss": 0.0017, "step": 14501 }, { "epoch": 3.2996587030716724, "grad_norm": 1.2337317655862887, "learning_rate": 3.240997208246168e-07, "loss": 0.0077, "step": 14502 }, { "epoch": 3.299886234357224, "grad_norm": 0.2726596006367319, "learning_rate": 3.2402140935555297e-07, "loss": 0.0024, "step": 14503 }, { "epoch": 3.300113765642776, "grad_norm": 0.2636298551743699, "learning_rate": 3.2394310403794005e-07, "loss": 0.0018, "step": 14504 }, { "epoch": 3.3003412969283277, "grad_norm": 0.23135213217197123, "learning_rate": 3.238648048733789e-07, "loss": 0.0015, "step": 14505 }, { "epoch": 3.3005688282138794, "grad_norm": 1.034898361779245, "learning_rate": 3.237865118634697e-07, "loss": 0.0104, "step": 14506 }, { "epoch": 3.300796359499431, "grad_norm": 0.48760726019750766, "learning_rate": 3.2370822500981213e-07, "loss": 0.0049, "step": 14507 }, { "epoch": 3.301023890784983, "grad_norm": 0.5661201141761545, "learning_rate": 3.2362994431400703e-07, "loss": 0.0031, "step": 14508 }, { "epoch": 3.3012514220705347, "grad_norm": 0.8786482277394804, "learning_rate": 3.2355166977765367e-07, "loss": 0.0053, "step": 14509 }, { "epoch": 3.3014789533560864, "grad_norm": 0.5068143668069204, "learning_rate": 3.2347340140235243e-07, "loss": 0.0032, "step": 14510 }, { "epoch": 3.301706484641638, "grad_norm": 0.5999140514373337, "learning_rate": 3.2339513918970266e-07, "loss": 0.0051, "step": 14511 }, { "epoch": 3.30193401592719, "grad_norm": 1.8787901225727746, "learning_rate": 3.2331688314130355e-07, "loss": 0.0154, "step": 14512 }, { "epoch": 3.3021615472127417, "grad_norm": 1.567060612957885, "learning_rate": 3.232386332587549e-07, "loss": 0.0195, "step": 14513 }, { "epoch": 3.3023890784982934, "grad_norm": 0.13387951720203858, "learning_rate": 3.231603895436559e-07, "loss": 0.0007, "step": 14514 }, { "epoch": 3.302616609783845, "grad_norm": 0.18245597153141035, "learning_rate": 3.23082151997606e-07, "loss": 0.0007, "step": 14515 }, { "epoch": 3.302844141069397, "grad_norm": 0.46809867202221866, "learning_rate": 3.230039206222037e-07, "loss": 0.0077, "step": 14516 }, { "epoch": 3.3030716723549487, "grad_norm": 0.4268297293308298, "learning_rate": 3.229256954190485e-07, "loss": 0.0015, "step": 14517 }, { "epoch": 3.3032992036405004, "grad_norm": 0.31169244039424887, "learning_rate": 3.2284747638973877e-07, "loss": 0.0021, "step": 14518 }, { "epoch": 3.303526734926052, "grad_norm": 0.8211210957338959, "learning_rate": 3.227692635358731e-07, "loss": 0.0089, "step": 14519 }, { "epoch": 3.303754266211604, "grad_norm": 0.6593450318753775, "learning_rate": 3.2269105685905023e-07, "loss": 0.0043, "step": 14520 }, { "epoch": 3.3039817974971557, "grad_norm": 0.3168308968317672, "learning_rate": 3.2261285636086837e-07, "loss": 0.0017, "step": 14521 }, { "epoch": 3.3042093287827075, "grad_norm": 0.5668745670323232, "learning_rate": 3.2253466204292624e-07, "loss": 0.0047, "step": 14522 }, { "epoch": 3.304436860068259, "grad_norm": 0.8243452750718552, "learning_rate": 3.2245647390682146e-07, "loss": 0.0069, "step": 14523 }, { "epoch": 3.304664391353811, "grad_norm": 0.3375387453095839, "learning_rate": 3.2237829195415244e-07, "loss": 0.0018, "step": 14524 }, { "epoch": 3.3048919226393627, "grad_norm": 0.24351703159861124, "learning_rate": 3.223001161865169e-07, "loss": 0.0014, "step": 14525 }, { "epoch": 3.305119453924915, "grad_norm": 0.19093566136487658, "learning_rate": 3.222219466055125e-07, "loss": 0.0009, "step": 14526 }, { "epoch": 3.305346985210466, "grad_norm": 0.6033470738648667, "learning_rate": 3.2214378321273694e-07, "loss": 0.004, "step": 14527 }, { "epoch": 3.3055745164960184, "grad_norm": 0.17100907819869465, "learning_rate": 3.220656260097877e-07, "loss": 0.0006, "step": 14528 }, { "epoch": 3.3058020477815697, "grad_norm": 0.39252440491265045, "learning_rate": 3.219874749982626e-07, "loss": 0.0027, "step": 14529 }, { "epoch": 3.306029579067122, "grad_norm": 1.1052622897982762, "learning_rate": 3.219093301797585e-07, "loss": 0.0048, "step": 14530 }, { "epoch": 3.3062571103526737, "grad_norm": 0.5660256651820837, "learning_rate": 3.218311915558725e-07, "loss": 0.001, "step": 14531 }, { "epoch": 3.3064846416382254, "grad_norm": 0.9912584834143767, "learning_rate": 3.2175305912820184e-07, "loss": 0.0046, "step": 14532 }, { "epoch": 3.306712172923777, "grad_norm": 0.2716616914457254, "learning_rate": 3.21674932898343e-07, "loss": 0.0015, "step": 14533 }, { "epoch": 3.306939704209329, "grad_norm": 0.5021277241593143, "learning_rate": 3.2159681286789314e-07, "loss": 0.0051, "step": 14534 }, { "epoch": 3.3071672354948807, "grad_norm": 0.25846357288399024, "learning_rate": 3.2151869903844876e-07, "loss": 0.0011, "step": 14535 }, { "epoch": 3.3073947667804324, "grad_norm": 0.5772738393144264, "learning_rate": 3.214405914116066e-07, "loss": 0.0021, "step": 14536 }, { "epoch": 3.307622298065984, "grad_norm": 0.21887745010291096, "learning_rate": 3.2136248998896273e-07, "loss": 0.0014, "step": 14537 }, { "epoch": 3.307849829351536, "grad_norm": 0.40865560544292157, "learning_rate": 3.212843947721133e-07, "loss": 0.0043, "step": 14538 }, { "epoch": 3.3080773606370877, "grad_norm": 0.9034416116902438, "learning_rate": 3.212063057626548e-07, "loss": 0.0165, "step": 14539 }, { "epoch": 3.3083048919226394, "grad_norm": 0.15629324227590566, "learning_rate": 3.2112822296218287e-07, "loss": 0.0004, "step": 14540 }, { "epoch": 3.308532423208191, "grad_norm": 0.30273544963133303, "learning_rate": 3.2105014637229357e-07, "loss": 0.0013, "step": 14541 }, { "epoch": 3.308759954493743, "grad_norm": 1.2199753916657652, "learning_rate": 3.209720759945828e-07, "loss": 0.0098, "step": 14542 }, { "epoch": 3.3089874857792947, "grad_norm": 0.7279543348446583, "learning_rate": 3.208940118306457e-07, "loss": 0.0052, "step": 14543 }, { "epoch": 3.3092150170648464, "grad_norm": 0.7304791608609112, "learning_rate": 3.208159538820783e-07, "loss": 0.0112, "step": 14544 }, { "epoch": 3.309442548350398, "grad_norm": 0.3338792421767982, "learning_rate": 3.207379021504756e-07, "loss": 0.0019, "step": 14545 }, { "epoch": 3.30967007963595, "grad_norm": 0.32198231895176593, "learning_rate": 3.206598566374332e-07, "loss": 0.0009, "step": 14546 }, { "epoch": 3.3098976109215017, "grad_norm": 0.6514547193576653, "learning_rate": 3.205818173445456e-07, "loss": 0.0019, "step": 14547 }, { "epoch": 3.3101251422070535, "grad_norm": 0.4903428952159103, "learning_rate": 3.2050378427340816e-07, "loss": 0.0033, "step": 14548 }, { "epoch": 3.310352673492605, "grad_norm": 0.31500534252220735, "learning_rate": 3.2042575742561604e-07, "loss": 0.0014, "step": 14549 }, { "epoch": 3.310580204778157, "grad_norm": 0.4088377341375805, "learning_rate": 3.2034773680276327e-07, "loss": 0.0013, "step": 14550 }, { "epoch": 3.3108077360637087, "grad_norm": 1.5970121032354914, "learning_rate": 3.202697224064451e-07, "loss": 0.0142, "step": 14551 }, { "epoch": 3.3110352673492605, "grad_norm": 0.4707954420924691, "learning_rate": 3.2019171423825547e-07, "loss": 0.0031, "step": 14552 }, { "epoch": 3.311262798634812, "grad_norm": 0.4525016335219407, "learning_rate": 3.2011371229978925e-07, "loss": 0.0023, "step": 14553 }, { "epoch": 3.311490329920364, "grad_norm": 0.14630655150408106, "learning_rate": 3.200357165926402e-07, "loss": 0.0004, "step": 14554 }, { "epoch": 3.3117178612059157, "grad_norm": 0.6534009007829198, "learning_rate": 3.199577271184025e-07, "loss": 0.0052, "step": 14555 }, { "epoch": 3.3119453924914675, "grad_norm": 0.5761256273448039, "learning_rate": 3.1987974387867037e-07, "loss": 0.0054, "step": 14556 }, { "epoch": 3.312172923777019, "grad_norm": 0.5388946033613642, "learning_rate": 3.198017668750374e-07, "loss": 0.0068, "step": 14557 }, { "epoch": 3.312400455062571, "grad_norm": 0.4414743147988591, "learning_rate": 3.1972379610909743e-07, "loss": 0.003, "step": 14558 }, { "epoch": 3.3126279863481227, "grad_norm": 0.25583641550685615, "learning_rate": 3.1964583158244383e-07, "loss": 0.0025, "step": 14559 }, { "epoch": 3.3128555176336745, "grad_norm": 0.17926910571100896, "learning_rate": 3.195678732966705e-07, "loss": 0.0007, "step": 14560 }, { "epoch": 3.3130830489192262, "grad_norm": 0.4123011462350274, "learning_rate": 3.1948992125337015e-07, "loss": 0.0043, "step": 14561 }, { "epoch": 3.313310580204778, "grad_norm": 0.6113853509410017, "learning_rate": 3.1941197545413633e-07, "loss": 0.0068, "step": 14562 }, { "epoch": 3.3135381114903297, "grad_norm": 1.1199552735594613, "learning_rate": 3.1933403590056227e-07, "loss": 0.0029, "step": 14563 }, { "epoch": 3.3137656427758815, "grad_norm": 0.06436601519800765, "learning_rate": 3.1925610259424047e-07, "loss": 0.0002, "step": 14564 }, { "epoch": 3.3139931740614337, "grad_norm": 0.22108663054186228, "learning_rate": 3.191781755367643e-07, "loss": 0.0015, "step": 14565 }, { "epoch": 3.314220705346985, "grad_norm": 0.5077942181370465, "learning_rate": 3.191002547297259e-07, "loss": 0.003, "step": 14566 }, { "epoch": 3.314448236632537, "grad_norm": 0.2517415971247871, "learning_rate": 3.190223401747182e-07, "loss": 0.0018, "step": 14567 }, { "epoch": 3.3146757679180885, "grad_norm": 0.695065501386204, "learning_rate": 3.189444318733333e-07, "loss": 0.011, "step": 14568 }, { "epoch": 3.3149032992036407, "grad_norm": 1.000801045923925, "learning_rate": 3.188665298271638e-07, "loss": 0.0083, "step": 14569 }, { "epoch": 3.3151308304891924, "grad_norm": 0.3650435285848313, "learning_rate": 3.1878863403780184e-07, "loss": 0.0069, "step": 14570 }, { "epoch": 3.315358361774744, "grad_norm": 0.9879351455599517, "learning_rate": 3.187107445068393e-07, "loss": 0.0138, "step": 14571 }, { "epoch": 3.315585893060296, "grad_norm": 0.6928308444809486, "learning_rate": 3.1863286123586844e-07, "loss": 0.0097, "step": 14572 }, { "epoch": 3.3158134243458477, "grad_norm": 0.47067845931694696, "learning_rate": 3.185549842264805e-07, "loss": 0.003, "step": 14573 }, { "epoch": 3.3160409556313994, "grad_norm": 0.5612381365719853, "learning_rate": 3.184771134802675e-07, "loss": 0.0054, "step": 14574 }, { "epoch": 3.316268486916951, "grad_norm": 0.3207987870224865, "learning_rate": 3.1839924899882106e-07, "loss": 0.0012, "step": 14575 }, { "epoch": 3.316496018202503, "grad_norm": 1.5568294260424476, "learning_rate": 3.1832139078373234e-07, "loss": 0.0175, "step": 14576 }, { "epoch": 3.3167235494880547, "grad_norm": 0.3912799576918923, "learning_rate": 3.18243538836593e-07, "loss": 0.0021, "step": 14577 }, { "epoch": 3.3169510807736065, "grad_norm": 1.5020958448519883, "learning_rate": 3.1816569315899353e-07, "loss": 0.015, "step": 14578 }, { "epoch": 3.317178612059158, "grad_norm": 0.7218793898955301, "learning_rate": 3.1808785375252573e-07, "loss": 0.0081, "step": 14579 }, { "epoch": 3.31740614334471, "grad_norm": 0.5321915578368301, "learning_rate": 3.1801002061877985e-07, "loss": 0.0025, "step": 14580 }, { "epoch": 3.3176336746302617, "grad_norm": 0.6408866859953859, "learning_rate": 3.1793219375934693e-07, "loss": 0.0051, "step": 14581 }, { "epoch": 3.3178612059158135, "grad_norm": 0.2275747572505347, "learning_rate": 3.178543731758178e-07, "loss": 0.0013, "step": 14582 }, { "epoch": 3.318088737201365, "grad_norm": 0.19776970176534578, "learning_rate": 3.177765588697826e-07, "loss": 0.0009, "step": 14583 }, { "epoch": 3.318316268486917, "grad_norm": 0.3819779579349653, "learning_rate": 3.1769875084283205e-07, "loss": 0.0033, "step": 14584 }, { "epoch": 3.3185437997724687, "grad_norm": 0.2513042862286722, "learning_rate": 3.17620949096556e-07, "loss": 0.0014, "step": 14585 }, { "epoch": 3.3187713310580205, "grad_norm": 0.6330290426992012, "learning_rate": 3.175431536325451e-07, "loss": 0.0042, "step": 14586 }, { "epoch": 3.318998862343572, "grad_norm": 0.26036232820916827, "learning_rate": 3.174653644523888e-07, "loss": 0.0012, "step": 14587 }, { "epoch": 3.319226393629124, "grad_norm": 0.254445147629532, "learning_rate": 3.1738758155767725e-07, "loss": 0.0018, "step": 14588 }, { "epoch": 3.3194539249146757, "grad_norm": 0.4430560263626908, "learning_rate": 3.1730980495000034e-07, "loss": 0.0024, "step": 14589 }, { "epoch": 3.3196814562002275, "grad_norm": 0.7125502691961386, "learning_rate": 3.172320346309474e-07, "loss": 0.0031, "step": 14590 }, { "epoch": 3.3199089874857792, "grad_norm": 0.9811658722214146, "learning_rate": 3.171542706021081e-07, "loss": 0.0047, "step": 14591 }, { "epoch": 3.320136518771331, "grad_norm": 0.39093838878040016, "learning_rate": 3.1707651286507155e-07, "loss": 0.0043, "step": 14592 }, { "epoch": 3.3203640500568827, "grad_norm": 1.1383861966902382, "learning_rate": 3.1699876142142737e-07, "loss": 0.0194, "step": 14593 }, { "epoch": 3.3205915813424345, "grad_norm": 0.6875500751956498, "learning_rate": 3.1692101627276424e-07, "loss": 0.0033, "step": 14594 }, { "epoch": 3.3208191126279862, "grad_norm": 2.389852979182777, "learning_rate": 3.1684327742067124e-07, "loss": 0.0081, "step": 14595 }, { "epoch": 3.321046643913538, "grad_norm": 0.2861737926443745, "learning_rate": 3.1676554486673756e-07, "loss": 0.0009, "step": 14596 }, { "epoch": 3.3212741751990897, "grad_norm": 0.5850770178936487, "learning_rate": 3.1668781861255145e-07, "loss": 0.0046, "step": 14597 }, { "epoch": 3.3215017064846415, "grad_norm": 0.5691720726838901, "learning_rate": 3.1661009865970183e-07, "loss": 0.0029, "step": 14598 }, { "epoch": 3.3217292377701932, "grad_norm": 0.14227179849330793, "learning_rate": 3.1653238500977705e-07, "loss": 0.0007, "step": 14599 }, { "epoch": 3.321956769055745, "grad_norm": 0.7874754886149006, "learning_rate": 3.164546776643651e-07, "loss": 0.0114, "step": 14600 }, { "epoch": 3.3221843003412967, "grad_norm": 0.06133366372582414, "learning_rate": 3.1637697662505454e-07, "loss": 0.0002, "step": 14601 }, { "epoch": 3.3224118316268485, "grad_norm": 0.16712755767211557, "learning_rate": 3.162992818934334e-07, "loss": 0.0006, "step": 14602 }, { "epoch": 3.3226393629124003, "grad_norm": 0.25400000856676236, "learning_rate": 3.1622159347108967e-07, "loss": 0.002, "step": 14603 }, { "epoch": 3.3228668941979524, "grad_norm": 0.6066311363665631, "learning_rate": 3.16143911359611e-07, "loss": 0.0029, "step": 14604 }, { "epoch": 3.3230944254835038, "grad_norm": 0.5096945747015263, "learning_rate": 3.160662355605852e-07, "loss": 0.0057, "step": 14605 }, { "epoch": 3.323321956769056, "grad_norm": 0.40469486599701865, "learning_rate": 3.1598856607559986e-07, "loss": 0.0018, "step": 14606 }, { "epoch": 3.3235494880546073, "grad_norm": 0.13847651386811313, "learning_rate": 3.159109029062421e-07, "loss": 0.0008, "step": 14607 }, { "epoch": 3.3237770193401595, "grad_norm": 0.6127743404750639, "learning_rate": 3.1583324605409927e-07, "loss": 0.0028, "step": 14608 }, { "epoch": 3.324004550625711, "grad_norm": 0.7072593517447371, "learning_rate": 3.1575559552075885e-07, "loss": 0.0031, "step": 14609 }, { "epoch": 3.324232081911263, "grad_norm": 0.6020306916295789, "learning_rate": 3.1567795130780787e-07, "loss": 0.0044, "step": 14610 }, { "epoch": 3.3244596131968147, "grad_norm": 0.34872027180833776, "learning_rate": 3.156003134168328e-07, "loss": 0.0013, "step": 14611 }, { "epoch": 3.3246871444823665, "grad_norm": 0.4835301088227863, "learning_rate": 3.1552268184942094e-07, "loss": 0.0019, "step": 14612 }, { "epoch": 3.324914675767918, "grad_norm": 0.3010757893079136, "learning_rate": 3.154450566071587e-07, "loss": 0.0019, "step": 14613 }, { "epoch": 3.32514220705347, "grad_norm": 1.8436916536794012, "learning_rate": 3.153674376916323e-07, "loss": 0.0224, "step": 14614 }, { "epoch": 3.3253697383390217, "grad_norm": 0.26429462093901346, "learning_rate": 3.152898251044285e-07, "loss": 0.0011, "step": 14615 }, { "epoch": 3.3255972696245735, "grad_norm": 0.2548713019777427, "learning_rate": 3.1521221884713335e-07, "loss": 0.0016, "step": 14616 }, { "epoch": 3.3258248009101252, "grad_norm": 0.26234738817489467, "learning_rate": 3.151346189213334e-07, "loss": 0.0012, "step": 14617 }, { "epoch": 3.326052332195677, "grad_norm": 0.5538947069939764, "learning_rate": 3.1505702532861434e-07, "loss": 0.0046, "step": 14618 }, { "epoch": 3.3262798634812287, "grad_norm": 0.45834543478763784, "learning_rate": 3.149794380705618e-07, "loss": 0.0015, "step": 14619 }, { "epoch": 3.3265073947667805, "grad_norm": 1.2821170993131936, "learning_rate": 3.149018571487621e-07, "loss": 0.01, "step": 14620 }, { "epoch": 3.3267349260523322, "grad_norm": 0.7089984747151015, "learning_rate": 3.148242825648001e-07, "loss": 0.0033, "step": 14621 }, { "epoch": 3.326962457337884, "grad_norm": 1.2777297103208762, "learning_rate": 3.147467143202619e-07, "loss": 0.0151, "step": 14622 }, { "epoch": 3.3271899886234357, "grad_norm": 0.5713969062315206, "learning_rate": 3.146691524167325e-07, "loss": 0.0055, "step": 14623 }, { "epoch": 3.3274175199089875, "grad_norm": 0.8941363558031219, "learning_rate": 3.145915968557976e-07, "loss": 0.0054, "step": 14624 }, { "epoch": 3.3276450511945392, "grad_norm": 0.09152606448533385, "learning_rate": 3.1451404763904193e-07, "loss": 0.0003, "step": 14625 }, { "epoch": 3.327872582480091, "grad_norm": 0.37947128650836015, "learning_rate": 3.1443650476805033e-07, "loss": 0.0018, "step": 14626 }, { "epoch": 3.3281001137656427, "grad_norm": 1.117221223385932, "learning_rate": 3.14358968244408e-07, "loss": 0.0119, "step": 14627 }, { "epoch": 3.3283276450511945, "grad_norm": 0.8595312106484242, "learning_rate": 3.1428143806969896e-07, "loss": 0.0119, "step": 14628 }, { "epoch": 3.3285551763367462, "grad_norm": 0.683446447169021, "learning_rate": 3.1420391424550875e-07, "loss": 0.0047, "step": 14629 }, { "epoch": 3.328782707622298, "grad_norm": 0.22508132951936496, "learning_rate": 3.141263967734211e-07, "loss": 0.001, "step": 14630 }, { "epoch": 3.3290102389078498, "grad_norm": 0.03131238266396795, "learning_rate": 3.1404888565502086e-07, "loss": 0.0001, "step": 14631 }, { "epoch": 3.3292377701934015, "grad_norm": 1.1522533267979047, "learning_rate": 3.139713808918918e-07, "loss": 0.0222, "step": 14632 }, { "epoch": 3.3294653014789533, "grad_norm": 0.37194228121255357, "learning_rate": 3.1389388248561787e-07, "loss": 0.0035, "step": 14633 }, { "epoch": 3.329692832764505, "grad_norm": 0.6089051303716257, "learning_rate": 3.1381639043778334e-07, "loss": 0.0055, "step": 14634 }, { "epoch": 3.3299203640500568, "grad_norm": 0.521080600645258, "learning_rate": 3.1373890474997176e-07, "loss": 0.0047, "step": 14635 }, { "epoch": 3.3301478953356085, "grad_norm": 2.2967077850626922, "learning_rate": 3.136614254237673e-07, "loss": 0.0276, "step": 14636 }, { "epoch": 3.3303754266211603, "grad_norm": 0.25983223554612306, "learning_rate": 3.1358395246075296e-07, "loss": 0.0019, "step": 14637 }, { "epoch": 3.330602957906712, "grad_norm": 0.48377782207841985, "learning_rate": 3.135064858625121e-07, "loss": 0.0048, "step": 14638 }, { "epoch": 3.3308304891922638, "grad_norm": 0.7288915086129899, "learning_rate": 3.134290256306285e-07, "loss": 0.0046, "step": 14639 }, { "epoch": 3.3310580204778155, "grad_norm": 0.9641445139056555, "learning_rate": 3.133515717666847e-07, "loss": 0.01, "step": 14640 }, { "epoch": 3.3312855517633673, "grad_norm": 1.115628586746884, "learning_rate": 3.13274124272264e-07, "loss": 0.0136, "step": 14641 }, { "epoch": 3.331513083048919, "grad_norm": 0.32603034531028113, "learning_rate": 3.1319668314894943e-07, "loss": 0.0012, "step": 14642 }, { "epoch": 3.331740614334471, "grad_norm": 1.6356599110264334, "learning_rate": 3.131192483983237e-07, "loss": 0.0058, "step": 14643 }, { "epoch": 3.3319681456200225, "grad_norm": 0.3930901980370049, "learning_rate": 3.130418200219694e-07, "loss": 0.0031, "step": 14644 }, { "epoch": 3.3321956769055747, "grad_norm": 0.5920321398672017, "learning_rate": 3.129643980214687e-07, "loss": 0.0034, "step": 14645 }, { "epoch": 3.3324232081911265, "grad_norm": 0.7846206460340607, "learning_rate": 3.128869823984046e-07, "loss": 0.0057, "step": 14646 }, { "epoch": 3.3326507394766782, "grad_norm": 0.5343879418712614, "learning_rate": 3.1280957315435857e-07, "loss": 0.0041, "step": 14647 }, { "epoch": 3.33287827076223, "grad_norm": 0.16837356711934, "learning_rate": 3.127321702909132e-07, "loss": 0.0008, "step": 14648 }, { "epoch": 3.3331058020477817, "grad_norm": 0.922588834628939, "learning_rate": 3.1265477380965067e-07, "loss": 0.005, "step": 14649 }, { "epoch": 3.3333333333333335, "grad_norm": 0.20877160601035152, "learning_rate": 3.125773837121522e-07, "loss": 0.0005, "step": 14650 }, { "epoch": 3.3335608646188852, "grad_norm": 0.2779232039500669, "learning_rate": 3.125000000000002e-07, "loss": 0.0018, "step": 14651 }, { "epoch": 3.333788395904437, "grad_norm": 0.5946082412129695, "learning_rate": 3.124226226747755e-07, "loss": 0.0032, "step": 14652 }, { "epoch": 3.3340159271899887, "grad_norm": 0.25489704155569204, "learning_rate": 3.123452517380602e-07, "loss": 0.0017, "step": 14653 }, { "epoch": 3.3342434584755405, "grad_norm": 0.4389555228123648, "learning_rate": 3.1226788719143515e-07, "loss": 0.0009, "step": 14654 }, { "epoch": 3.3344709897610922, "grad_norm": 0.4923549270296928, "learning_rate": 3.1219052903648177e-07, "loss": 0.0033, "step": 14655 }, { "epoch": 3.334698521046644, "grad_norm": 0.6386595258772676, "learning_rate": 3.121131772747813e-07, "loss": 0.007, "step": 14656 }, { "epoch": 3.3349260523321957, "grad_norm": 0.5294733088279308, "learning_rate": 3.120358319079142e-07, "loss": 0.0031, "step": 14657 }, { "epoch": 3.3351535836177475, "grad_norm": 0.5489947468770096, "learning_rate": 3.119584929374618e-07, "loss": 0.0054, "step": 14658 }, { "epoch": 3.3353811149032992, "grad_norm": 1.8778002748643263, "learning_rate": 3.1188116036500427e-07, "loss": 0.0088, "step": 14659 }, { "epoch": 3.335608646188851, "grad_norm": 0.40729946884050394, "learning_rate": 3.1180383419212254e-07, "loss": 0.0043, "step": 14660 }, { "epoch": 3.3358361774744028, "grad_norm": 0.3215082123744462, "learning_rate": 3.1172651442039665e-07, "loss": 0.0017, "step": 14661 }, { "epoch": 3.3360637087599545, "grad_norm": 0.3809537788740849, "learning_rate": 3.116492010514071e-07, "loss": 0.0022, "step": 14662 }, { "epoch": 3.3362912400455063, "grad_norm": 0.206472116909785, "learning_rate": 3.115718940867342e-07, "loss": 0.0012, "step": 14663 }, { "epoch": 3.336518771331058, "grad_norm": 0.8288489569852674, "learning_rate": 3.1149459352795757e-07, "loss": 0.0096, "step": 14664 }, { "epoch": 3.3367463026166098, "grad_norm": 0.8177091285258762, "learning_rate": 3.114172993766575e-07, "loss": 0.0045, "step": 14665 }, { "epoch": 3.3369738339021615, "grad_norm": 0.8345696669260446, "learning_rate": 3.1134001163441325e-07, "loss": 0.0062, "step": 14666 }, { "epoch": 3.3372013651877133, "grad_norm": 1.310824665304604, "learning_rate": 3.1126273030280507e-07, "loss": 0.0104, "step": 14667 }, { "epoch": 3.337428896473265, "grad_norm": 0.36631900029983033, "learning_rate": 3.111854553834118e-07, "loss": 0.0021, "step": 14668 }, { "epoch": 3.3376564277588168, "grad_norm": 0.9674366585661982, "learning_rate": 3.1110818687781314e-07, "loss": 0.0086, "step": 14669 }, { "epoch": 3.3378839590443685, "grad_norm": 0.38491985613014806, "learning_rate": 3.1103092478758846e-07, "loss": 0.0028, "step": 14670 }, { "epoch": 3.3381114903299203, "grad_norm": 1.9198683362311397, "learning_rate": 3.109536691143164e-07, "loss": 0.0248, "step": 14671 }, { "epoch": 3.338339021615472, "grad_norm": 0.6898116338027593, "learning_rate": 3.108764198595765e-07, "loss": 0.0079, "step": 14672 }, { "epoch": 3.3385665529010238, "grad_norm": 0.9739513244322119, "learning_rate": 3.10799177024947e-07, "loss": 0.0095, "step": 14673 }, { "epoch": 3.3387940841865755, "grad_norm": 0.6482999960721432, "learning_rate": 3.107219406120072e-07, "loss": 0.0066, "step": 14674 }, { "epoch": 3.3390216154721273, "grad_norm": 0.27099335489070225, "learning_rate": 3.106447106223351e-07, "loss": 0.0031, "step": 14675 }, { "epoch": 3.339249146757679, "grad_norm": 0.8461005028598987, "learning_rate": 3.1056748705750935e-07, "loss": 0.0052, "step": 14676 }, { "epoch": 3.339476678043231, "grad_norm": 0.3271907721060226, "learning_rate": 3.1049026991910856e-07, "loss": 0.0008, "step": 14677 }, { "epoch": 3.3397042093287825, "grad_norm": 0.9840461894188248, "learning_rate": 3.1041305920871047e-07, "loss": 0.0134, "step": 14678 }, { "epoch": 3.3399317406143343, "grad_norm": 1.1924533713177508, "learning_rate": 3.1033585492789347e-07, "loss": 0.0088, "step": 14679 }, { "epoch": 3.3401592718998865, "grad_norm": 0.4439432304302222, "learning_rate": 3.102586570782351e-07, "loss": 0.0011, "step": 14680 }, { "epoch": 3.340386803185438, "grad_norm": 0.2908992797402719, "learning_rate": 3.101814656613136e-07, "loss": 0.0016, "step": 14681 }, { "epoch": 3.34061433447099, "grad_norm": 1.4482754627138328, "learning_rate": 3.101042806787062e-07, "loss": 0.0055, "step": 14682 }, { "epoch": 3.3408418657565413, "grad_norm": 0.5260055868768609, "learning_rate": 3.1002710213199055e-07, "loss": 0.0045, "step": 14683 }, { "epoch": 3.3410693970420935, "grad_norm": 0.37009326563527484, "learning_rate": 3.099499300227443e-07, "loss": 0.0017, "step": 14684 }, { "epoch": 3.3412969283276452, "grad_norm": 0.1877634991047841, "learning_rate": 3.0987276435254435e-07, "loss": 0.001, "step": 14685 }, { "epoch": 3.341524459613197, "grad_norm": 0.8958311046733302, "learning_rate": 3.097956051229681e-07, "loss": 0.0055, "step": 14686 }, { "epoch": 3.3417519908987487, "grad_norm": 0.11075822562241451, "learning_rate": 3.097184523355925e-07, "loss": 0.0006, "step": 14687 }, { "epoch": 3.3419795221843005, "grad_norm": 0.5799106089090466, "learning_rate": 3.09641305991994e-07, "loss": 0.0036, "step": 14688 }, { "epoch": 3.3422070534698523, "grad_norm": 0.32334325632391714, "learning_rate": 3.095641660937497e-07, "loss": 0.0007, "step": 14689 }, { "epoch": 3.342434584755404, "grad_norm": 0.38447198602286153, "learning_rate": 3.0948703264243614e-07, "loss": 0.0037, "step": 14690 }, { "epoch": 3.3426621160409558, "grad_norm": 5.085672540571008, "learning_rate": 3.0940990563963004e-07, "loss": 0.0509, "step": 14691 }, { "epoch": 3.3428896473265075, "grad_norm": 0.6434910979194458, "learning_rate": 3.0933278508690724e-07, "loss": 0.0014, "step": 14692 }, { "epoch": 3.3431171786120593, "grad_norm": 0.3053836140733857, "learning_rate": 3.092556709858444e-07, "loss": 0.0015, "step": 14693 }, { "epoch": 3.343344709897611, "grad_norm": 1.0445100493480375, "learning_rate": 3.0917856333801744e-07, "loss": 0.0113, "step": 14694 }, { "epoch": 3.3435722411831628, "grad_norm": 0.5271889406891226, "learning_rate": 3.091014621450018e-07, "loss": 0.0033, "step": 14695 }, { "epoch": 3.3437997724687145, "grad_norm": 1.6316545266541895, "learning_rate": 3.090243674083742e-07, "loss": 0.0123, "step": 14696 }, { "epoch": 3.3440273037542663, "grad_norm": 0.2900896087591255, "learning_rate": 3.0894727912970954e-07, "loss": 0.0014, "step": 14697 }, { "epoch": 3.344254835039818, "grad_norm": 0.5329893967526861, "learning_rate": 3.0887019731058397e-07, "loss": 0.0026, "step": 14698 }, { "epoch": 3.3444823663253698, "grad_norm": 1.5759987782477098, "learning_rate": 3.0879312195257235e-07, "loss": 0.0148, "step": 14699 }, { "epoch": 3.3447098976109215, "grad_norm": 0.5394739822889245, "learning_rate": 3.087160530572505e-07, "loss": 0.0027, "step": 14700 }, { "epoch": 3.3449374288964733, "grad_norm": 0.13432013160169945, "learning_rate": 3.0863899062619296e-07, "loss": 0.0005, "step": 14701 }, { "epoch": 3.345164960182025, "grad_norm": 1.648928254538013, "learning_rate": 3.085619346609751e-07, "loss": 0.0157, "step": 14702 }, { "epoch": 3.345392491467577, "grad_norm": 0.8460408880113941, "learning_rate": 3.08484885163172e-07, "loss": 0.0096, "step": 14703 }, { "epoch": 3.3456200227531285, "grad_norm": 0.6768555402080791, "learning_rate": 3.084078421343579e-07, "loss": 0.0041, "step": 14704 }, { "epoch": 3.3458475540386803, "grad_norm": 0.9987627850311813, "learning_rate": 3.08330805576108e-07, "loss": 0.0103, "step": 14705 }, { "epoch": 3.346075085324232, "grad_norm": 0.6137223628500738, "learning_rate": 3.082537754899964e-07, "loss": 0.0042, "step": 14706 }, { "epoch": 3.346302616609784, "grad_norm": 1.580918315178426, "learning_rate": 3.0817675187759735e-07, "loss": 0.0068, "step": 14707 }, { "epoch": 3.3465301478953355, "grad_norm": 0.5723634408323635, "learning_rate": 3.080997347404853e-07, "loss": 0.0024, "step": 14708 }, { "epoch": 3.3467576791808873, "grad_norm": 0.5271699991987798, "learning_rate": 3.080227240802342e-07, "loss": 0.0035, "step": 14709 }, { "epoch": 3.346985210466439, "grad_norm": 0.5368539445634555, "learning_rate": 3.0794571989841845e-07, "loss": 0.0031, "step": 14710 }, { "epoch": 3.347212741751991, "grad_norm": 0.4979413900646961, "learning_rate": 3.0786872219661116e-07, "loss": 0.0022, "step": 14711 }, { "epoch": 3.3474402730375425, "grad_norm": 1.081672122934087, "learning_rate": 3.077917309763868e-07, "loss": 0.0059, "step": 14712 }, { "epoch": 3.3476678043230943, "grad_norm": 0.6810615609823865, "learning_rate": 3.077147462393184e-07, "loss": 0.0015, "step": 14713 }, { "epoch": 3.347895335608646, "grad_norm": 0.19006815768424756, "learning_rate": 3.076377679869793e-07, "loss": 0.0007, "step": 14714 }, { "epoch": 3.348122866894198, "grad_norm": 0.1426370967845766, "learning_rate": 3.0756079622094294e-07, "loss": 0.0005, "step": 14715 }, { "epoch": 3.3483503981797496, "grad_norm": 0.6708446120455815, "learning_rate": 3.074838309427826e-07, "loss": 0.0111, "step": 14716 }, { "epoch": 3.3485779294653013, "grad_norm": 0.5644075241241383, "learning_rate": 3.074068721540715e-07, "loss": 0.0014, "step": 14717 }, { "epoch": 3.348805460750853, "grad_norm": 1.1018665519077948, "learning_rate": 3.07329919856382e-07, "loss": 0.0043, "step": 14718 }, { "epoch": 3.3490329920364053, "grad_norm": 1.2774360836337042, "learning_rate": 3.0725297405128746e-07, "loss": 0.0118, "step": 14719 }, { "epoch": 3.3492605233219566, "grad_norm": 0.07024816336676773, "learning_rate": 3.071760347403602e-07, "loss": 0.0003, "step": 14720 }, { "epoch": 3.3494880546075088, "grad_norm": 0.404146710655921, "learning_rate": 3.070991019251724e-07, "loss": 0.0006, "step": 14721 }, { "epoch": 3.34971558589306, "grad_norm": 0.5434182416401144, "learning_rate": 3.0702217560729685e-07, "loss": 0.0032, "step": 14722 }, { "epoch": 3.3499431171786123, "grad_norm": 0.8743725464565837, "learning_rate": 3.0694525578830564e-07, "loss": 0.0147, "step": 14723 }, { "epoch": 3.350170648464164, "grad_norm": 0.19019018483894337, "learning_rate": 3.068683424697711e-07, "loss": 0.0007, "step": 14724 }, { "epoch": 3.3503981797497158, "grad_norm": 1.4209363921268208, "learning_rate": 3.0679143565326503e-07, "loss": 0.0243, "step": 14725 }, { "epoch": 3.3506257110352675, "grad_norm": 0.6577105315622972, "learning_rate": 3.06714535340359e-07, "loss": 0.0073, "step": 14726 }, { "epoch": 3.3508532423208193, "grad_norm": 0.9023447735560397, "learning_rate": 3.0663764153262524e-07, "loss": 0.0097, "step": 14727 }, { "epoch": 3.351080773606371, "grad_norm": 0.7660955845793473, "learning_rate": 3.065607542316348e-07, "loss": 0.0055, "step": 14728 }, { "epoch": 3.3513083048919228, "grad_norm": 0.5023616151602297, "learning_rate": 3.0648387343895936e-07, "loss": 0.0015, "step": 14729 }, { "epoch": 3.3515358361774745, "grad_norm": 0.36890350469166294, "learning_rate": 3.0640699915617013e-07, "loss": 0.0031, "step": 14730 }, { "epoch": 3.3517633674630263, "grad_norm": 0.29313046971594275, "learning_rate": 3.063301313848387e-07, "loss": 0.0016, "step": 14731 }, { "epoch": 3.351990898748578, "grad_norm": 0.7553847892385294, "learning_rate": 3.062532701265357e-07, "loss": 0.007, "step": 14732 }, { "epoch": 3.35221843003413, "grad_norm": 0.5704561019852156, "learning_rate": 3.0617641538283186e-07, "loss": 0.007, "step": 14733 }, { "epoch": 3.3524459613196815, "grad_norm": 0.5297349080461073, "learning_rate": 3.0609956715529845e-07, "loss": 0.0035, "step": 14734 }, { "epoch": 3.3526734926052333, "grad_norm": 0.6631457690616018, "learning_rate": 3.060227254455056e-07, "loss": 0.0026, "step": 14735 }, { "epoch": 3.352901023890785, "grad_norm": 0.462358406727253, "learning_rate": 3.0594589025502413e-07, "loss": 0.0036, "step": 14736 }, { "epoch": 3.353128555176337, "grad_norm": 0.30199559003649185, "learning_rate": 3.058690615854243e-07, "loss": 0.0023, "step": 14737 }, { "epoch": 3.3533560864618885, "grad_norm": 0.37721098705164074, "learning_rate": 3.057922394382766e-07, "loss": 0.0023, "step": 14738 }, { "epoch": 3.3535836177474403, "grad_norm": 0.1341763326244921, "learning_rate": 3.057154238151509e-07, "loss": 0.0003, "step": 14739 }, { "epoch": 3.353811149032992, "grad_norm": 0.1556776811567092, "learning_rate": 3.05638614717617e-07, "loss": 0.0006, "step": 14740 }, { "epoch": 3.354038680318544, "grad_norm": 0.11041244845515535, "learning_rate": 3.055618121472452e-07, "loss": 0.0006, "step": 14741 }, { "epoch": 3.3542662116040955, "grad_norm": 0.09908941844939793, "learning_rate": 3.0548501610560467e-07, "loss": 0.0005, "step": 14742 }, { "epoch": 3.3544937428896473, "grad_norm": 0.9729334023042104, "learning_rate": 3.0540822659426524e-07, "loss": 0.0084, "step": 14743 }, { "epoch": 3.354721274175199, "grad_norm": 0.937386629994392, "learning_rate": 3.053314436147966e-07, "loss": 0.012, "step": 14744 }, { "epoch": 3.354948805460751, "grad_norm": 0.38578677027368413, "learning_rate": 3.0525466716876756e-07, "loss": 0.0027, "step": 14745 }, { "epoch": 3.3551763367463026, "grad_norm": 0.45943910598725046, "learning_rate": 3.051778972577478e-07, "loss": 0.0023, "step": 14746 }, { "epoch": 3.3554038680318543, "grad_norm": 1.0331330282978706, "learning_rate": 3.051011338833058e-07, "loss": 0.0116, "step": 14747 }, { "epoch": 3.355631399317406, "grad_norm": 1.769047460446382, "learning_rate": 3.050243770470111e-07, "loss": 0.004, "step": 14748 }, { "epoch": 3.355858930602958, "grad_norm": 0.29635155882315856, "learning_rate": 3.049476267504317e-07, "loss": 0.0016, "step": 14749 }, { "epoch": 3.3560864618885096, "grad_norm": 0.8477131268663862, "learning_rate": 3.0487088299513684e-07, "loss": 0.0047, "step": 14750 }, { "epoch": 3.3563139931740613, "grad_norm": 0.4003658442116063, "learning_rate": 3.04794145782695e-07, "loss": 0.0027, "step": 14751 }, { "epoch": 3.356541524459613, "grad_norm": 0.2862616652183203, "learning_rate": 3.0471741511467416e-07, "loss": 0.0018, "step": 14752 }, { "epoch": 3.356769055745165, "grad_norm": 0.4296526556754382, "learning_rate": 3.04640690992643e-07, "loss": 0.0014, "step": 14753 }, { "epoch": 3.3569965870307166, "grad_norm": 1.0421345654733196, "learning_rate": 3.045639734181693e-07, "loss": 0.0027, "step": 14754 }, { "epoch": 3.3572241183162683, "grad_norm": 0.09489918265631363, "learning_rate": 3.0448726239282097e-07, "loss": 0.0006, "step": 14755 }, { "epoch": 3.35745164960182, "grad_norm": 0.25128483693650366, "learning_rate": 3.044105579181663e-07, "loss": 0.0004, "step": 14756 }, { "epoch": 3.357679180887372, "grad_norm": 0.8061641734762043, "learning_rate": 3.043338599957725e-07, "loss": 0.0116, "step": 14757 }, { "epoch": 3.357906712172924, "grad_norm": 0.7595785160434811, "learning_rate": 3.042571686272075e-07, "loss": 0.0094, "step": 14758 }, { "epoch": 3.3581342434584753, "grad_norm": 0.535641379728902, "learning_rate": 3.041804838140384e-07, "loss": 0.0025, "step": 14759 }, { "epoch": 3.3583617747440275, "grad_norm": 0.5532327159518169, "learning_rate": 3.0410380555783283e-07, "loss": 0.0074, "step": 14760 }, { "epoch": 3.358589306029579, "grad_norm": 0.6476044193634631, "learning_rate": 3.0402713386015754e-07, "loss": 0.0054, "step": 14761 }, { "epoch": 3.358816837315131, "grad_norm": 0.4339046352865238, "learning_rate": 3.0395046872257985e-07, "loss": 0.003, "step": 14762 }, { "epoch": 3.359044368600683, "grad_norm": 0.14816448918559727, "learning_rate": 3.0387381014666676e-07, "loss": 0.0005, "step": 14763 }, { "epoch": 3.3592718998862345, "grad_norm": 0.31542040261798904, "learning_rate": 3.037971581339847e-07, "loss": 0.0011, "step": 14764 }, { "epoch": 3.3594994311717863, "grad_norm": 0.906764723258262, "learning_rate": 3.0372051268610074e-07, "loss": 0.0128, "step": 14765 }, { "epoch": 3.359726962457338, "grad_norm": 0.3531229797837723, "learning_rate": 3.036438738045808e-07, "loss": 0.0013, "step": 14766 }, { "epoch": 3.35995449374289, "grad_norm": 0.17633900004153544, "learning_rate": 3.035672414909918e-07, "loss": 0.0004, "step": 14767 }, { "epoch": 3.3601820250284415, "grad_norm": 0.3374429921250332, "learning_rate": 3.0349061574689955e-07, "loss": 0.0009, "step": 14768 }, { "epoch": 3.3604095563139933, "grad_norm": 0.588805284978515, "learning_rate": 3.034139965738702e-07, "loss": 0.008, "step": 14769 }, { "epoch": 3.360637087599545, "grad_norm": 0.33440498267323576, "learning_rate": 3.0333738397347003e-07, "loss": 0.0014, "step": 14770 }, { "epoch": 3.360864618885097, "grad_norm": 0.22738295971543576, "learning_rate": 3.0326077794726446e-07, "loss": 0.0016, "step": 14771 }, { "epoch": 3.3610921501706486, "grad_norm": 0.3403945293057373, "learning_rate": 3.031841784968196e-07, "loss": 0.0017, "step": 14772 }, { "epoch": 3.3613196814562003, "grad_norm": 0.23200650885522012, "learning_rate": 3.031075856237005e-07, "loss": 0.0007, "step": 14773 }, { "epoch": 3.361547212741752, "grad_norm": 0.34354146429016924, "learning_rate": 3.0303099932947306e-07, "loss": 0.0015, "step": 14774 }, { "epoch": 3.361774744027304, "grad_norm": 0.38098575631565496, "learning_rate": 3.029544196157021e-07, "loss": 0.0043, "step": 14775 }, { "epoch": 3.3620022753128556, "grad_norm": 0.19293232967377158, "learning_rate": 3.0287784648395295e-07, "loss": 0.0012, "step": 14776 }, { "epoch": 3.3622298065984073, "grad_norm": 0.14160793443458466, "learning_rate": 3.0280127993579095e-07, "loss": 0.0006, "step": 14777 }, { "epoch": 3.362457337883959, "grad_norm": 0.5538337529783864, "learning_rate": 3.027247199727805e-07, "loss": 0.0029, "step": 14778 }, { "epoch": 3.362684869169511, "grad_norm": 0.28182389264251473, "learning_rate": 3.026481665964868e-07, "loss": 0.0017, "step": 14779 }, { "epoch": 3.3629124004550626, "grad_norm": 0.40831433622256313, "learning_rate": 3.025716198084739e-07, "loss": 0.0031, "step": 14780 }, { "epoch": 3.3631399317406143, "grad_norm": 0.4477153114254186, "learning_rate": 3.024950796103069e-07, "loss": 0.0028, "step": 14781 }, { "epoch": 3.363367463026166, "grad_norm": 0.15484224122576137, "learning_rate": 3.0241854600354954e-07, "loss": 0.0008, "step": 14782 }, { "epoch": 3.363594994311718, "grad_norm": 0.9681107094938387, "learning_rate": 3.023420189897664e-07, "loss": 0.0058, "step": 14783 }, { "epoch": 3.3638225255972696, "grad_norm": 0.953986213377787, "learning_rate": 3.022654985705217e-07, "loss": 0.008, "step": 14784 }, { "epoch": 3.3640500568828213, "grad_norm": 0.3421316123924003, "learning_rate": 3.0218898474737883e-07, "loss": 0.0016, "step": 14785 }, { "epoch": 3.364277588168373, "grad_norm": 0.3127888474699428, "learning_rate": 3.021124775219022e-07, "loss": 0.0051, "step": 14786 }, { "epoch": 3.364505119453925, "grad_norm": 0.14716040080992399, "learning_rate": 3.0203597689565495e-07, "loss": 0.0007, "step": 14787 }, { "epoch": 3.3647326507394766, "grad_norm": 0.6863372727971593, "learning_rate": 3.0195948287020106e-07, "loss": 0.0048, "step": 14788 }, { "epoch": 3.3649601820250283, "grad_norm": 0.43254643746418964, "learning_rate": 3.018829954471035e-07, "loss": 0.0029, "step": 14789 }, { "epoch": 3.36518771331058, "grad_norm": 0.3936000640997224, "learning_rate": 3.018065146279258e-07, "loss": 0.0011, "step": 14790 }, { "epoch": 3.365415244596132, "grad_norm": 0.8353037296035992, "learning_rate": 3.017300404142312e-07, "loss": 0.002, "step": 14791 }, { "epoch": 3.3656427758816836, "grad_norm": 0.7674829373632199, "learning_rate": 3.016535728075824e-07, "loss": 0.0063, "step": 14792 }, { "epoch": 3.3658703071672353, "grad_norm": 0.6048115448134055, "learning_rate": 3.015771118095425e-07, "loss": 0.0081, "step": 14793 }, { "epoch": 3.366097838452787, "grad_norm": 3.234892746357166, "learning_rate": 3.0150065742167417e-07, "loss": 0.015, "step": 14794 }, { "epoch": 3.366325369738339, "grad_norm": 0.3904079662555942, "learning_rate": 3.014242096455397e-07, "loss": 0.0043, "step": 14795 }, { "epoch": 3.3665529010238906, "grad_norm": 0.681133508763016, "learning_rate": 3.0134776848270183e-07, "loss": 0.0036, "step": 14796 }, { "epoch": 3.366780432309443, "grad_norm": 0.3129775821301946, "learning_rate": 3.012713339347228e-07, "loss": 0.0017, "step": 14797 }, { "epoch": 3.367007963594994, "grad_norm": 0.6744265600176337, "learning_rate": 3.01194906003165e-07, "loss": 0.0043, "step": 14798 }, { "epoch": 3.3672354948805463, "grad_norm": 0.7689572196513859, "learning_rate": 3.0111848468959016e-07, "loss": 0.0065, "step": 14799 }, { "epoch": 3.3674630261660976, "grad_norm": 0.7369514949384628, "learning_rate": 3.0104206999556053e-07, "loss": 0.0073, "step": 14800 }, { "epoch": 3.36769055745165, "grad_norm": 1.3894441811827016, "learning_rate": 3.009656619226377e-07, "loss": 0.011, "step": 14801 }, { "epoch": 3.3679180887372016, "grad_norm": 0.48875311678819217, "learning_rate": 3.0088926047238303e-07, "loss": 0.0031, "step": 14802 }, { "epoch": 3.3681456200227533, "grad_norm": 0.26798964481199905, "learning_rate": 3.0081286564635826e-07, "loss": 0.0054, "step": 14803 }, { "epoch": 3.368373151308305, "grad_norm": 0.4887264157953789, "learning_rate": 3.0073647744612485e-07, "loss": 0.0022, "step": 14804 }, { "epoch": 3.368600682593857, "grad_norm": 0.8262919143975711, "learning_rate": 3.0066009587324424e-07, "loss": 0.0025, "step": 14805 }, { "epoch": 3.3688282138794086, "grad_norm": 0.29802252370141114, "learning_rate": 3.0058372092927704e-07, "loss": 0.0029, "step": 14806 }, { "epoch": 3.3690557451649603, "grad_norm": 0.23755940234335676, "learning_rate": 3.0050735261578465e-07, "loss": 0.0009, "step": 14807 }, { "epoch": 3.369283276450512, "grad_norm": 0.6579644599685505, "learning_rate": 3.004309909343277e-07, "loss": 0.0033, "step": 14808 }, { "epoch": 3.369510807736064, "grad_norm": 0.5322069030122475, "learning_rate": 3.003546358864666e-07, "loss": 0.0024, "step": 14809 }, { "epoch": 3.3697383390216156, "grad_norm": 0.7186043393791868, "learning_rate": 3.0027828747376217e-07, "loss": 0.0081, "step": 14810 }, { "epoch": 3.3699658703071673, "grad_norm": 0.035726846163499096, "learning_rate": 3.002019456977749e-07, "loss": 0.0001, "step": 14811 }, { "epoch": 3.370193401592719, "grad_norm": 1.4259361960669632, "learning_rate": 3.0012561056006526e-07, "loss": 0.0117, "step": 14812 }, { "epoch": 3.370420932878271, "grad_norm": 0.6000169744479306, "learning_rate": 3.0004928206219316e-07, "loss": 0.0087, "step": 14813 }, { "epoch": 3.3706484641638226, "grad_norm": 0.8619395475872911, "learning_rate": 2.999729602057183e-07, "loss": 0.0051, "step": 14814 }, { "epoch": 3.3708759954493743, "grad_norm": 1.3856054704489331, "learning_rate": 2.998966449922012e-07, "loss": 0.0115, "step": 14815 }, { "epoch": 3.371103526734926, "grad_norm": 1.687962140319161, "learning_rate": 2.998203364232008e-07, "loss": 0.007, "step": 14816 }, { "epoch": 3.371331058020478, "grad_norm": 1.5023004183964028, "learning_rate": 2.9974403450027765e-07, "loss": 0.0089, "step": 14817 }, { "epoch": 3.3715585893060296, "grad_norm": 0.47910853106836054, "learning_rate": 2.9966773922499047e-07, "loss": 0.0039, "step": 14818 }, { "epoch": 3.3717861205915813, "grad_norm": 0.685194396536098, "learning_rate": 2.9959145059889916e-07, "loss": 0.0027, "step": 14819 }, { "epoch": 3.372013651877133, "grad_norm": 0.5204802066842632, "learning_rate": 2.9951516862356265e-07, "loss": 0.0086, "step": 14820 }, { "epoch": 3.372241183162685, "grad_norm": 0.49825933357339236, "learning_rate": 2.994388933005397e-07, "loss": 0.0029, "step": 14821 }, { "epoch": 3.3724687144482366, "grad_norm": 0.4096609697941933, "learning_rate": 2.993626246313897e-07, "loss": 0.0036, "step": 14822 }, { "epoch": 3.3726962457337883, "grad_norm": 0.8491867621206096, "learning_rate": 2.9928636261767105e-07, "loss": 0.0037, "step": 14823 }, { "epoch": 3.37292377701934, "grad_norm": 0.4291326521433602, "learning_rate": 2.99210107260943e-07, "loss": 0.0007, "step": 14824 }, { "epoch": 3.373151308304892, "grad_norm": 0.38748851438240217, "learning_rate": 2.991338585627634e-07, "loss": 0.0024, "step": 14825 }, { "epoch": 3.3733788395904436, "grad_norm": 0.18942184945875787, "learning_rate": 2.9905761652469124e-07, "loss": 0.0009, "step": 14826 }, { "epoch": 3.3736063708759954, "grad_norm": 0.3619593608305918, "learning_rate": 2.989813811482845e-07, "loss": 0.0026, "step": 14827 }, { "epoch": 3.373833902161547, "grad_norm": 0.6825079976154103, "learning_rate": 2.989051524351009e-07, "loss": 0.0025, "step": 14828 }, { "epoch": 3.374061433447099, "grad_norm": 0.302806510545422, "learning_rate": 2.9882893038669883e-07, "loss": 0.0012, "step": 14829 }, { "epoch": 3.3742889647326506, "grad_norm": 0.05177795817831769, "learning_rate": 2.987527150046361e-07, "loss": 0.0001, "step": 14830 }, { "epoch": 3.3745164960182024, "grad_norm": 0.3010940476590635, "learning_rate": 2.986765062904706e-07, "loss": 0.0021, "step": 14831 }, { "epoch": 3.374744027303754, "grad_norm": 0.2901299256996753, "learning_rate": 2.9860030424575965e-07, "loss": 0.0024, "step": 14832 }, { "epoch": 3.374971558589306, "grad_norm": 0.5133496715953355, "learning_rate": 2.985241088720604e-07, "loss": 0.0023, "step": 14833 }, { "epoch": 3.3751990898748576, "grad_norm": 0.8392297795113307, "learning_rate": 2.984479201709308e-07, "loss": 0.0031, "step": 14834 }, { "epoch": 3.3754266211604094, "grad_norm": 0.7972155525126281, "learning_rate": 2.9837173814392736e-07, "loss": 0.0077, "step": 14835 }, { "epoch": 3.3756541524459616, "grad_norm": 0.31488149456792, "learning_rate": 2.982955627926075e-07, "loss": 0.0023, "step": 14836 }, { "epoch": 3.375881683731513, "grad_norm": 0.24773479446003357, "learning_rate": 2.9821939411852787e-07, "loss": 0.0008, "step": 14837 }, { "epoch": 3.376109215017065, "grad_norm": 0.3039887420036145, "learning_rate": 2.9814323212324564e-07, "loss": 0.0016, "step": 14838 }, { "epoch": 3.3763367463026164, "grad_norm": 0.47194433185471785, "learning_rate": 2.980670768083172e-07, "loss": 0.0057, "step": 14839 }, { "epoch": 3.3765642775881686, "grad_norm": 0.2760847403437947, "learning_rate": 2.9799092817529867e-07, "loss": 0.0009, "step": 14840 }, { "epoch": 3.3767918088737203, "grad_norm": 0.41031597753265237, "learning_rate": 2.9791478622574693e-07, "loss": 0.0018, "step": 14841 }, { "epoch": 3.377019340159272, "grad_norm": 0.8464884351150707, "learning_rate": 2.978386509612177e-07, "loss": 0.0045, "step": 14842 }, { "epoch": 3.377246871444824, "grad_norm": 0.16884125435927663, "learning_rate": 2.9776252238326725e-07, "loss": 0.0011, "step": 14843 }, { "epoch": 3.3774744027303756, "grad_norm": 0.31138897276426164, "learning_rate": 2.9768640049345157e-07, "loss": 0.002, "step": 14844 }, { "epoch": 3.3777019340159273, "grad_norm": 1.4141834697304805, "learning_rate": 2.9761028529332667e-07, "loss": 0.0042, "step": 14845 }, { "epoch": 3.377929465301479, "grad_norm": 0.2246983994091843, "learning_rate": 2.9753417678444785e-07, "loss": 0.0031, "step": 14846 }, { "epoch": 3.378156996587031, "grad_norm": 0.32344171432033947, "learning_rate": 2.974580749683706e-07, "loss": 0.0021, "step": 14847 }, { "epoch": 3.3783845278725826, "grad_norm": 0.6918619906376737, "learning_rate": 2.9738197984665063e-07, "loss": 0.0009, "step": 14848 }, { "epoch": 3.3786120591581343, "grad_norm": 0.4875949976355605, "learning_rate": 2.9730589142084274e-07, "loss": 0.0019, "step": 14849 }, { "epoch": 3.378839590443686, "grad_norm": 0.6539265884734718, "learning_rate": 2.972298096925023e-07, "loss": 0.0031, "step": 14850 }, { "epoch": 3.379067121729238, "grad_norm": 0.5011974553609775, "learning_rate": 2.971537346631845e-07, "loss": 0.0045, "step": 14851 }, { "epoch": 3.3792946530147896, "grad_norm": 1.2875201082610956, "learning_rate": 2.970776663344437e-07, "loss": 0.0064, "step": 14852 }, { "epoch": 3.3795221843003413, "grad_norm": 1.0909042730162455, "learning_rate": 2.9700160470783507e-07, "loss": 0.0099, "step": 14853 }, { "epoch": 3.379749715585893, "grad_norm": 1.047697155068411, "learning_rate": 2.969255497849127e-07, "loss": 0.0088, "step": 14854 }, { "epoch": 3.379977246871445, "grad_norm": 0.9405262460389096, "learning_rate": 2.968495015672315e-07, "loss": 0.0059, "step": 14855 }, { "epoch": 3.3802047781569966, "grad_norm": 2.070363379383587, "learning_rate": 2.967734600563453e-07, "loss": 0.029, "step": 14856 }, { "epoch": 3.3804323094425484, "grad_norm": 1.0306945261991918, "learning_rate": 2.966974252538084e-07, "loss": 0.0059, "step": 14857 }, { "epoch": 3.3806598407281, "grad_norm": 0.3720775329433418, "learning_rate": 2.966213971611752e-07, "loss": 0.0019, "step": 14858 }, { "epoch": 3.380887372013652, "grad_norm": 0.05332967783908869, "learning_rate": 2.9654537577999897e-07, "loss": 0.0002, "step": 14859 }, { "epoch": 3.3811149032992036, "grad_norm": 0.7181007925655054, "learning_rate": 2.964693611118339e-07, "loss": 0.004, "step": 14860 }, { "epoch": 3.3813424345847554, "grad_norm": 1.2094132057354643, "learning_rate": 2.9639335315823323e-07, "loss": 0.0092, "step": 14861 }, { "epoch": 3.381569965870307, "grad_norm": 0.25212443628838627, "learning_rate": 2.963173519207508e-07, "loss": 0.0019, "step": 14862 }, { "epoch": 3.381797497155859, "grad_norm": 0.12532285874734272, "learning_rate": 2.9624135740093956e-07, "loss": 0.0005, "step": 14863 }, { "epoch": 3.3820250284414106, "grad_norm": 0.1501265248585643, "learning_rate": 2.961653696003529e-07, "loss": 0.0012, "step": 14864 }, { "epoch": 3.3822525597269624, "grad_norm": 0.1857104194479308, "learning_rate": 2.9608938852054405e-07, "loss": 0.0007, "step": 14865 }, { "epoch": 3.382480091012514, "grad_norm": 0.05414116256966545, "learning_rate": 2.9601341416306545e-07, "loss": 0.0002, "step": 14866 }, { "epoch": 3.382707622298066, "grad_norm": 0.25091789941145703, "learning_rate": 2.9593744652947044e-07, "loss": 0.0007, "step": 14867 }, { "epoch": 3.3829351535836176, "grad_norm": 0.3067440755911079, "learning_rate": 2.958614856213111e-07, "loss": 0.0009, "step": 14868 }, { "epoch": 3.3831626848691694, "grad_norm": 1.6702324803819648, "learning_rate": 2.9578553144014047e-07, "loss": 0.0059, "step": 14869 }, { "epoch": 3.383390216154721, "grad_norm": 0.29766012142378095, "learning_rate": 2.957095839875104e-07, "loss": 0.0011, "step": 14870 }, { "epoch": 3.383617747440273, "grad_norm": 0.6590188443372222, "learning_rate": 2.956336432649734e-07, "loss": 0.0049, "step": 14871 }, { "epoch": 3.3838452787258246, "grad_norm": 0.7958355586009561, "learning_rate": 2.955577092740817e-07, "loss": 0.0025, "step": 14872 }, { "epoch": 3.3840728100113764, "grad_norm": 0.5052715846685174, "learning_rate": 2.954817820163869e-07, "loss": 0.0072, "step": 14873 }, { "epoch": 3.384300341296928, "grad_norm": 0.21294044753093194, "learning_rate": 2.9540586149344126e-07, "loss": 0.001, "step": 14874 }, { "epoch": 3.3845278725824803, "grad_norm": 0.9112082859023934, "learning_rate": 2.9532994770679585e-07, "loss": 0.0068, "step": 14875 }, { "epoch": 3.3847554038680316, "grad_norm": 0.4896665929369818, "learning_rate": 2.952540406580026e-07, "loss": 0.0029, "step": 14876 }, { "epoch": 3.384982935153584, "grad_norm": 0.2851632123281949, "learning_rate": 2.951781403486131e-07, "loss": 0.0014, "step": 14877 }, { "epoch": 3.385210466439135, "grad_norm": 0.4942291126305877, "learning_rate": 2.951022467801781e-07, "loss": 0.0043, "step": 14878 }, { "epoch": 3.3854379977246873, "grad_norm": 0.5388514806066088, "learning_rate": 2.9502635995424926e-07, "loss": 0.0027, "step": 14879 }, { "epoch": 3.385665529010239, "grad_norm": 0.6351734456325785, "learning_rate": 2.949504798723771e-07, "loss": 0.0019, "step": 14880 }, { "epoch": 3.385893060295791, "grad_norm": 0.39459493245131777, "learning_rate": 2.9487460653611285e-07, "loss": 0.002, "step": 14881 }, { "epoch": 3.3861205915813426, "grad_norm": 1.332922910032315, "learning_rate": 2.947987399470068e-07, "loss": 0.0094, "step": 14882 }, { "epoch": 3.3863481228668944, "grad_norm": 0.18380092312672183, "learning_rate": 2.947228801066098e-07, "loss": 0.0007, "step": 14883 }, { "epoch": 3.386575654152446, "grad_norm": 0.4039574552145477, "learning_rate": 2.9464702701647254e-07, "loss": 0.0048, "step": 14884 }, { "epoch": 3.386803185437998, "grad_norm": 0.07776903895483772, "learning_rate": 2.9457118067814476e-07, "loss": 0.0003, "step": 14885 }, { "epoch": 3.3870307167235496, "grad_norm": 0.789885993703824, "learning_rate": 2.944953410931771e-07, "loss": 0.0054, "step": 14886 }, { "epoch": 3.3872582480091014, "grad_norm": 0.2800290103395284, "learning_rate": 2.9441950826311917e-07, "loss": 0.0027, "step": 14887 }, { "epoch": 3.387485779294653, "grad_norm": 1.1275079179642253, "learning_rate": 2.9434368218952134e-07, "loss": 0.0072, "step": 14888 }, { "epoch": 3.387713310580205, "grad_norm": 0.3835347453907643, "learning_rate": 2.9426786287393273e-07, "loss": 0.0035, "step": 14889 }, { "epoch": 3.3879408418657566, "grad_norm": 0.49350135023756275, "learning_rate": 2.941920503179034e-07, "loss": 0.0036, "step": 14890 }, { "epoch": 3.3881683731513084, "grad_norm": 0.5588885168924206, "learning_rate": 2.9411624452298294e-07, "loss": 0.0026, "step": 14891 }, { "epoch": 3.38839590443686, "grad_norm": 0.5795402072462155, "learning_rate": 2.9404044549072015e-07, "loss": 0.0052, "step": 14892 }, { "epoch": 3.388623435722412, "grad_norm": 0.15639420687232067, "learning_rate": 2.9396465322266486e-07, "loss": 0.001, "step": 14893 }, { "epoch": 3.3888509670079636, "grad_norm": 0.8702095815918617, "learning_rate": 2.9388886772036554e-07, "loss": 0.0041, "step": 14894 }, { "epoch": 3.3890784982935154, "grad_norm": 0.7049550280408233, "learning_rate": 2.938130889853715e-07, "loss": 0.0028, "step": 14895 }, { "epoch": 3.389306029579067, "grad_norm": 0.5992568711841696, "learning_rate": 2.9373731701923124e-07, "loss": 0.0086, "step": 14896 }, { "epoch": 3.389533560864619, "grad_norm": 0.3128639547761124, "learning_rate": 2.936615518234935e-07, "loss": 0.0027, "step": 14897 }, { "epoch": 3.3897610921501706, "grad_norm": 0.4131808799017607, "learning_rate": 2.935857933997071e-07, "loss": 0.0038, "step": 14898 }, { "epoch": 3.3899886234357224, "grad_norm": 1.3965184710387524, "learning_rate": 2.935100417494199e-07, "loss": 0.0204, "step": 14899 }, { "epoch": 3.390216154721274, "grad_norm": 0.1840486516360665, "learning_rate": 2.9343429687418053e-07, "loss": 0.0009, "step": 14900 }, { "epoch": 3.390443686006826, "grad_norm": 0.34909836165787766, "learning_rate": 2.9335855877553695e-07, "loss": 0.0018, "step": 14901 }, { "epoch": 3.3906712172923776, "grad_norm": 0.27105264595797446, "learning_rate": 2.932828274550367e-07, "loss": 0.0017, "step": 14902 }, { "epoch": 3.3908987485779294, "grad_norm": 0.5092225021897222, "learning_rate": 2.932071029142281e-07, "loss": 0.0056, "step": 14903 }, { "epoch": 3.391126279863481, "grad_norm": 0.43700441696877457, "learning_rate": 2.931313851546586e-07, "loss": 0.0043, "step": 14904 }, { "epoch": 3.391353811149033, "grad_norm": 0.6667775572036557, "learning_rate": 2.9305567417787605e-07, "loss": 0.0098, "step": 14905 }, { "epoch": 3.3915813424345846, "grad_norm": 1.0810726413335003, "learning_rate": 2.9297996998542734e-07, "loss": 0.0082, "step": 14906 }, { "epoch": 3.3918088737201364, "grad_norm": 0.29915687270160557, "learning_rate": 2.929042725788602e-07, "loss": 0.0034, "step": 14907 }, { "epoch": 3.392036405005688, "grad_norm": 0.6022391647465698, "learning_rate": 2.928285819597215e-07, "loss": 0.0039, "step": 14908 }, { "epoch": 3.39226393629124, "grad_norm": 1.1752508474334709, "learning_rate": 2.9275289812955814e-07, "loss": 0.0141, "step": 14909 }, { "epoch": 3.3924914675767917, "grad_norm": 0.5894276033086081, "learning_rate": 2.9267722108991697e-07, "loss": 0.0026, "step": 14910 }, { "epoch": 3.3927189988623434, "grad_norm": 0.6762351918269361, "learning_rate": 2.926015508423447e-07, "loss": 0.0045, "step": 14911 }, { "epoch": 3.392946530147895, "grad_norm": 0.8876933177219656, "learning_rate": 2.9252588738838837e-07, "loss": 0.0041, "step": 14912 }, { "epoch": 3.393174061433447, "grad_norm": 0.09211323760884785, "learning_rate": 2.9245023072959367e-07, "loss": 0.0006, "step": 14913 }, { "epoch": 3.393401592718999, "grad_norm": 0.1620894065806776, "learning_rate": 2.9237458086750744e-07, "loss": 0.0005, "step": 14914 }, { "epoch": 3.3936291240045504, "grad_norm": 0.7378159258795475, "learning_rate": 2.9229893780367566e-07, "loss": 0.001, "step": 14915 }, { "epoch": 3.3938566552901026, "grad_norm": 0.2609429869551764, "learning_rate": 2.9222330153964396e-07, "loss": 0.005, "step": 14916 }, { "epoch": 3.394084186575654, "grad_norm": 0.20077606862872355, "learning_rate": 2.9214767207695856e-07, "loss": 0.0006, "step": 14917 }, { "epoch": 3.394311717861206, "grad_norm": 1.3172994212149285, "learning_rate": 2.920720494171652e-07, "loss": 0.0174, "step": 14918 }, { "epoch": 3.394539249146758, "grad_norm": 0.38714242623609746, "learning_rate": 2.919964335618097e-07, "loss": 0.005, "step": 14919 }, { "epoch": 3.3947667804323096, "grad_norm": 0.47070616132631754, "learning_rate": 2.919208245124371e-07, "loss": 0.0035, "step": 14920 }, { "epoch": 3.3949943117178614, "grad_norm": 0.35234100866221163, "learning_rate": 2.9184522227059266e-07, "loss": 0.0009, "step": 14921 }, { "epoch": 3.395221843003413, "grad_norm": 0.06272659453129034, "learning_rate": 2.91769626837822e-07, "loss": 0.0002, "step": 14922 }, { "epoch": 3.395449374288965, "grad_norm": 0.6087352312523102, "learning_rate": 2.916940382156696e-07, "loss": 0.0043, "step": 14923 }, { "epoch": 3.3956769055745166, "grad_norm": 0.20020613402565618, "learning_rate": 2.9161845640568063e-07, "loss": 0.001, "step": 14924 }, { "epoch": 3.3959044368600684, "grad_norm": 0.6535136225173411, "learning_rate": 2.915428814093999e-07, "loss": 0.0025, "step": 14925 }, { "epoch": 3.39613196814562, "grad_norm": 0.10797873963634484, "learning_rate": 2.914673132283722e-07, "loss": 0.0005, "step": 14926 }, { "epoch": 3.396359499431172, "grad_norm": 0.2776277326898101, "learning_rate": 2.913917518641418e-07, "loss": 0.0017, "step": 14927 }, { "epoch": 3.3965870307167236, "grad_norm": 0.42180594778762803, "learning_rate": 2.9131619731825274e-07, "loss": 0.0027, "step": 14928 }, { "epoch": 3.3968145620022754, "grad_norm": 0.44331486053964153, "learning_rate": 2.9124064959224976e-07, "loss": 0.0038, "step": 14929 }, { "epoch": 3.397042093287827, "grad_norm": 0.6319797088003573, "learning_rate": 2.911651086876764e-07, "loss": 0.0105, "step": 14930 }, { "epoch": 3.397269624573379, "grad_norm": 0.03879080288097349, "learning_rate": 2.9108957460607683e-07, "loss": 0.0002, "step": 14931 }, { "epoch": 3.3974971558589306, "grad_norm": 0.6345742614743396, "learning_rate": 2.910140473489948e-07, "loss": 0.0024, "step": 14932 }, { "epoch": 3.3977246871444824, "grad_norm": 0.9123272659544914, "learning_rate": 2.909385269179742e-07, "loss": 0.0067, "step": 14933 }, { "epoch": 3.397952218430034, "grad_norm": 0.5796232614983045, "learning_rate": 2.908630133145584e-07, "loss": 0.0059, "step": 14934 }, { "epoch": 3.398179749715586, "grad_norm": 0.3761352116762306, "learning_rate": 2.9078750654029037e-07, "loss": 0.0035, "step": 14935 }, { "epoch": 3.3984072810011376, "grad_norm": 0.361960823655268, "learning_rate": 2.9071200659671386e-07, "loss": 0.0023, "step": 14936 }, { "epoch": 3.3986348122866894, "grad_norm": 0.8618596897358021, "learning_rate": 2.906365134853713e-07, "loss": 0.0082, "step": 14937 }, { "epoch": 3.398862343572241, "grad_norm": 0.4345435802749013, "learning_rate": 2.905610272078064e-07, "loss": 0.0038, "step": 14938 }, { "epoch": 3.399089874857793, "grad_norm": 1.0754292473920326, "learning_rate": 2.9048554776556166e-07, "loss": 0.0069, "step": 14939 }, { "epoch": 3.3993174061433447, "grad_norm": 0.5235359036651026, "learning_rate": 2.9041007516017946e-07, "loss": 0.0019, "step": 14940 }, { "epoch": 3.3995449374288964, "grad_norm": 0.49280304795275326, "learning_rate": 2.9033460939320283e-07, "loss": 0.0038, "step": 14941 }, { "epoch": 3.399772468714448, "grad_norm": 0.3898289998572331, "learning_rate": 2.902591504661736e-07, "loss": 0.0022, "step": 14942 }, { "epoch": 3.4, "grad_norm": 0.5206848813356955, "learning_rate": 2.901836983806343e-07, "loss": 0.0042, "step": 14943 }, { "epoch": 3.4002275312855517, "grad_norm": 0.44545950389351285, "learning_rate": 2.90108253138127e-07, "loss": 0.0029, "step": 14944 }, { "epoch": 3.4004550625711034, "grad_norm": 0.8817112850831297, "learning_rate": 2.9003281474019393e-07, "loss": 0.0067, "step": 14945 }, { "epoch": 3.400682593856655, "grad_norm": 0.797473231463469, "learning_rate": 2.8995738318837665e-07, "loss": 0.0059, "step": 14946 }, { "epoch": 3.400910125142207, "grad_norm": 0.6177234224957123, "learning_rate": 2.898819584842166e-07, "loss": 0.0026, "step": 14947 }, { "epoch": 3.4011376564277587, "grad_norm": 0.308818369429757, "learning_rate": 2.898065406292559e-07, "loss": 0.0017, "step": 14948 }, { "epoch": 3.4013651877133104, "grad_norm": 0.5577990531948294, "learning_rate": 2.8973112962503525e-07, "loss": 0.0049, "step": 14949 }, { "epoch": 3.401592718998862, "grad_norm": 0.44484007470396536, "learning_rate": 2.896557254730964e-07, "loss": 0.0032, "step": 14950 }, { "epoch": 3.401820250284414, "grad_norm": 0.3483341148552075, "learning_rate": 2.895803281749804e-07, "loss": 0.0033, "step": 14951 }, { "epoch": 3.4020477815699657, "grad_norm": 1.5640354473145457, "learning_rate": 2.895049377322284e-07, "loss": 0.0172, "step": 14952 }, { "epoch": 3.402275312855518, "grad_norm": 0.1470981172414834, "learning_rate": 2.8942955414638094e-07, "loss": 0.0009, "step": 14953 }, { "epoch": 3.402502844141069, "grad_norm": 0.2521530788192634, "learning_rate": 2.893541774189787e-07, "loss": 0.0012, "step": 14954 }, { "epoch": 3.4027303754266214, "grad_norm": 1.268579157229921, "learning_rate": 2.8927880755156257e-07, "loss": 0.0067, "step": 14955 }, { "epoch": 3.4029579067121727, "grad_norm": 0.9811763678176044, "learning_rate": 2.8920344454567257e-07, "loss": 0.0064, "step": 14956 }, { "epoch": 3.403185437997725, "grad_norm": 0.6123248863704933, "learning_rate": 2.8912808840284913e-07, "loss": 0.0022, "step": 14957 }, { "epoch": 3.4034129692832766, "grad_norm": 0.07604972376835623, "learning_rate": 2.890527391246327e-07, "loss": 0.0004, "step": 14958 }, { "epoch": 3.4036405005688284, "grad_norm": 0.40130340440118467, "learning_rate": 2.8897739671256283e-07, "loss": 0.0013, "step": 14959 }, { "epoch": 3.40386803185438, "grad_norm": 0.33045013040297516, "learning_rate": 2.889020611681798e-07, "loss": 0.0069, "step": 14960 }, { "epoch": 3.404095563139932, "grad_norm": 0.653298391715356, "learning_rate": 2.888267324930228e-07, "loss": 0.0063, "step": 14961 }, { "epoch": 3.4043230944254836, "grad_norm": 0.2930178163546423, "learning_rate": 2.8875141068863193e-07, "loss": 0.0006, "step": 14962 }, { "epoch": 3.4045506257110354, "grad_norm": 2.377366234613021, "learning_rate": 2.8867609575654625e-07, "loss": 0.064, "step": 14963 }, { "epoch": 3.404778156996587, "grad_norm": 0.31145950998563293, "learning_rate": 2.8860078769830526e-07, "loss": 0.0006, "step": 14964 }, { "epoch": 3.405005688282139, "grad_norm": 0.95041265410482, "learning_rate": 2.885254865154483e-07, "loss": 0.0117, "step": 14965 }, { "epoch": 3.4052332195676907, "grad_norm": 29.522083152717, "learning_rate": 2.8845019220951384e-07, "loss": 0.299, "step": 14966 }, { "epoch": 3.4054607508532424, "grad_norm": 0.3397923428718924, "learning_rate": 2.8837490478204147e-07, "loss": 0.003, "step": 14967 }, { "epoch": 3.405688282138794, "grad_norm": 1.1906702512458542, "learning_rate": 2.8829962423456917e-07, "loss": 0.0116, "step": 14968 }, { "epoch": 3.405915813424346, "grad_norm": 0.7914439233044608, "learning_rate": 2.882243505686362e-07, "loss": 0.0134, "step": 14969 }, { "epoch": 3.4061433447098977, "grad_norm": 0.2589140836600221, "learning_rate": 2.8814908378578056e-07, "loss": 0.0012, "step": 14970 }, { "epoch": 3.4063708759954494, "grad_norm": 0.29850114872758166, "learning_rate": 2.8807382388754067e-07, "loss": 0.0029, "step": 14971 }, { "epoch": 3.406598407281001, "grad_norm": 1.9456756750949453, "learning_rate": 2.87998570875455e-07, "loss": 0.0076, "step": 14972 }, { "epoch": 3.406825938566553, "grad_norm": 0.18444558015718662, "learning_rate": 2.8792332475106114e-07, "loss": 0.0012, "step": 14973 }, { "epoch": 3.4070534698521047, "grad_norm": 1.2722625968819372, "learning_rate": 2.8784808551589745e-07, "loss": 0.005, "step": 14974 }, { "epoch": 3.4072810011376564, "grad_norm": 0.5849606517509109, "learning_rate": 2.8777285317150116e-07, "loss": 0.0069, "step": 14975 }, { "epoch": 3.407508532423208, "grad_norm": 1.084060323800571, "learning_rate": 2.8769762771941037e-07, "loss": 0.0121, "step": 14976 }, { "epoch": 3.40773606370876, "grad_norm": 0.4569698118726321, "learning_rate": 2.8762240916116213e-07, "loss": 0.0065, "step": 14977 }, { "epoch": 3.4079635949943117, "grad_norm": 0.2350467895815259, "learning_rate": 2.875471974982939e-07, "loss": 0.0006, "step": 14978 }, { "epoch": 3.4081911262798634, "grad_norm": 0.7579166924434921, "learning_rate": 2.8747199273234325e-07, "loss": 0.0055, "step": 14979 }, { "epoch": 3.408418657565415, "grad_norm": 1.0402363650075495, "learning_rate": 2.8739679486484664e-07, "loss": 0.0051, "step": 14980 }, { "epoch": 3.408646188850967, "grad_norm": 0.6694927752847732, "learning_rate": 2.8732160389734157e-07, "loss": 0.0038, "step": 14981 }, { "epoch": 3.4088737201365187, "grad_norm": 0.7798658028860543, "learning_rate": 2.872464198313642e-07, "loss": 0.0059, "step": 14982 }, { "epoch": 3.4091012514220704, "grad_norm": 0.7115401471226052, "learning_rate": 2.871712426684516e-07, "loss": 0.0062, "step": 14983 }, { "epoch": 3.409328782707622, "grad_norm": 0.8686713641510782, "learning_rate": 2.8709607241013993e-07, "loss": 0.0038, "step": 14984 }, { "epoch": 3.409556313993174, "grad_norm": 0.8285752522390052, "learning_rate": 2.870209090579657e-07, "loss": 0.0066, "step": 14985 }, { "epoch": 3.4097838452787257, "grad_norm": 0.09643608840905647, "learning_rate": 2.869457526134653e-07, "loss": 0.0001, "step": 14986 }, { "epoch": 3.4100113765642774, "grad_norm": 0.7324303582941036, "learning_rate": 2.868706030781744e-07, "loss": 0.0045, "step": 14987 }, { "epoch": 3.410238907849829, "grad_norm": 0.1455082081595924, "learning_rate": 2.867954604536294e-07, "loss": 0.0005, "step": 14988 }, { "epoch": 3.410466439135381, "grad_norm": 0.519472169093649, "learning_rate": 2.867203247413657e-07, "loss": 0.0052, "step": 14989 }, { "epoch": 3.4106939704209327, "grad_norm": 0.6315997038967311, "learning_rate": 2.866451959429189e-07, "loss": 0.0038, "step": 14990 }, { "epoch": 3.4109215017064844, "grad_norm": 0.059322676191105955, "learning_rate": 2.8657007405982457e-07, "loss": 0.0004, "step": 14991 }, { "epoch": 3.4111490329920366, "grad_norm": 0.6853640329160814, "learning_rate": 2.8649495909361817e-07, "loss": 0.0024, "step": 14992 }, { "epoch": 3.411376564277588, "grad_norm": 0.3576325201424231, "learning_rate": 2.86419851045835e-07, "loss": 0.0009, "step": 14993 }, { "epoch": 3.41160409556314, "grad_norm": 0.6000328134203492, "learning_rate": 2.8634474991800984e-07, "loss": 0.0034, "step": 14994 }, { "epoch": 3.4118316268486915, "grad_norm": 0.778990038023882, "learning_rate": 2.8626965571167793e-07, "loss": 0.0076, "step": 14995 }, { "epoch": 3.4120591581342437, "grad_norm": 0.4628121385999185, "learning_rate": 2.8619456842837394e-07, "loss": 0.0018, "step": 14996 }, { "epoch": 3.4122866894197954, "grad_norm": 0.17851129789200476, "learning_rate": 2.8611948806963206e-07, "loss": 0.0017, "step": 14997 }, { "epoch": 3.412514220705347, "grad_norm": 0.17394098021735402, "learning_rate": 2.8604441463698763e-07, "loss": 0.0007, "step": 14998 }, { "epoch": 3.412741751990899, "grad_norm": 0.27592226890939536, "learning_rate": 2.859693481319744e-07, "loss": 0.0006, "step": 14999 }, { "epoch": 3.4129692832764507, "grad_norm": 0.5414232713764048, "learning_rate": 2.85894288556127e-07, "loss": 0.0044, "step": 15000 }, { "epoch": 3.4131968145620024, "grad_norm": 0.3819068503867737, "learning_rate": 2.8581923591097904e-07, "loss": 0.0012, "step": 15001 }, { "epoch": 3.413424345847554, "grad_norm": 0.6962645488339461, "learning_rate": 2.85744190198065e-07, "loss": 0.0046, "step": 15002 }, { "epoch": 3.413651877133106, "grad_norm": 0.7430901862532121, "learning_rate": 2.856691514189181e-07, "loss": 0.0092, "step": 15003 }, { "epoch": 3.4138794084186577, "grad_norm": 0.21809525769589497, "learning_rate": 2.8559411957507234e-07, "loss": 0.0005, "step": 15004 }, { "epoch": 3.4141069397042094, "grad_norm": 0.24008640216753432, "learning_rate": 2.855190946680614e-07, "loss": 0.0014, "step": 15005 }, { "epoch": 3.414334470989761, "grad_norm": 2.0020483636013298, "learning_rate": 2.854440766994182e-07, "loss": 0.0165, "step": 15006 }, { "epoch": 3.414562002275313, "grad_norm": 0.36536431140120507, "learning_rate": 2.8536906567067647e-07, "loss": 0.0027, "step": 15007 }, { "epoch": 3.4147895335608647, "grad_norm": 0.500234672561151, "learning_rate": 2.85294061583369e-07, "loss": 0.0047, "step": 15008 }, { "epoch": 3.4150170648464164, "grad_norm": 0.6705053759949247, "learning_rate": 2.8521906443902856e-07, "loss": 0.0103, "step": 15009 }, { "epoch": 3.415244596131968, "grad_norm": 0.9102469947389688, "learning_rate": 2.8514407423918816e-07, "loss": 0.0147, "step": 15010 }, { "epoch": 3.41547212741752, "grad_norm": 0.5660377792340773, "learning_rate": 2.8506909098538046e-07, "loss": 0.0044, "step": 15011 }, { "epoch": 3.4156996587030717, "grad_norm": 0.1125624168935126, "learning_rate": 2.849941146791382e-07, "loss": 0.0003, "step": 15012 }, { "epoch": 3.4159271899886234, "grad_norm": 1.3513587123048376, "learning_rate": 2.8491914532199334e-07, "loss": 0.0055, "step": 15013 }, { "epoch": 3.416154721274175, "grad_norm": 1.813517626588242, "learning_rate": 2.8484418291547853e-07, "loss": 0.0149, "step": 15014 }, { "epoch": 3.416382252559727, "grad_norm": 0.8192339038016409, "learning_rate": 2.8476922746112567e-07, "loss": 0.0053, "step": 15015 }, { "epoch": 3.4166097838452787, "grad_norm": 0.20747565278957225, "learning_rate": 2.846942789604665e-07, "loss": 0.001, "step": 15016 }, { "epoch": 3.4168373151308304, "grad_norm": 0.09582787665518298, "learning_rate": 2.8461933741503296e-07, "loss": 0.0003, "step": 15017 }, { "epoch": 3.417064846416382, "grad_norm": 0.13363593326179599, "learning_rate": 2.845444028263568e-07, "loss": 0.0006, "step": 15018 }, { "epoch": 3.417292377701934, "grad_norm": 0.9193027219390261, "learning_rate": 2.844694751959698e-07, "loss": 0.0074, "step": 15019 }, { "epoch": 3.4175199089874857, "grad_norm": 0.760292412977701, "learning_rate": 2.843945545254028e-07, "loss": 0.0029, "step": 15020 }, { "epoch": 3.4177474402730375, "grad_norm": 0.3403971952116927, "learning_rate": 2.843196408161875e-07, "loss": 0.0019, "step": 15021 }, { "epoch": 3.417974971558589, "grad_norm": 0.3053631547802761, "learning_rate": 2.8424473406985494e-07, "loss": 0.0023, "step": 15022 }, { "epoch": 3.418202502844141, "grad_norm": 0.329439163559577, "learning_rate": 2.8416983428793563e-07, "loss": 0.0016, "step": 15023 }, { "epoch": 3.4184300341296927, "grad_norm": 0.5400979894204992, "learning_rate": 2.8409494147196075e-07, "loss": 0.0038, "step": 15024 }, { "epoch": 3.4186575654152445, "grad_norm": 0.37002413026224806, "learning_rate": 2.840200556234609e-07, "loss": 0.0025, "step": 15025 }, { "epoch": 3.418885096700796, "grad_norm": 1.2788635186623625, "learning_rate": 2.839451767439669e-07, "loss": 0.0082, "step": 15026 }, { "epoch": 3.419112627986348, "grad_norm": 0.06883754479741228, "learning_rate": 2.838703048350089e-07, "loss": 0.0002, "step": 15027 }, { "epoch": 3.4193401592718997, "grad_norm": 0.9843069620186022, "learning_rate": 2.837954398981168e-07, "loss": 0.0058, "step": 15028 }, { "epoch": 3.4195676905574515, "grad_norm": 0.4967842645684991, "learning_rate": 2.8372058193482134e-07, "loss": 0.0018, "step": 15029 }, { "epoch": 3.419795221843003, "grad_norm": 0.1838240285496831, "learning_rate": 2.8364573094665185e-07, "loss": 0.0005, "step": 15030 }, { "epoch": 3.4200227531285554, "grad_norm": 0.46096549875670934, "learning_rate": 2.835708869351385e-07, "loss": 0.002, "step": 15031 }, { "epoch": 3.4202502844141067, "grad_norm": 0.2733882699665609, "learning_rate": 2.834960499018109e-07, "loss": 0.0015, "step": 15032 }, { "epoch": 3.420477815699659, "grad_norm": 0.3938436732807016, "learning_rate": 2.8342121984819895e-07, "loss": 0.0014, "step": 15033 }, { "epoch": 3.4207053469852102, "grad_norm": 0.2571616409699401, "learning_rate": 2.833463967758315e-07, "loss": 0.0007, "step": 15034 }, { "epoch": 3.4209328782707624, "grad_norm": 1.2089812107965383, "learning_rate": 2.8327158068623797e-07, "loss": 0.006, "step": 15035 }, { "epoch": 3.421160409556314, "grad_norm": 0.4373598695942766, "learning_rate": 2.831967715809476e-07, "loss": 0.0047, "step": 15036 }, { "epoch": 3.421387940841866, "grad_norm": 0.9706769188267569, "learning_rate": 2.8312196946148897e-07, "loss": 0.0063, "step": 15037 }, { "epoch": 3.4216154721274177, "grad_norm": 0.42460037099785075, "learning_rate": 2.8304717432939116e-07, "loss": 0.0012, "step": 15038 }, { "epoch": 3.4218430034129694, "grad_norm": 0.45315231247868765, "learning_rate": 2.8297238618618295e-07, "loss": 0.0016, "step": 15039 }, { "epoch": 3.422070534698521, "grad_norm": 0.3198169019155831, "learning_rate": 2.828976050333929e-07, "loss": 0.0021, "step": 15040 }, { "epoch": 3.422298065984073, "grad_norm": 0.09532025814072362, "learning_rate": 2.828228308725494e-07, "loss": 0.0003, "step": 15041 }, { "epoch": 3.4225255972696247, "grad_norm": 0.25289918632349895, "learning_rate": 2.827480637051802e-07, "loss": 0.002, "step": 15042 }, { "epoch": 3.4227531285551764, "grad_norm": 0.4312028392742241, "learning_rate": 2.8267330353281405e-07, "loss": 0.0023, "step": 15043 }, { "epoch": 3.422980659840728, "grad_norm": 0.7734635340780897, "learning_rate": 2.825985503569784e-07, "loss": 0.0079, "step": 15044 }, { "epoch": 3.42320819112628, "grad_norm": 0.6107853999748889, "learning_rate": 2.825238041792014e-07, "loss": 0.0077, "step": 15045 }, { "epoch": 3.4234357224118317, "grad_norm": 0.5630871584651279, "learning_rate": 2.8244906500101076e-07, "loss": 0.0076, "step": 15046 }, { "epoch": 3.4236632536973834, "grad_norm": 0.1045993216554125, "learning_rate": 2.8237433282393367e-07, "loss": 0.0008, "step": 15047 }, { "epoch": 3.423890784982935, "grad_norm": 0.5936116113838263, "learning_rate": 2.8229960764949797e-07, "loss": 0.0066, "step": 15048 }, { "epoch": 3.424118316268487, "grad_norm": 0.24956577881302017, "learning_rate": 2.822248894792304e-07, "loss": 0.0006, "step": 15049 }, { "epoch": 3.4243458475540387, "grad_norm": 1.3234863523137794, "learning_rate": 2.821501783146586e-07, "loss": 0.0091, "step": 15050 }, { "epoch": 3.4245733788395905, "grad_norm": 1.3588685841526644, "learning_rate": 2.8207547415730894e-07, "loss": 0.0101, "step": 15051 }, { "epoch": 3.424800910125142, "grad_norm": 1.9544797576172679, "learning_rate": 2.820007770087087e-07, "loss": 0.0237, "step": 15052 }, { "epoch": 3.425028441410694, "grad_norm": 0.2932319997392286, "learning_rate": 2.819260868703845e-07, "loss": 0.0006, "step": 15053 }, { "epoch": 3.4252559726962457, "grad_norm": 0.6844526123210493, "learning_rate": 2.818514037438626e-07, "loss": 0.0056, "step": 15054 }, { "epoch": 3.4254835039817975, "grad_norm": 0.24493588810738237, "learning_rate": 2.817767276306697e-07, "loss": 0.0005, "step": 15055 }, { "epoch": 3.425711035267349, "grad_norm": 0.9100166509544722, "learning_rate": 2.817020585323318e-07, "loss": 0.0021, "step": 15056 }, { "epoch": 3.425938566552901, "grad_norm": 0.1798867351993494, "learning_rate": 2.816273964503753e-07, "loss": 0.0007, "step": 15057 }, { "epoch": 3.4261660978384527, "grad_norm": 0.052854968452995005, "learning_rate": 2.8155274138632545e-07, "loss": 0.0002, "step": 15058 }, { "epoch": 3.4263936291240045, "grad_norm": 0.33942932560863454, "learning_rate": 2.814780933417091e-07, "loss": 0.0019, "step": 15059 }, { "epoch": 3.426621160409556, "grad_norm": 0.19659013940354847, "learning_rate": 2.814034523180514e-07, "loss": 0.0005, "step": 15060 }, { "epoch": 3.426848691695108, "grad_norm": 0.07168541564905484, "learning_rate": 2.8132881831687756e-07, "loss": 0.0002, "step": 15061 }, { "epoch": 3.4270762229806597, "grad_norm": 0.8048809644593855, "learning_rate": 2.8125419133971354e-07, "loss": 0.0012, "step": 15062 }, { "epoch": 3.4273037542662115, "grad_norm": 1.4184243675820456, "learning_rate": 2.811795713880841e-07, "loss": 0.0049, "step": 15063 }, { "epoch": 3.4275312855517632, "grad_norm": 1.1643910320388045, "learning_rate": 2.8110495846351446e-07, "loss": 0.0084, "step": 15064 }, { "epoch": 3.427758816837315, "grad_norm": 0.4803827213836409, "learning_rate": 2.8103035256753e-07, "loss": 0.0053, "step": 15065 }, { "epoch": 3.4279863481228667, "grad_norm": 0.0703764887545943, "learning_rate": 2.809557537016549e-07, "loss": 0.0002, "step": 15066 }, { "epoch": 3.4282138794084185, "grad_norm": 0.07426808267293178, "learning_rate": 2.8088116186741435e-07, "loss": 0.0003, "step": 15067 }, { "epoch": 3.4284414106939702, "grad_norm": 0.9085637763321069, "learning_rate": 2.808065770663324e-07, "loss": 0.0064, "step": 15068 }, { "epoch": 3.428668941979522, "grad_norm": 0.4303552866424408, "learning_rate": 2.8073199929993384e-07, "loss": 0.0008, "step": 15069 }, { "epoch": 3.428896473265074, "grad_norm": 1.378368717953752, "learning_rate": 2.806574285697425e-07, "loss": 0.0224, "step": 15070 }, { "epoch": 3.4291240045506255, "grad_norm": 1.1893846543577928, "learning_rate": 2.805828648772827e-07, "loss": 0.0037, "step": 15071 }, { "epoch": 3.4293515358361777, "grad_norm": 0.5225883780184007, "learning_rate": 2.8050830822407856e-07, "loss": 0.0024, "step": 15072 }, { "epoch": 3.429579067121729, "grad_norm": 0.5224267172929234, "learning_rate": 2.804337586116535e-07, "loss": 0.0088, "step": 15073 }, { "epoch": 3.429806598407281, "grad_norm": 0.37889855732842664, "learning_rate": 2.8035921604153163e-07, "loss": 0.0023, "step": 15074 }, { "epoch": 3.430034129692833, "grad_norm": 0.6332127829816002, "learning_rate": 2.8028468051523596e-07, "loss": 0.0052, "step": 15075 }, { "epoch": 3.4302616609783847, "grad_norm": 0.20242605934027705, "learning_rate": 2.802101520342903e-07, "loss": 0.0015, "step": 15076 }, { "epoch": 3.4304891922639364, "grad_norm": 0.43198772583338924, "learning_rate": 2.801356306002175e-07, "loss": 0.0032, "step": 15077 }, { "epoch": 3.430716723549488, "grad_norm": 0.6039645998010343, "learning_rate": 2.800611162145408e-07, "loss": 0.0022, "step": 15078 }, { "epoch": 3.43094425483504, "grad_norm": 0.5273049415394784, "learning_rate": 2.799866088787834e-07, "loss": 0.0034, "step": 15079 }, { "epoch": 3.4311717861205917, "grad_norm": 0.18825564442855577, "learning_rate": 2.7991210859446757e-07, "loss": 0.0004, "step": 15080 }, { "epoch": 3.4313993174061435, "grad_norm": 0.22567120115423453, "learning_rate": 2.7983761536311654e-07, "loss": 0.0006, "step": 15081 }, { "epoch": 3.431626848691695, "grad_norm": 0.4592897851868263, "learning_rate": 2.7976312918625225e-07, "loss": 0.0032, "step": 15082 }, { "epoch": 3.431854379977247, "grad_norm": 0.35407692456042483, "learning_rate": 2.796886500653975e-07, "loss": 0.0014, "step": 15083 }, { "epoch": 3.4320819112627987, "grad_norm": 0.11211686881692448, "learning_rate": 2.796141780020742e-07, "loss": 0.0004, "step": 15084 }, { "epoch": 3.4323094425483505, "grad_norm": 0.1921072009721091, "learning_rate": 2.7953971299780454e-07, "loss": 0.0011, "step": 15085 }, { "epoch": 3.432536973833902, "grad_norm": 1.4379618267186731, "learning_rate": 2.794652550541107e-07, "loss": 0.009, "step": 15086 }, { "epoch": 3.432764505119454, "grad_norm": 0.14242211642191133, "learning_rate": 2.7939080417251395e-07, "loss": 0.0004, "step": 15087 }, { "epoch": 3.4329920364050057, "grad_norm": 0.0694851432980155, "learning_rate": 2.7931636035453646e-07, "loss": 0.0002, "step": 15088 }, { "epoch": 3.4332195676905575, "grad_norm": 1.1105875310313593, "learning_rate": 2.7924192360169927e-07, "loss": 0.0136, "step": 15089 }, { "epoch": 3.4334470989761092, "grad_norm": 0.7477212156317495, "learning_rate": 2.791674939155242e-07, "loss": 0.0106, "step": 15090 }, { "epoch": 3.433674630261661, "grad_norm": 0.5597317563928156, "learning_rate": 2.790930712975318e-07, "loss": 0.001, "step": 15091 }, { "epoch": 3.4339021615472127, "grad_norm": 1.067590750468674, "learning_rate": 2.7901865574924375e-07, "loss": 0.0072, "step": 15092 }, { "epoch": 3.4341296928327645, "grad_norm": 0.49401847450872677, "learning_rate": 2.789442472721808e-07, "loss": 0.0069, "step": 15093 }, { "epoch": 3.4343572241183162, "grad_norm": 0.3453740820323069, "learning_rate": 2.788698458678635e-07, "loss": 0.0032, "step": 15094 }, { "epoch": 3.434584755403868, "grad_norm": 0.5538272496803932, "learning_rate": 2.787954515378129e-07, "loss": 0.0066, "step": 15095 }, { "epoch": 3.4348122866894197, "grad_norm": 0.6551239886595295, "learning_rate": 2.787210642835492e-07, "loss": 0.0057, "step": 15096 }, { "epoch": 3.4350398179749715, "grad_norm": 1.4552203918902398, "learning_rate": 2.786466841065925e-07, "loss": 0.005, "step": 15097 }, { "epoch": 3.4352673492605232, "grad_norm": 0.4925028749715436, "learning_rate": 2.7857231100846324e-07, "loss": 0.0019, "step": 15098 }, { "epoch": 3.435494880546075, "grad_norm": 0.45924503127189054, "learning_rate": 2.7849794499068155e-07, "loss": 0.0056, "step": 15099 }, { "epoch": 3.4357224118316267, "grad_norm": 0.864093914404415, "learning_rate": 2.784235860547675e-07, "loss": 0.0106, "step": 15100 }, { "epoch": 3.4359499431171785, "grad_norm": 0.4993652660955865, "learning_rate": 2.783492342022404e-07, "loss": 0.0052, "step": 15101 }, { "epoch": 3.4361774744027302, "grad_norm": 1.6431511376097978, "learning_rate": 2.782748894346203e-07, "loss": 0.0072, "step": 15102 }, { "epoch": 3.436405005688282, "grad_norm": 0.1333832398080592, "learning_rate": 2.7820055175342647e-07, "loss": 0.0006, "step": 15103 }, { "epoch": 3.4366325369738338, "grad_norm": 0.4116287069485271, "learning_rate": 2.78126221160178e-07, "loss": 0.0059, "step": 15104 }, { "epoch": 3.4368600682593855, "grad_norm": 0.343860446055768, "learning_rate": 2.780518976563943e-07, "loss": 0.0018, "step": 15105 }, { "epoch": 3.4370875995449373, "grad_norm": 0.24250538313877662, "learning_rate": 2.779775812435944e-07, "loss": 0.0009, "step": 15106 }, { "epoch": 3.437315130830489, "grad_norm": 0.24695406212892176, "learning_rate": 2.779032719232975e-07, "loss": 0.0012, "step": 15107 }, { "epoch": 3.4375426621160408, "grad_norm": 0.3780697504056101, "learning_rate": 2.778289696970218e-07, "loss": 0.0013, "step": 15108 }, { "epoch": 3.437770193401593, "grad_norm": 0.3167460577415568, "learning_rate": 2.7775467456628623e-07, "loss": 0.0003, "step": 15109 }, { "epoch": 3.4379977246871443, "grad_norm": 0.12949704536074394, "learning_rate": 2.776803865326093e-07, "loss": 0.0008, "step": 15110 }, { "epoch": 3.4382252559726965, "grad_norm": 0.5546677166940766, "learning_rate": 2.776061055975089e-07, "loss": 0.0062, "step": 15111 }, { "epoch": 3.4384527872582478, "grad_norm": 0.6548043563588964, "learning_rate": 2.775318317625035e-07, "loss": 0.0052, "step": 15112 }, { "epoch": 3.4386803185438, "grad_norm": 0.2521238716494059, "learning_rate": 2.774575650291111e-07, "loss": 0.0009, "step": 15113 }, { "epoch": 3.4389078498293517, "grad_norm": 1.2798804315151975, "learning_rate": 2.773833053988498e-07, "loss": 0.0024, "step": 15114 }, { "epoch": 3.4391353811149035, "grad_norm": 1.1206426290997626, "learning_rate": 2.7730905287323706e-07, "loss": 0.0104, "step": 15115 }, { "epoch": 3.439362912400455, "grad_norm": 0.47808958311989136, "learning_rate": 2.772348074537904e-07, "loss": 0.0032, "step": 15116 }, { "epoch": 3.439590443686007, "grad_norm": 0.34628468384692457, "learning_rate": 2.7716056914202755e-07, "loss": 0.0041, "step": 15117 }, { "epoch": 3.4398179749715587, "grad_norm": 0.46796316455059594, "learning_rate": 2.7708633793946537e-07, "loss": 0.0018, "step": 15118 }, { "epoch": 3.4400455062571105, "grad_norm": 0.8873530593082574, "learning_rate": 2.770121138476213e-07, "loss": 0.0059, "step": 15119 }, { "epoch": 3.4402730375426622, "grad_norm": 0.26020985400007357, "learning_rate": 2.7693789686801224e-07, "loss": 0.0009, "step": 15120 }, { "epoch": 3.440500568828214, "grad_norm": 0.6394830322369316, "learning_rate": 2.768636870021554e-07, "loss": 0.0096, "step": 15121 }, { "epoch": 3.4407281001137657, "grad_norm": 0.8392460024709847, "learning_rate": 2.7678948425156726e-07, "loss": 0.009, "step": 15122 }, { "epoch": 3.4409556313993175, "grad_norm": 0.1808338636556926, "learning_rate": 2.767152886177641e-07, "loss": 0.0011, "step": 15123 }, { "epoch": 3.4411831626848692, "grad_norm": 0.32672983649922205, "learning_rate": 2.766411001022626e-07, "loss": 0.0012, "step": 15124 }, { "epoch": 3.441410693970421, "grad_norm": 0.28698229364122485, "learning_rate": 2.7656691870657893e-07, "loss": 0.0011, "step": 15125 }, { "epoch": 3.4416382252559727, "grad_norm": 1.5905336508357273, "learning_rate": 2.764927444322296e-07, "loss": 0.0093, "step": 15126 }, { "epoch": 3.4418657565415245, "grad_norm": 0.46835925034014, "learning_rate": 2.7641857728073013e-07, "loss": 0.0113, "step": 15127 }, { "epoch": 3.4420932878270762, "grad_norm": 0.1473487648282159, "learning_rate": 2.763444172535967e-07, "loss": 0.0005, "step": 15128 }, { "epoch": 3.442320819112628, "grad_norm": 1.5222644428280687, "learning_rate": 2.762702643523449e-07, "loss": 0.0117, "step": 15129 }, { "epoch": 3.4425483503981797, "grad_norm": 0.9234770392627072, "learning_rate": 2.7619611857849e-07, "loss": 0.0142, "step": 15130 }, { "epoch": 3.4427758816837315, "grad_norm": 0.641982917223709, "learning_rate": 2.761219799335476e-07, "loss": 0.0059, "step": 15131 }, { "epoch": 3.4430034129692833, "grad_norm": 0.7412279835770336, "learning_rate": 2.76047848419033e-07, "loss": 0.0044, "step": 15132 }, { "epoch": 3.443230944254835, "grad_norm": 0.5211565604059721, "learning_rate": 2.7597372403646155e-07, "loss": 0.003, "step": 15133 }, { "epoch": 3.4434584755403868, "grad_norm": 0.5688458995117145, "learning_rate": 2.7589960678734796e-07, "loss": 0.0041, "step": 15134 }, { "epoch": 3.4436860068259385, "grad_norm": 0.7198754675631932, "learning_rate": 2.7582549667320683e-07, "loss": 0.0056, "step": 15135 }, { "epoch": 3.4439135381114903, "grad_norm": 0.5758234811127095, "learning_rate": 2.757513936955533e-07, "loss": 0.0025, "step": 15136 }, { "epoch": 3.444141069397042, "grad_norm": 0.5893085486365249, "learning_rate": 2.756772978559014e-07, "loss": 0.0049, "step": 15137 }, { "epoch": 3.4443686006825938, "grad_norm": 0.46820913984780094, "learning_rate": 2.756032091557658e-07, "loss": 0.003, "step": 15138 }, { "epoch": 3.4445961319681455, "grad_norm": 0.7698514441421431, "learning_rate": 2.7552912759666074e-07, "loss": 0.0058, "step": 15139 }, { "epoch": 3.4448236632536973, "grad_norm": 0.11062558576694033, "learning_rate": 2.754550531801005e-07, "loss": 0.0003, "step": 15140 }, { "epoch": 3.445051194539249, "grad_norm": 1.4292823911941082, "learning_rate": 2.7538098590759887e-07, "loss": 0.0087, "step": 15141 }, { "epoch": 3.4452787258248008, "grad_norm": 0.10323871900160363, "learning_rate": 2.7530692578066925e-07, "loss": 0.0004, "step": 15142 }, { "epoch": 3.4455062571103525, "grad_norm": 0.46825538312612963, "learning_rate": 2.752328728008259e-07, "loss": 0.0035, "step": 15143 }, { "epoch": 3.4457337883959043, "grad_norm": 0.37067053422568613, "learning_rate": 2.7515882696958185e-07, "loss": 0.0012, "step": 15144 }, { "epoch": 3.445961319681456, "grad_norm": 0.4618017655236481, "learning_rate": 2.7508478828845067e-07, "loss": 0.0044, "step": 15145 }, { "epoch": 3.4461888509670078, "grad_norm": 0.4118283248663299, "learning_rate": 2.7501075675894556e-07, "loss": 0.002, "step": 15146 }, { "epoch": 3.4464163822525595, "grad_norm": 1.2059437520691474, "learning_rate": 2.749367323825799e-07, "loss": 0.0062, "step": 15147 }, { "epoch": 3.4466439135381117, "grad_norm": 0.5318763489722147, "learning_rate": 2.748627151608663e-07, "loss": 0.002, "step": 15148 }, { "epoch": 3.446871444823663, "grad_norm": 1.058027575240638, "learning_rate": 2.7478870509531725e-07, "loss": 0.0088, "step": 15149 }, { "epoch": 3.4470989761092152, "grad_norm": 0.7117495412478256, "learning_rate": 2.747147021874459e-07, "loss": 0.0038, "step": 15150 }, { "epoch": 3.4473265073947665, "grad_norm": 0.44948050413883517, "learning_rate": 2.746407064387644e-07, "loss": 0.0019, "step": 15151 }, { "epoch": 3.4475540386803187, "grad_norm": 0.15637706303731866, "learning_rate": 2.7456671785078515e-07, "loss": 0.0009, "step": 15152 }, { "epoch": 3.4477815699658705, "grad_norm": 0.38672028902318395, "learning_rate": 2.7449273642502064e-07, "loss": 0.0016, "step": 15153 }, { "epoch": 3.4480091012514222, "grad_norm": 1.1663576213756834, "learning_rate": 2.744187621629825e-07, "loss": 0.0059, "step": 15154 }, { "epoch": 3.448236632536974, "grad_norm": 0.6394944366985155, "learning_rate": 2.7434479506618296e-07, "loss": 0.0037, "step": 15155 }, { "epoch": 3.4484641638225257, "grad_norm": 0.16519864585228505, "learning_rate": 2.742708351361334e-07, "loss": 0.0006, "step": 15156 }, { "epoch": 3.4486916951080775, "grad_norm": 0.1328633216578064, "learning_rate": 2.7419688237434587e-07, "loss": 0.0004, "step": 15157 }, { "epoch": 3.4489192263936292, "grad_norm": 0.27263100049494426, "learning_rate": 2.7412293678233136e-07, "loss": 0.0018, "step": 15158 }, { "epoch": 3.449146757679181, "grad_norm": 0.19863871518486526, "learning_rate": 2.740489983616014e-07, "loss": 0.0006, "step": 15159 }, { "epoch": 3.4493742889647327, "grad_norm": 0.31002072785019624, "learning_rate": 2.739750671136675e-07, "loss": 0.0009, "step": 15160 }, { "epoch": 3.4496018202502845, "grad_norm": 0.13823365100051002, "learning_rate": 2.7390114304004004e-07, "loss": 0.0008, "step": 15161 }, { "epoch": 3.4498293515358363, "grad_norm": 0.02132060973850818, "learning_rate": 2.738272261422304e-07, "loss": 0.0001, "step": 15162 }, { "epoch": 3.450056882821388, "grad_norm": 0.08111146615236327, "learning_rate": 2.73753316421749e-07, "loss": 0.0002, "step": 15163 }, { "epoch": 3.4502844141069398, "grad_norm": 1.1854711234203417, "learning_rate": 2.7367941388010666e-07, "loss": 0.0095, "step": 15164 }, { "epoch": 3.4505119453924915, "grad_norm": 0.8952150952207776, "learning_rate": 2.736055185188136e-07, "loss": 0.0049, "step": 15165 }, { "epoch": 3.4507394766780433, "grad_norm": 0.7263767136514907, "learning_rate": 2.735316303393801e-07, "loss": 0.0043, "step": 15166 }, { "epoch": 3.450967007963595, "grad_norm": 0.4970135917371101, "learning_rate": 2.734577493433166e-07, "loss": 0.0042, "step": 15167 }, { "epoch": 3.4511945392491468, "grad_norm": 0.18475365553865006, "learning_rate": 2.733838755321327e-07, "loss": 0.0007, "step": 15168 }, { "epoch": 3.4514220705346985, "grad_norm": 1.6576755657464137, "learning_rate": 2.733100089073386e-07, "loss": 0.0043, "step": 15169 }, { "epoch": 3.4516496018202503, "grad_norm": 0.43389560807214567, "learning_rate": 2.7323614947044367e-07, "loss": 0.0021, "step": 15170 }, { "epoch": 3.451877133105802, "grad_norm": 1.9536614675769088, "learning_rate": 2.7316229722295777e-07, "loss": 0.0023, "step": 15171 }, { "epoch": 3.4521046643913538, "grad_norm": 0.7303016082999729, "learning_rate": 2.7308845216639e-07, "loss": 0.0064, "step": 15172 }, { "epoch": 3.4523321956769055, "grad_norm": 0.08247340500286614, "learning_rate": 2.7301461430224977e-07, "loss": 0.0003, "step": 15173 }, { "epoch": 3.4525597269624573, "grad_norm": 0.7109306360511196, "learning_rate": 2.7294078363204634e-07, "loss": 0.0018, "step": 15174 }, { "epoch": 3.452787258248009, "grad_norm": 0.6557675475648835, "learning_rate": 2.7286696015728837e-07, "loss": 0.0059, "step": 15175 }, { "epoch": 3.453014789533561, "grad_norm": 0.5341545994867485, "learning_rate": 2.72793143879485e-07, "loss": 0.0037, "step": 15176 }, { "epoch": 3.4532423208191125, "grad_norm": 0.3794355757938408, "learning_rate": 2.727193348001446e-07, "loss": 0.0026, "step": 15177 }, { "epoch": 3.4534698521046643, "grad_norm": 0.16541516397683317, "learning_rate": 2.72645532920776e-07, "loss": 0.0007, "step": 15178 }, { "epoch": 3.453697383390216, "grad_norm": 0.6871754696282407, "learning_rate": 2.725717382428872e-07, "loss": 0.0057, "step": 15179 }, { "epoch": 3.453924914675768, "grad_norm": 1.3972643862102185, "learning_rate": 2.724979507679865e-07, "loss": 0.0158, "step": 15180 }, { "epoch": 3.4541524459613195, "grad_norm": 0.41148531162040347, "learning_rate": 2.7242417049758256e-07, "loss": 0.0015, "step": 15181 }, { "epoch": 3.4543799772468713, "grad_norm": 0.4290060383203278, "learning_rate": 2.7235039743318243e-07, "loss": 0.0024, "step": 15182 }, { "epoch": 3.454607508532423, "grad_norm": 0.1715722585335586, "learning_rate": 2.7227663157629465e-07, "loss": 0.0009, "step": 15183 }, { "epoch": 3.454835039817975, "grad_norm": 0.1912274727361876, "learning_rate": 2.7220287292842657e-07, "loss": 0.0005, "step": 15184 }, { "epoch": 3.4550625711035265, "grad_norm": 0.4592759893697386, "learning_rate": 2.721291214910851e-07, "loss": 0.0048, "step": 15185 }, { "epoch": 3.4552901023890783, "grad_norm": 1.239343263187903, "learning_rate": 2.7205537726577864e-07, "loss": 0.0102, "step": 15186 }, { "epoch": 3.4555176336746305, "grad_norm": 0.907366191797762, "learning_rate": 2.719816402540137e-07, "loss": 0.0084, "step": 15187 }, { "epoch": 3.455745164960182, "grad_norm": 0.9917915602369876, "learning_rate": 2.719079104572977e-07, "loss": 0.009, "step": 15188 }, { "epoch": 3.455972696245734, "grad_norm": 1.2746835707900637, "learning_rate": 2.718341878771371e-07, "loss": 0.0114, "step": 15189 }, { "epoch": 3.4562002275312853, "grad_norm": 0.7887282383601081, "learning_rate": 2.717604725150392e-07, "loss": 0.0089, "step": 15190 }, { "epoch": 3.4564277588168375, "grad_norm": 1.7502726530281532, "learning_rate": 2.7168676437251004e-07, "loss": 0.0035, "step": 15191 }, { "epoch": 3.4566552901023893, "grad_norm": 0.45800748032452954, "learning_rate": 2.716130634510563e-07, "loss": 0.0029, "step": 15192 }, { "epoch": 3.456882821387941, "grad_norm": 0.21798356561023155, "learning_rate": 2.715393697521847e-07, "loss": 0.0013, "step": 15193 }, { "epoch": 3.4571103526734928, "grad_norm": 1.0007912436442958, "learning_rate": 2.714656832774007e-07, "loss": 0.007, "step": 15194 }, { "epoch": 3.4573378839590445, "grad_norm": 0.2560282740041878, "learning_rate": 2.713920040282109e-07, "loss": 0.0015, "step": 15195 }, { "epoch": 3.4575654152445963, "grad_norm": 0.654892917876421, "learning_rate": 2.713183320061208e-07, "loss": 0.0013, "step": 15196 }, { "epoch": 3.457792946530148, "grad_norm": 0.3352818081878217, "learning_rate": 2.712446672126364e-07, "loss": 0.0043, "step": 15197 }, { "epoch": 3.4580204778156998, "grad_norm": 0.2845378046947495, "learning_rate": 2.71171009649263e-07, "loss": 0.0019, "step": 15198 }, { "epoch": 3.4582480091012515, "grad_norm": 0.4425151817647733, "learning_rate": 2.7109735931750605e-07, "loss": 0.0041, "step": 15199 }, { "epoch": 3.4584755403868033, "grad_norm": 0.11281588606039619, "learning_rate": 2.7102371621887123e-07, "loss": 0.0003, "step": 15200 }, { "epoch": 3.458703071672355, "grad_norm": 0.2905222087957507, "learning_rate": 2.7095008035486313e-07, "loss": 0.0011, "step": 15201 }, { "epoch": 3.4589306029579068, "grad_norm": 1.0846551868916574, "learning_rate": 2.708764517269872e-07, "loss": 0.012, "step": 15202 }, { "epoch": 3.4591581342434585, "grad_norm": 0.6548648568582272, "learning_rate": 2.7080283033674807e-07, "loss": 0.0039, "step": 15203 }, { "epoch": 3.4593856655290103, "grad_norm": 1.6266339584037737, "learning_rate": 2.7072921618565014e-07, "loss": 0.0209, "step": 15204 }, { "epoch": 3.459613196814562, "grad_norm": 0.6920066692202498, "learning_rate": 2.706556092751982e-07, "loss": 0.0037, "step": 15205 }, { "epoch": 3.459840728100114, "grad_norm": 0.6753071307409846, "learning_rate": 2.705820096068967e-07, "loss": 0.0073, "step": 15206 }, { "epoch": 3.4600682593856655, "grad_norm": 0.8377227392213179, "learning_rate": 2.7050841718225e-07, "loss": 0.0057, "step": 15207 }, { "epoch": 3.4602957906712173, "grad_norm": 0.13054682124983552, "learning_rate": 2.704348320027619e-07, "loss": 0.0006, "step": 15208 }, { "epoch": 3.460523321956769, "grad_norm": 0.291157148620576, "learning_rate": 2.7036125406993664e-07, "loss": 0.0015, "step": 15209 }, { "epoch": 3.460750853242321, "grad_norm": 0.42245764325195684, "learning_rate": 2.7028768338527784e-07, "loss": 0.0028, "step": 15210 }, { "epoch": 3.4609783845278725, "grad_norm": 0.26045086435486386, "learning_rate": 2.7021411995028906e-07, "loss": 0.0017, "step": 15211 }, { "epoch": 3.4612059158134243, "grad_norm": 0.4869334320778816, "learning_rate": 2.7014056376647375e-07, "loss": 0.0036, "step": 15212 }, { "epoch": 3.461433447098976, "grad_norm": 0.659312602533059, "learning_rate": 2.700670148353356e-07, "loss": 0.0088, "step": 15213 }, { "epoch": 3.461660978384528, "grad_norm": 1.7492826361281038, "learning_rate": 2.699934731583777e-07, "loss": 0.015, "step": 15214 }, { "epoch": 3.4618885096700796, "grad_norm": 0.6536530544307855, "learning_rate": 2.6991993873710285e-07, "loss": 0.0021, "step": 15215 }, { "epoch": 3.4621160409556313, "grad_norm": 0.42069259513251966, "learning_rate": 2.698464115730144e-07, "loss": 0.0039, "step": 15216 }, { "epoch": 3.462343572241183, "grad_norm": 0.8148958579858051, "learning_rate": 2.697728916676149e-07, "loss": 0.0074, "step": 15217 }, { "epoch": 3.462571103526735, "grad_norm": 0.5119323171797298, "learning_rate": 2.696993790224067e-07, "loss": 0.0024, "step": 15218 }, { "epoch": 3.4627986348122866, "grad_norm": 0.6546285536529314, "learning_rate": 2.696258736388924e-07, "loss": 0.0028, "step": 15219 }, { "epoch": 3.4630261660978383, "grad_norm": 1.5505241871058064, "learning_rate": 2.695523755185745e-07, "loss": 0.0169, "step": 15220 }, { "epoch": 3.46325369738339, "grad_norm": 0.4921377764354985, "learning_rate": 2.694788846629553e-07, "loss": 0.0032, "step": 15221 }, { "epoch": 3.463481228668942, "grad_norm": 0.9632605993147966, "learning_rate": 2.6940540107353656e-07, "loss": 0.0103, "step": 15222 }, { "epoch": 3.4637087599544936, "grad_norm": 0.2817546897603156, "learning_rate": 2.6933192475181997e-07, "loss": 0.0021, "step": 15223 }, { "epoch": 3.4639362912400453, "grad_norm": 0.5077348720734118, "learning_rate": 2.6925845569930767e-07, "loss": 0.0031, "step": 15224 }, { "epoch": 3.464163822525597, "grad_norm": 0.36989145565111003, "learning_rate": 2.691849939175008e-07, "loss": 0.0036, "step": 15225 }, { "epoch": 3.4643913538111493, "grad_norm": 0.661117410247879, "learning_rate": 2.6911153940790103e-07, "loss": 0.0023, "step": 15226 }, { "epoch": 3.4646188850967006, "grad_norm": 0.49133845583426666, "learning_rate": 2.6903809217200957e-07, "loss": 0.0026, "step": 15227 }, { "epoch": 3.4648464163822528, "grad_norm": 1.177095845197867, "learning_rate": 2.689646522113278e-07, "loss": 0.003, "step": 15228 }, { "epoch": 3.465073947667804, "grad_norm": 0.640349706776051, "learning_rate": 2.6889121952735657e-07, "loss": 0.0038, "step": 15229 }, { "epoch": 3.4653014789533563, "grad_norm": 0.2874778994707409, "learning_rate": 2.688177941215964e-07, "loss": 0.0005, "step": 15230 }, { "epoch": 3.465529010238908, "grad_norm": 0.33253542481154785, "learning_rate": 2.687443759955484e-07, "loss": 0.001, "step": 15231 }, { "epoch": 3.4657565415244598, "grad_norm": 0.39794371634227066, "learning_rate": 2.6867096515071267e-07, "loss": 0.001, "step": 15232 }, { "epoch": 3.4659840728100115, "grad_norm": 0.6963154177306968, "learning_rate": 2.685975615885898e-07, "loss": 0.0031, "step": 15233 }, { "epoch": 3.4662116040955633, "grad_norm": 0.257186265835131, "learning_rate": 2.6852416531068014e-07, "loss": 0.0017, "step": 15234 }, { "epoch": 3.466439135381115, "grad_norm": 1.7408933046774564, "learning_rate": 2.684507763184838e-07, "loss": 0.0034, "step": 15235 }, { "epoch": 3.466666666666667, "grad_norm": 0.8450016161260546, "learning_rate": 2.683773946135007e-07, "loss": 0.0052, "step": 15236 }, { "epoch": 3.4668941979522185, "grad_norm": 1.0035443142390659, "learning_rate": 2.6830402019723026e-07, "loss": 0.0053, "step": 15237 }, { "epoch": 3.4671217292377703, "grad_norm": 1.7528857840734946, "learning_rate": 2.6823065307117263e-07, "loss": 0.0179, "step": 15238 }, { "epoch": 3.467349260523322, "grad_norm": 0.8733256810272018, "learning_rate": 2.6815729323682683e-07, "loss": 0.0029, "step": 15239 }, { "epoch": 3.467576791808874, "grad_norm": 0.2575769809470281, "learning_rate": 2.680839406956924e-07, "loss": 0.0005, "step": 15240 }, { "epoch": 3.4678043230944255, "grad_norm": 0.34226003102331237, "learning_rate": 2.6801059544926883e-07, "loss": 0.004, "step": 15241 }, { "epoch": 3.4680318543799773, "grad_norm": 0.6969201971849638, "learning_rate": 2.679372574990546e-07, "loss": 0.0049, "step": 15242 }, { "epoch": 3.468259385665529, "grad_norm": 0.30938780676234284, "learning_rate": 2.6786392684654926e-07, "loss": 0.0016, "step": 15243 }, { "epoch": 3.468486916951081, "grad_norm": 0.25240268129815174, "learning_rate": 2.6779060349325085e-07, "loss": 0.0011, "step": 15244 }, { "epoch": 3.4687144482366326, "grad_norm": 1.410681661876961, "learning_rate": 2.677172874406583e-07, "loss": 0.0091, "step": 15245 }, { "epoch": 3.4689419795221843, "grad_norm": 0.7617637620654898, "learning_rate": 2.6764397869027013e-07, "loss": 0.0027, "step": 15246 }, { "epoch": 3.469169510807736, "grad_norm": 0.9582897453938677, "learning_rate": 2.6757067724358473e-07, "loss": 0.0067, "step": 15247 }, { "epoch": 3.469397042093288, "grad_norm": 0.04656540597834979, "learning_rate": 2.6749738310210015e-07, "loss": 0.0001, "step": 15248 }, { "epoch": 3.4696245733788396, "grad_norm": 0.3369637605951823, "learning_rate": 2.6742409626731405e-07, "loss": 0.0006, "step": 15249 }, { "epoch": 3.4698521046643913, "grad_norm": 0.27747088021464583, "learning_rate": 2.673508167407248e-07, "loss": 0.0014, "step": 15250 }, { "epoch": 3.470079635949943, "grad_norm": 0.4595825058201091, "learning_rate": 2.6727754452382967e-07, "loss": 0.0031, "step": 15251 }, { "epoch": 3.470307167235495, "grad_norm": 0.09960180664056154, "learning_rate": 2.672042796181263e-07, "loss": 0.0005, "step": 15252 }, { "epoch": 3.4705346985210466, "grad_norm": 0.6443412936054486, "learning_rate": 2.671310220251122e-07, "loss": 0.0065, "step": 15253 }, { "epoch": 3.4707622298065983, "grad_norm": 0.13229597858245074, "learning_rate": 2.6705777174628486e-07, "loss": 0.0003, "step": 15254 }, { "epoch": 3.47098976109215, "grad_norm": 0.14549978497724364, "learning_rate": 2.6698452878314113e-07, "loss": 0.0003, "step": 15255 }, { "epoch": 3.471217292377702, "grad_norm": 1.349321283135413, "learning_rate": 2.6691129313717776e-07, "loss": 0.01, "step": 15256 }, { "epoch": 3.4714448236632536, "grad_norm": 0.8157115738480342, "learning_rate": 2.668380648098919e-07, "loss": 0.0135, "step": 15257 }, { "epoch": 3.4716723549488053, "grad_norm": 0.598770221166162, "learning_rate": 2.6676484380277985e-07, "loss": 0.0055, "step": 15258 }, { "epoch": 3.471899886234357, "grad_norm": 0.3517172618063293, "learning_rate": 2.6669163011733836e-07, "loss": 0.003, "step": 15259 }, { "epoch": 3.472127417519909, "grad_norm": 0.39098778518566163, "learning_rate": 2.6661842375506395e-07, "loss": 0.0019, "step": 15260 }, { "epoch": 3.4723549488054606, "grad_norm": 0.4741021523353443, "learning_rate": 2.6654522471745237e-07, "loss": 0.0026, "step": 15261 }, { "epoch": 3.4725824800910123, "grad_norm": 0.696392766662692, "learning_rate": 2.664720330060001e-07, "loss": 0.004, "step": 15262 }, { "epoch": 3.472810011376564, "grad_norm": 2.02244150564933, "learning_rate": 2.663988486222027e-07, "loss": 0.0025, "step": 15263 }, { "epoch": 3.473037542662116, "grad_norm": 0.7968200859127745, "learning_rate": 2.6632567156755633e-07, "loss": 0.0061, "step": 15264 }, { "epoch": 3.473265073947668, "grad_norm": 0.21971670582717245, "learning_rate": 2.6625250184355605e-07, "loss": 0.0009, "step": 15265 }, { "epoch": 3.4734926052332193, "grad_norm": 0.19542395810727184, "learning_rate": 2.661793394516977e-07, "loss": 0.0011, "step": 15266 }, { "epoch": 3.4737201365187715, "grad_norm": 0.1083018152002733, "learning_rate": 2.661061843934767e-07, "loss": 0.0004, "step": 15267 }, { "epoch": 3.473947667804323, "grad_norm": 0.26234328577145427, "learning_rate": 2.6603303667038773e-07, "loss": 0.0026, "step": 15268 }, { "epoch": 3.474175199089875, "grad_norm": 0.08478313097460632, "learning_rate": 2.6595989628392637e-07, "loss": 0.0002, "step": 15269 }, { "epoch": 3.474402730375427, "grad_norm": 0.27414476403618243, "learning_rate": 2.6588676323558693e-07, "loss": 0.0019, "step": 15270 }, { "epoch": 3.4746302616609785, "grad_norm": 0.4897005837961132, "learning_rate": 2.658136375268646e-07, "loss": 0.0017, "step": 15271 }, { "epoch": 3.4748577929465303, "grad_norm": 0.4346487643683441, "learning_rate": 2.6574051915925344e-07, "loss": 0.0036, "step": 15272 }, { "epoch": 3.475085324232082, "grad_norm": 0.33241164703589465, "learning_rate": 2.6566740813424815e-07, "loss": 0.0025, "step": 15273 }, { "epoch": 3.475312855517634, "grad_norm": 0.14242437307143294, "learning_rate": 2.6559430445334313e-07, "loss": 0.0004, "step": 15274 }, { "epoch": 3.4755403868031856, "grad_norm": 0.21039739715370587, "learning_rate": 2.6552120811803213e-07, "loss": 0.001, "step": 15275 }, { "epoch": 3.4757679180887373, "grad_norm": 1.2404728614232419, "learning_rate": 2.6544811912980954e-07, "loss": 0.0106, "step": 15276 }, { "epoch": 3.475995449374289, "grad_norm": 1.084146813471423, "learning_rate": 2.653750374901686e-07, "loss": 0.0058, "step": 15277 }, { "epoch": 3.476222980659841, "grad_norm": 0.2616657956843443, "learning_rate": 2.6530196320060355e-07, "loss": 0.0019, "step": 15278 }, { "epoch": 3.4764505119453926, "grad_norm": 0.490956711224591, "learning_rate": 2.6522889626260734e-07, "loss": 0.0035, "step": 15279 }, { "epoch": 3.4766780432309443, "grad_norm": 0.4746021166714727, "learning_rate": 2.651558366776736e-07, "loss": 0.0063, "step": 15280 }, { "epoch": 3.476905574516496, "grad_norm": 0.6235832468328756, "learning_rate": 2.650827844472958e-07, "loss": 0.0048, "step": 15281 }, { "epoch": 3.477133105802048, "grad_norm": 0.2933511406056667, "learning_rate": 2.650097395729665e-07, "loss": 0.0019, "step": 15282 }, { "epoch": 3.4773606370875996, "grad_norm": 0.6161365655545592, "learning_rate": 2.64936702056179e-07, "loss": 0.0047, "step": 15283 }, { "epoch": 3.4775881683731513, "grad_norm": 0.9377522586917149, "learning_rate": 2.648636718984258e-07, "loss": 0.005, "step": 15284 }, { "epoch": 3.477815699658703, "grad_norm": 0.49172655699114326, "learning_rate": 2.647906491011997e-07, "loss": 0.0023, "step": 15285 }, { "epoch": 3.478043230944255, "grad_norm": 0.19528855637714834, "learning_rate": 2.6471763366599283e-07, "loss": 0.0005, "step": 15286 }, { "epoch": 3.4782707622298066, "grad_norm": 0.11948499118204844, "learning_rate": 2.646446255942977e-07, "loss": 0.0004, "step": 15287 }, { "epoch": 3.4784982935153583, "grad_norm": 0.15466354305414443, "learning_rate": 2.6457162488760673e-07, "loss": 0.0002, "step": 15288 }, { "epoch": 3.47872582480091, "grad_norm": 0.255789701418576, "learning_rate": 2.644986315474114e-07, "loss": 0.0008, "step": 15289 }, { "epoch": 3.478953356086462, "grad_norm": 1.3960227063248796, "learning_rate": 2.64425645575204e-07, "loss": 0.0117, "step": 15290 }, { "epoch": 3.4791808873720136, "grad_norm": 0.6603605744602985, "learning_rate": 2.643526669724761e-07, "loss": 0.0053, "step": 15291 }, { "epoch": 3.4794084186575653, "grad_norm": 0.5393818914441353, "learning_rate": 2.642796957407189e-07, "loss": 0.0021, "step": 15292 }, { "epoch": 3.479635949943117, "grad_norm": 0.7381565344633324, "learning_rate": 2.642067318814242e-07, "loss": 0.004, "step": 15293 }, { "epoch": 3.479863481228669, "grad_norm": 0.6777504069215352, "learning_rate": 2.6413377539608304e-07, "loss": 0.0047, "step": 15294 }, { "epoch": 3.4800910125142206, "grad_norm": 0.10320089292423545, "learning_rate": 2.640608262861869e-07, "loss": 0.0003, "step": 15295 }, { "epoch": 3.4803185437997723, "grad_norm": 0.6378550250702973, "learning_rate": 2.639878845532262e-07, "loss": 0.0018, "step": 15296 }, { "epoch": 3.480546075085324, "grad_norm": 0.5044884690189949, "learning_rate": 2.639149501986922e-07, "loss": 0.0035, "step": 15297 }, { "epoch": 3.480773606370876, "grad_norm": 0.6159226839475352, "learning_rate": 2.638420232240753e-07, "loss": 0.0051, "step": 15298 }, { "epoch": 3.4810011376564276, "grad_norm": 0.8491955072539791, "learning_rate": 2.637691036308658e-07, "loss": 0.0066, "step": 15299 }, { "epoch": 3.4812286689419794, "grad_norm": 0.8945004554337461, "learning_rate": 2.636961914205543e-07, "loss": 0.0045, "step": 15300 }, { "epoch": 3.481456200227531, "grad_norm": 1.0238224023232143, "learning_rate": 2.6362328659463093e-07, "loss": 0.0126, "step": 15301 }, { "epoch": 3.481683731513083, "grad_norm": 0.5209240864396859, "learning_rate": 2.63550389154586e-07, "loss": 0.0028, "step": 15302 }, { "epoch": 3.4819112627986346, "grad_norm": 0.6102841200075212, "learning_rate": 2.6347749910190887e-07, "loss": 0.0065, "step": 15303 }, { "epoch": 3.482138794084187, "grad_norm": 0.611689725609308, "learning_rate": 2.634046164380898e-07, "loss": 0.0058, "step": 15304 }, { "epoch": 3.482366325369738, "grad_norm": 0.4893939936985107, "learning_rate": 2.6333174116461813e-07, "loss": 0.0028, "step": 15305 }, { "epoch": 3.4825938566552903, "grad_norm": 0.8524805565930987, "learning_rate": 2.6325887328298283e-07, "loss": 0.0037, "step": 15306 }, { "epoch": 3.4828213879408416, "grad_norm": 0.6293071515138392, "learning_rate": 2.631860127946742e-07, "loss": 0.0066, "step": 15307 }, { "epoch": 3.483048919226394, "grad_norm": 0.854587232466734, "learning_rate": 2.631131597011806e-07, "loss": 0.0091, "step": 15308 }, { "epoch": 3.4832764505119456, "grad_norm": 0.527006099701677, "learning_rate": 2.6304031400399146e-07, "loss": 0.001, "step": 15309 }, { "epoch": 3.4835039817974973, "grad_norm": 1.077145232669864, "learning_rate": 2.629674757045954e-07, "loss": 0.013, "step": 15310 }, { "epoch": 3.483731513083049, "grad_norm": 1.174623646187956, "learning_rate": 2.628946448044808e-07, "loss": 0.0257, "step": 15311 }, { "epoch": 3.483959044368601, "grad_norm": 1.1676535442866958, "learning_rate": 2.628218213051366e-07, "loss": 0.0056, "step": 15312 }, { "epoch": 3.4841865756541526, "grad_norm": 1.0784795331354444, "learning_rate": 2.627490052080511e-07, "loss": 0.0128, "step": 15313 }, { "epoch": 3.4844141069397043, "grad_norm": 0.2689770878968667, "learning_rate": 2.626761965147126e-07, "loss": 0.0011, "step": 15314 }, { "epoch": 3.484641638225256, "grad_norm": 1.0607457058510212, "learning_rate": 2.626033952266089e-07, "loss": 0.0113, "step": 15315 }, { "epoch": 3.484869169510808, "grad_norm": 0.16653476185206117, "learning_rate": 2.625306013452284e-07, "loss": 0.0006, "step": 15316 }, { "epoch": 3.4850967007963596, "grad_norm": 0.12401666298795969, "learning_rate": 2.624578148720585e-07, "loss": 0.0005, "step": 15317 }, { "epoch": 3.4853242320819113, "grad_norm": 0.2509956480158934, "learning_rate": 2.623850358085867e-07, "loss": 0.0027, "step": 15318 }, { "epoch": 3.485551763367463, "grad_norm": 0.49152741601257716, "learning_rate": 2.623122641563007e-07, "loss": 0.0027, "step": 15319 }, { "epoch": 3.485779294653015, "grad_norm": 0.5972354822919392, "learning_rate": 2.6223949991668773e-07, "loss": 0.0032, "step": 15320 }, { "epoch": 3.4860068259385666, "grad_norm": 0.6455182843972607, "learning_rate": 2.621667430912353e-07, "loss": 0.0045, "step": 15321 }, { "epoch": 3.4862343572241183, "grad_norm": 0.8506881471277566, "learning_rate": 2.6209399368142987e-07, "loss": 0.0012, "step": 15322 }, { "epoch": 3.48646188850967, "grad_norm": 0.07476120988456339, "learning_rate": 2.620212516887588e-07, "loss": 0.0003, "step": 15323 }, { "epoch": 3.486689419795222, "grad_norm": 0.2293637074044615, "learning_rate": 2.619485171147086e-07, "loss": 0.0007, "step": 15324 }, { "epoch": 3.4869169510807736, "grad_norm": 0.9375506038787945, "learning_rate": 2.618757899607657e-07, "loss": 0.0021, "step": 15325 }, { "epoch": 3.4871444823663253, "grad_norm": 0.1566553857748347, "learning_rate": 2.6180307022841646e-07, "loss": 0.0003, "step": 15326 }, { "epoch": 3.487372013651877, "grad_norm": 0.6790249986449478, "learning_rate": 2.6173035791914735e-07, "loss": 0.0058, "step": 15327 }, { "epoch": 3.487599544937429, "grad_norm": 0.8016112145550106, "learning_rate": 2.616576530344447e-07, "loss": 0.0036, "step": 15328 }, { "epoch": 3.4878270762229806, "grad_norm": 0.4676689215879217, "learning_rate": 2.615849555757941e-07, "loss": 0.0046, "step": 15329 }, { "epoch": 3.4880546075085324, "grad_norm": 0.6462358037497287, "learning_rate": 2.615122655446813e-07, "loss": 0.0034, "step": 15330 }, { "epoch": 3.488282138794084, "grad_norm": 0.16306971456412514, "learning_rate": 2.6143958294259225e-07, "loss": 0.0005, "step": 15331 }, { "epoch": 3.488509670079636, "grad_norm": 0.31994461659097034, "learning_rate": 2.613669077710122e-07, "loss": 0.0011, "step": 15332 }, { "epoch": 3.4887372013651876, "grad_norm": 0.230662359866276, "learning_rate": 2.6129424003142646e-07, "loss": 0.0011, "step": 15333 }, { "epoch": 3.4889647326507394, "grad_norm": 0.9287568448772778, "learning_rate": 2.6122157972532036e-07, "loss": 0.0063, "step": 15334 }, { "epoch": 3.489192263936291, "grad_norm": 0.21175961970123078, "learning_rate": 2.6114892685417926e-07, "loss": 0.0013, "step": 15335 }, { "epoch": 3.489419795221843, "grad_norm": 0.5325215128951258, "learning_rate": 2.610762814194877e-07, "loss": 0.0049, "step": 15336 }, { "epoch": 3.4896473265073946, "grad_norm": 4.642773486295972, "learning_rate": 2.6100364342273016e-07, "loss": 0.0058, "step": 15337 }, { "epoch": 3.4898748577929464, "grad_norm": 0.3039574454768847, "learning_rate": 2.609310128653918e-07, "loss": 0.0008, "step": 15338 }, { "epoch": 3.490102389078498, "grad_norm": 0.2985638668405866, "learning_rate": 2.608583897489566e-07, "loss": 0.0011, "step": 15339 }, { "epoch": 3.49032992036405, "grad_norm": 0.11078815268542899, "learning_rate": 2.6078577407490896e-07, "loss": 0.0005, "step": 15340 }, { "epoch": 3.4905574516496016, "grad_norm": 0.9902809625019157, "learning_rate": 2.6071316584473304e-07, "loss": 0.003, "step": 15341 }, { "epoch": 3.4907849829351534, "grad_norm": 0.5099642095244397, "learning_rate": 2.6064056505991315e-07, "loss": 0.0016, "step": 15342 }, { "epoch": 3.4910125142207056, "grad_norm": 0.5112649558479578, "learning_rate": 2.6056797172193275e-07, "loss": 0.0077, "step": 15343 }, { "epoch": 3.491240045506257, "grad_norm": 0.7246762366665075, "learning_rate": 2.6049538583227543e-07, "loss": 0.0057, "step": 15344 }, { "epoch": 3.491467576791809, "grad_norm": 0.36778732082276955, "learning_rate": 2.6042280739242503e-07, "loss": 0.0008, "step": 15345 }, { "epoch": 3.4916951080773604, "grad_norm": 1.0280468382520214, "learning_rate": 2.603502364038645e-07, "loss": 0.0067, "step": 15346 }, { "epoch": 3.4919226393629126, "grad_norm": 0.49967974505289725, "learning_rate": 2.602776728680774e-07, "loss": 0.0041, "step": 15347 }, { "epoch": 3.4921501706484643, "grad_norm": 0.9594938141830941, "learning_rate": 2.6020511678654695e-07, "loss": 0.0078, "step": 15348 }, { "epoch": 3.492377701934016, "grad_norm": 0.42769811462772783, "learning_rate": 2.6013256816075546e-07, "loss": 0.0045, "step": 15349 }, { "epoch": 3.492605233219568, "grad_norm": 0.39845755932511995, "learning_rate": 2.600600269921864e-07, "loss": 0.0017, "step": 15350 }, { "epoch": 3.4928327645051196, "grad_norm": 0.5370863101144996, "learning_rate": 2.5998749328232174e-07, "loss": 0.0023, "step": 15351 }, { "epoch": 3.4930602957906713, "grad_norm": 0.6998980753930187, "learning_rate": 2.599149670326445e-07, "loss": 0.0023, "step": 15352 }, { "epoch": 3.493287827076223, "grad_norm": 0.4900377229737589, "learning_rate": 2.5984244824463647e-07, "loss": 0.0028, "step": 15353 }, { "epoch": 3.493515358361775, "grad_norm": 0.5181615539990417, "learning_rate": 2.597699369197801e-07, "loss": 0.0039, "step": 15354 }, { "epoch": 3.4937428896473266, "grad_norm": 0.23815108345534308, "learning_rate": 2.5969743305955746e-07, "loss": 0.0008, "step": 15355 }, { "epoch": 3.4939704209328784, "grad_norm": 0.6975778144285902, "learning_rate": 2.596249366654501e-07, "loss": 0.005, "step": 15356 }, { "epoch": 3.49419795221843, "grad_norm": 1.0243164543428627, "learning_rate": 2.595524477389401e-07, "loss": 0.0093, "step": 15357 }, { "epoch": 3.494425483503982, "grad_norm": 0.8845489237395351, "learning_rate": 2.594799662815085e-07, "loss": 0.0026, "step": 15358 }, { "epoch": 3.4946530147895336, "grad_norm": 0.3044953938345193, "learning_rate": 2.5940749229463726e-07, "loss": 0.0022, "step": 15359 }, { "epoch": 3.4948805460750854, "grad_norm": 1.4207443467895287, "learning_rate": 2.593350257798071e-07, "loss": 0.004, "step": 15360 }, { "epoch": 3.495108077360637, "grad_norm": 0.40367385647045295, "learning_rate": 2.5926256673849933e-07, "loss": 0.0043, "step": 15361 }, { "epoch": 3.495335608646189, "grad_norm": 0.40919169011838213, "learning_rate": 2.591901151721951e-07, "loss": 0.0015, "step": 15362 }, { "epoch": 3.4955631399317406, "grad_norm": 0.1822647395256875, "learning_rate": 2.591176710823747e-07, "loss": 0.0004, "step": 15363 }, { "epoch": 3.4957906712172924, "grad_norm": 0.4930973439924128, "learning_rate": 2.590452344705193e-07, "loss": 0.0098, "step": 15364 }, { "epoch": 3.496018202502844, "grad_norm": 0.38084294610252767, "learning_rate": 2.589728053381088e-07, "loss": 0.0007, "step": 15365 }, { "epoch": 3.496245733788396, "grad_norm": 0.681481696942821, "learning_rate": 2.589003836866239e-07, "loss": 0.0065, "step": 15366 }, { "epoch": 3.4964732650739476, "grad_norm": 0.8598908895428453, "learning_rate": 2.5882796951754485e-07, "loss": 0.011, "step": 15367 }, { "epoch": 3.4967007963594994, "grad_norm": 0.3032568667695609, "learning_rate": 2.5875556283235124e-07, "loss": 0.0011, "step": 15368 }, { "epoch": 3.496928327645051, "grad_norm": 0.06816957668565628, "learning_rate": 2.586831636325235e-07, "loss": 0.0004, "step": 15369 }, { "epoch": 3.497155858930603, "grad_norm": 0.6435942582205305, "learning_rate": 2.586107719195407e-07, "loss": 0.0029, "step": 15370 }, { "epoch": 3.4973833902161546, "grad_norm": 0.2276080638706146, "learning_rate": 2.5853838769488297e-07, "loss": 0.0008, "step": 15371 }, { "epoch": 3.4976109215017064, "grad_norm": 0.3590433278802152, "learning_rate": 2.5846601096002925e-07, "loss": 0.0027, "step": 15372 }, { "epoch": 3.497838452787258, "grad_norm": 0.27459835212353045, "learning_rate": 2.5839364171645895e-07, "loss": 0.0016, "step": 15373 }, { "epoch": 3.49806598407281, "grad_norm": 0.987291050165906, "learning_rate": 2.583212799656515e-07, "loss": 0.0119, "step": 15374 }, { "epoch": 3.4982935153583616, "grad_norm": 0.9686954686367137, "learning_rate": 2.5824892570908526e-07, "loss": 0.0043, "step": 15375 }, { "epoch": 3.4985210466439134, "grad_norm": 0.26729197449354514, "learning_rate": 2.5817657894823954e-07, "loss": 0.0009, "step": 15376 }, { "epoch": 3.498748577929465, "grad_norm": 2.32902712325534, "learning_rate": 2.581042396845925e-07, "loss": 0.0125, "step": 15377 }, { "epoch": 3.498976109215017, "grad_norm": 0.38879536219306443, "learning_rate": 2.5803190791962317e-07, "loss": 0.0028, "step": 15378 }, { "epoch": 3.4992036405005686, "grad_norm": 1.1030084227776449, "learning_rate": 2.579595836548093e-07, "loss": 0.0041, "step": 15379 }, { "epoch": 3.4994311717861204, "grad_norm": 0.8135986036834416, "learning_rate": 2.5788726689162926e-07, "loss": 0.0101, "step": 15380 }, { "epoch": 3.499658703071672, "grad_norm": 0.46051476062461105, "learning_rate": 2.5781495763156146e-07, "loss": 0.0046, "step": 15381 }, { "epoch": 3.4998862343572243, "grad_norm": 3.2350089586914783, "learning_rate": 2.5774265587608313e-07, "loss": 0.0346, "step": 15382 }, { "epoch": 3.5001137656427757, "grad_norm": 0.22501287282326438, "learning_rate": 2.5767036162667266e-07, "loss": 0.0008, "step": 15383 }, { "epoch": 3.500341296928328, "grad_norm": 0.30309617905647523, "learning_rate": 2.57598074884807e-07, "loss": 0.0025, "step": 15384 }, { "epoch": 3.500568828213879, "grad_norm": 0.8666714325221557, "learning_rate": 2.57525795651964e-07, "loss": 0.0025, "step": 15385 }, { "epoch": 3.5007963594994314, "grad_norm": 0.26566891863478825, "learning_rate": 2.574535239296206e-07, "loss": 0.0006, "step": 15386 }, { "epoch": 3.5010238907849827, "grad_norm": 0.8549023488906333, "learning_rate": 2.5738125971925406e-07, "loss": 0.0063, "step": 15387 }, { "epoch": 3.501251422070535, "grad_norm": 0.039984207635931086, "learning_rate": 2.5730900302234145e-07, "loss": 0.0001, "step": 15388 }, { "epoch": 3.5014789533560866, "grad_norm": 0.7716453641085621, "learning_rate": 2.5723675384035925e-07, "loss": 0.0099, "step": 15389 }, { "epoch": 3.5017064846416384, "grad_norm": 0.7159304469362151, "learning_rate": 2.571645121747844e-07, "loss": 0.0089, "step": 15390 }, { "epoch": 3.50193401592719, "grad_norm": 3.7579432817925458, "learning_rate": 2.570922780270932e-07, "loss": 0.0028, "step": 15391 }, { "epoch": 3.502161547212742, "grad_norm": 0.41727643707541257, "learning_rate": 2.570200513987622e-07, "loss": 0.0033, "step": 15392 }, { "epoch": 3.5023890784982936, "grad_norm": 0.12551883239010764, "learning_rate": 2.569478322912671e-07, "loss": 0.0003, "step": 15393 }, { "epoch": 3.5026166097838454, "grad_norm": 0.5011618614234995, "learning_rate": 2.5687562070608434e-07, "loss": 0.0049, "step": 15394 }, { "epoch": 3.502844141069397, "grad_norm": 0.1573513247986685, "learning_rate": 2.5680341664469e-07, "loss": 0.0005, "step": 15395 }, { "epoch": 3.503071672354949, "grad_norm": 1.2337051689407377, "learning_rate": 2.5673122010855916e-07, "loss": 0.0105, "step": 15396 }, { "epoch": 3.5032992036405006, "grad_norm": 0.4056665466322732, "learning_rate": 2.56659031099168e-07, "loss": 0.0015, "step": 15397 }, { "epoch": 3.5035267349260524, "grad_norm": 0.4586568247230619, "learning_rate": 2.5658684961799164e-07, "loss": 0.0047, "step": 15398 }, { "epoch": 3.503754266211604, "grad_norm": 0.64298888731243, "learning_rate": 2.5651467566650516e-07, "loss": 0.0028, "step": 15399 }, { "epoch": 3.503981797497156, "grad_norm": 1.9800510312442694, "learning_rate": 2.564425092461839e-07, "loss": 0.0141, "step": 15400 }, { "epoch": 3.5042093287827076, "grad_norm": 0.13912770072845704, "learning_rate": 2.5637035035850274e-07, "loss": 0.0006, "step": 15401 }, { "epoch": 3.5044368600682594, "grad_norm": 0.2972703286699179, "learning_rate": 2.562981990049367e-07, "loss": 0.0016, "step": 15402 }, { "epoch": 3.504664391353811, "grad_norm": 0.6244190755666323, "learning_rate": 2.5622605518695997e-07, "loss": 0.0055, "step": 15403 }, { "epoch": 3.504891922639363, "grad_norm": 3.337494029497492, "learning_rate": 2.561539189060476e-07, "loss": 0.0028, "step": 15404 }, { "epoch": 3.5051194539249146, "grad_norm": 0.11538457881824693, "learning_rate": 2.5608179016367354e-07, "loss": 0.0003, "step": 15405 }, { "epoch": 3.5053469852104664, "grad_norm": 0.726732066922599, "learning_rate": 2.5600966896131187e-07, "loss": 0.0052, "step": 15406 }, { "epoch": 3.505574516496018, "grad_norm": 1.0465978178799795, "learning_rate": 2.559375553004368e-07, "loss": 0.004, "step": 15407 }, { "epoch": 3.50580204778157, "grad_norm": 0.20781481903603644, "learning_rate": 2.5586544918252224e-07, "loss": 0.0009, "step": 15408 }, { "epoch": 3.5060295790671216, "grad_norm": 1.1926808843744992, "learning_rate": 2.5579335060904196e-07, "loss": 0.0111, "step": 15409 }, { "epoch": 3.5062571103526734, "grad_norm": 0.5582434736066456, "learning_rate": 2.5572125958146925e-07, "loss": 0.0027, "step": 15410 }, { "epoch": 3.506484641638225, "grad_norm": 0.3492776698071973, "learning_rate": 2.5564917610127795e-07, "loss": 0.0022, "step": 15411 }, { "epoch": 3.506712172923777, "grad_norm": 0.3941711996000594, "learning_rate": 2.5557710016994105e-07, "loss": 0.0031, "step": 15412 }, { "epoch": 3.5069397042093287, "grad_norm": 0.7727003632534586, "learning_rate": 2.5550503178893143e-07, "loss": 0.0098, "step": 15413 }, { "epoch": 3.5071672354948804, "grad_norm": 0.13314345932415622, "learning_rate": 2.5543297095972224e-07, "loss": 0.0005, "step": 15414 }, { "epoch": 3.507394766780432, "grad_norm": 0.18892869157098577, "learning_rate": 2.553609176837863e-07, "loss": 0.0013, "step": 15415 }, { "epoch": 3.507622298065984, "grad_norm": 0.3664243188106686, "learning_rate": 2.552888719625965e-07, "loss": 0.0015, "step": 15416 }, { "epoch": 3.507849829351536, "grad_norm": 1.0009514702674018, "learning_rate": 2.5521683379762507e-07, "loss": 0.0066, "step": 15417 }, { "epoch": 3.5080773606370874, "grad_norm": 0.08030840104158457, "learning_rate": 2.55144803190344e-07, "loss": 0.0001, "step": 15418 }, { "epoch": 3.5083048919226396, "grad_norm": 0.5456132879763431, "learning_rate": 2.5507278014222614e-07, "loss": 0.0034, "step": 15419 }, { "epoch": 3.508532423208191, "grad_norm": 0.27379789325142745, "learning_rate": 2.550007646547429e-07, "loss": 0.0034, "step": 15420 }, { "epoch": 3.508759954493743, "grad_norm": 0.14275960024944961, "learning_rate": 2.5492875672936637e-07, "loss": 0.0005, "step": 15421 }, { "epoch": 3.5089874857792944, "grad_norm": 0.1967879896593556, "learning_rate": 2.5485675636756834e-07, "loss": 0.0011, "step": 15422 }, { "epoch": 3.5092150170648466, "grad_norm": 0.8801860445491932, "learning_rate": 2.547847635708205e-07, "loss": 0.0057, "step": 15423 }, { "epoch": 3.509442548350398, "grad_norm": 0.3318279386574459, "learning_rate": 2.547127783405941e-07, "loss": 0.0032, "step": 15424 }, { "epoch": 3.50967007963595, "grad_norm": 0.047520972260823496, "learning_rate": 2.5464080067836015e-07, "loss": 0.0002, "step": 15425 }, { "epoch": 3.5098976109215014, "grad_norm": 1.2097517567016944, "learning_rate": 2.545688305855902e-07, "loss": 0.0129, "step": 15426 }, { "epoch": 3.5101251422070536, "grad_norm": 0.3395427638985972, "learning_rate": 2.5449686806375445e-07, "loss": 0.0008, "step": 15427 }, { "epoch": 3.5103526734926054, "grad_norm": 0.4250533462095035, "learning_rate": 2.544249131143247e-07, "loss": 0.0023, "step": 15428 }, { "epoch": 3.510580204778157, "grad_norm": 0.4247269958485166, "learning_rate": 2.5435296573877076e-07, "loss": 0.0012, "step": 15429 }, { "epoch": 3.510807736063709, "grad_norm": 0.7895807227288858, "learning_rate": 2.5428102593856363e-07, "loss": 0.0034, "step": 15430 }, { "epoch": 3.5110352673492606, "grad_norm": 0.8721292529006054, "learning_rate": 2.5420909371517336e-07, "loss": 0.006, "step": 15431 }, { "epoch": 3.5112627986348124, "grad_norm": 0.7249201739269038, "learning_rate": 2.5413716907007e-07, "loss": 0.0035, "step": 15432 }, { "epoch": 3.511490329920364, "grad_norm": 0.5658267670491594, "learning_rate": 2.5406525200472357e-07, "loss": 0.004, "step": 15433 }, { "epoch": 3.511717861205916, "grad_norm": 0.14463504418210316, "learning_rate": 2.5399334252060414e-07, "loss": 0.0005, "step": 15434 }, { "epoch": 3.5119453924914676, "grad_norm": 0.9783810151531211, "learning_rate": 2.5392144061918153e-07, "loss": 0.01, "step": 15435 }, { "epoch": 3.5121729237770194, "grad_norm": 0.18825872564364682, "learning_rate": 2.5384954630192506e-07, "loss": 0.0007, "step": 15436 }, { "epoch": 3.512400455062571, "grad_norm": 0.2977348223105874, "learning_rate": 2.5377765957030397e-07, "loss": 0.0004, "step": 15437 }, { "epoch": 3.512627986348123, "grad_norm": 0.14218977807495645, "learning_rate": 2.537057804257878e-07, "loss": 0.0003, "step": 15438 }, { "epoch": 3.5128555176336747, "grad_norm": 0.5403904770593356, "learning_rate": 2.536339088698453e-07, "loss": 0.0067, "step": 15439 }, { "epoch": 3.5130830489192264, "grad_norm": 0.5545309008473565, "learning_rate": 2.535620449039456e-07, "loss": 0.0034, "step": 15440 }, { "epoch": 3.513310580204778, "grad_norm": 0.21006875049048992, "learning_rate": 2.5349018852955745e-07, "loss": 0.001, "step": 15441 }, { "epoch": 3.51353811149033, "grad_norm": 0.626576997383687, "learning_rate": 2.534183397481496e-07, "loss": 0.0062, "step": 15442 }, { "epoch": 3.5137656427758817, "grad_norm": 2.6628641899301897, "learning_rate": 2.533464985611905e-07, "loss": 0.0163, "step": 15443 }, { "epoch": 3.5139931740614334, "grad_norm": 0.23382386744484973, "learning_rate": 2.53274664970148e-07, "loss": 0.0016, "step": 15444 }, { "epoch": 3.514220705346985, "grad_norm": 0.10483785513006723, "learning_rate": 2.5320283897649075e-07, "loss": 0.0004, "step": 15445 }, { "epoch": 3.514448236632537, "grad_norm": 0.5381046857252806, "learning_rate": 2.531310205816864e-07, "loss": 0.0024, "step": 15446 }, { "epoch": 3.5146757679180887, "grad_norm": 0.6191662629806232, "learning_rate": 2.530592097872029e-07, "loss": 0.0013, "step": 15447 }, { "epoch": 3.5149032992036404, "grad_norm": 0.2836729919560993, "learning_rate": 2.52987406594508e-07, "loss": 0.0017, "step": 15448 }, { "epoch": 3.515130830489192, "grad_norm": 0.2677597199421764, "learning_rate": 2.5291561100506947e-07, "loss": 0.0012, "step": 15449 }, { "epoch": 3.515358361774744, "grad_norm": 0.4950652974460796, "learning_rate": 2.5284382302035434e-07, "loss": 0.0027, "step": 15450 }, { "epoch": 3.5155858930602957, "grad_norm": 0.5556551325347796, "learning_rate": 2.5277204264182974e-07, "loss": 0.0021, "step": 15451 }, { "epoch": 3.5158134243458474, "grad_norm": 0.4191311783552675, "learning_rate": 2.5270026987096306e-07, "loss": 0.0018, "step": 15452 }, { "epoch": 3.516040955631399, "grad_norm": 0.14634896079278556, "learning_rate": 2.526285047092208e-07, "loss": 0.0011, "step": 15453 }, { "epoch": 3.516268486916951, "grad_norm": 0.7787680820089049, "learning_rate": 2.5255674715807e-07, "loss": 0.0071, "step": 15454 }, { "epoch": 3.5164960182025027, "grad_norm": 0.47368056611750914, "learning_rate": 2.524849972189773e-07, "loss": 0.0036, "step": 15455 }, { "epoch": 3.516723549488055, "grad_norm": 0.3261867707120345, "learning_rate": 2.524132548934089e-07, "loss": 0.0014, "step": 15456 }, { "epoch": 3.516951080773606, "grad_norm": 0.18705761222644235, "learning_rate": 2.5234152018283143e-07, "loss": 0.0012, "step": 15457 }, { "epoch": 3.5171786120591584, "grad_norm": 0.38028661996585145, "learning_rate": 2.522697930887105e-07, "loss": 0.0025, "step": 15458 }, { "epoch": 3.5174061433447097, "grad_norm": 0.4323246299620359, "learning_rate": 2.521980736125127e-07, "loss": 0.0057, "step": 15459 }, { "epoch": 3.517633674630262, "grad_norm": 0.10047136200939119, "learning_rate": 2.521263617557032e-07, "loss": 0.0005, "step": 15460 }, { "epoch": 3.517861205915813, "grad_norm": 0.5051720620216931, "learning_rate": 2.520546575197481e-07, "loss": 0.0021, "step": 15461 }, { "epoch": 3.5180887372013654, "grad_norm": 1.3299386184967041, "learning_rate": 2.5198296090611286e-07, "loss": 0.0115, "step": 15462 }, { "epoch": 3.5183162684869167, "grad_norm": 0.4506156440267844, "learning_rate": 2.519112719162626e-07, "loss": 0.0032, "step": 15463 }, { "epoch": 3.518543799772469, "grad_norm": 0.526576525630008, "learning_rate": 2.518395905516629e-07, "loss": 0.0045, "step": 15464 }, { "epoch": 3.51877133105802, "grad_norm": 0.9437988342637598, "learning_rate": 2.517679168137784e-07, "loss": 0.0117, "step": 15465 }, { "epoch": 3.5189988623435724, "grad_norm": 1.7338432074684988, "learning_rate": 2.516962507040742e-07, "loss": 0.0056, "step": 15466 }, { "epoch": 3.519226393629124, "grad_norm": 1.0709216146316123, "learning_rate": 2.516245922240148e-07, "loss": 0.0109, "step": 15467 }, { "epoch": 3.519453924914676, "grad_norm": 0.12291358118606532, "learning_rate": 2.5155294137506495e-07, "loss": 0.0006, "step": 15468 }, { "epoch": 3.5196814562002277, "grad_norm": 0.052935491794612995, "learning_rate": 2.5148129815868926e-07, "loss": 0.0003, "step": 15469 }, { "epoch": 3.5199089874857794, "grad_norm": 0.9758932015891303, "learning_rate": 2.514096625763516e-07, "loss": 0.0072, "step": 15470 }, { "epoch": 3.520136518771331, "grad_norm": 0.6578886643668896, "learning_rate": 2.513380346295164e-07, "loss": 0.0072, "step": 15471 }, { "epoch": 3.520364050056883, "grad_norm": 0.8026348610820674, "learning_rate": 2.512664143196472e-07, "loss": 0.0151, "step": 15472 }, { "epoch": 3.5205915813424347, "grad_norm": 0.08245744686565254, "learning_rate": 2.5119480164820833e-07, "loss": 0.0003, "step": 15473 }, { "epoch": 3.5208191126279864, "grad_norm": 0.8103764573084358, "learning_rate": 2.511231966166628e-07, "loss": 0.0019, "step": 15474 }, { "epoch": 3.521046643913538, "grad_norm": 0.9589908588848076, "learning_rate": 2.5105159922647447e-07, "loss": 0.0105, "step": 15475 }, { "epoch": 3.52127417519909, "grad_norm": 0.6457286691807909, "learning_rate": 2.5098000947910684e-07, "loss": 0.0056, "step": 15476 }, { "epoch": 3.5215017064846417, "grad_norm": 0.3125267347488885, "learning_rate": 2.509084273760226e-07, "loss": 0.0023, "step": 15477 }, { "epoch": 3.5217292377701934, "grad_norm": 1.6774999258330845, "learning_rate": 2.508368529186852e-07, "loss": 0.0106, "step": 15478 }, { "epoch": 3.521956769055745, "grad_norm": 0.07665007218406349, "learning_rate": 2.5076528610855703e-07, "loss": 0.0003, "step": 15479 }, { "epoch": 3.522184300341297, "grad_norm": 1.1262090399632199, "learning_rate": 2.5069372694710125e-07, "loss": 0.0074, "step": 15480 }, { "epoch": 3.5224118316268487, "grad_norm": 0.6469319267321486, "learning_rate": 2.5062217543578003e-07, "loss": 0.0036, "step": 15481 }, { "epoch": 3.5226393629124004, "grad_norm": 0.3083891516037085, "learning_rate": 2.505506315760559e-07, "loss": 0.0024, "step": 15482 }, { "epoch": 3.522866894197952, "grad_norm": 0.5191360868470775, "learning_rate": 2.5047909536939123e-07, "loss": 0.0053, "step": 15483 }, { "epoch": 3.523094425483504, "grad_norm": 0.6037804603408462, "learning_rate": 2.504075668172477e-07, "loss": 0.0044, "step": 15484 }, { "epoch": 3.5233219567690557, "grad_norm": 1.2467429511238795, "learning_rate": 2.503360459210878e-07, "loss": 0.0106, "step": 15485 }, { "epoch": 3.5235494880546074, "grad_norm": 0.4652977012758226, "learning_rate": 2.5026453268237265e-07, "loss": 0.0013, "step": 15486 }, { "epoch": 3.523777019340159, "grad_norm": 0.429792922242713, "learning_rate": 2.5019302710256427e-07, "loss": 0.0084, "step": 15487 }, { "epoch": 3.524004550625711, "grad_norm": 0.4390702980874194, "learning_rate": 2.501215291831241e-07, "loss": 0.003, "step": 15488 }, { "epoch": 3.5242320819112627, "grad_norm": 0.32696862623139916, "learning_rate": 2.500500389255131e-07, "loss": 0.0019, "step": 15489 }, { "epoch": 3.5244596131968144, "grad_norm": 0.780311918399249, "learning_rate": 2.4997855633119287e-07, "loss": 0.0021, "step": 15490 }, { "epoch": 3.524687144482366, "grad_norm": 2.112943535601672, "learning_rate": 2.499070814016239e-07, "loss": 0.0254, "step": 15491 }, { "epoch": 3.524914675767918, "grad_norm": 0.3224566408469174, "learning_rate": 2.4983561413826753e-07, "loss": 0.0013, "step": 15492 }, { "epoch": 3.5251422070534697, "grad_norm": 0.6242419796429848, "learning_rate": 2.4976415454258386e-07, "loss": 0.0051, "step": 15493 }, { "epoch": 3.5253697383390215, "grad_norm": 0.5339941647021503, "learning_rate": 2.4969270261603363e-07, "loss": 0.0028, "step": 15494 }, { "epoch": 3.5255972696245736, "grad_norm": 0.5003412901431155, "learning_rate": 2.496212583600774e-07, "loss": 0.0021, "step": 15495 }, { "epoch": 3.525824800910125, "grad_norm": 0.3356210265015936, "learning_rate": 2.49549821776175e-07, "loss": 0.0027, "step": 15496 }, { "epoch": 3.526052332195677, "grad_norm": 0.6151034153806519, "learning_rate": 2.4947839286578686e-07, "loss": 0.0029, "step": 15497 }, { "epoch": 3.5262798634812285, "grad_norm": 0.9009618374413745, "learning_rate": 2.4940697163037243e-07, "loss": 0.0082, "step": 15498 }, { "epoch": 3.5265073947667807, "grad_norm": 0.5745488227470995, "learning_rate": 2.4933555807139187e-07, "loss": 0.0044, "step": 15499 }, { "epoch": 3.526734926052332, "grad_norm": 0.25003624493984555, "learning_rate": 2.492641521903042e-07, "loss": 0.0025, "step": 15500 }, { "epoch": 3.526962457337884, "grad_norm": 1.3300468001915413, "learning_rate": 2.491927539885692e-07, "loss": 0.008, "step": 15501 }, { "epoch": 3.5271899886234355, "grad_norm": 0.3915610912415768, "learning_rate": 2.4912136346764627e-07, "loss": 0.0025, "step": 15502 }, { "epoch": 3.5274175199089877, "grad_norm": 0.15985308876911644, "learning_rate": 2.49049980628994e-07, "loss": 0.0005, "step": 15503 }, { "epoch": 3.527645051194539, "grad_norm": 0.9020281853994985, "learning_rate": 2.489786054740719e-07, "loss": 0.0141, "step": 15504 }, { "epoch": 3.527872582480091, "grad_norm": 0.4713802277466898, "learning_rate": 2.489072380043384e-07, "loss": 0.0039, "step": 15505 }, { "epoch": 3.528100113765643, "grad_norm": 0.5397933034022778, "learning_rate": 2.48835878221252e-07, "loss": 0.0054, "step": 15506 }, { "epoch": 3.5283276450511947, "grad_norm": 0.4800431679346867, "learning_rate": 2.487645261262713e-07, "loss": 0.0036, "step": 15507 }, { "epoch": 3.5285551763367464, "grad_norm": 0.8682549892013931, "learning_rate": 2.4869318172085467e-07, "loss": 0.0039, "step": 15508 }, { "epoch": 3.528782707622298, "grad_norm": 0.22733112644823872, "learning_rate": 2.486218450064605e-07, "loss": 0.0023, "step": 15509 }, { "epoch": 3.52901023890785, "grad_norm": 0.8955877167798854, "learning_rate": 2.4855051598454626e-07, "loss": 0.0063, "step": 15510 }, { "epoch": 3.5292377701934017, "grad_norm": 0.5386205023098983, "learning_rate": 2.484791946565702e-07, "loss": 0.0029, "step": 15511 }, { "epoch": 3.5294653014789534, "grad_norm": 0.8443056457196988, "learning_rate": 2.484078810239898e-07, "loss": 0.0027, "step": 15512 }, { "epoch": 3.529692832764505, "grad_norm": 0.7624106890091775, "learning_rate": 2.4833657508826256e-07, "loss": 0.005, "step": 15513 }, { "epoch": 3.529920364050057, "grad_norm": 0.3894896992094088, "learning_rate": 2.4826527685084576e-07, "loss": 0.003, "step": 15514 }, { "epoch": 3.5301478953356087, "grad_norm": 0.6835303435529795, "learning_rate": 2.481939863131968e-07, "loss": 0.007, "step": 15515 }, { "epoch": 3.5303754266211604, "grad_norm": 0.48455708594461655, "learning_rate": 2.481227034767729e-07, "loss": 0.0035, "step": 15516 }, { "epoch": 3.530602957906712, "grad_norm": 0.7871345631198706, "learning_rate": 2.480514283430305e-07, "loss": 0.0056, "step": 15517 }, { "epoch": 3.530830489192264, "grad_norm": 1.1035677806470043, "learning_rate": 2.479801609134267e-07, "loss": 0.0084, "step": 15518 }, { "epoch": 3.5310580204778157, "grad_norm": 0.5750406541926759, "learning_rate": 2.4790890118941805e-07, "loss": 0.0092, "step": 15519 }, { "epoch": 3.5312855517633674, "grad_norm": 1.074969972692436, "learning_rate": 2.4783764917246054e-07, "loss": 0.004, "step": 15520 }, { "epoch": 3.531513083048919, "grad_norm": 0.9989401314495124, "learning_rate": 2.4776640486401075e-07, "loss": 0.003, "step": 15521 }, { "epoch": 3.531740614334471, "grad_norm": 0.4169655067077839, "learning_rate": 2.4769516826552484e-07, "loss": 0.0049, "step": 15522 }, { "epoch": 3.5319681456200227, "grad_norm": 1.420205328368173, "learning_rate": 2.4762393937845886e-07, "loss": 0.0064, "step": 15523 }, { "epoch": 3.5321956769055745, "grad_norm": 0.19514343338723214, "learning_rate": 2.4755271820426843e-07, "loss": 0.0013, "step": 15524 }, { "epoch": 3.532423208191126, "grad_norm": 0.37156801538692463, "learning_rate": 2.4748150474440897e-07, "loss": 0.002, "step": 15525 }, { "epoch": 3.532650739476678, "grad_norm": 0.344309311451988, "learning_rate": 2.4741029900033637e-07, "loss": 0.0023, "step": 15526 }, { "epoch": 3.5328782707622297, "grad_norm": 1.2043122067501986, "learning_rate": 2.473391009735055e-07, "loss": 0.0092, "step": 15527 }, { "epoch": 3.5331058020477815, "grad_norm": 0.4346424129352224, "learning_rate": 2.472679106653718e-07, "loss": 0.0021, "step": 15528 }, { "epoch": 3.533333333333333, "grad_norm": 1.6004889365823403, "learning_rate": 2.471967280773902e-07, "loss": 0.0079, "step": 15529 }, { "epoch": 3.533560864618885, "grad_norm": 0.10927056405773208, "learning_rate": 2.4712555321101574e-07, "loss": 0.0005, "step": 15530 }, { "epoch": 3.5337883959044367, "grad_norm": 0.23614278049533882, "learning_rate": 2.47054386067703e-07, "loss": 0.0015, "step": 15531 }, { "epoch": 3.5340159271899885, "grad_norm": 0.49232662386292847, "learning_rate": 2.469832266489062e-07, "loss": 0.0038, "step": 15532 }, { "epoch": 3.5342434584755402, "grad_norm": 0.5765486359888935, "learning_rate": 2.4691207495608006e-07, "loss": 0.0069, "step": 15533 }, { "epoch": 3.5344709897610924, "grad_norm": 0.7487696802367937, "learning_rate": 2.4684093099067856e-07, "loss": 0.0041, "step": 15534 }, { "epoch": 3.5346985210466437, "grad_norm": 1.0286021597609178, "learning_rate": 2.4676979475415586e-07, "loss": 0.0098, "step": 15535 }, { "epoch": 3.534926052332196, "grad_norm": 0.8090154338092855, "learning_rate": 2.466986662479658e-07, "loss": 0.005, "step": 15536 }, { "epoch": 3.5351535836177472, "grad_norm": 1.6622279784839118, "learning_rate": 2.466275454735625e-07, "loss": 0.017, "step": 15537 }, { "epoch": 3.5353811149032994, "grad_norm": 0.3478679276849749, "learning_rate": 2.4655643243239913e-07, "loss": 0.0021, "step": 15538 }, { "epoch": 3.5356086461888507, "grad_norm": 0.47831601294663, "learning_rate": 2.46485327125929e-07, "loss": 0.0034, "step": 15539 }, { "epoch": 3.535836177474403, "grad_norm": 0.9678185086563167, "learning_rate": 2.464142295556058e-07, "loss": 0.0059, "step": 15540 }, { "epoch": 3.5360637087599542, "grad_norm": 0.26292670547108615, "learning_rate": 2.4634313972288214e-07, "loss": 0.0018, "step": 15541 }, { "epoch": 3.5362912400455064, "grad_norm": 0.27682160964042724, "learning_rate": 2.462720576292112e-07, "loss": 0.0014, "step": 15542 }, { "epoch": 3.536518771331058, "grad_norm": 0.42234202358855866, "learning_rate": 2.462009832760461e-07, "loss": 0.0032, "step": 15543 }, { "epoch": 3.53674630261661, "grad_norm": 0.37704179650259984, "learning_rate": 2.461299166648389e-07, "loss": 0.0012, "step": 15544 }, { "epoch": 3.5369738339021617, "grad_norm": 0.5181075977754376, "learning_rate": 2.4605885779704255e-07, "loss": 0.0034, "step": 15545 }, { "epoch": 3.5372013651877134, "grad_norm": 2.0737176848962164, "learning_rate": 2.459878066741089e-07, "loss": 0.0252, "step": 15546 }, { "epoch": 3.537428896473265, "grad_norm": 0.8419550856171398, "learning_rate": 2.459167632974907e-07, "loss": 0.009, "step": 15547 }, { "epoch": 3.537656427758817, "grad_norm": 1.4454899934769845, "learning_rate": 2.458457276686391e-07, "loss": 0.0131, "step": 15548 }, { "epoch": 3.5378839590443687, "grad_norm": 0.7019639163468783, "learning_rate": 2.457746997890068e-07, "loss": 0.0146, "step": 15549 }, { "epoch": 3.5381114903299204, "grad_norm": 1.8775706000551957, "learning_rate": 2.4570367966004517e-07, "loss": 0.0196, "step": 15550 }, { "epoch": 3.538339021615472, "grad_norm": 0.5441235726365402, "learning_rate": 2.4563266728320546e-07, "loss": 0.0023, "step": 15551 }, { "epoch": 3.538566552901024, "grad_norm": 0.598164973104711, "learning_rate": 2.455616626599395e-07, "loss": 0.0033, "step": 15552 }, { "epoch": 3.5387940841865757, "grad_norm": 0.3195660955021229, "learning_rate": 2.4549066579169797e-07, "loss": 0.0034, "step": 15553 }, { "epoch": 3.5390216154721275, "grad_norm": 0.36633308839536294, "learning_rate": 2.454196766799322e-07, "loss": 0.0018, "step": 15554 }, { "epoch": 3.539249146757679, "grad_norm": 0.47006261575638325, "learning_rate": 2.453486953260931e-07, "loss": 0.0052, "step": 15555 }, { "epoch": 3.539476678043231, "grad_norm": 0.6007516941259741, "learning_rate": 2.452777217316316e-07, "loss": 0.0073, "step": 15556 }, { "epoch": 3.5397042093287827, "grad_norm": 0.35608142749095795, "learning_rate": 2.45206755897998e-07, "loss": 0.0048, "step": 15557 }, { "epoch": 3.5399317406143345, "grad_norm": 0.9052677935935244, "learning_rate": 2.4513579782664257e-07, "loss": 0.0086, "step": 15558 }, { "epoch": 3.540159271899886, "grad_norm": 0.10747971339051582, "learning_rate": 2.4506484751901595e-07, "loss": 0.0004, "step": 15559 }, { "epoch": 3.540386803185438, "grad_norm": 0.46745146843402396, "learning_rate": 2.4499390497656784e-07, "loss": 0.0019, "step": 15560 }, { "epoch": 3.5406143344709897, "grad_norm": 0.85979218550729, "learning_rate": 2.4492297020074826e-07, "loss": 0.0062, "step": 15561 }, { "epoch": 3.5408418657565415, "grad_norm": 2.257180430833416, "learning_rate": 2.448520431930074e-07, "loss": 0.0105, "step": 15562 }, { "epoch": 3.5410693970420932, "grad_norm": 0.4307859086637115, "learning_rate": 2.4478112395479433e-07, "loss": 0.0012, "step": 15563 }, { "epoch": 3.541296928327645, "grad_norm": 0.8779780583891088, "learning_rate": 2.4471021248755894e-07, "loss": 0.0084, "step": 15564 }, { "epoch": 3.5415244596131967, "grad_norm": 0.34207233955181454, "learning_rate": 2.446393087927502e-07, "loss": 0.0023, "step": 15565 }, { "epoch": 3.5417519908987485, "grad_norm": 0.5976374024114226, "learning_rate": 2.4456841287181754e-07, "loss": 0.0031, "step": 15566 }, { "epoch": 3.5419795221843002, "grad_norm": 0.3026115085762255, "learning_rate": 2.444975247262096e-07, "loss": 0.0014, "step": 15567 }, { "epoch": 3.542207053469852, "grad_norm": 0.3249583860290832, "learning_rate": 2.4442664435737535e-07, "loss": 0.0017, "step": 15568 }, { "epoch": 3.5424345847554037, "grad_norm": 0.8057065860739001, "learning_rate": 2.443557717667638e-07, "loss": 0.0045, "step": 15569 }, { "epoch": 3.5426621160409555, "grad_norm": 0.7510306287162268, "learning_rate": 2.4428490695582286e-07, "loss": 0.0069, "step": 15570 }, { "epoch": 3.5428896473265072, "grad_norm": 0.1906333864214979, "learning_rate": 2.442140499260014e-07, "loss": 0.0005, "step": 15571 }, { "epoch": 3.543117178612059, "grad_norm": 0.4181980164609579, "learning_rate": 2.441432006787473e-07, "loss": 0.0026, "step": 15572 }, { "epoch": 3.543344709897611, "grad_norm": 0.9399219249562449, "learning_rate": 2.440723592155087e-07, "loss": 0.009, "step": 15573 }, { "epoch": 3.5435722411831625, "grad_norm": 0.6649509591818505, "learning_rate": 2.440015255377333e-07, "loss": 0.0096, "step": 15574 }, { "epoch": 3.5437997724687147, "grad_norm": 0.04851086145280556, "learning_rate": 2.43930699646869e-07, "loss": 0.0002, "step": 15575 }, { "epoch": 3.544027303754266, "grad_norm": 0.35793204977291365, "learning_rate": 2.4385988154436346e-07, "loss": 0.0041, "step": 15576 }, { "epoch": 3.544254835039818, "grad_norm": 0.8476011010203872, "learning_rate": 2.4378907123166373e-07, "loss": 0.0089, "step": 15577 }, { "epoch": 3.5444823663253695, "grad_norm": 2.139880327004407, "learning_rate": 2.437182687102174e-07, "loss": 0.0059, "step": 15578 }, { "epoch": 3.5447098976109217, "grad_norm": 0.9143464253450003, "learning_rate": 2.436474739814712e-07, "loss": 0.0054, "step": 15579 }, { "epoch": 3.544937428896473, "grad_norm": 0.23944544392855902, "learning_rate": 2.435766870468725e-07, "loss": 0.0011, "step": 15580 }, { "epoch": 3.545164960182025, "grad_norm": 0.2557059845582268, "learning_rate": 2.435059079078674e-07, "loss": 0.0017, "step": 15581 }, { "epoch": 3.545392491467577, "grad_norm": 0.428705709759836, "learning_rate": 2.4343513656590303e-07, "loss": 0.0023, "step": 15582 }, { "epoch": 3.5456200227531287, "grad_norm": 1.0172799402961161, "learning_rate": 2.4336437302242574e-07, "loss": 0.0113, "step": 15583 }, { "epoch": 3.5458475540386805, "grad_norm": 0.5833690900800672, "learning_rate": 2.432936172788816e-07, "loss": 0.0062, "step": 15584 }, { "epoch": 3.546075085324232, "grad_norm": 1.4169914663109602, "learning_rate": 2.432228693367171e-07, "loss": 0.0082, "step": 15585 }, { "epoch": 3.546302616609784, "grad_norm": 0.5386813530492699, "learning_rate": 2.431521291973777e-07, "loss": 0.0021, "step": 15586 }, { "epoch": 3.5465301478953357, "grad_norm": 0.9311016180630076, "learning_rate": 2.430813968623096e-07, "loss": 0.0074, "step": 15587 }, { "epoch": 3.5467576791808875, "grad_norm": 0.700097086789804, "learning_rate": 2.430106723329582e-07, "loss": 0.0094, "step": 15588 }, { "epoch": 3.546985210466439, "grad_norm": 0.5394604706155116, "learning_rate": 2.42939955610769e-07, "loss": 0.0045, "step": 15589 }, { "epoch": 3.547212741751991, "grad_norm": 0.8566433372757637, "learning_rate": 2.428692466971877e-07, "loss": 0.0063, "step": 15590 }, { "epoch": 3.5474402730375427, "grad_norm": 0.3325929517279916, "learning_rate": 2.4279854559365886e-07, "loss": 0.0013, "step": 15591 }, { "epoch": 3.5476678043230945, "grad_norm": 0.11354873610268419, "learning_rate": 2.4272785230162806e-07, "loss": 0.0003, "step": 15592 }, { "epoch": 3.5478953356086462, "grad_norm": 0.6645180930529042, "learning_rate": 2.426571668225396e-07, "loss": 0.0084, "step": 15593 }, { "epoch": 3.548122866894198, "grad_norm": 1.6139250335588626, "learning_rate": 2.4258648915783863e-07, "loss": 0.013, "step": 15594 }, { "epoch": 3.5483503981797497, "grad_norm": 0.5177858936694284, "learning_rate": 2.4251581930896925e-07, "loss": 0.0066, "step": 15595 }, { "epoch": 3.5485779294653015, "grad_norm": 1.606550719833483, "learning_rate": 2.424451572773761e-07, "loss": 0.009, "step": 15596 }, { "epoch": 3.5488054607508532, "grad_norm": 0.10252637951944082, "learning_rate": 2.4237450306450346e-07, "loss": 0.0005, "step": 15597 }, { "epoch": 3.549032992036405, "grad_norm": 0.209130745534805, "learning_rate": 2.42303856671795e-07, "loss": 0.0006, "step": 15598 }, { "epoch": 3.5492605233219567, "grad_norm": 1.1883101582059115, "learning_rate": 2.422332181006951e-07, "loss": 0.0126, "step": 15599 }, { "epoch": 3.5494880546075085, "grad_norm": 0.21132548385708783, "learning_rate": 2.4216258735264725e-07, "loss": 0.0008, "step": 15600 }, { "epoch": 3.5497155858930602, "grad_norm": 0.32127894124084866, "learning_rate": 2.420919644290947e-07, "loss": 0.0011, "step": 15601 }, { "epoch": 3.549943117178612, "grad_norm": 0.6843168977392555, "learning_rate": 2.4202134933148117e-07, "loss": 0.0051, "step": 15602 }, { "epoch": 3.5501706484641637, "grad_norm": 0.42063527552475105, "learning_rate": 2.4195074206124986e-07, "loss": 0.005, "step": 15603 }, { "epoch": 3.5503981797497155, "grad_norm": 1.4784291047879765, "learning_rate": 2.418801426198441e-07, "loss": 0.0052, "step": 15604 }, { "epoch": 3.5506257110352673, "grad_norm": 0.3548836781305931, "learning_rate": 2.418095510087063e-07, "loss": 0.0028, "step": 15605 }, { "epoch": 3.550853242320819, "grad_norm": 0.16810964940238296, "learning_rate": 2.4173896722927975e-07, "loss": 0.0008, "step": 15606 }, { "epoch": 3.5510807736063708, "grad_norm": 0.42048804102907356, "learning_rate": 2.416683912830068e-07, "loss": 0.0042, "step": 15607 }, { "epoch": 3.5513083048919225, "grad_norm": 0.11239326695458736, "learning_rate": 2.4159782317132966e-07, "loss": 0.0005, "step": 15608 }, { "epoch": 3.5515358361774743, "grad_norm": 0.26101622086200005, "learning_rate": 2.4152726289569085e-07, "loss": 0.0013, "step": 15609 }, { "epoch": 3.551763367463026, "grad_norm": 0.16080035501226234, "learning_rate": 2.414567104575325e-07, "loss": 0.0005, "step": 15610 }, { "epoch": 3.5519908987485778, "grad_norm": 0.11450697870397827, "learning_rate": 2.413861658582968e-07, "loss": 0.0006, "step": 15611 }, { "epoch": 3.55221843003413, "grad_norm": 0.10211625150878349, "learning_rate": 2.413156290994253e-07, "loss": 0.0005, "step": 15612 }, { "epoch": 3.5524459613196813, "grad_norm": 0.3801817294213101, "learning_rate": 2.4124510018235945e-07, "loss": 0.0019, "step": 15613 }, { "epoch": 3.5526734926052335, "grad_norm": 0.26087042883170136, "learning_rate": 2.411745791085409e-07, "loss": 0.0007, "step": 15614 }, { "epoch": 3.5529010238907848, "grad_norm": 0.5044517976245876, "learning_rate": 2.4110406587941103e-07, "loss": 0.0032, "step": 15615 }, { "epoch": 3.553128555176337, "grad_norm": 0.2829769887563387, "learning_rate": 2.410335604964112e-07, "loss": 0.0013, "step": 15616 }, { "epoch": 3.5533560864618883, "grad_norm": 0.19942878192906044, "learning_rate": 2.4096306296098196e-07, "loss": 0.0009, "step": 15617 }, { "epoch": 3.5535836177474405, "grad_norm": 0.31493047216160164, "learning_rate": 2.408925732745646e-07, "loss": 0.0024, "step": 15618 }, { "epoch": 3.553811149032992, "grad_norm": 0.5538806388775921, "learning_rate": 2.408220914385996e-07, "loss": 0.0015, "step": 15619 }, { "epoch": 3.554038680318544, "grad_norm": 0.43322179935572946, "learning_rate": 2.407516174545273e-07, "loss": 0.0034, "step": 15620 }, { "epoch": 3.5542662116040957, "grad_norm": 0.7889399441542715, "learning_rate": 2.4068115132378814e-07, "loss": 0.0062, "step": 15621 }, { "epoch": 3.5544937428896475, "grad_norm": 0.7048622040542334, "learning_rate": 2.4061069304782243e-07, "loss": 0.0032, "step": 15622 }, { "epoch": 3.5547212741751992, "grad_norm": 0.9937678518968718, "learning_rate": 2.4054024262807036e-07, "loss": 0.0027, "step": 15623 }, { "epoch": 3.554948805460751, "grad_norm": 0.5785866984292645, "learning_rate": 2.404698000659714e-07, "loss": 0.0032, "step": 15624 }, { "epoch": 3.5551763367463027, "grad_norm": 0.7773097123065528, "learning_rate": 2.403993653629658e-07, "loss": 0.0034, "step": 15625 }, { "epoch": 3.5554038680318545, "grad_norm": 0.12865653096039387, "learning_rate": 2.4032893852049274e-07, "loss": 0.0003, "step": 15626 }, { "epoch": 3.5556313993174062, "grad_norm": 0.6223237063708256, "learning_rate": 2.402585195399915e-07, "loss": 0.0077, "step": 15627 }, { "epoch": 3.555858930602958, "grad_norm": 0.1995034217314328, "learning_rate": 2.401881084229014e-07, "loss": 0.0007, "step": 15628 }, { "epoch": 3.5560864618885097, "grad_norm": 0.34051467796679397, "learning_rate": 2.401177051706618e-07, "loss": 0.0015, "step": 15629 }, { "epoch": 3.5563139931740615, "grad_norm": 0.6235693676954123, "learning_rate": 2.400473097847115e-07, "loss": 0.0038, "step": 15630 }, { "epoch": 3.5565415244596132, "grad_norm": 0.9498422424982759, "learning_rate": 2.3997692226648923e-07, "loss": 0.0095, "step": 15631 }, { "epoch": 3.556769055745165, "grad_norm": 1.0882065300906087, "learning_rate": 2.399065426174333e-07, "loss": 0.0143, "step": 15632 }, { "epoch": 3.5569965870307167, "grad_norm": 0.5844752718552233, "learning_rate": 2.398361708389826e-07, "loss": 0.0076, "step": 15633 }, { "epoch": 3.5572241183162685, "grad_norm": 2.145890202711923, "learning_rate": 2.39765806932575e-07, "loss": 0.0377, "step": 15634 }, { "epoch": 3.5574516496018203, "grad_norm": 3.6465083556964513, "learning_rate": 2.3969545089964875e-07, "loss": 0.0295, "step": 15635 }, { "epoch": 3.557679180887372, "grad_norm": 0.29076417213507044, "learning_rate": 2.396251027416418e-07, "loss": 0.0004, "step": 15636 }, { "epoch": 3.5579067121729238, "grad_norm": 0.25805176762022886, "learning_rate": 2.395547624599922e-07, "loss": 0.002, "step": 15637 }, { "epoch": 3.5581342434584755, "grad_norm": 1.0656874871466642, "learning_rate": 2.394844300561373e-07, "loss": 0.0094, "step": 15638 }, { "epoch": 3.5583617747440273, "grad_norm": 0.24273729192123306, "learning_rate": 2.3941410553151446e-07, "loss": 0.0008, "step": 15639 }, { "epoch": 3.558589306029579, "grad_norm": 0.28170963977307556, "learning_rate": 2.393437888875614e-07, "loss": 0.0011, "step": 15640 }, { "epoch": 3.5588168373151308, "grad_norm": 0.780286394252798, "learning_rate": 2.392734801257147e-07, "loss": 0.0083, "step": 15641 }, { "epoch": 3.5590443686006825, "grad_norm": 0.5835224866596822, "learning_rate": 2.392031792474116e-07, "loss": 0.0055, "step": 15642 }, { "epoch": 3.5592718998862343, "grad_norm": 0.7511876926518567, "learning_rate": 2.3913288625408906e-07, "loss": 0.0039, "step": 15643 }, { "epoch": 3.559499431171786, "grad_norm": 72.59708556306197, "learning_rate": 2.390626011471838e-07, "loss": 0.1235, "step": 15644 }, { "epoch": 3.5597269624573378, "grad_norm": 0.32817873715792045, "learning_rate": 2.3899232392813223e-07, "loss": 0.0077, "step": 15645 }, { "epoch": 3.5599544937428895, "grad_norm": 0.8881941506958387, "learning_rate": 2.389220545983704e-07, "loss": 0.0049, "step": 15646 }, { "epoch": 3.5601820250284413, "grad_norm": 0.5609444938988077, "learning_rate": 2.3885179315933483e-07, "loss": 0.0042, "step": 15647 }, { "epoch": 3.560409556313993, "grad_norm": 0.23334753749116116, "learning_rate": 2.3878153961246125e-07, "loss": 0.0014, "step": 15648 }, { "epoch": 3.560637087599545, "grad_norm": 0.6312067397413218, "learning_rate": 2.387112939591857e-07, "loss": 0.0052, "step": 15649 }, { "epoch": 3.5608646188850965, "grad_norm": 1.9460887196608485, "learning_rate": 2.38641056200944e-07, "loss": 0.0042, "step": 15650 }, { "epoch": 3.5610921501706487, "grad_norm": 0.7466041011077913, "learning_rate": 2.385708263391714e-07, "loss": 0.0064, "step": 15651 }, { "epoch": 3.5613196814562, "grad_norm": 0.6321009535531834, "learning_rate": 2.385006043753035e-07, "loss": 0.0053, "step": 15652 }, { "epoch": 3.5615472127417522, "grad_norm": 0.1902404273430676, "learning_rate": 2.3843039031077526e-07, "loss": 0.0009, "step": 15653 }, { "epoch": 3.5617747440273035, "grad_norm": 0.3588911064368835, "learning_rate": 2.3836018414702205e-07, "loss": 0.003, "step": 15654 }, { "epoch": 3.5620022753128557, "grad_norm": 0.06680782256032256, "learning_rate": 2.3828998588547842e-07, "loss": 0.0002, "step": 15655 }, { "epoch": 3.562229806598407, "grad_norm": 1.079600558619967, "learning_rate": 2.3821979552757926e-07, "loss": 0.0124, "step": 15656 }, { "epoch": 3.5624573378839592, "grad_norm": 0.31397059890986406, "learning_rate": 2.381496130747593e-07, "loss": 0.0036, "step": 15657 }, { "epoch": 3.5626848691695105, "grad_norm": 0.19193701237007824, "learning_rate": 2.3807943852845252e-07, "loss": 0.0006, "step": 15658 }, { "epoch": 3.5629124004550627, "grad_norm": 0.22195486781043222, "learning_rate": 2.3800927189009364e-07, "loss": 0.0011, "step": 15659 }, { "epoch": 3.5631399317406145, "grad_norm": 0.08343207394465793, "learning_rate": 2.3793911316111632e-07, "loss": 0.0004, "step": 15660 }, { "epoch": 3.5633674630261662, "grad_norm": 0.624851546792518, "learning_rate": 2.378689623429549e-07, "loss": 0.0063, "step": 15661 }, { "epoch": 3.563594994311718, "grad_norm": 1.3563025959339514, "learning_rate": 2.3779881943704259e-07, "loss": 0.0045, "step": 15662 }, { "epoch": 3.5638225255972698, "grad_norm": 0.2067366845550704, "learning_rate": 2.3772868444481339e-07, "loss": 0.0008, "step": 15663 }, { "epoch": 3.5640500568828215, "grad_norm": 1.0318574068235047, "learning_rate": 2.3765855736770074e-07, "loss": 0.0036, "step": 15664 }, { "epoch": 3.5642775881683733, "grad_norm": 0.5343557262247115, "learning_rate": 2.3758843820713764e-07, "loss": 0.0038, "step": 15665 }, { "epoch": 3.564505119453925, "grad_norm": 0.46505588848515944, "learning_rate": 2.3751832696455749e-07, "loss": 0.0019, "step": 15666 }, { "epoch": 3.5647326507394768, "grad_norm": 2.2787475492565044, "learning_rate": 2.3744822364139295e-07, "loss": 0.0041, "step": 15667 }, { "epoch": 3.5649601820250285, "grad_norm": 0.5341255422168038, "learning_rate": 2.3737812823907718e-07, "loss": 0.0011, "step": 15668 }, { "epoch": 3.5651877133105803, "grad_norm": 0.33237393085278405, "learning_rate": 2.3730804075904238e-07, "loss": 0.0021, "step": 15669 }, { "epoch": 3.565415244596132, "grad_norm": 0.7907761239200911, "learning_rate": 2.3723796120272112e-07, "loss": 0.0048, "step": 15670 }, { "epoch": 3.5656427758816838, "grad_norm": 0.7858283369277481, "learning_rate": 2.3716788957154603e-07, "loss": 0.0035, "step": 15671 }, { "epoch": 3.5658703071672355, "grad_norm": 0.9728462797661518, "learning_rate": 2.370978258669488e-07, "loss": 0.0087, "step": 15672 }, { "epoch": 3.5660978384527873, "grad_norm": 0.3688619105475081, "learning_rate": 2.3702777009036178e-07, "loss": 0.0016, "step": 15673 }, { "epoch": 3.566325369738339, "grad_norm": 0.42134004592676144, "learning_rate": 2.3695772224321643e-07, "loss": 0.0011, "step": 15674 }, { "epoch": 3.5665529010238908, "grad_norm": 0.8979889327690931, "learning_rate": 2.3688768232694456e-07, "loss": 0.0059, "step": 15675 }, { "epoch": 3.5667804323094425, "grad_norm": 0.0337247210196297, "learning_rate": 2.3681765034297783e-07, "loss": 0.0001, "step": 15676 }, { "epoch": 3.5670079635949943, "grad_norm": 1.2644498006597213, "learning_rate": 2.3674762629274726e-07, "loss": 0.002, "step": 15677 }, { "epoch": 3.567235494880546, "grad_norm": 0.4473705982016037, "learning_rate": 2.366776101776843e-07, "loss": 0.0006, "step": 15678 }, { "epoch": 3.567463026166098, "grad_norm": 0.5108111492386535, "learning_rate": 2.3660760199921965e-07, "loss": 0.0012, "step": 15679 }, { "epoch": 3.5676905574516495, "grad_norm": 0.5927637457046773, "learning_rate": 2.3653760175878456e-07, "loss": 0.0026, "step": 15680 }, { "epoch": 3.5679180887372013, "grad_norm": 0.5756820639779598, "learning_rate": 2.3646760945780917e-07, "loss": 0.0014, "step": 15681 }, { "epoch": 3.568145620022753, "grad_norm": 2.887588731873046, "learning_rate": 2.363976250977243e-07, "loss": 0.0401, "step": 15682 }, { "epoch": 3.568373151308305, "grad_norm": 0.48470033315797223, "learning_rate": 2.3632764867996045e-07, "loss": 0.0034, "step": 15683 }, { "epoch": 3.5686006825938565, "grad_norm": 1.2013932663151112, "learning_rate": 2.3625768020594749e-07, "loss": 0.0039, "step": 15684 }, { "epoch": 3.5688282138794083, "grad_norm": 0.9650970778237993, "learning_rate": 2.3618771967711582e-07, "loss": 0.0064, "step": 15685 }, { "epoch": 3.56905574516496, "grad_norm": 0.26011198445311284, "learning_rate": 2.3611776709489485e-07, "loss": 0.0032, "step": 15686 }, { "epoch": 3.569283276450512, "grad_norm": 1.925678008552924, "learning_rate": 2.3604782246071476e-07, "loss": 0.0178, "step": 15687 }, { "epoch": 3.5695108077360636, "grad_norm": 0.36127358125379966, "learning_rate": 2.3597788577600458e-07, "loss": 0.0037, "step": 15688 }, { "epoch": 3.5697383390216153, "grad_norm": 0.49397657656808425, "learning_rate": 2.3590795704219397e-07, "loss": 0.0021, "step": 15689 }, { "epoch": 3.5699658703071675, "grad_norm": 0.7616578629276558, "learning_rate": 2.3583803626071232e-07, "loss": 0.0058, "step": 15690 }, { "epoch": 3.570193401592719, "grad_norm": 0.7482244548922928, "learning_rate": 2.357681234329883e-07, "loss": 0.01, "step": 15691 }, { "epoch": 3.570420932878271, "grad_norm": 0.637329218834373, "learning_rate": 2.3569821856045123e-07, "loss": 0.004, "step": 15692 }, { "epoch": 3.5706484641638223, "grad_norm": 0.4202271086642983, "learning_rate": 2.356283216445294e-07, "loss": 0.0016, "step": 15693 }, { "epoch": 3.5708759954493745, "grad_norm": 0.1660724270977048, "learning_rate": 2.3555843268665176e-07, "loss": 0.0016, "step": 15694 }, { "epoch": 3.571103526734926, "grad_norm": 0.8068878861737283, "learning_rate": 2.354885516882463e-07, "loss": 0.0064, "step": 15695 }, { "epoch": 3.571331058020478, "grad_norm": 1.0678829771148164, "learning_rate": 2.3541867865074147e-07, "loss": 0.007, "step": 15696 }, { "epoch": 3.5715585893060293, "grad_norm": 0.2594914072985646, "learning_rate": 2.3534881357556562e-07, "loss": 0.0012, "step": 15697 }, { "epoch": 3.5717861205915815, "grad_norm": 1.23768757156391, "learning_rate": 2.3527895646414618e-07, "loss": 0.0077, "step": 15698 }, { "epoch": 3.5720136518771333, "grad_norm": 0.1481588711497329, "learning_rate": 2.3520910731791136e-07, "loss": 0.0004, "step": 15699 }, { "epoch": 3.572241183162685, "grad_norm": 0.36158168883959474, "learning_rate": 2.3513926613828828e-07, "loss": 0.0024, "step": 15700 }, { "epoch": 3.5724687144482368, "grad_norm": 0.4155759317888497, "learning_rate": 2.3506943292670482e-07, "loss": 0.0028, "step": 15701 }, { "epoch": 3.5726962457337885, "grad_norm": 0.6690370735497002, "learning_rate": 2.3499960768458778e-07, "loss": 0.0074, "step": 15702 }, { "epoch": 3.5729237770193403, "grad_norm": 0.4648832755564358, "learning_rate": 2.3492979041336455e-07, "loss": 0.0025, "step": 15703 }, { "epoch": 3.573151308304892, "grad_norm": 1.148132082388822, "learning_rate": 2.3485998111446222e-07, "loss": 0.0052, "step": 15704 }, { "epoch": 3.573378839590444, "grad_norm": 0.036801594133423585, "learning_rate": 2.3479017978930722e-07, "loss": 0.0001, "step": 15705 }, { "epoch": 3.5736063708759955, "grad_norm": 0.4700014245144343, "learning_rate": 2.3472038643932645e-07, "loss": 0.0056, "step": 15706 }, { "epoch": 3.5738339021615473, "grad_norm": 0.4667311404109101, "learning_rate": 2.3465060106594626e-07, "loss": 0.0045, "step": 15707 }, { "epoch": 3.574061433447099, "grad_norm": 0.44500481415536824, "learning_rate": 2.3458082367059264e-07, "loss": 0.0036, "step": 15708 }, { "epoch": 3.574288964732651, "grad_norm": 0.6842143491185493, "learning_rate": 2.3451105425469197e-07, "loss": 0.0061, "step": 15709 }, { "epoch": 3.5745164960182025, "grad_norm": 1.843634500794425, "learning_rate": 2.344412928196702e-07, "loss": 0.017, "step": 15710 }, { "epoch": 3.5747440273037543, "grad_norm": 0.2700113410569783, "learning_rate": 2.3437153936695336e-07, "loss": 0.0012, "step": 15711 }, { "epoch": 3.574971558589306, "grad_norm": 0.2507249453832044, "learning_rate": 2.3430179389796665e-07, "loss": 0.0011, "step": 15712 }, { "epoch": 3.575199089874858, "grad_norm": 0.6546803331756255, "learning_rate": 2.3423205641413592e-07, "loss": 0.0071, "step": 15713 }, { "epoch": 3.5754266211604095, "grad_norm": 0.788453001317172, "learning_rate": 2.3416232691688635e-07, "loss": 0.0059, "step": 15714 }, { "epoch": 3.5756541524459613, "grad_norm": 0.340335375416776, "learning_rate": 2.3409260540764284e-07, "loss": 0.0065, "step": 15715 }, { "epoch": 3.575881683731513, "grad_norm": 0.4270946925317336, "learning_rate": 2.3402289188783045e-07, "loss": 0.0018, "step": 15716 }, { "epoch": 3.576109215017065, "grad_norm": 0.06755753073229358, "learning_rate": 2.339531863588742e-07, "loss": 0.0003, "step": 15717 }, { "epoch": 3.5763367463026166, "grad_norm": 0.29866921807500985, "learning_rate": 2.3388348882219887e-07, "loss": 0.001, "step": 15718 }, { "epoch": 3.5765642775881683, "grad_norm": 0.8586431844385688, "learning_rate": 2.3381379927922845e-07, "loss": 0.012, "step": 15719 }, { "epoch": 3.57679180887372, "grad_norm": 0.26865144245990835, "learning_rate": 2.337441177313878e-07, "loss": 0.0014, "step": 15720 }, { "epoch": 3.577019340159272, "grad_norm": 0.7771489169746504, "learning_rate": 2.3367444418010088e-07, "loss": 0.0044, "step": 15721 }, { "epoch": 3.5772468714448236, "grad_norm": 1.2078148911093642, "learning_rate": 2.3360477862679135e-07, "loss": 0.0144, "step": 15722 }, { "epoch": 3.5774744027303753, "grad_norm": 0.23807847875313198, "learning_rate": 2.335351210728834e-07, "loss": 0.001, "step": 15723 }, { "epoch": 3.577701934015927, "grad_norm": 0.7846912558688239, "learning_rate": 2.3346547151980058e-07, "loss": 0.0084, "step": 15724 }, { "epoch": 3.577929465301479, "grad_norm": 1.4527682891646507, "learning_rate": 2.3339582996896675e-07, "loss": 0.0092, "step": 15725 }, { "epoch": 3.5781569965870306, "grad_norm": 0.09700304007489788, "learning_rate": 2.333261964218049e-07, "loss": 0.0003, "step": 15726 }, { "epoch": 3.5783845278725823, "grad_norm": 0.2384876174259115, "learning_rate": 2.332565708797381e-07, "loss": 0.0012, "step": 15727 }, { "epoch": 3.578612059158134, "grad_norm": 0.3650204835160109, "learning_rate": 2.3318695334418974e-07, "loss": 0.0037, "step": 15728 }, { "epoch": 3.5788395904436863, "grad_norm": 0.8433438102863481, "learning_rate": 2.3311734381658228e-07, "loss": 0.006, "step": 15729 }, { "epoch": 3.5790671217292376, "grad_norm": 0.8602534248906869, "learning_rate": 2.3304774229833864e-07, "loss": 0.0091, "step": 15730 }, { "epoch": 3.5792946530147898, "grad_norm": 0.9884690787285435, "learning_rate": 2.329781487908813e-07, "loss": 0.0056, "step": 15731 }, { "epoch": 3.579522184300341, "grad_norm": 0.45135482758296613, "learning_rate": 2.329085632956328e-07, "loss": 0.003, "step": 15732 }, { "epoch": 3.5797497155858933, "grad_norm": 0.3178232318082304, "learning_rate": 2.3283898581401524e-07, "loss": 0.0011, "step": 15733 }, { "epoch": 3.5799772468714446, "grad_norm": 0.05014599313308, "learning_rate": 2.3276941634745039e-07, "loss": 0.0002, "step": 15734 }, { "epoch": 3.580204778156997, "grad_norm": 0.33982114497538657, "learning_rate": 2.3269985489736032e-07, "loss": 0.0014, "step": 15735 }, { "epoch": 3.580432309442548, "grad_norm": 0.10953804902055495, "learning_rate": 2.326303014651668e-07, "loss": 0.0002, "step": 15736 }, { "epoch": 3.5806598407281003, "grad_norm": 0.06242954250852876, "learning_rate": 2.3256075605229148e-07, "loss": 0.0002, "step": 15737 }, { "epoch": 3.580887372013652, "grad_norm": 0.8145274390519632, "learning_rate": 2.3249121866015557e-07, "loss": 0.0078, "step": 15738 }, { "epoch": 3.581114903299204, "grad_norm": 0.8429223971020845, "learning_rate": 2.3242168929018017e-07, "loss": 0.0045, "step": 15739 }, { "epoch": 3.5813424345847555, "grad_norm": 0.9247663809598855, "learning_rate": 2.3235216794378665e-07, "loss": 0.0089, "step": 15740 }, { "epoch": 3.5815699658703073, "grad_norm": 0.5374519981618306, "learning_rate": 2.3228265462239545e-07, "loss": 0.0077, "step": 15741 }, { "epoch": 3.581797497155859, "grad_norm": 0.4406954176599803, "learning_rate": 2.322131493274276e-07, "loss": 0.0058, "step": 15742 }, { "epoch": 3.582025028441411, "grad_norm": 0.4195620981232804, "learning_rate": 2.321436520603036e-07, "loss": 0.0022, "step": 15743 }, { "epoch": 3.5822525597269625, "grad_norm": 0.4002172165046845, "learning_rate": 2.3207416282244419e-07, "loss": 0.0036, "step": 15744 }, { "epoch": 3.5824800910125143, "grad_norm": 0.5458250004244031, "learning_rate": 2.320046816152692e-07, "loss": 0.0031, "step": 15745 }, { "epoch": 3.582707622298066, "grad_norm": 1.0933919428332581, "learning_rate": 2.319352084401985e-07, "loss": 0.0095, "step": 15746 }, { "epoch": 3.582935153583618, "grad_norm": 0.5410410725620374, "learning_rate": 2.3186574329865263e-07, "loss": 0.0039, "step": 15747 }, { "epoch": 3.5831626848691696, "grad_norm": 1.3741507287631556, "learning_rate": 2.3179628619205064e-07, "loss": 0.0119, "step": 15748 }, { "epoch": 3.5833902161547213, "grad_norm": 0.3028385816986504, "learning_rate": 2.3172683712181253e-07, "loss": 0.0012, "step": 15749 }, { "epoch": 3.583617747440273, "grad_norm": 0.9887138508414405, "learning_rate": 2.3165739608935756e-07, "loss": 0.0104, "step": 15750 }, { "epoch": 3.583845278725825, "grad_norm": 0.7074703038391088, "learning_rate": 2.3158796309610528e-07, "loss": 0.0034, "step": 15751 }, { "epoch": 3.5840728100113766, "grad_norm": 0.42702868573309005, "learning_rate": 2.3151853814347453e-07, "loss": 0.0032, "step": 15752 }, { "epoch": 3.5843003412969283, "grad_norm": 0.5118880988210713, "learning_rate": 2.3144912123288407e-07, "loss": 0.004, "step": 15753 }, { "epoch": 3.58452787258248, "grad_norm": 0.2651440526647722, "learning_rate": 2.3137971236575297e-07, "loss": 0.0023, "step": 15754 }, { "epoch": 3.584755403868032, "grad_norm": 0.9938826329073732, "learning_rate": 2.3131031154349947e-07, "loss": 0.007, "step": 15755 }, { "epoch": 3.5849829351535836, "grad_norm": 0.24405593714021956, "learning_rate": 2.3124091876754218e-07, "loss": 0.001, "step": 15756 }, { "epoch": 3.5852104664391353, "grad_norm": 0.15940872052504182, "learning_rate": 2.3117153403929963e-07, "loss": 0.0007, "step": 15757 }, { "epoch": 3.585437997724687, "grad_norm": 0.18534639392843877, "learning_rate": 2.311021573601894e-07, "loss": 0.0005, "step": 15758 }, { "epoch": 3.585665529010239, "grad_norm": 0.8440763065621802, "learning_rate": 2.3103278873162987e-07, "loss": 0.0072, "step": 15759 }, { "epoch": 3.5858930602957906, "grad_norm": 0.32566698081512446, "learning_rate": 2.3096342815503847e-07, "loss": 0.0023, "step": 15760 }, { "epoch": 3.5861205915813423, "grad_norm": 2.1545755758942673, "learning_rate": 2.3089407563183315e-07, "loss": 0.0197, "step": 15761 }, { "epoch": 3.586348122866894, "grad_norm": 0.14476131640943526, "learning_rate": 2.3082473116343096e-07, "loss": 0.0004, "step": 15762 }, { "epoch": 3.586575654152446, "grad_norm": 0.5149234126914334, "learning_rate": 2.3075539475124933e-07, "loss": 0.0032, "step": 15763 }, { "epoch": 3.5868031854379976, "grad_norm": 0.09888338395679991, "learning_rate": 2.3068606639670566e-07, "loss": 0.0006, "step": 15764 }, { "epoch": 3.5870307167235493, "grad_norm": 0.45341108925520995, "learning_rate": 2.306167461012164e-07, "loss": 0.002, "step": 15765 }, { "epoch": 3.587258248009101, "grad_norm": 0.21428576851247305, "learning_rate": 2.305474338661988e-07, "loss": 0.0012, "step": 15766 }, { "epoch": 3.587485779294653, "grad_norm": 0.4890097809126971, "learning_rate": 2.30478129693069e-07, "loss": 0.002, "step": 15767 }, { "epoch": 3.587713310580205, "grad_norm": 0.4273403799331209, "learning_rate": 2.304088335832439e-07, "loss": 0.0033, "step": 15768 }, { "epoch": 3.5879408418657563, "grad_norm": 1.2390397963658693, "learning_rate": 2.3033954553813943e-07, "loss": 0.011, "step": 15769 }, { "epoch": 3.5881683731513085, "grad_norm": 0.5695078753654147, "learning_rate": 2.302702655591718e-07, "loss": 0.0067, "step": 15770 }, { "epoch": 3.58839590443686, "grad_norm": 0.028041001176005026, "learning_rate": 2.3020099364775734e-07, "loss": 0.0001, "step": 15771 }, { "epoch": 3.588623435722412, "grad_norm": 0.6255740248237277, "learning_rate": 2.301317298053112e-07, "loss": 0.0036, "step": 15772 }, { "epoch": 3.5888509670079634, "grad_norm": 0.7312869897069866, "learning_rate": 2.3006247403324965e-07, "loss": 0.0017, "step": 15773 }, { "epoch": 3.5890784982935156, "grad_norm": 0.5002178019540059, "learning_rate": 2.299932263329876e-07, "loss": 0.0016, "step": 15774 }, { "epoch": 3.589306029579067, "grad_norm": 0.9494361681274178, "learning_rate": 2.2992398670594073e-07, "loss": 0.0066, "step": 15775 }, { "epoch": 3.589533560864619, "grad_norm": 0.5099392029762823, "learning_rate": 2.2985475515352385e-07, "loss": 0.0037, "step": 15776 }, { "epoch": 3.589761092150171, "grad_norm": 0.27933294135545383, "learning_rate": 2.297855316771521e-07, "loss": 0.0014, "step": 15777 }, { "epoch": 3.5899886234357226, "grad_norm": 0.41823017566379006, "learning_rate": 2.297163162782405e-07, "loss": 0.0032, "step": 15778 }, { "epoch": 3.5902161547212743, "grad_norm": 0.25814861241189374, "learning_rate": 2.2964710895820323e-07, "loss": 0.0013, "step": 15779 }, { "epoch": 3.590443686006826, "grad_norm": 0.23402801252198271, "learning_rate": 2.2957790971845528e-07, "loss": 0.0008, "step": 15780 }, { "epoch": 3.590671217292378, "grad_norm": 0.4813714977801502, "learning_rate": 2.2950871856041037e-07, "loss": 0.003, "step": 15781 }, { "epoch": 3.5908987485779296, "grad_norm": 1.5028753333743015, "learning_rate": 2.2943953548548324e-07, "loss": 0.0087, "step": 15782 }, { "epoch": 3.5911262798634813, "grad_norm": 0.4543605138897999, "learning_rate": 2.2937036049508727e-07, "loss": 0.0048, "step": 15783 }, { "epoch": 3.591353811149033, "grad_norm": 0.20790477977164346, "learning_rate": 2.293011935906366e-07, "loss": 0.0009, "step": 15784 }, { "epoch": 3.591581342434585, "grad_norm": 3.9221903620598244, "learning_rate": 2.2923203477354515e-07, "loss": 0.0427, "step": 15785 }, { "epoch": 3.5918088737201366, "grad_norm": 0.5687358646875049, "learning_rate": 2.2916288404522576e-07, "loss": 0.0026, "step": 15786 }, { "epoch": 3.5920364050056883, "grad_norm": 0.04658720920277923, "learning_rate": 2.2909374140709233e-07, "loss": 0.0002, "step": 15787 }, { "epoch": 3.59226393629124, "grad_norm": 0.44292403314592144, "learning_rate": 2.2902460686055755e-07, "loss": 0.0031, "step": 15788 }, { "epoch": 3.592491467576792, "grad_norm": 0.19763607919718834, "learning_rate": 2.2895548040703485e-07, "loss": 0.0008, "step": 15789 }, { "epoch": 3.5927189988623436, "grad_norm": 0.7193721253413345, "learning_rate": 2.288863620479366e-07, "loss": 0.0041, "step": 15790 }, { "epoch": 3.5929465301478953, "grad_norm": 0.18056610476572119, "learning_rate": 2.288172517846756e-07, "loss": 0.0004, "step": 15791 }, { "epoch": 3.593174061433447, "grad_norm": 0.4239952638642846, "learning_rate": 2.2874814961866465e-07, "loss": 0.0017, "step": 15792 }, { "epoch": 3.593401592718999, "grad_norm": 0.26514867317948615, "learning_rate": 2.286790555513156e-07, "loss": 0.0014, "step": 15793 }, { "epoch": 3.5936291240045506, "grad_norm": 1.336166599683839, "learning_rate": 2.286099695840411e-07, "loss": 0.0068, "step": 15794 }, { "epoch": 3.5938566552901023, "grad_norm": 0.44038040708003856, "learning_rate": 2.285408917182528e-07, "loss": 0.0021, "step": 15795 }, { "epoch": 3.594084186575654, "grad_norm": 0.8589072182681445, "learning_rate": 2.2847182195536216e-07, "loss": 0.0075, "step": 15796 }, { "epoch": 3.594311717861206, "grad_norm": 0.525909330837615, "learning_rate": 2.2840276029678172e-07, "loss": 0.0015, "step": 15797 }, { "epoch": 3.5945392491467576, "grad_norm": 0.8359901231382255, "learning_rate": 2.283337067439223e-07, "loss": 0.0041, "step": 15798 }, { "epoch": 3.5947667804323093, "grad_norm": 0.37232849776733135, "learning_rate": 2.282646612981957e-07, "loss": 0.0018, "step": 15799 }, { "epoch": 3.594994311717861, "grad_norm": 0.15924104699905908, "learning_rate": 2.2819562396101258e-07, "loss": 0.0008, "step": 15800 }, { "epoch": 3.595221843003413, "grad_norm": 0.30654946480645884, "learning_rate": 2.281265947337844e-07, "loss": 0.001, "step": 15801 }, { "epoch": 3.5954493742889646, "grad_norm": 0.20940525170350138, "learning_rate": 2.2805757361792163e-07, "loss": 0.0012, "step": 15802 }, { "epoch": 3.5956769055745164, "grad_norm": 0.6576826681861495, "learning_rate": 2.2798856061483507e-07, "loss": 0.0078, "step": 15803 }, { "epoch": 3.595904436860068, "grad_norm": 1.1385850572326472, "learning_rate": 2.279195557259354e-07, "loss": 0.0083, "step": 15804 }, { "epoch": 3.59613196814562, "grad_norm": 0.12120261839369825, "learning_rate": 2.2785055895263266e-07, "loss": 0.0005, "step": 15805 }, { "epoch": 3.5963594994311716, "grad_norm": 0.3930417256203286, "learning_rate": 2.2778157029633728e-07, "loss": 0.0018, "step": 15806 }, { "epoch": 3.596587030716724, "grad_norm": 5.455305287630324, "learning_rate": 2.2771258975845902e-07, "loss": 0.0134, "step": 15807 }, { "epoch": 3.596814562002275, "grad_norm": 0.2014176888971954, "learning_rate": 2.27643617340408e-07, "loss": 0.0011, "step": 15808 }, { "epoch": 3.5970420932878273, "grad_norm": 0.3741928814299998, "learning_rate": 2.2757465304359343e-07, "loss": 0.0035, "step": 15809 }, { "epoch": 3.5972696245733786, "grad_norm": 0.7196050978145074, "learning_rate": 2.2750569686942517e-07, "loss": 0.005, "step": 15810 }, { "epoch": 3.597497155858931, "grad_norm": 0.4914392457652968, "learning_rate": 2.2743674881931272e-07, "loss": 0.0039, "step": 15811 }, { "epoch": 3.597724687144482, "grad_norm": 0.5077073133765044, "learning_rate": 2.2736780889466473e-07, "loss": 0.0034, "step": 15812 }, { "epoch": 3.5979522184300343, "grad_norm": 0.4118930250315949, "learning_rate": 2.2729887709689078e-07, "loss": 0.0045, "step": 15813 }, { "epoch": 3.5981797497155856, "grad_norm": 0.4095270755971945, "learning_rate": 2.2722995342739945e-07, "loss": 0.0013, "step": 15814 }, { "epoch": 3.598407281001138, "grad_norm": 0.40758821495470726, "learning_rate": 2.2716103788759908e-07, "loss": 0.0019, "step": 15815 }, { "epoch": 3.5986348122866896, "grad_norm": 0.07867092098929815, "learning_rate": 2.2709213047889859e-07, "loss": 0.0002, "step": 15816 }, { "epoch": 3.5988623435722413, "grad_norm": 0.3723902655928474, "learning_rate": 2.2702323120270618e-07, "loss": 0.005, "step": 15817 }, { "epoch": 3.599089874857793, "grad_norm": 0.45075594779374345, "learning_rate": 2.2695434006043021e-07, "loss": 0.0026, "step": 15818 }, { "epoch": 3.599317406143345, "grad_norm": 0.7989582505522347, "learning_rate": 2.2688545705347843e-07, "loss": 0.0035, "step": 15819 }, { "epoch": 3.5995449374288966, "grad_norm": 0.3623985830777399, "learning_rate": 2.2681658218325894e-07, "loss": 0.0027, "step": 15820 }, { "epoch": 3.5997724687144483, "grad_norm": 0.3375017012177959, "learning_rate": 2.2674771545117936e-07, "loss": 0.0021, "step": 15821 }, { "epoch": 3.6, "grad_norm": 0.466129589456604, "learning_rate": 2.2667885685864677e-07, "loss": 0.0065, "step": 15822 }, { "epoch": 3.600227531285552, "grad_norm": 0.6123746211869712, "learning_rate": 2.2661000640706893e-07, "loss": 0.0039, "step": 15823 }, { "epoch": 3.6004550625711036, "grad_norm": 1.570866122815435, "learning_rate": 2.2654116409785293e-07, "loss": 0.0041, "step": 15824 }, { "epoch": 3.6006825938566553, "grad_norm": 0.41409706303819854, "learning_rate": 2.2647232993240605e-07, "loss": 0.0021, "step": 15825 }, { "epoch": 3.600910125142207, "grad_norm": 0.3513969463060036, "learning_rate": 2.2640350391213462e-07, "loss": 0.0012, "step": 15826 }, { "epoch": 3.601137656427759, "grad_norm": 0.6149763312110867, "learning_rate": 2.2633468603844576e-07, "loss": 0.0029, "step": 15827 }, { "epoch": 3.6013651877133106, "grad_norm": 0.38786465970656225, "learning_rate": 2.2626587631274587e-07, "loss": 0.0039, "step": 15828 }, { "epoch": 3.6015927189988624, "grad_norm": 0.3198372088431216, "learning_rate": 2.2619707473644094e-07, "loss": 0.0033, "step": 15829 }, { "epoch": 3.601820250284414, "grad_norm": 0.18307510557240297, "learning_rate": 2.261282813109375e-07, "loss": 0.0007, "step": 15830 }, { "epoch": 3.602047781569966, "grad_norm": 1.0512979077832483, "learning_rate": 2.2605949603764145e-07, "loss": 0.0071, "step": 15831 }, { "epoch": 3.6022753128555176, "grad_norm": 1.0866790533742265, "learning_rate": 2.2599071891795887e-07, "loss": 0.0042, "step": 15832 }, { "epoch": 3.6025028441410694, "grad_norm": 1.06980626618501, "learning_rate": 2.2592194995329525e-07, "loss": 0.0082, "step": 15833 }, { "epoch": 3.602730375426621, "grad_norm": 0.41394500128473216, "learning_rate": 2.258531891450559e-07, "loss": 0.004, "step": 15834 }, { "epoch": 3.602957906712173, "grad_norm": 0.255824825510578, "learning_rate": 2.2578443649464654e-07, "loss": 0.0012, "step": 15835 }, { "epoch": 3.6031854379977246, "grad_norm": 0.46830618642179267, "learning_rate": 2.25715692003472e-07, "loss": 0.0016, "step": 15836 }, { "epoch": 3.6034129692832764, "grad_norm": 0.32661486639337, "learning_rate": 2.2564695567293745e-07, "loss": 0.0028, "step": 15837 }, { "epoch": 3.603640500568828, "grad_norm": 1.2788144358668458, "learning_rate": 2.2557822750444776e-07, "loss": 0.0063, "step": 15838 }, { "epoch": 3.60386803185438, "grad_norm": 0.3902832933324385, "learning_rate": 2.255095074994078e-07, "loss": 0.0043, "step": 15839 }, { "epoch": 3.6040955631399316, "grad_norm": 0.9863885709331071, "learning_rate": 2.254407956592218e-07, "loss": 0.0087, "step": 15840 }, { "epoch": 3.6043230944254834, "grad_norm": 1.108859481521411, "learning_rate": 2.2537209198529406e-07, "loss": 0.0076, "step": 15841 }, { "epoch": 3.604550625711035, "grad_norm": 0.11342587054949846, "learning_rate": 2.2530339647902902e-07, "loss": 0.0004, "step": 15842 }, { "epoch": 3.604778156996587, "grad_norm": 0.811281640264487, "learning_rate": 2.252347091418304e-07, "loss": 0.0061, "step": 15843 }, { "epoch": 3.6050056882821386, "grad_norm": 0.11390951067020767, "learning_rate": 2.2516602997510218e-07, "loss": 0.0008, "step": 15844 }, { "epoch": 3.6052332195676904, "grad_norm": 1.360623269614006, "learning_rate": 2.2509735898024825e-07, "loss": 0.0176, "step": 15845 }, { "epoch": 3.6054607508532426, "grad_norm": 0.4299372263979608, "learning_rate": 2.2502869615867167e-07, "loss": 0.0035, "step": 15846 }, { "epoch": 3.605688282138794, "grad_norm": 0.5659939684603957, "learning_rate": 2.249600415117763e-07, "loss": 0.0048, "step": 15847 }, { "epoch": 3.605915813424346, "grad_norm": 1.5810172927104917, "learning_rate": 2.2489139504096474e-07, "loss": 0.015, "step": 15848 }, { "epoch": 3.6061433447098974, "grad_norm": 0.17494653948738326, "learning_rate": 2.2482275674764056e-07, "loss": 0.0008, "step": 15849 }, { "epoch": 3.6063708759954496, "grad_norm": 0.3320219336254124, "learning_rate": 2.2475412663320616e-07, "loss": 0.0039, "step": 15850 }, { "epoch": 3.606598407281001, "grad_norm": 0.5379927046546439, "learning_rate": 2.246855046990644e-07, "loss": 0.0031, "step": 15851 }, { "epoch": 3.606825938566553, "grad_norm": 0.3208831840101802, "learning_rate": 2.2461689094661795e-07, "loss": 0.0007, "step": 15852 }, { "epoch": 3.6070534698521044, "grad_norm": 1.980603312126053, "learning_rate": 2.2454828537726875e-07, "loss": 0.014, "step": 15853 }, { "epoch": 3.6072810011376566, "grad_norm": 0.24180103252693597, "learning_rate": 2.2447968799241944e-07, "loss": 0.0013, "step": 15854 }, { "epoch": 3.6075085324232083, "grad_norm": 1.5036633140299518, "learning_rate": 2.244110987934716e-07, "loss": 0.0168, "step": 15855 }, { "epoch": 3.60773606370876, "grad_norm": 0.4531920752766311, "learning_rate": 2.2434251778182725e-07, "loss": 0.0016, "step": 15856 }, { "epoch": 3.607963594994312, "grad_norm": 1.3926464113305614, "learning_rate": 2.2427394495888806e-07, "loss": 0.0091, "step": 15857 }, { "epoch": 3.6081911262798636, "grad_norm": 1.7224639822708456, "learning_rate": 2.2420538032605577e-07, "loss": 0.0061, "step": 15858 }, { "epoch": 3.6084186575654154, "grad_norm": 0.6396392273563831, "learning_rate": 2.2413682388473148e-07, "loss": 0.0047, "step": 15859 }, { "epoch": 3.608646188850967, "grad_norm": 1.412789098889002, "learning_rate": 2.2406827563631612e-07, "loss": 0.0094, "step": 15860 }, { "epoch": 3.608873720136519, "grad_norm": 1.276456652213996, "learning_rate": 2.239997355822112e-07, "loss": 0.0079, "step": 15861 }, { "epoch": 3.6091012514220706, "grad_norm": 0.7456408388775314, "learning_rate": 2.23931203723817e-07, "loss": 0.0047, "step": 15862 }, { "epoch": 3.6093287827076224, "grad_norm": 0.394605206057294, "learning_rate": 2.238626800625345e-07, "loss": 0.0015, "step": 15863 }, { "epoch": 3.609556313993174, "grad_norm": 1.2519715966497351, "learning_rate": 2.2379416459976448e-07, "loss": 0.0065, "step": 15864 }, { "epoch": 3.609783845278726, "grad_norm": 0.2449925856622047, "learning_rate": 2.2372565733690661e-07, "loss": 0.0021, "step": 15865 }, { "epoch": 3.6100113765642776, "grad_norm": 0.3926399195109492, "learning_rate": 2.2365715827536167e-07, "loss": 0.0034, "step": 15866 }, { "epoch": 3.6102389078498294, "grad_norm": 0.2670097065207557, "learning_rate": 2.235886674165292e-07, "loss": 0.0015, "step": 15867 }, { "epoch": 3.610466439135381, "grad_norm": 1.1221389058301612, "learning_rate": 2.2352018476180945e-07, "loss": 0.0124, "step": 15868 }, { "epoch": 3.610693970420933, "grad_norm": 1.0841008980052274, "learning_rate": 2.234517103126016e-07, "loss": 0.0061, "step": 15869 }, { "epoch": 3.6109215017064846, "grad_norm": 0.32517888820950597, "learning_rate": 2.2338324407030537e-07, "loss": 0.0025, "step": 15870 }, { "epoch": 3.6111490329920364, "grad_norm": 0.6176179225562013, "learning_rate": 2.2331478603632038e-07, "loss": 0.0062, "step": 15871 }, { "epoch": 3.611376564277588, "grad_norm": 0.10405854724872562, "learning_rate": 2.2324633621204524e-07, "loss": 0.0005, "step": 15872 }, { "epoch": 3.61160409556314, "grad_norm": 0.724531490534648, "learning_rate": 2.231778945988794e-07, "loss": 0.0041, "step": 15873 }, { "epoch": 3.6118316268486916, "grad_norm": 0.47208442188811695, "learning_rate": 2.2310946119822132e-07, "loss": 0.003, "step": 15874 }, { "epoch": 3.6120591581342434, "grad_norm": 0.8566496772750254, "learning_rate": 2.230410360114701e-07, "loss": 0.0057, "step": 15875 }, { "epoch": 3.612286689419795, "grad_norm": 0.49082658570793525, "learning_rate": 2.229726190400236e-07, "loss": 0.001, "step": 15876 }, { "epoch": 3.612514220705347, "grad_norm": 0.8835559844803296, "learning_rate": 2.229042102852806e-07, "loss": 0.0057, "step": 15877 }, { "epoch": 3.6127417519908986, "grad_norm": 1.9564254694854, "learning_rate": 2.2283580974863932e-07, "loss": 0.0219, "step": 15878 }, { "epoch": 3.6129692832764504, "grad_norm": 0.7098893232772694, "learning_rate": 2.2276741743149733e-07, "loss": 0.0054, "step": 15879 }, { "epoch": 3.613196814562002, "grad_norm": 0.4056008322311634, "learning_rate": 2.2269903333525285e-07, "loss": 0.0029, "step": 15880 }, { "epoch": 3.613424345847554, "grad_norm": 0.3356481692910507, "learning_rate": 2.2263065746130325e-07, "loss": 0.0008, "step": 15881 }, { "epoch": 3.6136518771331056, "grad_norm": 0.6113338603343728, "learning_rate": 2.2256228981104627e-07, "loss": 0.0051, "step": 15882 }, { "epoch": 3.6138794084186574, "grad_norm": 0.08986791146483418, "learning_rate": 2.2249393038587877e-07, "loss": 0.0005, "step": 15883 }, { "epoch": 3.614106939704209, "grad_norm": 0.5629196170126098, "learning_rate": 2.2242557918719827e-07, "loss": 0.0063, "step": 15884 }, { "epoch": 3.6143344709897613, "grad_norm": 2.5985632946208654, "learning_rate": 2.2235723621640185e-07, "loss": 0.0265, "step": 15885 }, { "epoch": 3.6145620022753127, "grad_norm": 0.35989651437018944, "learning_rate": 2.2228890147488587e-07, "loss": 0.0008, "step": 15886 }, { "epoch": 3.614789533560865, "grad_norm": 0.4589799606109037, "learning_rate": 2.2222057496404743e-07, "loss": 0.004, "step": 15887 }, { "epoch": 3.615017064846416, "grad_norm": 0.31510537171231984, "learning_rate": 2.2215225668528255e-07, "loss": 0.0029, "step": 15888 }, { "epoch": 3.6152445961319684, "grad_norm": 0.9635450182274021, "learning_rate": 2.22083946639988e-07, "loss": 0.0126, "step": 15889 }, { "epoch": 3.6154721274175197, "grad_norm": 1.0693275084098113, "learning_rate": 2.2201564482955948e-07, "loss": 0.0055, "step": 15890 }, { "epoch": 3.615699658703072, "grad_norm": 0.4737778734112922, "learning_rate": 2.2194735125539316e-07, "loss": 0.0035, "step": 15891 }, { "epoch": 3.615927189988623, "grad_norm": 0.2712980847064824, "learning_rate": 2.2187906591888497e-07, "loss": 0.0028, "step": 15892 }, { "epoch": 3.6161547212741754, "grad_norm": 2.1510139216690725, "learning_rate": 2.2181078882143026e-07, "loss": 0.0116, "step": 15893 }, { "epoch": 3.616382252559727, "grad_norm": 0.15644774567155897, "learning_rate": 2.2174251996442478e-07, "loss": 0.0006, "step": 15894 }, { "epoch": 3.616609783845279, "grad_norm": 0.8081380139793863, "learning_rate": 2.2167425934926344e-07, "loss": 0.0059, "step": 15895 }, { "epoch": 3.6168373151308306, "grad_norm": 0.676584665260528, "learning_rate": 2.2160600697734187e-07, "loss": 0.0064, "step": 15896 }, { "epoch": 3.6170648464163824, "grad_norm": 0.6544597960341926, "learning_rate": 2.2153776285005444e-07, "loss": 0.0046, "step": 15897 }, { "epoch": 3.617292377701934, "grad_norm": 1.0376805838073966, "learning_rate": 2.2146952696879622e-07, "loss": 0.0066, "step": 15898 }, { "epoch": 3.617519908987486, "grad_norm": 0.9750699808616354, "learning_rate": 2.2140129933496215e-07, "loss": 0.0055, "step": 15899 }, { "epoch": 3.6177474402730376, "grad_norm": 0.6194032227847109, "learning_rate": 2.2133307994994608e-07, "loss": 0.0046, "step": 15900 }, { "epoch": 3.6179749715585894, "grad_norm": 0.3041136455076489, "learning_rate": 2.212648688151428e-07, "loss": 0.0011, "step": 15901 }, { "epoch": 3.618202502844141, "grad_norm": 1.652245967476916, "learning_rate": 2.2119666593194617e-07, "loss": 0.0139, "step": 15902 }, { "epoch": 3.618430034129693, "grad_norm": 0.727747777511932, "learning_rate": 2.2112847130175002e-07, "loss": 0.0053, "step": 15903 }, { "epoch": 3.6186575654152446, "grad_norm": 0.19739242439421578, "learning_rate": 2.210602849259482e-07, "loss": 0.0013, "step": 15904 }, { "epoch": 3.6188850967007964, "grad_norm": 1.1833102990022817, "learning_rate": 2.209921068059344e-07, "loss": 0.0061, "step": 15905 }, { "epoch": 3.619112627986348, "grad_norm": 0.9735744838982858, "learning_rate": 2.2092393694310224e-07, "loss": 0.0124, "step": 15906 }, { "epoch": 3.6193401592719, "grad_norm": 0.7426545094818162, "learning_rate": 2.2085577533884455e-07, "loss": 0.0078, "step": 15907 }, { "epoch": 3.6195676905574516, "grad_norm": 0.40242612677841666, "learning_rate": 2.2078762199455483e-07, "loss": 0.0042, "step": 15908 }, { "epoch": 3.6197952218430034, "grad_norm": 0.8900163930067658, "learning_rate": 2.207194769116259e-07, "loss": 0.0051, "step": 15909 }, { "epoch": 3.620022753128555, "grad_norm": 0.4025284839708747, "learning_rate": 2.2065134009145028e-07, "loss": 0.0027, "step": 15910 }, { "epoch": 3.620250284414107, "grad_norm": 0.5386224113014252, "learning_rate": 2.2058321153542068e-07, "loss": 0.0043, "step": 15911 }, { "epoch": 3.6204778156996587, "grad_norm": 0.17931885759850416, "learning_rate": 2.2051509124492955e-07, "loss": 0.0007, "step": 15912 }, { "epoch": 3.6207053469852104, "grad_norm": 0.8796814794592502, "learning_rate": 2.2044697922136946e-07, "loss": 0.0076, "step": 15913 }, { "epoch": 3.620932878270762, "grad_norm": 0.6121831727338296, "learning_rate": 2.2037887546613193e-07, "loss": 0.0016, "step": 15914 }, { "epoch": 3.621160409556314, "grad_norm": 0.34589165215456896, "learning_rate": 2.2031077998060945e-07, "loss": 0.0018, "step": 15915 }, { "epoch": 3.6213879408418657, "grad_norm": 0.5864364840952287, "learning_rate": 2.2024269276619338e-07, "loss": 0.0022, "step": 15916 }, { "epoch": 3.6216154721274174, "grad_norm": 0.5734423105021703, "learning_rate": 2.2017461382427502e-07, "loss": 0.0028, "step": 15917 }, { "epoch": 3.621843003412969, "grad_norm": 0.26051215703204356, "learning_rate": 2.2010654315624657e-07, "loss": 0.0009, "step": 15918 }, { "epoch": 3.622070534698521, "grad_norm": 0.4330708276998231, "learning_rate": 2.2003848076349866e-07, "loss": 0.0022, "step": 15919 }, { "epoch": 3.6222980659840727, "grad_norm": 0.8743125030805039, "learning_rate": 2.199704266474228e-07, "loss": 0.0102, "step": 15920 }, { "epoch": 3.6225255972696244, "grad_norm": 0.03816877955147695, "learning_rate": 2.1990238080940953e-07, "loss": 0.0001, "step": 15921 }, { "epoch": 3.622753128555176, "grad_norm": 0.22318026179912778, "learning_rate": 2.1983434325084956e-07, "loss": 0.0008, "step": 15922 }, { "epoch": 3.622980659840728, "grad_norm": 0.3710926063699255, "learning_rate": 2.1976631397313353e-07, "loss": 0.0022, "step": 15923 }, { "epoch": 3.62320819112628, "grad_norm": 0.1341530230919009, "learning_rate": 2.1969829297765193e-07, "loss": 0.0007, "step": 15924 }, { "epoch": 3.6234357224118314, "grad_norm": 0.26502749577620277, "learning_rate": 2.196302802657952e-07, "loss": 0.0008, "step": 15925 }, { "epoch": 3.6236632536973836, "grad_norm": 0.19950593539181455, "learning_rate": 2.195622758389529e-07, "loss": 0.0006, "step": 15926 }, { "epoch": 3.623890784982935, "grad_norm": 0.5712890413588881, "learning_rate": 2.1949427969851535e-07, "loss": 0.0032, "step": 15927 }, { "epoch": 3.624118316268487, "grad_norm": 0.43193991055360864, "learning_rate": 2.1942629184587205e-07, "loss": 0.0035, "step": 15928 }, { "epoch": 3.6243458475540384, "grad_norm": 0.25653391553486843, "learning_rate": 2.1935831228241242e-07, "loss": 0.0012, "step": 15929 }, { "epoch": 3.6245733788395906, "grad_norm": 0.18406399018423644, "learning_rate": 2.1929034100952596e-07, "loss": 0.0007, "step": 15930 }, { "epoch": 3.624800910125142, "grad_norm": 0.33135541767080995, "learning_rate": 2.1922237802860186e-07, "loss": 0.0024, "step": 15931 }, { "epoch": 3.625028441410694, "grad_norm": 0.3085693486318131, "learning_rate": 2.191544233410295e-07, "loss": 0.0018, "step": 15932 }, { "epoch": 3.625255972696246, "grad_norm": 0.6541693331133717, "learning_rate": 2.190864769481972e-07, "loss": 0.0065, "step": 15933 }, { "epoch": 3.6254835039817976, "grad_norm": 1.6466917378093602, "learning_rate": 2.1901853885149414e-07, "loss": 0.0155, "step": 15934 }, { "epoch": 3.6257110352673494, "grad_norm": 0.5584828345826126, "learning_rate": 2.1895060905230865e-07, "loss": 0.0024, "step": 15935 }, { "epoch": 3.625938566552901, "grad_norm": 0.26708612799483744, "learning_rate": 2.1888268755202883e-07, "loss": 0.0018, "step": 15936 }, { "epoch": 3.626166097838453, "grad_norm": 0.4383098138509103, "learning_rate": 2.1881477435204308e-07, "loss": 0.0022, "step": 15937 }, { "epoch": 3.6263936291240046, "grad_norm": 0.6206579813424873, "learning_rate": 2.1874686945373953e-07, "loss": 0.0037, "step": 15938 }, { "epoch": 3.6266211604095564, "grad_norm": 0.9371099628111546, "learning_rate": 2.1867897285850606e-07, "loss": 0.0105, "step": 15939 }, { "epoch": 3.626848691695108, "grad_norm": 0.6963927766672215, "learning_rate": 2.1861108456773028e-07, "loss": 0.008, "step": 15940 }, { "epoch": 3.62707622298066, "grad_norm": 0.32563100169132986, "learning_rate": 2.1854320458279948e-07, "loss": 0.002, "step": 15941 }, { "epoch": 3.6273037542662117, "grad_norm": 0.38991635574481326, "learning_rate": 2.184753329051014e-07, "loss": 0.0016, "step": 15942 }, { "epoch": 3.6275312855517634, "grad_norm": 0.7186002523117945, "learning_rate": 2.1840746953602276e-07, "loss": 0.0036, "step": 15943 }, { "epoch": 3.627758816837315, "grad_norm": 0.23176805863743125, "learning_rate": 2.1833961447695084e-07, "loss": 0.0022, "step": 15944 }, { "epoch": 3.627986348122867, "grad_norm": 0.49978104426734016, "learning_rate": 2.182717677292724e-07, "loss": 0.0027, "step": 15945 }, { "epoch": 3.6282138794084187, "grad_norm": 0.508321423447466, "learning_rate": 2.1820392929437433e-07, "loss": 0.0024, "step": 15946 }, { "epoch": 3.6284414106939704, "grad_norm": 0.8531881943418171, "learning_rate": 2.1813609917364303e-07, "loss": 0.0038, "step": 15947 }, { "epoch": 3.628668941979522, "grad_norm": 0.08883010128018151, "learning_rate": 2.180682773684644e-07, "loss": 0.0002, "step": 15948 }, { "epoch": 3.628896473265074, "grad_norm": 0.49019699402014477, "learning_rate": 2.180004638802252e-07, "loss": 0.0024, "step": 15949 }, { "epoch": 3.6291240045506257, "grad_norm": 0.38486909525645735, "learning_rate": 2.179326587103109e-07, "loss": 0.0049, "step": 15950 }, { "epoch": 3.6293515358361774, "grad_norm": 0.7843409732092723, "learning_rate": 2.1786486186010759e-07, "loss": 0.0056, "step": 15951 }, { "epoch": 3.629579067121729, "grad_norm": 0.48086257988453973, "learning_rate": 2.17797073331001e-07, "loss": 0.0036, "step": 15952 }, { "epoch": 3.629806598407281, "grad_norm": 0.6561289860686319, "learning_rate": 2.1772929312437637e-07, "loss": 0.0043, "step": 15953 }, { "epoch": 3.6300341296928327, "grad_norm": 0.8027755963800245, "learning_rate": 2.1766152124161924e-07, "loss": 0.0021, "step": 15954 }, { "epoch": 3.6302616609783844, "grad_norm": 0.9294605420380081, "learning_rate": 2.1759375768411445e-07, "loss": 0.0027, "step": 15955 }, { "epoch": 3.630489192263936, "grad_norm": 0.5484569606636766, "learning_rate": 2.1752600245324724e-07, "loss": 0.0086, "step": 15956 }, { "epoch": 3.630716723549488, "grad_norm": 0.33756584303244325, "learning_rate": 2.174582555504022e-07, "loss": 0.0027, "step": 15957 }, { "epoch": 3.6309442548350397, "grad_norm": 0.21370020056792988, "learning_rate": 2.1739051697696397e-07, "loss": 0.0016, "step": 15958 }, { "epoch": 3.6311717861205914, "grad_norm": 0.45420475520226433, "learning_rate": 2.173227867343173e-07, "loss": 0.0029, "step": 15959 }, { "epoch": 3.631399317406143, "grad_norm": 2.096624835595946, "learning_rate": 2.17255064823846e-07, "loss": 0.0293, "step": 15960 }, { "epoch": 3.631626848691695, "grad_norm": 0.23808755574186818, "learning_rate": 2.171873512469347e-07, "loss": 0.0013, "step": 15961 }, { "epoch": 3.6318543799772467, "grad_norm": 0.16267020678098723, "learning_rate": 2.1711964600496675e-07, "loss": 0.0005, "step": 15962 }, { "epoch": 3.632081911262799, "grad_norm": 0.2928975596915178, "learning_rate": 2.1705194909932655e-07, "loss": 0.0017, "step": 15963 }, { "epoch": 3.63230944254835, "grad_norm": 1.100793732963176, "learning_rate": 2.169842605313971e-07, "loss": 0.0122, "step": 15964 }, { "epoch": 3.6325369738339024, "grad_norm": 0.3094183314340141, "learning_rate": 2.1691658030256218e-07, "loss": 0.0012, "step": 15965 }, { "epoch": 3.6327645051194537, "grad_norm": 0.4717106211144001, "learning_rate": 2.1684890841420517e-07, "loss": 0.0031, "step": 15966 }, { "epoch": 3.632992036405006, "grad_norm": 0.48926788450573094, "learning_rate": 2.1678124486770884e-07, "loss": 0.0036, "step": 15967 }, { "epoch": 3.633219567690557, "grad_norm": 0.8501196141546149, "learning_rate": 2.1671358966445635e-07, "loss": 0.0049, "step": 15968 }, { "epoch": 3.6334470989761094, "grad_norm": 0.5343458746725768, "learning_rate": 2.166459428058302e-07, "loss": 0.0038, "step": 15969 }, { "epoch": 3.6336746302616607, "grad_norm": 0.23268401864394184, "learning_rate": 2.1657830429321333e-07, "loss": 0.0008, "step": 15970 }, { "epoch": 3.633902161547213, "grad_norm": 1.9591781654685927, "learning_rate": 2.1651067412798767e-07, "loss": 0.016, "step": 15971 }, { "epoch": 3.6341296928327647, "grad_norm": 0.14943679946674532, "learning_rate": 2.1644305231153573e-07, "loss": 0.0006, "step": 15972 }, { "epoch": 3.6343572241183164, "grad_norm": 0.6799631773334497, "learning_rate": 2.1637543884523978e-07, "loss": 0.001, "step": 15973 }, { "epoch": 3.634584755403868, "grad_norm": 0.09204266785079641, "learning_rate": 2.1630783373048136e-07, "loss": 0.0003, "step": 15974 }, { "epoch": 3.63481228668942, "grad_norm": 0.3611754223566449, "learning_rate": 2.1624023696864247e-07, "loss": 0.0013, "step": 15975 }, { "epoch": 3.6350398179749717, "grad_norm": 1.4225371449572373, "learning_rate": 2.161726485611043e-07, "loss": 0.0127, "step": 15976 }, { "epoch": 3.6352673492605234, "grad_norm": 0.10078077859197139, "learning_rate": 2.1610506850924875e-07, "loss": 0.0004, "step": 15977 }, { "epoch": 3.635494880546075, "grad_norm": 0.1481020559239009, "learning_rate": 2.1603749681445642e-07, "loss": 0.0007, "step": 15978 }, { "epoch": 3.635722411831627, "grad_norm": 1.5815601883340968, "learning_rate": 2.1596993347810882e-07, "loss": 0.0114, "step": 15979 }, { "epoch": 3.6359499431171787, "grad_norm": 0.48880801754365377, "learning_rate": 2.159023785015868e-07, "loss": 0.0032, "step": 15980 }, { "epoch": 3.6361774744027304, "grad_norm": 0.30794597750979075, "learning_rate": 2.1583483188627074e-07, "loss": 0.0011, "step": 15981 }, { "epoch": 3.636405005688282, "grad_norm": 1.573871378614413, "learning_rate": 2.1576729363354158e-07, "loss": 0.0093, "step": 15982 }, { "epoch": 3.636632536973834, "grad_norm": 0.8029099085829832, "learning_rate": 2.156997637447792e-07, "loss": 0.0038, "step": 15983 }, { "epoch": 3.6368600682593857, "grad_norm": 0.9948051660504613, "learning_rate": 2.1563224222136413e-07, "loss": 0.0083, "step": 15984 }, { "epoch": 3.6370875995449374, "grad_norm": 0.4468079964399961, "learning_rate": 2.1556472906467645e-07, "loss": 0.0051, "step": 15985 }, { "epoch": 3.637315130830489, "grad_norm": 0.4414501546628803, "learning_rate": 2.154972242760957e-07, "loss": 0.0043, "step": 15986 }, { "epoch": 3.637542662116041, "grad_norm": 0.06659847260602453, "learning_rate": 2.1542972785700195e-07, "loss": 0.0003, "step": 15987 }, { "epoch": 3.6377701934015927, "grad_norm": 0.15771192710298496, "learning_rate": 2.1536223980877423e-07, "loss": 0.0008, "step": 15988 }, { "epoch": 3.6379977246871444, "grad_norm": 1.3571678859684375, "learning_rate": 2.1529476013279227e-07, "loss": 0.0081, "step": 15989 }, { "epoch": 3.638225255972696, "grad_norm": 0.158915449442281, "learning_rate": 2.1522728883043492e-07, "loss": 0.0009, "step": 15990 }, { "epoch": 3.638452787258248, "grad_norm": 0.4247939345801617, "learning_rate": 2.1515982590308133e-07, "loss": 0.0028, "step": 15991 }, { "epoch": 3.6386803185437997, "grad_norm": 0.4509476844528247, "learning_rate": 2.1509237135211046e-07, "loss": 0.0043, "step": 15992 }, { "epoch": 3.6389078498293514, "grad_norm": 0.12444785761098469, "learning_rate": 2.1502492517890064e-07, "loss": 0.0004, "step": 15993 }, { "epoch": 3.639135381114903, "grad_norm": 0.4930320111279219, "learning_rate": 2.1495748738483076e-07, "loss": 0.0022, "step": 15994 }, { "epoch": 3.639362912400455, "grad_norm": 0.958233378001298, "learning_rate": 2.1489005797127863e-07, "loss": 0.0057, "step": 15995 }, { "epoch": 3.6395904436860067, "grad_norm": 0.5656843514766836, "learning_rate": 2.1482263693962285e-07, "loss": 0.0058, "step": 15996 }, { "epoch": 3.6398179749715585, "grad_norm": 0.21642973737641308, "learning_rate": 2.1475522429124096e-07, "loss": 0.0009, "step": 15997 }, { "epoch": 3.64004550625711, "grad_norm": 0.5713583724297829, "learning_rate": 2.1468782002751097e-07, "loss": 0.007, "step": 15998 }, { "epoch": 3.640273037542662, "grad_norm": 0.9880220416925699, "learning_rate": 2.1462042414981072e-07, "loss": 0.0124, "step": 15999 }, { "epoch": 3.640500568828214, "grad_norm": 1.2415616661967166, "learning_rate": 2.1455303665951714e-07, "loss": 0.0064, "step": 16000 }, { "epoch": 3.6407281001137655, "grad_norm": 0.25161343578611906, "learning_rate": 2.1448565755800808e-07, "loss": 0.0047, "step": 16001 }, { "epoch": 3.6409556313993177, "grad_norm": 0.3502364261785629, "learning_rate": 2.1441828684666008e-07, "loss": 0.0016, "step": 16002 }, { "epoch": 3.641183162684869, "grad_norm": 1.5188179746019221, "learning_rate": 2.143509245268506e-07, "loss": 0.0143, "step": 16003 }, { "epoch": 3.641410693970421, "grad_norm": 0.1439881861544686, "learning_rate": 2.1428357059995597e-07, "loss": 0.0008, "step": 16004 }, { "epoch": 3.6416382252559725, "grad_norm": 0.9613368427304276, "learning_rate": 2.142162250673529e-07, "loss": 0.0139, "step": 16005 }, { "epoch": 3.6418657565415247, "grad_norm": 0.7071488039617382, "learning_rate": 2.1414888793041813e-07, "loss": 0.0073, "step": 16006 }, { "epoch": 3.642093287827076, "grad_norm": 0.11586864492485503, "learning_rate": 2.1408155919052745e-07, "loss": 0.0003, "step": 16007 }, { "epoch": 3.642320819112628, "grad_norm": 0.41252429033943977, "learning_rate": 2.140142388490573e-07, "loss": 0.003, "step": 16008 }, { "epoch": 3.6425483503981795, "grad_norm": 0.7297200999902189, "learning_rate": 2.1394692690738344e-07, "loss": 0.0078, "step": 16009 }, { "epoch": 3.6427758816837317, "grad_norm": 0.61880660988117, "learning_rate": 2.138796233668814e-07, "loss": 0.0085, "step": 16010 }, { "epoch": 3.6430034129692834, "grad_norm": 0.9437582960444202, "learning_rate": 2.1381232822892694e-07, "loss": 0.0111, "step": 16011 }, { "epoch": 3.643230944254835, "grad_norm": 1.8360118271162715, "learning_rate": 2.1374504149489536e-07, "loss": 0.0056, "step": 16012 }, { "epoch": 3.643458475540387, "grad_norm": 0.6159513196376065, "learning_rate": 2.1367776316616227e-07, "loss": 0.0056, "step": 16013 }, { "epoch": 3.6436860068259387, "grad_norm": 0.40493702866592113, "learning_rate": 2.136104932441021e-07, "loss": 0.0013, "step": 16014 }, { "epoch": 3.6439135381114904, "grad_norm": 1.0385759418994263, "learning_rate": 2.1354323173009027e-07, "loss": 0.0079, "step": 16015 }, { "epoch": 3.644141069397042, "grad_norm": 0.47513169468029415, "learning_rate": 2.1347597862550127e-07, "loss": 0.0063, "step": 16016 }, { "epoch": 3.644368600682594, "grad_norm": 0.4404757588547658, "learning_rate": 2.134087339317093e-07, "loss": 0.0014, "step": 16017 }, { "epoch": 3.6445961319681457, "grad_norm": 1.0777393561286468, "learning_rate": 2.1334149765008909e-07, "loss": 0.0029, "step": 16018 }, { "epoch": 3.6448236632536974, "grad_norm": 1.6962389578252046, "learning_rate": 2.1327426978201476e-07, "loss": 0.0184, "step": 16019 }, { "epoch": 3.645051194539249, "grad_norm": 0.7587215810258429, "learning_rate": 2.1320705032886044e-07, "loss": 0.0065, "step": 16020 }, { "epoch": 3.645278725824801, "grad_norm": 0.699021934888247, "learning_rate": 2.131398392919997e-07, "loss": 0.0045, "step": 16021 }, { "epoch": 3.6455062571103527, "grad_norm": 0.2819184260998263, "learning_rate": 2.1307263667280655e-07, "loss": 0.0021, "step": 16022 }, { "epoch": 3.6457337883959045, "grad_norm": 0.37517283832854087, "learning_rate": 2.1300544247265437e-07, "loss": 0.0037, "step": 16023 }, { "epoch": 3.645961319681456, "grad_norm": 0.3503666867391538, "learning_rate": 2.1293825669291612e-07, "loss": 0.0022, "step": 16024 }, { "epoch": 3.646188850967008, "grad_norm": 1.2377282686199287, "learning_rate": 2.1287107933496524e-07, "loss": 0.0147, "step": 16025 }, { "epoch": 3.6464163822525597, "grad_norm": 0.6241625961233649, "learning_rate": 2.1280391040017473e-07, "loss": 0.0056, "step": 16026 }, { "epoch": 3.6466439135381115, "grad_norm": 0.06330682819028825, "learning_rate": 2.1273674988991764e-07, "loss": 0.0002, "step": 16027 }, { "epoch": 3.646871444823663, "grad_norm": 0.2109125587653197, "learning_rate": 2.1266959780556637e-07, "loss": 0.0014, "step": 16028 }, { "epoch": 3.647098976109215, "grad_norm": 0.8287480258827371, "learning_rate": 2.1260245414849313e-07, "loss": 0.0016, "step": 16029 }, { "epoch": 3.6473265073947667, "grad_norm": 0.7833353248823837, "learning_rate": 2.1253531892007067e-07, "loss": 0.0061, "step": 16030 }, { "epoch": 3.6475540386803185, "grad_norm": 0.7825637312697511, "learning_rate": 2.1246819212167065e-07, "loss": 0.0024, "step": 16031 }, { "epoch": 3.64778156996587, "grad_norm": 1.6364318932723578, "learning_rate": 2.124010737546653e-07, "loss": 0.0141, "step": 16032 }, { "epoch": 3.648009101251422, "grad_norm": 0.8262023795719334, "learning_rate": 2.1233396382042637e-07, "loss": 0.0057, "step": 16033 }, { "epoch": 3.6482366325369737, "grad_norm": 0.5376815930516007, "learning_rate": 2.1226686232032563e-07, "loss": 0.0015, "step": 16034 }, { "epoch": 3.6484641638225255, "grad_norm": 1.8896003741395415, "learning_rate": 2.121997692557344e-07, "loss": 0.0097, "step": 16035 }, { "epoch": 3.6486916951080772, "grad_norm": 3.7215709022154777, "learning_rate": 2.121326846280236e-07, "loss": 0.0292, "step": 16036 }, { "epoch": 3.648919226393629, "grad_norm": 0.3168022218212936, "learning_rate": 2.1206560843856486e-07, "loss": 0.0025, "step": 16037 }, { "epoch": 3.6491467576791807, "grad_norm": 0.1314457271826531, "learning_rate": 2.1199854068872844e-07, "loss": 0.0003, "step": 16038 }, { "epoch": 3.649374288964733, "grad_norm": 0.40201952705693456, "learning_rate": 2.1193148137988582e-07, "loss": 0.0042, "step": 16039 }, { "epoch": 3.6496018202502842, "grad_norm": 0.36992923142733825, "learning_rate": 2.1186443051340705e-07, "loss": 0.0039, "step": 16040 }, { "epoch": 3.6498293515358364, "grad_norm": 0.3806565473719617, "learning_rate": 2.1179738809066292e-07, "loss": 0.0018, "step": 16041 }, { "epoch": 3.6500568828213877, "grad_norm": 0.36656851106600985, "learning_rate": 2.1173035411302333e-07, "loss": 0.0027, "step": 16042 }, { "epoch": 3.65028441410694, "grad_norm": 0.343281679130593, "learning_rate": 2.1166332858185833e-07, "loss": 0.0024, "step": 16043 }, { "epoch": 3.6505119453924912, "grad_norm": 0.7900542525877526, "learning_rate": 2.115963114985379e-07, "loss": 0.0096, "step": 16044 }, { "epoch": 3.6507394766780434, "grad_norm": 0.7184472014591092, "learning_rate": 2.115293028644317e-07, "loss": 0.005, "step": 16045 }, { "epoch": 3.6509670079635947, "grad_norm": 0.44383826351328703, "learning_rate": 2.1146230268090956e-07, "loss": 0.0034, "step": 16046 }, { "epoch": 3.651194539249147, "grad_norm": 0.6839537716353906, "learning_rate": 2.1139531094934059e-07, "loss": 0.0034, "step": 16047 }, { "epoch": 3.6514220705346982, "grad_norm": 0.875527054518681, "learning_rate": 2.1132832767109374e-07, "loss": 0.01, "step": 16048 }, { "epoch": 3.6516496018202504, "grad_norm": 0.6679264905516115, "learning_rate": 2.112613528475385e-07, "loss": 0.0052, "step": 16049 }, { "epoch": 3.651877133105802, "grad_norm": 0.30129605990346114, "learning_rate": 2.111943864800433e-07, "loss": 0.0019, "step": 16050 }, { "epoch": 3.652104664391354, "grad_norm": 0.0645024266500519, "learning_rate": 2.11127428569977e-07, "loss": 0.0002, "step": 16051 }, { "epoch": 3.6523321956769057, "grad_norm": 0.9782513829392228, "learning_rate": 2.1106047911870804e-07, "loss": 0.008, "step": 16052 }, { "epoch": 3.6525597269624575, "grad_norm": 0.6399070190831552, "learning_rate": 2.1099353812760502e-07, "loss": 0.003, "step": 16053 }, { "epoch": 3.652787258248009, "grad_norm": 0.9699049206677801, "learning_rate": 2.1092660559803588e-07, "loss": 0.008, "step": 16054 }, { "epoch": 3.653014789533561, "grad_norm": 0.6932042286673608, "learning_rate": 2.1085968153136834e-07, "loss": 0.0021, "step": 16055 }, { "epoch": 3.6532423208191127, "grad_norm": 0.5823062799299092, "learning_rate": 2.1079276592897067e-07, "loss": 0.0036, "step": 16056 }, { "epoch": 3.6534698521046645, "grad_norm": 0.5212129294940951, "learning_rate": 2.1072585879221008e-07, "loss": 0.0026, "step": 16057 }, { "epoch": 3.653697383390216, "grad_norm": 0.6965254615655665, "learning_rate": 2.1065896012245422e-07, "loss": 0.0111, "step": 16058 }, { "epoch": 3.653924914675768, "grad_norm": 0.5051602626465316, "learning_rate": 2.1059206992107056e-07, "loss": 0.0013, "step": 16059 }, { "epoch": 3.6541524459613197, "grad_norm": 0.2756644406074085, "learning_rate": 2.1052518818942588e-07, "loss": 0.0011, "step": 16060 }, { "epoch": 3.6543799772468715, "grad_norm": 1.1138409318932683, "learning_rate": 2.1045831492888748e-07, "loss": 0.0146, "step": 16061 }, { "epoch": 3.654607508532423, "grad_norm": 0.1417887297446316, "learning_rate": 2.103914501408217e-07, "loss": 0.0006, "step": 16062 }, { "epoch": 3.654835039817975, "grad_norm": 0.7848195908683662, "learning_rate": 2.1032459382659556e-07, "loss": 0.0043, "step": 16063 }, { "epoch": 3.6550625711035267, "grad_norm": 0.3552832206929919, "learning_rate": 2.1025774598757507e-07, "loss": 0.0023, "step": 16064 }, { "epoch": 3.6552901023890785, "grad_norm": 1.4817885287126293, "learning_rate": 2.1019090662512676e-07, "loss": 0.0112, "step": 16065 }, { "epoch": 3.6555176336746302, "grad_norm": 0.542063755671719, "learning_rate": 2.1012407574061677e-07, "loss": 0.0034, "step": 16066 }, { "epoch": 3.655745164960182, "grad_norm": 0.9013295424566131, "learning_rate": 2.1005725333541068e-07, "loss": 0.0028, "step": 16067 }, { "epoch": 3.6559726962457337, "grad_norm": 0.6564647560789099, "learning_rate": 2.0999043941087455e-07, "loss": 0.0086, "step": 16068 }, { "epoch": 3.6562002275312855, "grad_norm": 0.6244109664002289, "learning_rate": 2.0992363396837363e-07, "loss": 0.0025, "step": 16069 }, { "epoch": 3.6564277588168372, "grad_norm": 0.8119683478447534, "learning_rate": 2.098568370092737e-07, "loss": 0.004, "step": 16070 }, { "epoch": 3.656655290102389, "grad_norm": 0.7180070197585985, "learning_rate": 2.0979004853493947e-07, "loss": 0.0022, "step": 16071 }, { "epoch": 3.6568828213879407, "grad_norm": 0.4926542102786679, "learning_rate": 2.0972326854673626e-07, "loss": 0.0025, "step": 16072 }, { "epoch": 3.6571103526734925, "grad_norm": 0.17919728867993467, "learning_rate": 2.0965649704602903e-07, "loss": 0.0008, "step": 16073 }, { "epoch": 3.6573378839590442, "grad_norm": 0.7958188838671745, "learning_rate": 2.0958973403418215e-07, "loss": 0.005, "step": 16074 }, { "epoch": 3.657565415244596, "grad_norm": 0.4169427584134152, "learning_rate": 2.0952297951256057e-07, "loss": 0.0027, "step": 16075 }, { "epoch": 3.6577929465301477, "grad_norm": 0.15928408842494024, "learning_rate": 2.0945623348252814e-07, "loss": 0.0007, "step": 16076 }, { "epoch": 3.6580204778156995, "grad_norm": 0.5926013859546968, "learning_rate": 2.0938949594544949e-07, "loss": 0.0085, "step": 16077 }, { "epoch": 3.6582480091012517, "grad_norm": 0.816636996242464, "learning_rate": 2.0932276690268815e-07, "loss": 0.0108, "step": 16078 }, { "epoch": 3.658475540386803, "grad_norm": 0.6974235452619704, "learning_rate": 2.0925604635560821e-07, "loss": 0.0034, "step": 16079 }, { "epoch": 3.658703071672355, "grad_norm": 0.09031948445350393, "learning_rate": 2.091893343055735e-07, "loss": 0.0003, "step": 16080 }, { "epoch": 3.6589306029579065, "grad_norm": 1.625912827290012, "learning_rate": 2.0912263075394706e-07, "loss": 0.0065, "step": 16081 }, { "epoch": 3.6591581342434587, "grad_norm": 1.026568811892908, "learning_rate": 2.0905593570209258e-07, "loss": 0.0096, "step": 16082 }, { "epoch": 3.65938566552901, "grad_norm": 0.951116447234717, "learning_rate": 2.089892491513728e-07, "loss": 0.0038, "step": 16083 }, { "epoch": 3.659613196814562, "grad_norm": 0.6097036576303992, "learning_rate": 2.089225711031511e-07, "loss": 0.0073, "step": 16084 }, { "epoch": 3.6598407281001135, "grad_norm": 2.4519806584984667, "learning_rate": 2.0885590155878987e-07, "loss": 0.0715, "step": 16085 }, { "epoch": 3.6600682593856657, "grad_norm": 0.29472083928656323, "learning_rate": 2.0878924051965183e-07, "loss": 0.0021, "step": 16086 }, { "epoch": 3.660295790671217, "grad_norm": 0.6019819251601193, "learning_rate": 2.0872258798709974e-07, "loss": 0.0026, "step": 16087 }, { "epoch": 3.660523321956769, "grad_norm": 1.0360777282659663, "learning_rate": 2.0865594396249528e-07, "loss": 0.0136, "step": 16088 }, { "epoch": 3.660750853242321, "grad_norm": 0.8122008755537165, "learning_rate": 2.0858930844720107e-07, "loss": 0.0049, "step": 16089 }, { "epoch": 3.6609783845278727, "grad_norm": 0.59512581602364, "learning_rate": 2.0852268144257858e-07, "loss": 0.0044, "step": 16090 }, { "epoch": 3.6612059158134245, "grad_norm": 0.2977866554362326, "learning_rate": 2.084560629499899e-07, "loss": 0.0017, "step": 16091 }, { "epoch": 3.6614334470989762, "grad_norm": 0.3648618472453446, "learning_rate": 2.0838945297079624e-07, "loss": 0.0017, "step": 16092 }, { "epoch": 3.661660978384528, "grad_norm": 0.34409817686793154, "learning_rate": 2.083228515063592e-07, "loss": 0.0014, "step": 16093 }, { "epoch": 3.6618885096700797, "grad_norm": 0.4965448015120001, "learning_rate": 2.082562585580402e-07, "loss": 0.0017, "step": 16094 }, { "epoch": 3.6621160409556315, "grad_norm": 0.6124158440756778, "learning_rate": 2.0818967412719978e-07, "loss": 0.0042, "step": 16095 }, { "epoch": 3.6623435722411832, "grad_norm": 0.04047242513603744, "learning_rate": 2.081230982151993e-07, "loss": 0.0002, "step": 16096 }, { "epoch": 3.662571103526735, "grad_norm": 0.898846206953982, "learning_rate": 2.080565308233992e-07, "loss": 0.01, "step": 16097 }, { "epoch": 3.6627986348122867, "grad_norm": 0.14634019753409264, "learning_rate": 2.079899719531598e-07, "loss": 0.0004, "step": 16098 }, { "epoch": 3.6630261660978385, "grad_norm": 0.2605072790997857, "learning_rate": 2.079234216058417e-07, "loss": 0.0014, "step": 16099 }, { "epoch": 3.6632536973833902, "grad_norm": 0.32881605886934323, "learning_rate": 2.0785687978280505e-07, "loss": 0.0025, "step": 16100 }, { "epoch": 3.663481228668942, "grad_norm": 0.16544126952405183, "learning_rate": 2.0779034648540996e-07, "loss": 0.0007, "step": 16101 }, { "epoch": 3.6637087599544937, "grad_norm": 0.6456325098360146, "learning_rate": 2.077238217150159e-07, "loss": 0.0022, "step": 16102 }, { "epoch": 3.6639362912400455, "grad_norm": 0.3909608090383968, "learning_rate": 2.0765730547298295e-07, "loss": 0.0015, "step": 16103 }, { "epoch": 3.6641638225255972, "grad_norm": 0.7439177768055636, "learning_rate": 2.075907977606701e-07, "loss": 0.0045, "step": 16104 }, { "epoch": 3.664391353811149, "grad_norm": 0.1693908002998616, "learning_rate": 2.0752429857943692e-07, "loss": 0.0003, "step": 16105 }, { "epoch": 3.6646188850967008, "grad_norm": 0.25789836547115685, "learning_rate": 2.0745780793064274e-07, "loss": 0.0006, "step": 16106 }, { "epoch": 3.6648464163822525, "grad_norm": 0.24226728413781148, "learning_rate": 2.0739132581564603e-07, "loss": 0.0014, "step": 16107 }, { "epoch": 3.6650739476678043, "grad_norm": 0.19945736434825997, "learning_rate": 2.07324852235806e-07, "loss": 0.0007, "step": 16108 }, { "epoch": 3.665301478953356, "grad_norm": 0.17924464133023965, "learning_rate": 2.072583871924809e-07, "loss": 0.0013, "step": 16109 }, { "epoch": 3.6655290102389078, "grad_norm": 0.2608169819324576, "learning_rate": 2.071919306870295e-07, "loss": 0.0057, "step": 16110 }, { "epoch": 3.6657565415244595, "grad_norm": 0.6789927781558401, "learning_rate": 2.071254827208096e-07, "loss": 0.0064, "step": 16111 }, { "epoch": 3.6659840728100113, "grad_norm": 0.34372678263218065, "learning_rate": 2.070590432951796e-07, "loss": 0.0014, "step": 16112 }, { "epoch": 3.666211604095563, "grad_norm": 0.3013272681963957, "learning_rate": 2.069926124114975e-07, "loss": 0.0023, "step": 16113 }, { "epoch": 3.6664391353811148, "grad_norm": 0.6915636859368606, "learning_rate": 2.0692619007112066e-07, "loss": 0.0038, "step": 16114 }, { "epoch": 3.6666666666666665, "grad_norm": 0.742732818985299, "learning_rate": 2.0685977627540704e-07, "loss": 0.0061, "step": 16115 }, { "epoch": 3.6668941979522183, "grad_norm": 0.4956233883819073, "learning_rate": 2.0679337102571382e-07, "loss": 0.0067, "step": 16116 }, { "epoch": 3.6671217292377705, "grad_norm": 0.6043935631499116, "learning_rate": 2.0672697432339795e-07, "loss": 0.0038, "step": 16117 }, { "epoch": 3.6673492605233218, "grad_norm": 0.05254918984688542, "learning_rate": 2.066605861698167e-07, "loss": 0.0001, "step": 16118 }, { "epoch": 3.667576791808874, "grad_norm": 0.5599430723052334, "learning_rate": 2.0659420656632693e-07, "loss": 0.0018, "step": 16119 }, { "epoch": 3.6678043230944253, "grad_norm": 0.6614193351994692, "learning_rate": 2.0652783551428552e-07, "loss": 0.0059, "step": 16120 }, { "epoch": 3.6680318543799775, "grad_norm": 0.806572408636319, "learning_rate": 2.0646147301504855e-07, "loss": 0.0068, "step": 16121 }, { "epoch": 3.668259385665529, "grad_norm": 0.9489371962780209, "learning_rate": 2.063951190699727e-07, "loss": 0.015, "step": 16122 }, { "epoch": 3.668486916951081, "grad_norm": 0.6010300222309805, "learning_rate": 2.0632877368041407e-07, "loss": 0.0027, "step": 16123 }, { "epoch": 3.6687144482366323, "grad_norm": 1.3134548035525462, "learning_rate": 2.0626243684772825e-07, "loss": 0.0045, "step": 16124 }, { "epoch": 3.6689419795221845, "grad_norm": 0.042620263404856806, "learning_rate": 2.061961085732714e-07, "loss": 0.0002, "step": 16125 }, { "epoch": 3.669169510807736, "grad_norm": 0.8176408005078178, "learning_rate": 2.0612978885839908e-07, "loss": 0.0082, "step": 16126 }, { "epoch": 3.669397042093288, "grad_norm": 0.843574092996639, "learning_rate": 2.0606347770446692e-07, "loss": 0.0047, "step": 16127 }, { "epoch": 3.6696245733788397, "grad_norm": 0.4782646102311395, "learning_rate": 2.0599717511282986e-07, "loss": 0.0066, "step": 16128 }, { "epoch": 3.6698521046643915, "grad_norm": 0.13635928452724444, "learning_rate": 2.0593088108484336e-07, "loss": 0.0006, "step": 16129 }, { "epoch": 3.6700796359499432, "grad_norm": 0.7611708233831403, "learning_rate": 2.0586459562186217e-07, "loss": 0.0032, "step": 16130 }, { "epoch": 3.670307167235495, "grad_norm": 0.10185691514964464, "learning_rate": 2.0579831872524082e-07, "loss": 0.0003, "step": 16131 }, { "epoch": 3.6705346985210467, "grad_norm": 0.6914892872906722, "learning_rate": 2.0573205039633413e-07, "loss": 0.0071, "step": 16132 }, { "epoch": 3.6707622298065985, "grad_norm": 0.13305615516235308, "learning_rate": 2.0566579063649644e-07, "loss": 0.0007, "step": 16133 }, { "epoch": 3.6709897610921502, "grad_norm": 0.5000589667379698, "learning_rate": 2.055995394470823e-07, "loss": 0.0023, "step": 16134 }, { "epoch": 3.671217292377702, "grad_norm": 0.39670570908428066, "learning_rate": 2.0553329682944537e-07, "loss": 0.0041, "step": 16135 }, { "epoch": 3.6714448236632538, "grad_norm": 0.16668933903373126, "learning_rate": 2.0546706278493949e-07, "loss": 0.0016, "step": 16136 }, { "epoch": 3.6716723549488055, "grad_norm": 1.6005548570227635, "learning_rate": 2.054008373149187e-07, "loss": 0.0098, "step": 16137 }, { "epoch": 3.6718998862343573, "grad_norm": 0.697195110130113, "learning_rate": 2.0533462042073612e-07, "loss": 0.0047, "step": 16138 }, { "epoch": 3.672127417519909, "grad_norm": 0.4547740018140975, "learning_rate": 2.0526841210374536e-07, "loss": 0.0057, "step": 16139 }, { "epoch": 3.6723549488054608, "grad_norm": 0.4836773876631081, "learning_rate": 2.0520221236529954e-07, "loss": 0.0016, "step": 16140 }, { "epoch": 3.6725824800910125, "grad_norm": 0.3173018487695531, "learning_rate": 2.0513602120675186e-07, "loss": 0.0008, "step": 16141 }, { "epoch": 3.6728100113765643, "grad_norm": 0.27995711239692656, "learning_rate": 2.0506983862945495e-07, "loss": 0.0007, "step": 16142 }, { "epoch": 3.673037542662116, "grad_norm": 0.7626639169623578, "learning_rate": 2.0500366463476133e-07, "loss": 0.0072, "step": 16143 }, { "epoch": 3.6732650739476678, "grad_norm": 0.22586681098834052, "learning_rate": 2.0493749922402376e-07, "loss": 0.0021, "step": 16144 }, { "epoch": 3.6734926052332195, "grad_norm": 0.27570818852736745, "learning_rate": 2.0487134239859423e-07, "loss": 0.0009, "step": 16145 }, { "epoch": 3.6737201365187713, "grad_norm": 0.6946034104083247, "learning_rate": 2.0480519415982495e-07, "loss": 0.0048, "step": 16146 }, { "epoch": 3.673947667804323, "grad_norm": 0.3707373972610715, "learning_rate": 2.0473905450906807e-07, "loss": 0.0014, "step": 16147 }, { "epoch": 3.6741751990898748, "grad_norm": 1.0644632317608747, "learning_rate": 2.046729234476754e-07, "loss": 0.008, "step": 16148 }, { "epoch": 3.6744027303754265, "grad_norm": 0.6617972640590679, "learning_rate": 2.046068009769984e-07, "loss": 0.0038, "step": 16149 }, { "epoch": 3.6746302616609783, "grad_norm": 0.43415775354156727, "learning_rate": 2.045406870983882e-07, "loss": 0.0014, "step": 16150 }, { "epoch": 3.67485779294653, "grad_norm": 0.5382214519210162, "learning_rate": 2.0447458181319657e-07, "loss": 0.005, "step": 16151 }, { "epoch": 3.675085324232082, "grad_norm": 0.08598054335790996, "learning_rate": 2.0440848512277414e-07, "loss": 0.0002, "step": 16152 }, { "epoch": 3.6753128555176335, "grad_norm": 0.9049317526930466, "learning_rate": 2.0434239702847198e-07, "loss": 0.0062, "step": 16153 }, { "epoch": 3.6755403868031853, "grad_norm": 0.2643362761506031, "learning_rate": 2.0427631753164103e-07, "loss": 0.003, "step": 16154 }, { "epoch": 3.675767918088737, "grad_norm": 0.5411053488178102, "learning_rate": 2.0421024663363146e-07, "loss": 0.0032, "step": 16155 }, { "epoch": 3.6759954493742892, "grad_norm": 0.715607359908165, "learning_rate": 2.04144184335794e-07, "loss": 0.0036, "step": 16156 }, { "epoch": 3.6762229806598405, "grad_norm": 1.2283598319399516, "learning_rate": 2.0407813063947838e-07, "loss": 0.0084, "step": 16157 }, { "epoch": 3.6764505119453927, "grad_norm": 0.4346267274167292, "learning_rate": 2.0401208554603514e-07, "loss": 0.0033, "step": 16158 }, { "epoch": 3.676678043230944, "grad_norm": 0.4593048889819252, "learning_rate": 2.0394604905681348e-07, "loss": 0.0025, "step": 16159 }, { "epoch": 3.6769055745164962, "grad_norm": 1.4288538988492792, "learning_rate": 2.038800211731639e-07, "loss": 0.0169, "step": 16160 }, { "epoch": 3.6771331058020476, "grad_norm": 0.41099483537455284, "learning_rate": 2.0381400189643533e-07, "loss": 0.0018, "step": 16161 }, { "epoch": 3.6773606370875997, "grad_norm": 0.31778139223567237, "learning_rate": 2.037479912279771e-07, "loss": 0.0006, "step": 16162 }, { "epoch": 3.677588168373151, "grad_norm": 0.2391702681590672, "learning_rate": 2.0368198916913855e-07, "loss": 0.0021, "step": 16163 }, { "epoch": 3.6778156996587033, "grad_norm": 1.0385947933657462, "learning_rate": 2.036159957212684e-07, "loss": 0.0061, "step": 16164 }, { "epoch": 3.6780432309442546, "grad_norm": 0.2785287485997368, "learning_rate": 2.0355001088571552e-07, "loss": 0.0026, "step": 16165 }, { "epoch": 3.6782707622298068, "grad_norm": 0.5842690189695026, "learning_rate": 2.034840346638288e-07, "loss": 0.0026, "step": 16166 }, { "epoch": 3.6784982935153585, "grad_norm": 0.5954350863880173, "learning_rate": 2.0341806705695625e-07, "loss": 0.0029, "step": 16167 }, { "epoch": 3.6787258248009103, "grad_norm": 0.6210702788031086, "learning_rate": 2.0335210806644656e-07, "loss": 0.004, "step": 16168 }, { "epoch": 3.678953356086462, "grad_norm": 0.6132990796695723, "learning_rate": 2.0328615769364727e-07, "loss": 0.0044, "step": 16169 }, { "epoch": 3.6791808873720138, "grad_norm": 0.2836474415736097, "learning_rate": 2.0322021593990688e-07, "loss": 0.0015, "step": 16170 }, { "epoch": 3.6794084186575655, "grad_norm": 0.1441788484601078, "learning_rate": 2.0315428280657252e-07, "loss": 0.0009, "step": 16171 }, { "epoch": 3.6796359499431173, "grad_norm": 0.40333595130039995, "learning_rate": 2.0308835829499211e-07, "loss": 0.0023, "step": 16172 }, { "epoch": 3.679863481228669, "grad_norm": 1.127681995585631, "learning_rate": 2.0302244240651318e-07, "loss": 0.0072, "step": 16173 }, { "epoch": 3.6800910125142208, "grad_norm": 0.49465494970468055, "learning_rate": 2.0295653514248247e-07, "loss": 0.0026, "step": 16174 }, { "epoch": 3.6803185437997725, "grad_norm": 0.8582980570481413, "learning_rate": 2.0289063650424746e-07, "loss": 0.0033, "step": 16175 }, { "epoch": 3.6805460750853243, "grad_norm": 0.182104718936936, "learning_rate": 2.028247464931546e-07, "loss": 0.0005, "step": 16176 }, { "epoch": 3.680773606370876, "grad_norm": 0.3116212897136677, "learning_rate": 2.0275886511055088e-07, "loss": 0.0014, "step": 16177 }, { "epoch": 3.681001137656428, "grad_norm": 0.40736368119822264, "learning_rate": 2.026929923577825e-07, "loss": 0.0021, "step": 16178 }, { "epoch": 3.6812286689419795, "grad_norm": 0.06582831052314606, "learning_rate": 2.0262712823619592e-07, "loss": 0.0002, "step": 16179 }, { "epoch": 3.6814562002275313, "grad_norm": 2.1960201102974186, "learning_rate": 2.025612727471376e-07, "loss": 0.0572, "step": 16180 }, { "epoch": 3.681683731513083, "grad_norm": 1.3618563041111835, "learning_rate": 2.0249542589195287e-07, "loss": 0.0158, "step": 16181 }, { "epoch": 3.681911262798635, "grad_norm": 0.07452589464090796, "learning_rate": 2.0242958767198811e-07, "loss": 0.0002, "step": 16182 }, { "epoch": 3.6821387940841865, "grad_norm": 0.6034749426410841, "learning_rate": 2.023637580885885e-07, "loss": 0.0043, "step": 16183 }, { "epoch": 3.6823663253697383, "grad_norm": 1.9261903296300629, "learning_rate": 2.0229793714309985e-07, "loss": 0.01, "step": 16184 }, { "epoch": 3.68259385665529, "grad_norm": 0.5356703809927634, "learning_rate": 2.02232124836867e-07, "loss": 0.0033, "step": 16185 }, { "epoch": 3.682821387940842, "grad_norm": 0.6975947513719645, "learning_rate": 2.0216632117123527e-07, "loss": 0.0026, "step": 16186 }, { "epoch": 3.6830489192263935, "grad_norm": 1.1911973870801116, "learning_rate": 2.021005261475497e-07, "loss": 0.0035, "step": 16187 }, { "epoch": 3.6832764505119453, "grad_norm": 0.936902364306303, "learning_rate": 2.0203473976715472e-07, "loss": 0.0024, "step": 16188 }, { "epoch": 3.683503981797497, "grad_norm": 0.48982836212367437, "learning_rate": 2.0196896203139527e-07, "loss": 0.002, "step": 16189 }, { "epoch": 3.683731513083049, "grad_norm": 0.44832739370367997, "learning_rate": 2.0190319294161523e-07, "loss": 0.0033, "step": 16190 }, { "epoch": 3.6839590443686006, "grad_norm": 0.4772558919937618, "learning_rate": 2.0183743249915926e-07, "loss": 0.0037, "step": 16191 }, { "epoch": 3.6841865756541523, "grad_norm": 0.7184671224694151, "learning_rate": 2.0177168070537102e-07, "loss": 0.0113, "step": 16192 }, { "epoch": 3.684414106939704, "grad_norm": 0.5091996330550319, "learning_rate": 2.017059375615945e-07, "loss": 0.0028, "step": 16193 }, { "epoch": 3.684641638225256, "grad_norm": 0.9539884929041427, "learning_rate": 2.016402030691736e-07, "loss": 0.0123, "step": 16194 }, { "epoch": 3.684869169510808, "grad_norm": 0.8274348590413181, "learning_rate": 2.015744772294514e-07, "loss": 0.0036, "step": 16195 }, { "epoch": 3.6850967007963593, "grad_norm": 0.49099384878103924, "learning_rate": 2.0150876004377158e-07, "loss": 0.0034, "step": 16196 }, { "epoch": 3.6853242320819115, "grad_norm": 0.9627636167985937, "learning_rate": 2.0144305151347694e-07, "loss": 0.004, "step": 16197 }, { "epoch": 3.685551763367463, "grad_norm": 0.8804939471148758, "learning_rate": 2.0137735163991083e-07, "loss": 0.0091, "step": 16198 }, { "epoch": 3.685779294653015, "grad_norm": 0.5006041296421109, "learning_rate": 2.0131166042441566e-07, "loss": 0.0021, "step": 16199 }, { "epoch": 3.6860068259385663, "grad_norm": 0.5549114517716768, "learning_rate": 2.0124597786833413e-07, "loss": 0.0094, "step": 16200 }, { "epoch": 3.6862343572241185, "grad_norm": 0.6404128694582321, "learning_rate": 2.0118030397300905e-07, "loss": 0.0028, "step": 16201 }, { "epoch": 3.68646188850967, "grad_norm": 0.4576952362765389, "learning_rate": 2.0111463873978208e-07, "loss": 0.0039, "step": 16202 }, { "epoch": 3.686689419795222, "grad_norm": 0.08089592732980205, "learning_rate": 2.0104898216999576e-07, "loss": 0.0004, "step": 16203 }, { "epoch": 3.6869169510807733, "grad_norm": 0.725747242815336, "learning_rate": 2.009833342649919e-07, "loss": 0.0053, "step": 16204 }, { "epoch": 3.6871444823663255, "grad_norm": 0.246881732466736, "learning_rate": 2.0091769502611186e-07, "loss": 0.0013, "step": 16205 }, { "epoch": 3.6873720136518773, "grad_norm": 0.3083646156459193, "learning_rate": 2.0085206445469755e-07, "loss": 0.0019, "step": 16206 }, { "epoch": 3.687599544937429, "grad_norm": 0.06956859289707837, "learning_rate": 2.0078644255209014e-07, "loss": 0.0002, "step": 16207 }, { "epoch": 3.687827076222981, "grad_norm": 0.7280798475514392, "learning_rate": 2.0072082931963125e-07, "loss": 0.0045, "step": 16208 }, { "epoch": 3.6880546075085325, "grad_norm": 0.34227333497278895, "learning_rate": 2.0065522475866127e-07, "loss": 0.0018, "step": 16209 }, { "epoch": 3.6882821387940843, "grad_norm": 0.3578123147909932, "learning_rate": 2.0058962887052162e-07, "loss": 0.0019, "step": 16210 }, { "epoch": 3.688509670079636, "grad_norm": 0.8043316671072145, "learning_rate": 2.0052404165655265e-07, "loss": 0.005, "step": 16211 }, { "epoch": 3.688737201365188, "grad_norm": 0.8530674310097458, "learning_rate": 2.0045846311809476e-07, "loss": 0.0068, "step": 16212 }, { "epoch": 3.6889647326507395, "grad_norm": 0.9343901970375926, "learning_rate": 2.0039289325648824e-07, "loss": 0.0079, "step": 16213 }, { "epoch": 3.6891922639362913, "grad_norm": 1.3795436362002766, "learning_rate": 2.0032733207307343e-07, "loss": 0.0044, "step": 16214 }, { "epoch": 3.689419795221843, "grad_norm": 0.7502330650584466, "learning_rate": 2.0026177956919038e-07, "loss": 0.0046, "step": 16215 }, { "epoch": 3.689647326507395, "grad_norm": 0.4901990278233929, "learning_rate": 2.001962357461785e-07, "loss": 0.0031, "step": 16216 }, { "epoch": 3.6898748577929465, "grad_norm": 3.4444917196926936, "learning_rate": 2.0013070060537779e-07, "loss": 0.0173, "step": 16217 }, { "epoch": 3.6901023890784983, "grad_norm": 0.4227103465655961, "learning_rate": 2.0006517414812744e-07, "loss": 0.0033, "step": 16218 }, { "epoch": 3.69032992036405, "grad_norm": 0.08143122841458673, "learning_rate": 1.999996563757664e-07, "loss": 0.0003, "step": 16219 }, { "epoch": 3.690557451649602, "grad_norm": 0.04839971616629905, "learning_rate": 1.9993414728963413e-07, "loss": 0.0001, "step": 16220 }, { "epoch": 3.6907849829351536, "grad_norm": 1.6308412112116284, "learning_rate": 1.998686468910694e-07, "loss": 0.0062, "step": 16221 }, { "epoch": 3.6910125142207053, "grad_norm": 0.44490975822322665, "learning_rate": 1.9980315518141112e-07, "loss": 0.0042, "step": 16222 }, { "epoch": 3.691240045506257, "grad_norm": 1.1249670367762883, "learning_rate": 1.997376721619977e-07, "loss": 0.0038, "step": 16223 }, { "epoch": 3.691467576791809, "grad_norm": 0.558019879413147, "learning_rate": 1.9967219783416717e-07, "loss": 0.0031, "step": 16224 }, { "epoch": 3.6916951080773606, "grad_norm": 0.6280266399618277, "learning_rate": 1.99606732199258e-07, "loss": 0.0048, "step": 16225 }, { "epoch": 3.6919226393629123, "grad_norm": 0.39623582235771454, "learning_rate": 1.995412752586081e-07, "loss": 0.0045, "step": 16226 }, { "epoch": 3.692150170648464, "grad_norm": 0.7064971812547154, "learning_rate": 1.9947582701355556e-07, "loss": 0.0058, "step": 16227 }, { "epoch": 3.692377701934016, "grad_norm": 0.3404800895709063, "learning_rate": 1.994103874654376e-07, "loss": 0.0019, "step": 16228 }, { "epoch": 3.6926052332195676, "grad_norm": 1.5881686858078676, "learning_rate": 1.9934495661559214e-07, "loss": 0.0124, "step": 16229 }, { "epoch": 3.6928327645051193, "grad_norm": 0.6763074579955631, "learning_rate": 1.9927953446535622e-07, "loss": 0.0046, "step": 16230 }, { "epoch": 3.693060295790671, "grad_norm": 0.35301718648422203, "learning_rate": 1.992141210160667e-07, "loss": 0.0027, "step": 16231 }, { "epoch": 3.693287827076223, "grad_norm": 0.34173779781600255, "learning_rate": 1.991487162690607e-07, "loss": 0.0024, "step": 16232 }, { "epoch": 3.6935153583617746, "grad_norm": 0.2671427213638045, "learning_rate": 1.9908332022567513e-07, "loss": 0.0038, "step": 16233 }, { "epoch": 3.6937428896473268, "grad_norm": 1.3517206463024085, "learning_rate": 1.9901793288724664e-07, "loss": 0.0108, "step": 16234 }, { "epoch": 3.693970420932878, "grad_norm": 0.6325840662346304, "learning_rate": 1.9895255425511123e-07, "loss": 0.003, "step": 16235 }, { "epoch": 3.6941979522184303, "grad_norm": 0.3597429976531521, "learning_rate": 1.9888718433060554e-07, "loss": 0.0011, "step": 16236 }, { "epoch": 3.6944254835039816, "grad_norm": 0.5037713747399808, "learning_rate": 1.9882182311506543e-07, "loss": 0.0037, "step": 16237 }, { "epoch": 3.694653014789534, "grad_norm": 0.740127015484561, "learning_rate": 1.9875647060982655e-07, "loss": 0.0041, "step": 16238 }, { "epoch": 3.694880546075085, "grad_norm": 0.19127277104449636, "learning_rate": 1.9869112681622477e-07, "loss": 0.0006, "step": 16239 }, { "epoch": 3.6951080773606373, "grad_norm": 0.9251996107135978, "learning_rate": 1.9862579173559563e-07, "loss": 0.0044, "step": 16240 }, { "epoch": 3.6953356086461886, "grad_norm": 0.9017236878150293, "learning_rate": 1.9856046536927465e-07, "loss": 0.01, "step": 16241 }, { "epoch": 3.695563139931741, "grad_norm": 0.37382411368826535, "learning_rate": 1.9849514771859674e-07, "loss": 0.0034, "step": 16242 }, { "epoch": 3.695790671217292, "grad_norm": 0.6740353115176447, "learning_rate": 1.9842983878489673e-07, "loss": 0.0054, "step": 16243 }, { "epoch": 3.6960182025028443, "grad_norm": 0.3300805759228008, "learning_rate": 1.9836453856950973e-07, "loss": 0.0007, "step": 16244 }, { "epoch": 3.696245733788396, "grad_norm": 0.30713541875231354, "learning_rate": 1.9829924707377005e-07, "loss": 0.0008, "step": 16245 }, { "epoch": 3.696473265073948, "grad_norm": 1.017590773065058, "learning_rate": 1.9823396429901235e-07, "loss": 0.0071, "step": 16246 }, { "epoch": 3.6967007963594996, "grad_norm": 0.5514659002535648, "learning_rate": 1.9816869024657078e-07, "loss": 0.004, "step": 16247 }, { "epoch": 3.6969283276450513, "grad_norm": 0.8265503646219358, "learning_rate": 1.981034249177797e-07, "loss": 0.005, "step": 16248 }, { "epoch": 3.697155858930603, "grad_norm": 0.623931580133533, "learning_rate": 1.9803816831397274e-07, "loss": 0.0033, "step": 16249 }, { "epoch": 3.697383390216155, "grad_norm": 0.27729312958192714, "learning_rate": 1.979729204364835e-07, "loss": 0.0014, "step": 16250 }, { "epoch": 3.6976109215017066, "grad_norm": 0.3308080939379749, "learning_rate": 1.9790768128664588e-07, "loss": 0.0031, "step": 16251 }, { "epoch": 3.6978384527872583, "grad_norm": 1.0368358238940627, "learning_rate": 1.9784245086579292e-07, "loss": 0.007, "step": 16252 }, { "epoch": 3.69806598407281, "grad_norm": 0.25795619426049926, "learning_rate": 1.9777722917525797e-07, "loss": 0.0018, "step": 16253 }, { "epoch": 3.698293515358362, "grad_norm": 0.11790672650428616, "learning_rate": 1.9771201621637398e-07, "loss": 0.0007, "step": 16254 }, { "epoch": 3.6985210466439136, "grad_norm": 1.0263268209424679, "learning_rate": 1.976468119904741e-07, "loss": 0.0049, "step": 16255 }, { "epoch": 3.6987485779294653, "grad_norm": 0.26476095420837964, "learning_rate": 1.9758161649889072e-07, "loss": 0.0024, "step": 16256 }, { "epoch": 3.698976109215017, "grad_norm": 0.4228840021279599, "learning_rate": 1.9751642974295605e-07, "loss": 0.0035, "step": 16257 }, { "epoch": 3.699203640500569, "grad_norm": 0.08780587789433573, "learning_rate": 1.9745125172400292e-07, "loss": 0.0005, "step": 16258 }, { "epoch": 3.6994311717861206, "grad_norm": 0.9604020212947209, "learning_rate": 1.9738608244336293e-07, "loss": 0.0021, "step": 16259 }, { "epoch": 3.6996587030716723, "grad_norm": 0.11907223070218226, "learning_rate": 1.973209219023683e-07, "loss": 0.0006, "step": 16260 }, { "epoch": 3.699886234357224, "grad_norm": 0.4044570899432615, "learning_rate": 1.97255770102351e-07, "loss": 0.0018, "step": 16261 }, { "epoch": 3.700113765642776, "grad_norm": 0.9720970430926694, "learning_rate": 1.9719062704464214e-07, "loss": 0.008, "step": 16262 }, { "epoch": 3.7003412969283276, "grad_norm": 1.6443019605437497, "learning_rate": 1.971254927305736e-07, "loss": 0.0387, "step": 16263 }, { "epoch": 3.7005688282138793, "grad_norm": 0.23431802728467818, "learning_rate": 1.970603671614762e-07, "loss": 0.0003, "step": 16264 }, { "epoch": 3.700796359499431, "grad_norm": 0.2934573147153461, "learning_rate": 1.9699525033868129e-07, "loss": 0.0012, "step": 16265 }, { "epoch": 3.701023890784983, "grad_norm": 0.5708900554359818, "learning_rate": 1.969301422635194e-07, "loss": 0.0064, "step": 16266 }, { "epoch": 3.7012514220705346, "grad_norm": 0.4145589372621083, "learning_rate": 1.9686504293732153e-07, "loss": 0.0034, "step": 16267 }, { "epoch": 3.7014789533560863, "grad_norm": 1.6552027088378882, "learning_rate": 1.967999523614182e-07, "loss": 0.0041, "step": 16268 }, { "epoch": 3.701706484641638, "grad_norm": 0.19239335288458043, "learning_rate": 1.9673487053713948e-07, "loss": 0.0007, "step": 16269 }, { "epoch": 3.70193401592719, "grad_norm": 0.618043458725372, "learning_rate": 1.9666979746581585e-07, "loss": 0.0133, "step": 16270 }, { "epoch": 3.7021615472127416, "grad_norm": 0.4020400091843672, "learning_rate": 1.9660473314877693e-07, "loss": 0.0023, "step": 16271 }, { "epoch": 3.7023890784982934, "grad_norm": 0.27927460505935425, "learning_rate": 1.9653967758735285e-07, "loss": 0.0015, "step": 16272 }, { "epoch": 3.7026166097838455, "grad_norm": 1.2237942286450834, "learning_rate": 1.9647463078287293e-07, "loss": 0.0046, "step": 16273 }, { "epoch": 3.702844141069397, "grad_norm": 0.11732487846243456, "learning_rate": 1.9640959273666675e-07, "loss": 0.0007, "step": 16274 }, { "epoch": 3.703071672354949, "grad_norm": 1.031321968906973, "learning_rate": 1.9634456345006374e-07, "loss": 0.0095, "step": 16275 }, { "epoch": 3.7032992036405004, "grad_norm": 0.3888254366235766, "learning_rate": 1.9627954292439259e-07, "loss": 0.003, "step": 16276 }, { "epoch": 3.7035267349260526, "grad_norm": 1.0134623101595566, "learning_rate": 1.9621453116098261e-07, "loss": 0.0113, "step": 16277 }, { "epoch": 3.703754266211604, "grad_norm": 0.7867441733761206, "learning_rate": 1.9614952816116215e-07, "loss": 0.0095, "step": 16278 }, { "epoch": 3.703981797497156, "grad_norm": 0.5300507370437206, "learning_rate": 1.9608453392626023e-07, "loss": 0.0031, "step": 16279 }, { "epoch": 3.7042093287827074, "grad_norm": 0.6198661337821229, "learning_rate": 1.960195484576046e-07, "loss": 0.0028, "step": 16280 }, { "epoch": 3.7044368600682596, "grad_norm": 0.9363212213152803, "learning_rate": 1.9595457175652379e-07, "loss": 0.0069, "step": 16281 }, { "epoch": 3.7046643913538113, "grad_norm": 0.2901125445271768, "learning_rate": 1.9588960382434594e-07, "loss": 0.0018, "step": 16282 }, { "epoch": 3.704891922639363, "grad_norm": 0.190824632281742, "learning_rate": 1.9582464466239856e-07, "loss": 0.0007, "step": 16283 }, { "epoch": 3.705119453924915, "grad_norm": 0.8405570637630166, "learning_rate": 1.9575969427200962e-07, "loss": 0.0017, "step": 16284 }, { "epoch": 3.7053469852104666, "grad_norm": 0.037519864167983676, "learning_rate": 1.9569475265450618e-07, "loss": 0.0001, "step": 16285 }, { "epoch": 3.7055745164960183, "grad_norm": 0.6127808121163069, "learning_rate": 1.956298198112158e-07, "loss": 0.0016, "step": 16286 }, { "epoch": 3.70580204778157, "grad_norm": 1.477910149299737, "learning_rate": 1.9556489574346576e-07, "loss": 0.0065, "step": 16287 }, { "epoch": 3.706029579067122, "grad_norm": 0.6833517846890925, "learning_rate": 1.9549998045258257e-07, "loss": 0.0026, "step": 16288 }, { "epoch": 3.7062571103526736, "grad_norm": 1.8150226532341798, "learning_rate": 1.9543507393989338e-07, "loss": 0.0209, "step": 16289 }, { "epoch": 3.7064846416382253, "grad_norm": 0.5365995223859027, "learning_rate": 1.9537017620672436e-07, "loss": 0.0048, "step": 16290 }, { "epoch": 3.706712172923777, "grad_norm": 0.948275293820392, "learning_rate": 1.9530528725440235e-07, "loss": 0.0034, "step": 16291 }, { "epoch": 3.706939704209329, "grad_norm": 2.1859273358861415, "learning_rate": 1.9524040708425306e-07, "loss": 0.0205, "step": 16292 }, { "epoch": 3.7071672354948806, "grad_norm": 0.5537803414184064, "learning_rate": 1.9517553569760282e-07, "loss": 0.0023, "step": 16293 }, { "epoch": 3.7073947667804323, "grad_norm": 0.277011595372693, "learning_rate": 1.951106730957776e-07, "loss": 0.0035, "step": 16294 }, { "epoch": 3.707622298065984, "grad_norm": 0.4849563286714679, "learning_rate": 1.950458192801028e-07, "loss": 0.0027, "step": 16295 }, { "epoch": 3.707849829351536, "grad_norm": 0.4418630672361451, "learning_rate": 1.9498097425190419e-07, "loss": 0.0024, "step": 16296 }, { "epoch": 3.7080773606370876, "grad_norm": 0.10872665943054197, "learning_rate": 1.949161380125067e-07, "loss": 0.0007, "step": 16297 }, { "epoch": 3.7083048919226393, "grad_norm": 1.1644805031880854, "learning_rate": 1.948513105632359e-07, "loss": 0.0149, "step": 16298 }, { "epoch": 3.708532423208191, "grad_norm": 0.78627207171935, "learning_rate": 1.9478649190541632e-07, "loss": 0.0044, "step": 16299 }, { "epoch": 3.708759954493743, "grad_norm": 0.7020014013406592, "learning_rate": 1.9472168204037292e-07, "loss": 0.0025, "step": 16300 }, { "epoch": 3.7089874857792946, "grad_norm": 1.2919570507662357, "learning_rate": 1.9465688096943062e-07, "loss": 0.0089, "step": 16301 }, { "epoch": 3.7092150170648464, "grad_norm": 0.19584538006897403, "learning_rate": 1.9459208869391324e-07, "loss": 0.0006, "step": 16302 }, { "epoch": 3.709442548350398, "grad_norm": 0.31134511212870436, "learning_rate": 1.9452730521514557e-07, "loss": 0.0022, "step": 16303 }, { "epoch": 3.70967007963595, "grad_norm": 0.21040665706716064, "learning_rate": 1.9446253053445122e-07, "loss": 0.0003, "step": 16304 }, { "epoch": 3.7098976109215016, "grad_norm": 0.6414184152191026, "learning_rate": 1.943977646531544e-07, "loss": 0.0061, "step": 16305 }, { "epoch": 3.7101251422070534, "grad_norm": 1.1561684862820039, "learning_rate": 1.943330075725785e-07, "loss": 0.0045, "step": 16306 }, { "epoch": 3.710352673492605, "grad_norm": 0.6722669157947968, "learning_rate": 1.9426825929404716e-07, "loss": 0.0027, "step": 16307 }, { "epoch": 3.710580204778157, "grad_norm": 0.5750391656903585, "learning_rate": 1.9420351981888394e-07, "loss": 0.0054, "step": 16308 }, { "epoch": 3.7108077360637086, "grad_norm": 1.8702943528044962, "learning_rate": 1.9413878914841165e-07, "loss": 0.0098, "step": 16309 }, { "epoch": 3.7110352673492604, "grad_norm": 0.3737304626857106, "learning_rate": 1.940740672839536e-07, "loss": 0.0016, "step": 16310 }, { "epoch": 3.711262798634812, "grad_norm": 1.1334621086207124, "learning_rate": 1.9400935422683245e-07, "loss": 0.0063, "step": 16311 }, { "epoch": 3.7114903299203643, "grad_norm": 0.5937359044578264, "learning_rate": 1.9394464997837057e-07, "loss": 0.0038, "step": 16312 }, { "epoch": 3.7117178612059156, "grad_norm": 0.4740726207545072, "learning_rate": 1.9387995453989055e-07, "loss": 0.0047, "step": 16313 }, { "epoch": 3.711945392491468, "grad_norm": 0.3326804354847959, "learning_rate": 1.9381526791271477e-07, "loss": 0.0009, "step": 16314 }, { "epoch": 3.712172923777019, "grad_norm": 0.6556607970503016, "learning_rate": 1.9375059009816537e-07, "loss": 0.0047, "step": 16315 }, { "epoch": 3.7124004550625713, "grad_norm": 0.13670241034618877, "learning_rate": 1.9368592109756396e-07, "loss": 0.0005, "step": 16316 }, { "epoch": 3.7126279863481226, "grad_norm": 0.7053879854678695, "learning_rate": 1.936212609122326e-07, "loss": 0.0055, "step": 16317 }, { "epoch": 3.712855517633675, "grad_norm": 0.36404626018608355, "learning_rate": 1.9355660954349258e-07, "loss": 0.0022, "step": 16318 }, { "epoch": 3.713083048919226, "grad_norm": 0.9784682582507198, "learning_rate": 1.934919669926652e-07, "loss": 0.0117, "step": 16319 }, { "epoch": 3.7133105802047783, "grad_norm": 0.46756861235943437, "learning_rate": 1.9342733326107172e-07, "loss": 0.0019, "step": 16320 }, { "epoch": 3.71353811149033, "grad_norm": 0.6445879795512865, "learning_rate": 1.9336270835003314e-07, "loss": 0.0087, "step": 16321 }, { "epoch": 3.713765642775882, "grad_norm": 1.74924093536784, "learning_rate": 1.932980922608705e-07, "loss": 0.0133, "step": 16322 }, { "epoch": 3.7139931740614336, "grad_norm": 0.12682987279363345, "learning_rate": 1.93233484994904e-07, "loss": 0.0003, "step": 16323 }, { "epoch": 3.7142207053469853, "grad_norm": 1.3828284277279355, "learning_rate": 1.9316888655345457e-07, "loss": 0.0108, "step": 16324 }, { "epoch": 3.714448236632537, "grad_norm": 0.500489898475227, "learning_rate": 1.9310429693784215e-07, "loss": 0.0013, "step": 16325 }, { "epoch": 3.714675767918089, "grad_norm": 1.3663382729624511, "learning_rate": 1.9303971614938682e-07, "loss": 0.0056, "step": 16326 }, { "epoch": 3.7149032992036406, "grad_norm": 0.08043301948462725, "learning_rate": 1.9297514418940857e-07, "loss": 0.0008, "step": 16327 }, { "epoch": 3.7151308304891923, "grad_norm": 0.19085043341547747, "learning_rate": 1.9291058105922715e-07, "loss": 0.001, "step": 16328 }, { "epoch": 3.715358361774744, "grad_norm": 0.4791427399536599, "learning_rate": 1.9284602676016227e-07, "loss": 0.0039, "step": 16329 }, { "epoch": 3.715585893060296, "grad_norm": 0.584669238916493, "learning_rate": 1.9278148129353324e-07, "loss": 0.003, "step": 16330 }, { "epoch": 3.7158134243458476, "grad_norm": 1.2091922729311861, "learning_rate": 1.9271694466065894e-07, "loss": 0.0099, "step": 16331 }, { "epoch": 3.7160409556313994, "grad_norm": 0.3998841993251996, "learning_rate": 1.926524168628588e-07, "loss": 0.0029, "step": 16332 }, { "epoch": 3.716268486916951, "grad_norm": 0.2169254235834115, "learning_rate": 1.9258789790145124e-07, "loss": 0.0005, "step": 16333 }, { "epoch": 3.716496018202503, "grad_norm": 1.2634320740182043, "learning_rate": 1.9252338777775506e-07, "loss": 0.0047, "step": 16334 }, { "epoch": 3.7167235494880546, "grad_norm": 0.8187887770920541, "learning_rate": 1.9245888649308887e-07, "loss": 0.0094, "step": 16335 }, { "epoch": 3.7169510807736064, "grad_norm": 1.1006454689229088, "learning_rate": 1.9239439404877101e-07, "loss": 0.0051, "step": 16336 }, { "epoch": 3.717178612059158, "grad_norm": 0.42369533422781835, "learning_rate": 1.9232991044611951e-07, "loss": 0.0018, "step": 16337 }, { "epoch": 3.71740614334471, "grad_norm": 0.4642949464940096, "learning_rate": 1.9226543568645207e-07, "loss": 0.0026, "step": 16338 }, { "epoch": 3.7176336746302616, "grad_norm": 0.31647497001782804, "learning_rate": 1.922009697710867e-07, "loss": 0.0016, "step": 16339 }, { "epoch": 3.7178612059158134, "grad_norm": 1.4267016589935353, "learning_rate": 1.9213651270134082e-07, "loss": 0.0146, "step": 16340 }, { "epoch": 3.718088737201365, "grad_norm": 0.15592030689901748, "learning_rate": 1.9207206447853176e-07, "loss": 0.0005, "step": 16341 }, { "epoch": 3.718316268486917, "grad_norm": 0.8410897314383107, "learning_rate": 1.9200762510397687e-07, "loss": 0.0075, "step": 16342 }, { "epoch": 3.7185437997724686, "grad_norm": 0.8133878941297098, "learning_rate": 1.9194319457899332e-07, "loss": 0.0027, "step": 16343 }, { "epoch": 3.7187713310580204, "grad_norm": 0.8077758148345052, "learning_rate": 1.9187877290489778e-07, "loss": 0.0047, "step": 16344 }, { "epoch": 3.718998862343572, "grad_norm": 0.3703249595689772, "learning_rate": 1.918143600830067e-07, "loss": 0.0033, "step": 16345 }, { "epoch": 3.719226393629124, "grad_norm": 1.6166620563883691, "learning_rate": 1.9174995611463694e-07, "loss": 0.0217, "step": 16346 }, { "epoch": 3.7194539249146756, "grad_norm": 0.4540408068214576, "learning_rate": 1.9168556100110425e-07, "loss": 0.0035, "step": 16347 }, { "epoch": 3.7196814562002274, "grad_norm": 0.5289186741805665, "learning_rate": 1.916211747437255e-07, "loss": 0.0023, "step": 16348 }, { "epoch": 3.719908987485779, "grad_norm": 0.7742612292290716, "learning_rate": 1.915567973438162e-07, "loss": 0.0063, "step": 16349 }, { "epoch": 3.720136518771331, "grad_norm": 0.4745661891024623, "learning_rate": 1.9149242880269204e-07, "loss": 0.0045, "step": 16350 }, { "epoch": 3.720364050056883, "grad_norm": 0.6621145421650152, "learning_rate": 1.9142806912166884e-07, "loss": 0.0056, "step": 16351 }, { "epoch": 3.7205915813424344, "grad_norm": 1.6325821272098666, "learning_rate": 1.913637183020617e-07, "loss": 0.0091, "step": 16352 }, { "epoch": 3.7208191126279866, "grad_norm": 1.2631975579612416, "learning_rate": 1.9129937634518592e-07, "loss": 0.0072, "step": 16353 }, { "epoch": 3.721046643913538, "grad_norm": 0.543463562076628, "learning_rate": 1.9123504325235666e-07, "loss": 0.0029, "step": 16354 }, { "epoch": 3.72127417519909, "grad_norm": 0.2542871124518192, "learning_rate": 1.911707190248889e-07, "loss": 0.0023, "step": 16355 }, { "epoch": 3.7215017064846414, "grad_norm": 0.08762703599092808, "learning_rate": 1.9110640366409707e-07, "loss": 0.0004, "step": 16356 }, { "epoch": 3.7217292377701936, "grad_norm": 0.9608993780003808, "learning_rate": 1.9104209717129556e-07, "loss": 0.007, "step": 16357 }, { "epoch": 3.721956769055745, "grad_norm": 0.948076215748066, "learning_rate": 1.9097779954779904e-07, "loss": 0.0088, "step": 16358 }, { "epoch": 3.722184300341297, "grad_norm": 0.1092187741819407, "learning_rate": 1.909135107949212e-07, "loss": 0.0003, "step": 16359 }, { "epoch": 3.722411831626849, "grad_norm": 0.584426207996749, "learning_rate": 1.9084923091397624e-07, "loss": 0.0045, "step": 16360 }, { "epoch": 3.7226393629124006, "grad_norm": 0.42258471007068216, "learning_rate": 1.9078495990627783e-07, "loss": 0.003, "step": 16361 }, { "epoch": 3.7228668941979524, "grad_norm": 0.8522844624594672, "learning_rate": 1.9072069777313982e-07, "loss": 0.0078, "step": 16362 }, { "epoch": 3.723094425483504, "grad_norm": 0.6570773975949395, "learning_rate": 1.9065644451587547e-07, "loss": 0.0081, "step": 16363 }, { "epoch": 3.723321956769056, "grad_norm": 1.2884479914017195, "learning_rate": 1.9059220013579765e-07, "loss": 0.0096, "step": 16364 }, { "epoch": 3.7235494880546076, "grad_norm": 0.5699477304550773, "learning_rate": 1.9052796463421994e-07, "loss": 0.0012, "step": 16365 }, { "epoch": 3.7237770193401594, "grad_norm": 0.17969102257231234, "learning_rate": 1.9046373801245473e-07, "loss": 0.0007, "step": 16366 }, { "epoch": 3.724004550625711, "grad_norm": 0.8568083059413839, "learning_rate": 1.9039952027181487e-07, "loss": 0.0083, "step": 16367 }, { "epoch": 3.724232081911263, "grad_norm": 1.204376113750361, "learning_rate": 1.9033531141361313e-07, "loss": 0.0098, "step": 16368 }, { "epoch": 3.7244596131968146, "grad_norm": 0.19898100812034702, "learning_rate": 1.902711114391613e-07, "loss": 0.0006, "step": 16369 }, { "epoch": 3.7246871444823664, "grad_norm": 0.4796834445794444, "learning_rate": 1.9020692034977202e-07, "loss": 0.0025, "step": 16370 }, { "epoch": 3.724914675767918, "grad_norm": 1.0952187315704507, "learning_rate": 1.901427381467568e-07, "loss": 0.0088, "step": 16371 }, { "epoch": 3.72514220705347, "grad_norm": 0.24925040880004426, "learning_rate": 1.9007856483142787e-07, "loss": 0.001, "step": 16372 }, { "epoch": 3.7253697383390216, "grad_norm": 0.09407317786236172, "learning_rate": 1.9001440040509625e-07, "loss": 0.0005, "step": 16373 }, { "epoch": 3.7255972696245734, "grad_norm": 0.480708042868899, "learning_rate": 1.8995024486907369e-07, "loss": 0.0018, "step": 16374 }, { "epoch": 3.725824800910125, "grad_norm": 0.5867370971734932, "learning_rate": 1.8988609822467162e-07, "loss": 0.0035, "step": 16375 }, { "epoch": 3.726052332195677, "grad_norm": 0.3396978348224254, "learning_rate": 1.898219604732006e-07, "loss": 0.0021, "step": 16376 }, { "epoch": 3.7262798634812286, "grad_norm": 0.9314150302676958, "learning_rate": 1.8975783161597186e-07, "loss": 0.0066, "step": 16377 }, { "epoch": 3.7265073947667804, "grad_norm": 0.7326037044571321, "learning_rate": 1.896937116542958e-07, "loss": 0.0035, "step": 16378 }, { "epoch": 3.726734926052332, "grad_norm": 1.7907490957454648, "learning_rate": 1.896296005894832e-07, "loss": 0.0198, "step": 16379 }, { "epoch": 3.726962457337884, "grad_norm": 1.5032048870884913, "learning_rate": 1.8956549842284406e-07, "loss": 0.016, "step": 16380 }, { "epoch": 3.7271899886234356, "grad_norm": 0.9277417446767187, "learning_rate": 1.8950140515568864e-07, "loss": 0.0032, "step": 16381 }, { "epoch": 3.7274175199089874, "grad_norm": 0.12969725937004625, "learning_rate": 1.8943732078932713e-07, "loss": 0.0004, "step": 16382 }, { "epoch": 3.727645051194539, "grad_norm": 0.5787652813494476, "learning_rate": 1.8937324532506893e-07, "loss": 0.0031, "step": 16383 }, { "epoch": 3.727872582480091, "grad_norm": 0.33851351933465096, "learning_rate": 1.8930917876422394e-07, "loss": 0.0014, "step": 16384 }, { "epoch": 3.7281001137656427, "grad_norm": 0.022226186633153237, "learning_rate": 1.8924512110810116e-07, "loss": 0.0001, "step": 16385 }, { "epoch": 3.7283276450511944, "grad_norm": 0.20550242888316012, "learning_rate": 1.8918107235801036e-07, "loss": 0.0018, "step": 16386 }, { "epoch": 3.728555176336746, "grad_norm": 0.3817639957057186, "learning_rate": 1.8911703251525998e-07, "loss": 0.0016, "step": 16387 }, { "epoch": 3.728782707622298, "grad_norm": 0.34264598246117134, "learning_rate": 1.8905300158115925e-07, "loss": 0.0024, "step": 16388 }, { "epoch": 3.7290102389078497, "grad_norm": 0.20681973161721476, "learning_rate": 1.8898897955701692e-07, "loss": 0.0012, "step": 16389 }, { "epoch": 3.729237770193402, "grad_norm": 0.43242699415679503, "learning_rate": 1.8892496644414116e-07, "loss": 0.0045, "step": 16390 }, { "epoch": 3.729465301478953, "grad_norm": 0.1449082677059615, "learning_rate": 1.8886096224384068e-07, "loss": 0.0002, "step": 16391 }, { "epoch": 3.7296928327645054, "grad_norm": 0.7040765568678506, "learning_rate": 1.8879696695742313e-07, "loss": 0.0081, "step": 16392 }, { "epoch": 3.7299203640500567, "grad_norm": 2.2217848594245715, "learning_rate": 1.8873298058619691e-07, "loss": 0.0078, "step": 16393 }, { "epoch": 3.730147895335609, "grad_norm": 0.168252052367586, "learning_rate": 1.886690031314694e-07, "loss": 0.0006, "step": 16394 }, { "epoch": 3.73037542662116, "grad_norm": 0.38482990270373235, "learning_rate": 1.886050345945483e-07, "loss": 0.0026, "step": 16395 }, { "epoch": 3.7306029579067124, "grad_norm": 0.6804331723014292, "learning_rate": 1.8854107497674135e-07, "loss": 0.0042, "step": 16396 }, { "epoch": 3.7308304891922637, "grad_norm": 0.7369487614346194, "learning_rate": 1.8847712427935525e-07, "loss": 0.0048, "step": 16397 }, { "epoch": 3.731058020477816, "grad_norm": 0.1739374873316932, "learning_rate": 1.884131825036975e-07, "loss": 0.0005, "step": 16398 }, { "epoch": 3.7312855517633676, "grad_norm": 0.15987578783219558, "learning_rate": 1.8834924965107478e-07, "loss": 0.0011, "step": 16399 }, { "epoch": 3.7315130830489194, "grad_norm": 0.6376516713147172, "learning_rate": 1.882853257227934e-07, "loss": 0.0018, "step": 16400 }, { "epoch": 3.731740614334471, "grad_norm": 0.5801453054073863, "learning_rate": 1.882214107201602e-07, "loss": 0.0041, "step": 16401 }, { "epoch": 3.731968145620023, "grad_norm": 1.4956900419553574, "learning_rate": 1.8815750464448147e-07, "loss": 0.0087, "step": 16402 }, { "epoch": 3.7321956769055746, "grad_norm": 0.7827363843623445, "learning_rate": 1.8809360749706352e-07, "loss": 0.0023, "step": 16403 }, { "epoch": 3.7324232081911264, "grad_norm": 0.7269569784356222, "learning_rate": 1.880297192792118e-07, "loss": 0.0065, "step": 16404 }, { "epoch": 3.732650739476678, "grad_norm": 0.43934534843662965, "learning_rate": 1.879658399922326e-07, "loss": 0.0027, "step": 16405 }, { "epoch": 3.73287827076223, "grad_norm": 1.8360159277464836, "learning_rate": 1.8790196963743123e-07, "loss": 0.0201, "step": 16406 }, { "epoch": 3.7331058020477816, "grad_norm": 1.0288700115812681, "learning_rate": 1.878381082161127e-07, "loss": 0.012, "step": 16407 }, { "epoch": 3.7333333333333334, "grad_norm": 0.14062569218489873, "learning_rate": 1.87774255729583e-07, "loss": 0.0005, "step": 16408 }, { "epoch": 3.733560864618885, "grad_norm": 1.0247654998388136, "learning_rate": 1.8771041217914656e-07, "loss": 0.0052, "step": 16409 }, { "epoch": 3.733788395904437, "grad_norm": 1.3052985434872857, "learning_rate": 1.876465775661087e-07, "loss": 0.0132, "step": 16410 }, { "epoch": 3.7340159271899886, "grad_norm": 0.6126302062390181, "learning_rate": 1.8758275189177353e-07, "loss": 0.0035, "step": 16411 }, { "epoch": 3.7342434584755404, "grad_norm": 0.667318514602759, "learning_rate": 1.8751893515744606e-07, "loss": 0.0025, "step": 16412 }, { "epoch": 3.734470989761092, "grad_norm": 0.024350903864675373, "learning_rate": 1.8745512736443017e-07, "loss": 0.0001, "step": 16413 }, { "epoch": 3.734698521046644, "grad_norm": 0.544677695935372, "learning_rate": 1.873913285140301e-07, "loss": 0.0052, "step": 16414 }, { "epoch": 3.7349260523321957, "grad_norm": 0.40344516924434626, "learning_rate": 1.873275386075501e-07, "loss": 0.0022, "step": 16415 }, { "epoch": 3.7351535836177474, "grad_norm": 0.8059846215961112, "learning_rate": 1.8726375764629344e-07, "loss": 0.0086, "step": 16416 }, { "epoch": 3.735381114903299, "grad_norm": 0.955755808990874, "learning_rate": 1.8719998563156408e-07, "loss": 0.0126, "step": 16417 }, { "epoch": 3.735608646188851, "grad_norm": 0.10561743252185911, "learning_rate": 1.8713622256466514e-07, "loss": 0.0005, "step": 16418 }, { "epoch": 3.7358361774744027, "grad_norm": 0.7452046292875252, "learning_rate": 1.8707246844689982e-07, "loss": 0.0044, "step": 16419 }, { "epoch": 3.7360637087599544, "grad_norm": 0.3576114641411884, "learning_rate": 1.8700872327957114e-07, "loss": 0.0038, "step": 16420 }, { "epoch": 3.736291240045506, "grad_norm": 0.6826267083707596, "learning_rate": 1.8694498706398208e-07, "loss": 0.0039, "step": 16421 }, { "epoch": 3.736518771331058, "grad_norm": 0.3732887742723227, "learning_rate": 1.8688125980143537e-07, "loss": 0.0012, "step": 16422 }, { "epoch": 3.7367463026166097, "grad_norm": 0.522844433191012, "learning_rate": 1.8681754149323322e-07, "loss": 0.0037, "step": 16423 }, { "epoch": 3.7369738339021614, "grad_norm": 0.5522141292688595, "learning_rate": 1.867538321406781e-07, "loss": 0.0071, "step": 16424 }, { "epoch": 3.737201365187713, "grad_norm": 0.8363143325202305, "learning_rate": 1.8669013174507214e-07, "loss": 0.0064, "step": 16425 }, { "epoch": 3.737428896473265, "grad_norm": 0.9012302944005206, "learning_rate": 1.866264403077169e-07, "loss": 0.0154, "step": 16426 }, { "epoch": 3.7376564277588167, "grad_norm": 0.5189143247480504, "learning_rate": 1.8656275782991443e-07, "loss": 0.0047, "step": 16427 }, { "epoch": 3.7378839590443684, "grad_norm": 0.7134864121486453, "learning_rate": 1.8649908431296618e-07, "loss": 0.0028, "step": 16428 }, { "epoch": 3.7381114903299206, "grad_norm": 0.2167166361106245, "learning_rate": 1.864354197581738e-07, "loss": 0.0013, "step": 16429 }, { "epoch": 3.738339021615472, "grad_norm": 1.1631549870414029, "learning_rate": 1.8637176416683806e-07, "loss": 0.0067, "step": 16430 }, { "epoch": 3.738566552901024, "grad_norm": 1.1926390563062872, "learning_rate": 1.8630811754026028e-07, "loss": 0.0063, "step": 16431 }, { "epoch": 3.7387940841865754, "grad_norm": 0.4584768815598459, "learning_rate": 1.8624447987974123e-07, "loss": 0.0028, "step": 16432 }, { "epoch": 3.7390216154721276, "grad_norm": 0.7725017046905691, "learning_rate": 1.8618085118658125e-07, "loss": 0.0066, "step": 16433 }, { "epoch": 3.739249146757679, "grad_norm": 1.558357409299369, "learning_rate": 1.86117231462081e-07, "loss": 0.0483, "step": 16434 }, { "epoch": 3.739476678043231, "grad_norm": 0.584641472756705, "learning_rate": 1.8605362070754079e-07, "loss": 0.0033, "step": 16435 }, { "epoch": 3.7397042093287824, "grad_norm": 0.19693933919649914, "learning_rate": 1.8599001892426083e-07, "loss": 0.0016, "step": 16436 }, { "epoch": 3.7399317406143346, "grad_norm": 1.0752684361929674, "learning_rate": 1.8592642611354085e-07, "loss": 0.0063, "step": 16437 }, { "epoch": 3.7401592718998864, "grad_norm": 0.29901914337652385, "learning_rate": 1.8586284227668046e-07, "loss": 0.0018, "step": 16438 }, { "epoch": 3.740386803185438, "grad_norm": 0.7694943766456259, "learning_rate": 1.8579926741497952e-07, "loss": 0.0089, "step": 16439 }, { "epoch": 3.74061433447099, "grad_norm": 0.18699499691264274, "learning_rate": 1.8573570152973702e-07, "loss": 0.0008, "step": 16440 }, { "epoch": 3.7408418657565417, "grad_norm": 0.011517418773784281, "learning_rate": 1.8567214462225223e-07, "loss": 0.0, "step": 16441 }, { "epoch": 3.7410693970420934, "grad_norm": 0.9614426578126792, "learning_rate": 1.8560859669382432e-07, "loss": 0.0058, "step": 16442 }, { "epoch": 3.741296928327645, "grad_norm": 0.5303592075245223, "learning_rate": 1.855450577457521e-07, "loss": 0.0072, "step": 16443 }, { "epoch": 3.741524459613197, "grad_norm": 0.7553880844063348, "learning_rate": 1.8548152777933413e-07, "loss": 0.011, "step": 16444 }, { "epoch": 3.7417519908987487, "grad_norm": 0.16763099578668592, "learning_rate": 1.8541800679586858e-07, "loss": 0.0007, "step": 16445 }, { "epoch": 3.7419795221843004, "grad_norm": 0.17506386070622462, "learning_rate": 1.8535449479665407e-07, "loss": 0.0014, "step": 16446 }, { "epoch": 3.742207053469852, "grad_norm": 0.454581554280454, "learning_rate": 1.8529099178298837e-07, "loss": 0.0026, "step": 16447 }, { "epoch": 3.742434584755404, "grad_norm": 0.18240676573659778, "learning_rate": 1.8522749775616944e-07, "loss": 0.0004, "step": 16448 }, { "epoch": 3.7426621160409557, "grad_norm": 0.47224721401013325, "learning_rate": 1.8516401271749508e-07, "loss": 0.0019, "step": 16449 }, { "epoch": 3.7428896473265074, "grad_norm": 0.27593648422418293, "learning_rate": 1.85100536668263e-07, "loss": 0.0018, "step": 16450 }, { "epoch": 3.743117178612059, "grad_norm": 1.0715219588200622, "learning_rate": 1.8503706960977031e-07, "loss": 0.0083, "step": 16451 }, { "epoch": 3.743344709897611, "grad_norm": 0.5050250936082404, "learning_rate": 1.84973611543314e-07, "loss": 0.0017, "step": 16452 }, { "epoch": 3.7435722411831627, "grad_norm": 0.622261896212338, "learning_rate": 1.8491016247019134e-07, "loss": 0.0075, "step": 16453 }, { "epoch": 3.7437997724687144, "grad_norm": 0.95439284447033, "learning_rate": 1.848467223916988e-07, "loss": 0.0057, "step": 16454 }, { "epoch": 3.744027303754266, "grad_norm": 1.034103282451848, "learning_rate": 1.847832913091331e-07, "loss": 0.0075, "step": 16455 }, { "epoch": 3.744254835039818, "grad_norm": 0.9682446922994277, "learning_rate": 1.84719869223791e-07, "loss": 0.006, "step": 16456 }, { "epoch": 3.7444823663253697, "grad_norm": 0.38970322111783345, "learning_rate": 1.846564561369682e-07, "loss": 0.0034, "step": 16457 }, { "epoch": 3.7447098976109214, "grad_norm": 0.49464592230961424, "learning_rate": 1.8459305204996115e-07, "loss": 0.0038, "step": 16458 }, { "epoch": 3.744937428896473, "grad_norm": 0.42771023639173633, "learning_rate": 1.8452965696406534e-07, "loss": 0.0042, "step": 16459 }, { "epoch": 3.745164960182025, "grad_norm": 0.9475480289973693, "learning_rate": 1.844662708805769e-07, "loss": 0.0065, "step": 16460 }, { "epoch": 3.7453924914675767, "grad_norm": 1.2441517424610704, "learning_rate": 1.8440289380079087e-07, "loss": 0.0232, "step": 16461 }, { "epoch": 3.7456200227531284, "grad_norm": 0.2955896299066583, "learning_rate": 1.8433952572600278e-07, "loss": 0.0018, "step": 16462 }, { "epoch": 3.74584755403868, "grad_norm": 0.2015512807127812, "learning_rate": 1.8427616665750797e-07, "loss": 0.0006, "step": 16463 }, { "epoch": 3.746075085324232, "grad_norm": 0.4950174523020124, "learning_rate": 1.8421281659660096e-07, "loss": 0.007, "step": 16464 }, { "epoch": 3.7463026166097837, "grad_norm": 0.7259473713401172, "learning_rate": 1.841494755445769e-07, "loss": 0.0036, "step": 16465 }, { "epoch": 3.7465301478953354, "grad_norm": 0.622685204594292, "learning_rate": 1.8408614350272998e-07, "loss": 0.0127, "step": 16466 }, { "epoch": 3.746757679180887, "grad_norm": 0.8406637270303291, "learning_rate": 1.84022820472355e-07, "loss": 0.0062, "step": 16467 }, { "epoch": 3.7469852104664394, "grad_norm": 1.14406319014235, "learning_rate": 1.8395950645474552e-07, "loss": 0.0078, "step": 16468 }, { "epoch": 3.7472127417519907, "grad_norm": 0.7135297013815267, "learning_rate": 1.8389620145119643e-07, "loss": 0.0036, "step": 16469 }, { "epoch": 3.747440273037543, "grad_norm": 0.8113533893645555, "learning_rate": 1.8383290546300115e-07, "loss": 0.003, "step": 16470 }, { "epoch": 3.747667804323094, "grad_norm": 0.6922061543830134, "learning_rate": 1.8376961849145315e-07, "loss": 0.006, "step": 16471 }, { "epoch": 3.7478953356086464, "grad_norm": 6.1567630754263405, "learning_rate": 1.837063405378462e-07, "loss": 0.0035, "step": 16472 }, { "epoch": 3.7481228668941977, "grad_norm": 0.6063316362496092, "learning_rate": 1.836430716034733e-07, "loss": 0.0046, "step": 16473 }, { "epoch": 3.74835039817975, "grad_norm": 1.5355454721363637, "learning_rate": 1.835798116896277e-07, "loss": 0.0123, "step": 16474 }, { "epoch": 3.748577929465301, "grad_norm": 0.46894679063529787, "learning_rate": 1.8351656079760257e-07, "loss": 0.0025, "step": 16475 }, { "epoch": 3.7488054607508534, "grad_norm": 0.8000297437169458, "learning_rate": 1.8345331892869016e-07, "loss": 0.0105, "step": 16476 }, { "epoch": 3.749032992036405, "grad_norm": 0.563091321012615, "learning_rate": 1.8339008608418343e-07, "loss": 0.003, "step": 16477 }, { "epoch": 3.749260523321957, "grad_norm": 0.5985171219729276, "learning_rate": 1.8332686226537438e-07, "loss": 0.005, "step": 16478 }, { "epoch": 3.7494880546075087, "grad_norm": 0.4836991913473239, "learning_rate": 1.832636474735556e-07, "loss": 0.0047, "step": 16479 }, { "epoch": 3.7497155858930604, "grad_norm": 1.6793300657916084, "learning_rate": 1.8320044171001868e-07, "loss": 0.0179, "step": 16480 }, { "epoch": 3.749943117178612, "grad_norm": 0.39503645634723555, "learning_rate": 1.831372449760556e-07, "loss": 0.0035, "step": 16481 }, { "epoch": 3.750170648464164, "grad_norm": 0.49080730711680304, "learning_rate": 1.830740572729582e-07, "loss": 0.0049, "step": 16482 }, { "epoch": 3.7503981797497157, "grad_norm": 0.27201406878864187, "learning_rate": 1.8301087860201753e-07, "loss": 0.0011, "step": 16483 }, { "epoch": 3.7506257110352674, "grad_norm": 0.5902836911978433, "learning_rate": 1.829477089645252e-07, "loss": 0.0079, "step": 16484 }, { "epoch": 3.750853242320819, "grad_norm": 1.381649884632532, "learning_rate": 1.8288454836177194e-07, "loss": 0.0126, "step": 16485 }, { "epoch": 3.751080773606371, "grad_norm": 0.08851876376607042, "learning_rate": 1.82821396795049e-07, "loss": 0.0003, "step": 16486 }, { "epoch": 3.7513083048919227, "grad_norm": 0.19258410931485137, "learning_rate": 1.827582542656467e-07, "loss": 0.001, "step": 16487 }, { "epoch": 3.7515358361774744, "grad_norm": 0.7174358251841292, "learning_rate": 1.8269512077485576e-07, "loss": 0.0029, "step": 16488 }, { "epoch": 3.751763367463026, "grad_norm": 0.5629179442202908, "learning_rate": 1.8263199632396673e-07, "loss": 0.0037, "step": 16489 }, { "epoch": 3.751990898748578, "grad_norm": 0.34963124238755017, "learning_rate": 1.825688809142693e-07, "loss": 0.0018, "step": 16490 }, { "epoch": 3.7522184300341297, "grad_norm": 0.4641517136469869, "learning_rate": 1.825057745470539e-07, "loss": 0.0033, "step": 16491 }, { "epoch": 3.7524459613196814, "grad_norm": 0.736274992012617, "learning_rate": 1.8244267722360988e-07, "loss": 0.0112, "step": 16492 }, { "epoch": 3.752673492605233, "grad_norm": 0.563414986350119, "learning_rate": 1.823795889452272e-07, "loss": 0.0045, "step": 16493 }, { "epoch": 3.752901023890785, "grad_norm": 0.10314953300488625, "learning_rate": 1.8231650971319494e-07, "loss": 0.0003, "step": 16494 }, { "epoch": 3.7531285551763367, "grad_norm": 0.8873280434037419, "learning_rate": 1.8225343952880247e-07, "loss": 0.0034, "step": 16495 }, { "epoch": 3.7533560864618885, "grad_norm": 0.03870980233024924, "learning_rate": 1.8219037839333903e-07, "loss": 0.0001, "step": 16496 }, { "epoch": 3.75358361774744, "grad_norm": 0.7175824883347391, "learning_rate": 1.821273263080931e-07, "loss": 0.0046, "step": 16497 }, { "epoch": 3.753811149032992, "grad_norm": 0.25100608684204145, "learning_rate": 1.8206428327435376e-07, "loss": 0.0016, "step": 16498 }, { "epoch": 3.7540386803185437, "grad_norm": 1.1402141914227768, "learning_rate": 1.8200124929340903e-07, "loss": 0.0091, "step": 16499 }, { "epoch": 3.7542662116040955, "grad_norm": 0.356519437429629, "learning_rate": 1.8193822436654767e-07, "loss": 0.001, "step": 16500 }, { "epoch": 3.754493742889647, "grad_norm": 0.6120828325984445, "learning_rate": 1.8187520849505737e-07, "loss": 0.0044, "step": 16501 }, { "epoch": 3.754721274175199, "grad_norm": 0.3384417843310643, "learning_rate": 1.8181220168022622e-07, "loss": 0.0029, "step": 16502 }, { "epoch": 3.7549488054607507, "grad_norm": 0.32058033213077913, "learning_rate": 1.8174920392334227e-07, "loss": 0.0025, "step": 16503 }, { "epoch": 3.7551763367463025, "grad_norm": 0.18419160054800712, "learning_rate": 1.8168621522569263e-07, "loss": 0.0007, "step": 16504 }, { "epoch": 3.755403868031854, "grad_norm": 0.6861460127936898, "learning_rate": 1.8162323558856504e-07, "loss": 0.0018, "step": 16505 }, { "epoch": 3.755631399317406, "grad_norm": 0.26315310190745417, "learning_rate": 1.8156026501324648e-07, "loss": 0.0018, "step": 16506 }, { "epoch": 3.755858930602958, "grad_norm": 0.9644341335007433, "learning_rate": 1.8149730350102381e-07, "loss": 0.0079, "step": 16507 }, { "epoch": 3.7560864618885095, "grad_norm": 0.387851625932115, "learning_rate": 1.8143435105318402e-07, "loss": 0.0029, "step": 16508 }, { "epoch": 3.7563139931740617, "grad_norm": 1.5150204024929343, "learning_rate": 1.8137140767101374e-07, "loss": 0.0119, "step": 16509 }, { "epoch": 3.756541524459613, "grad_norm": 0.5542603164568901, "learning_rate": 1.813084733557996e-07, "loss": 0.0024, "step": 16510 }, { "epoch": 3.756769055745165, "grad_norm": 0.3678437711199008, "learning_rate": 1.8124554810882746e-07, "loss": 0.0019, "step": 16511 }, { "epoch": 3.7569965870307165, "grad_norm": 0.5047670896395048, "learning_rate": 1.8118263193138385e-07, "loss": 0.0024, "step": 16512 }, { "epoch": 3.7572241183162687, "grad_norm": 2.8261502592305443, "learning_rate": 1.8111972482475447e-07, "loss": 0.0333, "step": 16513 }, { "epoch": 3.75745164960182, "grad_norm": 0.665410152087374, "learning_rate": 1.810568267902247e-07, "loss": 0.003, "step": 16514 }, { "epoch": 3.757679180887372, "grad_norm": 0.7417863529294301, "learning_rate": 1.8099393782908045e-07, "loss": 0.0006, "step": 16515 }, { "epoch": 3.757906712172924, "grad_norm": 0.24498858791557093, "learning_rate": 1.8093105794260684e-07, "loss": 0.0019, "step": 16516 }, { "epoch": 3.7581342434584757, "grad_norm": 0.5442163137613143, "learning_rate": 1.8086818713208943e-07, "loss": 0.0076, "step": 16517 }, { "epoch": 3.7583617747440274, "grad_norm": 0.6555850345746679, "learning_rate": 1.8080532539881262e-07, "loss": 0.0029, "step": 16518 }, { "epoch": 3.758589306029579, "grad_norm": 0.48726198062133047, "learning_rate": 1.8074247274406168e-07, "loss": 0.0104, "step": 16519 }, { "epoch": 3.758816837315131, "grad_norm": 0.5994904771606403, "learning_rate": 1.8067962916912096e-07, "loss": 0.0012, "step": 16520 }, { "epoch": 3.7590443686006827, "grad_norm": 0.38105260735961927, "learning_rate": 1.8061679467527473e-07, "loss": 0.0017, "step": 16521 }, { "epoch": 3.7592718998862344, "grad_norm": 0.42160826968337634, "learning_rate": 1.805539692638073e-07, "loss": 0.0013, "step": 16522 }, { "epoch": 3.759499431171786, "grad_norm": 0.3941959480170118, "learning_rate": 1.804911529360029e-07, "loss": 0.0018, "step": 16523 }, { "epoch": 3.759726962457338, "grad_norm": 0.6704739325031847, "learning_rate": 1.8042834569314534e-07, "loss": 0.0031, "step": 16524 }, { "epoch": 3.7599544937428897, "grad_norm": 0.228189346728719, "learning_rate": 1.803655475365182e-07, "loss": 0.001, "step": 16525 }, { "epoch": 3.7601820250284415, "grad_norm": 1.023098988695765, "learning_rate": 1.8030275846740484e-07, "loss": 0.0102, "step": 16526 }, { "epoch": 3.760409556313993, "grad_norm": 0.527678106197925, "learning_rate": 1.8023997848708882e-07, "loss": 0.0036, "step": 16527 }, { "epoch": 3.760637087599545, "grad_norm": 0.8780794434039524, "learning_rate": 1.8017720759685268e-07, "loss": 0.0053, "step": 16528 }, { "epoch": 3.7608646188850967, "grad_norm": 0.7134843952367896, "learning_rate": 1.8011444579798018e-07, "loss": 0.0047, "step": 16529 }, { "epoch": 3.7610921501706485, "grad_norm": 0.4981091017998324, "learning_rate": 1.8005169309175342e-07, "loss": 0.0051, "step": 16530 }, { "epoch": 3.7613196814562, "grad_norm": 0.4116631148982807, "learning_rate": 1.7998894947945536e-07, "loss": 0.0015, "step": 16531 }, { "epoch": 3.761547212741752, "grad_norm": 0.9382427644382783, "learning_rate": 1.7992621496236824e-07, "loss": 0.0135, "step": 16532 }, { "epoch": 3.7617747440273037, "grad_norm": 0.8021222068060124, "learning_rate": 1.7986348954177397e-07, "loss": 0.0037, "step": 16533 }, { "epoch": 3.7620022753128555, "grad_norm": 0.44958954982190463, "learning_rate": 1.7980077321895466e-07, "loss": 0.0013, "step": 16534 }, { "epoch": 3.7622298065984072, "grad_norm": 0.8360244595774642, "learning_rate": 1.7973806599519229e-07, "loss": 0.005, "step": 16535 }, { "epoch": 3.762457337883959, "grad_norm": 1.751461318199935, "learning_rate": 1.7967536787176851e-07, "loss": 0.0058, "step": 16536 }, { "epoch": 3.7626848691695107, "grad_norm": 1.2989976193531745, "learning_rate": 1.7961267884996448e-07, "loss": 0.0066, "step": 16537 }, { "epoch": 3.7629124004550625, "grad_norm": 0.6377070953102826, "learning_rate": 1.7954999893106188e-07, "loss": 0.0034, "step": 16538 }, { "epoch": 3.7631399317406142, "grad_norm": 1.2816131700838886, "learning_rate": 1.7948732811634137e-07, "loss": 0.0102, "step": 16539 }, { "epoch": 3.763367463026166, "grad_norm": 1.0443295597513707, "learning_rate": 1.794246664070838e-07, "loss": 0.0051, "step": 16540 }, { "epoch": 3.7635949943117177, "grad_norm": 0.9233256434191525, "learning_rate": 1.7936201380457006e-07, "loss": 0.0063, "step": 16541 }, { "epoch": 3.7638225255972695, "grad_norm": 0.48868977744270253, "learning_rate": 1.792993703100806e-07, "loss": 0.0093, "step": 16542 }, { "epoch": 3.7640500568828212, "grad_norm": 0.7757453094870906, "learning_rate": 1.7923673592489587e-07, "loss": 0.0042, "step": 16543 }, { "epoch": 3.764277588168373, "grad_norm": 0.4064737622015737, "learning_rate": 1.7917411065029596e-07, "loss": 0.0026, "step": 16544 }, { "epoch": 3.7645051194539247, "grad_norm": 0.17765328385620308, "learning_rate": 1.791114944875605e-07, "loss": 0.0003, "step": 16545 }, { "epoch": 3.764732650739477, "grad_norm": 0.8238812283920371, "learning_rate": 1.790488874379697e-07, "loss": 0.0096, "step": 16546 }, { "epoch": 3.7649601820250282, "grad_norm": 0.1385066897033583, "learning_rate": 1.7898628950280273e-07, "loss": 0.0007, "step": 16547 }, { "epoch": 3.7651877133105804, "grad_norm": 1.7173794462033165, "learning_rate": 1.7892370068333914e-07, "loss": 0.016, "step": 16548 }, { "epoch": 3.7654152445961317, "grad_norm": 1.3017627915445151, "learning_rate": 1.788611209808582e-07, "loss": 0.0056, "step": 16549 }, { "epoch": 3.765642775881684, "grad_norm": 0.7107078174754847, "learning_rate": 1.7879855039663905e-07, "loss": 0.0065, "step": 16550 }, { "epoch": 3.7658703071672353, "grad_norm": 1.2333909455695082, "learning_rate": 1.7873598893196036e-07, "loss": 0.0071, "step": 16551 }, { "epoch": 3.7660978384527874, "grad_norm": 0.3382470353458453, "learning_rate": 1.7867343658810058e-07, "loss": 0.0015, "step": 16552 }, { "epoch": 3.7663253697383388, "grad_norm": 1.29089069250987, "learning_rate": 1.786108933663385e-07, "loss": 0.0073, "step": 16553 }, { "epoch": 3.766552901023891, "grad_norm": 0.5825837615927791, "learning_rate": 1.7854835926795212e-07, "loss": 0.0027, "step": 16554 }, { "epoch": 3.7667804323094427, "grad_norm": 1.0023326056539892, "learning_rate": 1.7848583429421952e-07, "loss": 0.011, "step": 16555 }, { "epoch": 3.7670079635949945, "grad_norm": 0.9908912802900016, "learning_rate": 1.784233184464188e-07, "loss": 0.01, "step": 16556 }, { "epoch": 3.767235494880546, "grad_norm": 0.25457541114155097, "learning_rate": 1.783608117258278e-07, "loss": 0.0011, "step": 16557 }, { "epoch": 3.767463026166098, "grad_norm": 0.5535477282086406, "learning_rate": 1.7829831413372377e-07, "loss": 0.0014, "step": 16558 }, { "epoch": 3.7676905574516497, "grad_norm": 0.11435259922576756, "learning_rate": 1.7823582567138392e-07, "loss": 0.0003, "step": 16559 }, { "epoch": 3.7679180887372015, "grad_norm": 0.9214513032525679, "learning_rate": 1.781733463400858e-07, "loss": 0.0035, "step": 16560 }, { "epoch": 3.768145620022753, "grad_norm": 0.8200234736574264, "learning_rate": 1.7811087614110595e-07, "loss": 0.0071, "step": 16561 }, { "epoch": 3.768373151308305, "grad_norm": 2.409857614529504, "learning_rate": 1.7804841507572133e-07, "loss": 0.0201, "step": 16562 }, { "epoch": 3.7686006825938567, "grad_norm": 0.5754664053124049, "learning_rate": 1.7798596314520875e-07, "loss": 0.0037, "step": 16563 }, { "epoch": 3.7688282138794085, "grad_norm": 0.8730937740200918, "learning_rate": 1.7792352035084428e-07, "loss": 0.0107, "step": 16564 }, { "epoch": 3.7690557451649602, "grad_norm": 0.30404714160327634, "learning_rate": 1.7786108669390443e-07, "loss": 0.0022, "step": 16565 }, { "epoch": 3.769283276450512, "grad_norm": 0.614215163807336, "learning_rate": 1.7779866217566485e-07, "loss": 0.0022, "step": 16566 }, { "epoch": 3.7695108077360637, "grad_norm": 0.3726234404637176, "learning_rate": 1.7773624679740178e-07, "loss": 0.0096, "step": 16567 }, { "epoch": 3.7697383390216155, "grad_norm": 0.6012242035669836, "learning_rate": 1.7767384056039055e-07, "loss": 0.0055, "step": 16568 }, { "epoch": 3.7699658703071672, "grad_norm": 0.9136508304354973, "learning_rate": 1.776114434659068e-07, "loss": 0.0077, "step": 16569 }, { "epoch": 3.770193401592719, "grad_norm": 0.1300217798782697, "learning_rate": 1.7754905551522593e-07, "loss": 0.0004, "step": 16570 }, { "epoch": 3.7704209328782707, "grad_norm": 0.027802194129363068, "learning_rate": 1.7748667670962276e-07, "loss": 0.0001, "step": 16571 }, { "epoch": 3.7706484641638225, "grad_norm": 0.6935262885278702, "learning_rate": 1.7742430705037248e-07, "loss": 0.0032, "step": 16572 }, { "epoch": 3.7708759954493742, "grad_norm": 0.20296228714139772, "learning_rate": 1.7736194653874952e-07, "loss": 0.0005, "step": 16573 }, { "epoch": 3.771103526734926, "grad_norm": 0.334533883949165, "learning_rate": 1.7729959517602877e-07, "loss": 0.0015, "step": 16574 }, { "epoch": 3.7713310580204777, "grad_norm": 0.6928072096472933, "learning_rate": 1.7723725296348415e-07, "loss": 0.0039, "step": 16575 }, { "epoch": 3.7715585893060295, "grad_norm": 1.3622886528329832, "learning_rate": 1.7717491990238999e-07, "loss": 0.011, "step": 16576 }, { "epoch": 3.7717861205915812, "grad_norm": 0.28224969565097685, "learning_rate": 1.7711259599402058e-07, "loss": 0.001, "step": 16577 }, { "epoch": 3.772013651877133, "grad_norm": 0.27898740506156344, "learning_rate": 1.770502812396492e-07, "loss": 0.0035, "step": 16578 }, { "epoch": 3.7722411831626848, "grad_norm": 0.32620594692051663, "learning_rate": 1.7698797564054994e-07, "loss": 0.0028, "step": 16579 }, { "epoch": 3.7724687144482365, "grad_norm": 1.205969526776394, "learning_rate": 1.769256791979957e-07, "loss": 0.0113, "step": 16580 }, { "epoch": 3.7726962457337883, "grad_norm": 0.04984079595147604, "learning_rate": 1.768633919132602e-07, "loss": 0.0001, "step": 16581 }, { "epoch": 3.77292377701934, "grad_norm": 1.186772304463266, "learning_rate": 1.7680111378761606e-07, "loss": 0.0188, "step": 16582 }, { "epoch": 3.7731513083048918, "grad_norm": 0.6248211902666234, "learning_rate": 1.767388448223363e-07, "loss": 0.0056, "step": 16583 }, { "epoch": 3.7733788395904435, "grad_norm": 0.056382850585520554, "learning_rate": 1.7667658501869377e-07, "loss": 0.0003, "step": 16584 }, { "epoch": 3.7736063708759957, "grad_norm": 0.26515605438840095, "learning_rate": 1.7661433437796062e-07, "loss": 0.0009, "step": 16585 }, { "epoch": 3.773833902161547, "grad_norm": 0.3135821639031892, "learning_rate": 1.7655209290140947e-07, "loss": 0.0013, "step": 16586 }, { "epoch": 3.774061433447099, "grad_norm": 0.35656275568713464, "learning_rate": 1.7648986059031204e-07, "loss": 0.0018, "step": 16587 }, { "epoch": 3.7742889647326505, "grad_norm": 0.3454902356020429, "learning_rate": 1.7642763744594067e-07, "loss": 0.0019, "step": 16588 }, { "epoch": 3.7745164960182027, "grad_norm": 0.8881475054127576, "learning_rate": 1.7636542346956672e-07, "loss": 0.0057, "step": 16589 }, { "epoch": 3.774744027303754, "grad_norm": 0.3729623243478376, "learning_rate": 1.7630321866246178e-07, "loss": 0.0044, "step": 16590 }, { "epoch": 3.774971558589306, "grad_norm": 0.5672407581243751, "learning_rate": 1.7624102302589756e-07, "loss": 0.0065, "step": 16591 }, { "epoch": 3.7751990898748575, "grad_norm": 0.201635229881675, "learning_rate": 1.761788365611448e-07, "loss": 0.0007, "step": 16592 }, { "epoch": 3.7754266211604097, "grad_norm": 0.47877496850359863, "learning_rate": 1.7611665926947477e-07, "loss": 0.0009, "step": 16593 }, { "epoch": 3.7756541524459615, "grad_norm": 0.37657400263976093, "learning_rate": 1.7605449115215798e-07, "loss": 0.0016, "step": 16594 }, { "epoch": 3.7758816837315132, "grad_norm": 0.7473818687274975, "learning_rate": 1.7599233221046515e-07, "loss": 0.0065, "step": 16595 }, { "epoch": 3.776109215017065, "grad_norm": 1.1527604449251858, "learning_rate": 1.7593018244566695e-07, "loss": 0.0083, "step": 16596 }, { "epoch": 3.7763367463026167, "grad_norm": 0.1807209294391202, "learning_rate": 1.758680418590332e-07, "loss": 0.001, "step": 16597 }, { "epoch": 3.7765642775881685, "grad_norm": 0.7623228961412802, "learning_rate": 1.7580591045183426e-07, "loss": 0.0065, "step": 16598 }, { "epoch": 3.7767918088737202, "grad_norm": 1.091462978665979, "learning_rate": 1.7574378822533974e-07, "loss": 0.0133, "step": 16599 }, { "epoch": 3.777019340159272, "grad_norm": 0.4968639147243467, "learning_rate": 1.7568167518081957e-07, "loss": 0.0082, "step": 16600 }, { "epoch": 3.7772468714448237, "grad_norm": 0.9021201031848165, "learning_rate": 1.7561957131954293e-07, "loss": 0.0051, "step": 16601 }, { "epoch": 3.7774744027303755, "grad_norm": 0.17480391033066042, "learning_rate": 1.7555747664277923e-07, "loss": 0.0009, "step": 16602 }, { "epoch": 3.7777019340159272, "grad_norm": 0.7663906933307957, "learning_rate": 1.7549539115179774e-07, "loss": 0.0069, "step": 16603 }, { "epoch": 3.777929465301479, "grad_norm": 0.4006077703908464, "learning_rate": 1.7543331484786702e-07, "loss": 0.0031, "step": 16604 }, { "epoch": 3.7781569965870307, "grad_norm": 0.295498158976028, "learning_rate": 1.7537124773225626e-07, "loss": 0.0018, "step": 16605 }, { "epoch": 3.7783845278725825, "grad_norm": 0.5667074798569519, "learning_rate": 1.7530918980623348e-07, "loss": 0.0057, "step": 16606 }, { "epoch": 3.7786120591581343, "grad_norm": 0.8844768712607707, "learning_rate": 1.752471410710675e-07, "loss": 0.0089, "step": 16607 }, { "epoch": 3.778839590443686, "grad_norm": 0.6504922731773561, "learning_rate": 1.7518510152802614e-07, "loss": 0.0043, "step": 16608 }, { "epoch": 3.7790671217292378, "grad_norm": 0.10495664819658256, "learning_rate": 1.7512307117837745e-07, "loss": 0.0003, "step": 16609 }, { "epoch": 3.7792946530147895, "grad_norm": 1.2515771247693581, "learning_rate": 1.750610500233895e-07, "loss": 0.0136, "step": 16610 }, { "epoch": 3.7795221843003413, "grad_norm": 0.21664171317922784, "learning_rate": 1.7499903806432948e-07, "loss": 0.001, "step": 16611 }, { "epoch": 3.779749715585893, "grad_norm": 0.33187395029561606, "learning_rate": 1.7493703530246514e-07, "loss": 0.0018, "step": 16612 }, { "epoch": 3.7799772468714448, "grad_norm": 0.5875253231743671, "learning_rate": 1.7487504173906356e-07, "loss": 0.0044, "step": 16613 }, { "epoch": 3.7802047781569965, "grad_norm": 0.8180415044899745, "learning_rate": 1.748130573753916e-07, "loss": 0.0175, "step": 16614 }, { "epoch": 3.7804323094425483, "grad_norm": 0.16369534186025037, "learning_rate": 1.7475108221271624e-07, "loss": 0.0008, "step": 16615 }, { "epoch": 3.7806598407281, "grad_norm": 0.45855743538475546, "learning_rate": 1.7468911625230415e-07, "loss": 0.0051, "step": 16616 }, { "epoch": 3.7808873720136518, "grad_norm": 0.9562330843687183, "learning_rate": 1.74627159495422e-07, "loss": 0.0023, "step": 16617 }, { "epoch": 3.7811149032992035, "grad_norm": 0.28208627643285583, "learning_rate": 1.7456521194333575e-07, "loss": 0.0025, "step": 16618 }, { "epoch": 3.7813424345847553, "grad_norm": 0.3470934322232059, "learning_rate": 1.7450327359731177e-07, "loss": 0.0026, "step": 16619 }, { "epoch": 3.781569965870307, "grad_norm": 0.39267902053611653, "learning_rate": 1.7444134445861593e-07, "loss": 0.0011, "step": 16620 }, { "epoch": 3.781797497155859, "grad_norm": 0.41195270239809056, "learning_rate": 1.7437942452851352e-07, "loss": 0.003, "step": 16621 }, { "epoch": 3.7820250284414105, "grad_norm": 1.5649553388390272, "learning_rate": 1.7431751380827054e-07, "loss": 0.0086, "step": 16622 }, { "epoch": 3.7822525597269623, "grad_norm": 0.18700698382358633, "learning_rate": 1.7425561229915212e-07, "loss": 0.0009, "step": 16623 }, { "epoch": 3.7824800910125145, "grad_norm": 0.5271993107338003, "learning_rate": 1.7419372000242365e-07, "loss": 0.0033, "step": 16624 }, { "epoch": 3.782707622298066, "grad_norm": 0.04295474704409443, "learning_rate": 1.7413183691934982e-07, "loss": 0.0002, "step": 16625 }, { "epoch": 3.782935153583618, "grad_norm": 0.17402576060394176, "learning_rate": 1.7406996305119565e-07, "loss": 0.0008, "step": 16626 }, { "epoch": 3.7831626848691693, "grad_norm": 0.6855898217683324, "learning_rate": 1.740080983992256e-07, "loss": 0.0041, "step": 16627 }, { "epoch": 3.7833902161547215, "grad_norm": 0.5107670761148817, "learning_rate": 1.739462429647039e-07, "loss": 0.003, "step": 16628 }, { "epoch": 3.783617747440273, "grad_norm": 0.5835512433751112, "learning_rate": 1.7388439674889484e-07, "loss": 0.0056, "step": 16629 }, { "epoch": 3.783845278725825, "grad_norm": 0.3405530919600017, "learning_rate": 1.738225597530626e-07, "loss": 0.0023, "step": 16630 }, { "epoch": 3.7840728100113763, "grad_norm": 0.8341276078310383, "learning_rate": 1.7376073197847115e-07, "loss": 0.0064, "step": 16631 }, { "epoch": 3.7843003412969285, "grad_norm": 0.3070523517040243, "learning_rate": 1.7369891342638397e-07, "loss": 0.0018, "step": 16632 }, { "epoch": 3.7845278725824802, "grad_norm": 0.13077507724181275, "learning_rate": 1.7363710409806425e-07, "loss": 0.0005, "step": 16633 }, { "epoch": 3.784755403868032, "grad_norm": 0.5167975156087946, "learning_rate": 1.7357530399477567e-07, "loss": 0.0024, "step": 16634 }, { "epoch": 3.7849829351535837, "grad_norm": 0.5850600283951195, "learning_rate": 1.7351351311778092e-07, "loss": 0.0058, "step": 16635 }, { "epoch": 3.7852104664391355, "grad_norm": 0.5460869294811699, "learning_rate": 1.7345173146834309e-07, "loss": 0.0043, "step": 16636 }, { "epoch": 3.7854379977246873, "grad_norm": 0.42364890735818084, "learning_rate": 1.7338995904772496e-07, "loss": 0.0035, "step": 16637 }, { "epoch": 3.785665529010239, "grad_norm": 0.44061332003945286, "learning_rate": 1.733281958571891e-07, "loss": 0.0029, "step": 16638 }, { "epoch": 3.7858930602957908, "grad_norm": 0.5535418848421353, "learning_rate": 1.7326644189799778e-07, "loss": 0.0047, "step": 16639 }, { "epoch": 3.7861205915813425, "grad_norm": 0.6281921780933729, "learning_rate": 1.732046971714129e-07, "loss": 0.0046, "step": 16640 }, { "epoch": 3.7863481228668943, "grad_norm": 0.4502041653148942, "learning_rate": 1.731429616786967e-07, "loss": 0.0037, "step": 16641 }, { "epoch": 3.786575654152446, "grad_norm": 0.4593332328443398, "learning_rate": 1.7308123542111073e-07, "loss": 0.0019, "step": 16642 }, { "epoch": 3.7868031854379978, "grad_norm": 0.10117601429571504, "learning_rate": 1.7301951839991657e-07, "loss": 0.0005, "step": 16643 }, { "epoch": 3.7870307167235495, "grad_norm": 1.0260395199328107, "learning_rate": 1.7295781061637574e-07, "loss": 0.0048, "step": 16644 }, { "epoch": 3.7872582480091013, "grad_norm": 0.39949201367377923, "learning_rate": 1.7289611207174956e-07, "loss": 0.0021, "step": 16645 }, { "epoch": 3.787485779294653, "grad_norm": 0.274616474559912, "learning_rate": 1.7283442276729887e-07, "loss": 0.0027, "step": 16646 }, { "epoch": 3.7877133105802048, "grad_norm": 1.7523952507482354, "learning_rate": 1.7277274270428434e-07, "loss": 0.006, "step": 16647 }, { "epoch": 3.7879408418657565, "grad_norm": 0.24417851006851501, "learning_rate": 1.7271107188396682e-07, "loss": 0.0011, "step": 16648 }, { "epoch": 3.7881683731513083, "grad_norm": 0.1973395859220131, "learning_rate": 1.726494103076063e-07, "loss": 0.0007, "step": 16649 }, { "epoch": 3.78839590443686, "grad_norm": 0.080362821662003, "learning_rate": 1.7258775797646385e-07, "loss": 0.0002, "step": 16650 }, { "epoch": 3.788623435722412, "grad_norm": 0.5730745415883615, "learning_rate": 1.72526114891799e-07, "loss": 0.0047, "step": 16651 }, { "epoch": 3.7888509670079635, "grad_norm": 0.8915656890206042, "learning_rate": 1.7246448105487153e-07, "loss": 0.0069, "step": 16652 }, { "epoch": 3.7890784982935153, "grad_norm": 1.0637267929099374, "learning_rate": 1.7240285646694146e-07, "loss": 0.0113, "step": 16653 }, { "epoch": 3.789306029579067, "grad_norm": 0.4840919125238157, "learning_rate": 1.7234124112926797e-07, "loss": 0.0031, "step": 16654 }, { "epoch": 3.789533560864619, "grad_norm": 0.6568809894696287, "learning_rate": 1.7227963504311043e-07, "loss": 0.0057, "step": 16655 }, { "epoch": 3.7897610921501705, "grad_norm": 0.4621782052284176, "learning_rate": 1.7221803820972813e-07, "loss": 0.0033, "step": 16656 }, { "epoch": 3.7899886234357223, "grad_norm": 0.6531836804894785, "learning_rate": 1.7215645063038e-07, "loss": 0.0088, "step": 16657 }, { "epoch": 3.790216154721274, "grad_norm": 0.9114559876311716, "learning_rate": 1.7209487230632475e-07, "loss": 0.0104, "step": 16658 }, { "epoch": 3.790443686006826, "grad_norm": 0.8907773600997843, "learning_rate": 1.7203330323882057e-07, "loss": 0.0089, "step": 16659 }, { "epoch": 3.7906712172923775, "grad_norm": 0.2595040137384129, "learning_rate": 1.719717434291264e-07, "loss": 0.0017, "step": 16660 }, { "epoch": 3.7908987485779293, "grad_norm": 0.7826428370306431, "learning_rate": 1.7191019287849984e-07, "loss": 0.0062, "step": 16661 }, { "epoch": 3.791126279863481, "grad_norm": 0.5498375760487232, "learning_rate": 1.7184865158819918e-07, "loss": 0.0021, "step": 16662 }, { "epoch": 3.7913538111490332, "grad_norm": 1.0354464348727315, "learning_rate": 1.7178711955948216e-07, "loss": 0.021, "step": 16663 }, { "epoch": 3.7915813424345846, "grad_norm": 0.2666465452685679, "learning_rate": 1.7172559679360656e-07, "loss": 0.0007, "step": 16664 }, { "epoch": 3.7918088737201368, "grad_norm": 0.31327557516203836, "learning_rate": 1.7166408329182967e-07, "loss": 0.0018, "step": 16665 }, { "epoch": 3.792036405005688, "grad_norm": 0.6775076171736775, "learning_rate": 1.7160257905540851e-07, "loss": 0.0053, "step": 16666 }, { "epoch": 3.7922639362912403, "grad_norm": 0.38183527635986414, "learning_rate": 1.7154108408560044e-07, "loss": 0.0018, "step": 16667 }, { "epoch": 3.7924914675767916, "grad_norm": 0.3675860016857003, "learning_rate": 1.7147959838366187e-07, "loss": 0.0022, "step": 16668 }, { "epoch": 3.7927189988623438, "grad_norm": 0.2640110324938606, "learning_rate": 1.7141812195084983e-07, "loss": 0.0015, "step": 16669 }, { "epoch": 3.792946530147895, "grad_norm": 0.246511500059273, "learning_rate": 1.713566547884208e-07, "loss": 0.001, "step": 16670 }, { "epoch": 3.7931740614334473, "grad_norm": 0.542547399985025, "learning_rate": 1.7129519689763077e-07, "loss": 0.0117, "step": 16671 }, { "epoch": 3.793401592718999, "grad_norm": 0.8869943197419153, "learning_rate": 1.7123374827973625e-07, "loss": 0.0064, "step": 16672 }, { "epoch": 3.7936291240045508, "grad_norm": 0.015612171581524092, "learning_rate": 1.7117230893599263e-07, "loss": 0.0, "step": 16673 }, { "epoch": 3.7938566552901025, "grad_norm": 0.32126866113187696, "learning_rate": 1.7111087886765604e-07, "loss": 0.0025, "step": 16674 }, { "epoch": 3.7940841865756543, "grad_norm": 0.22725923170782789, "learning_rate": 1.7104945807598167e-07, "loss": 0.0009, "step": 16675 }, { "epoch": 3.794311717861206, "grad_norm": 0.87433952777107, "learning_rate": 1.7098804656222498e-07, "loss": 0.0114, "step": 16676 }, { "epoch": 3.7945392491467578, "grad_norm": 0.0588646605752852, "learning_rate": 1.7092664432764138e-07, "loss": 0.0002, "step": 16677 }, { "epoch": 3.7947667804323095, "grad_norm": 0.4537422236897651, "learning_rate": 1.7086525137348536e-07, "loss": 0.0045, "step": 16678 }, { "epoch": 3.7949943117178613, "grad_norm": 1.0278213348001437, "learning_rate": 1.7080386770101203e-07, "loss": 0.0076, "step": 16679 }, { "epoch": 3.795221843003413, "grad_norm": 0.49020936079160154, "learning_rate": 1.707424933114757e-07, "loss": 0.0047, "step": 16680 }, { "epoch": 3.795449374288965, "grad_norm": 0.483424172334496, "learning_rate": 1.7068112820613103e-07, "loss": 0.0059, "step": 16681 }, { "epoch": 3.7956769055745165, "grad_norm": 0.618145049704191, "learning_rate": 1.7061977238623185e-07, "loss": 0.0017, "step": 16682 }, { "epoch": 3.7959044368600683, "grad_norm": 0.1355796149162624, "learning_rate": 1.705584258530324e-07, "loss": 0.0005, "step": 16683 }, { "epoch": 3.79613196814562, "grad_norm": 0.5366042019532917, "learning_rate": 1.704970886077866e-07, "loss": 0.0036, "step": 16684 }, { "epoch": 3.796359499431172, "grad_norm": 0.7615968442785938, "learning_rate": 1.7043576065174782e-07, "loss": 0.0035, "step": 16685 }, { "epoch": 3.7965870307167235, "grad_norm": 1.1167846687677685, "learning_rate": 1.703744419861697e-07, "loss": 0.0062, "step": 16686 }, { "epoch": 3.7968145620022753, "grad_norm": 0.4508092902135068, "learning_rate": 1.7031313261230524e-07, "loss": 0.0033, "step": 16687 }, { "epoch": 3.797042093287827, "grad_norm": 0.8431344289903133, "learning_rate": 1.702518325314078e-07, "loss": 0.0033, "step": 16688 }, { "epoch": 3.797269624573379, "grad_norm": 0.27948408169876343, "learning_rate": 1.7019054174472992e-07, "loss": 0.0014, "step": 16689 }, { "epoch": 3.7974971558589306, "grad_norm": 0.2720524045270991, "learning_rate": 1.7012926025352434e-07, "loss": 0.0011, "step": 16690 }, { "epoch": 3.7977246871444823, "grad_norm": 1.1983344166317285, "learning_rate": 1.700679880590439e-07, "loss": 0.0106, "step": 16691 }, { "epoch": 3.797952218430034, "grad_norm": 0.7339026413385052, "learning_rate": 1.7000672516254036e-07, "loss": 0.0086, "step": 16692 }, { "epoch": 3.798179749715586, "grad_norm": 1.0092155093504325, "learning_rate": 1.6994547156526632e-07, "loss": 0.0027, "step": 16693 }, { "epoch": 3.7984072810011376, "grad_norm": 0.35645746382172677, "learning_rate": 1.6988422726847322e-07, "loss": 0.0014, "step": 16694 }, { "epoch": 3.7986348122866893, "grad_norm": 0.9191990974431556, "learning_rate": 1.698229922734132e-07, "loss": 0.0058, "step": 16695 }, { "epoch": 3.798862343572241, "grad_norm": 0.364113692571558, "learning_rate": 1.6976176658133738e-07, "loss": 0.0027, "step": 16696 }, { "epoch": 3.799089874857793, "grad_norm": 1.1378193649519879, "learning_rate": 1.6970055019349737e-07, "loss": 0.0116, "step": 16697 }, { "epoch": 3.7993174061433446, "grad_norm": 0.6427549631607249, "learning_rate": 1.696393431111445e-07, "loss": 0.0044, "step": 16698 }, { "epoch": 3.7995449374288963, "grad_norm": 1.0582863205663529, "learning_rate": 1.6957814533552927e-07, "loss": 0.0082, "step": 16699 }, { "epoch": 3.799772468714448, "grad_norm": 0.09512410536899586, "learning_rate": 1.6951695686790288e-07, "loss": 0.0005, "step": 16700 }, { "epoch": 3.8, "grad_norm": 1.7153598615414751, "learning_rate": 1.694557777095157e-07, "loss": 0.0308, "step": 16701 }, { "epoch": 3.800227531285552, "grad_norm": 0.32761650629641925, "learning_rate": 1.6939460786161792e-07, "loss": 0.0023, "step": 16702 }, { "epoch": 3.8004550625711033, "grad_norm": 0.08537677088137398, "learning_rate": 1.6933344732546e-07, "loss": 0.0004, "step": 16703 }, { "epoch": 3.8006825938566555, "grad_norm": 0.25213767449854824, "learning_rate": 1.692722961022919e-07, "loss": 0.0016, "step": 16704 }, { "epoch": 3.800910125142207, "grad_norm": 1.557540532446151, "learning_rate": 1.692111541933636e-07, "loss": 0.0056, "step": 16705 }, { "epoch": 3.801137656427759, "grad_norm": 0.7595471211771433, "learning_rate": 1.6915002159992435e-07, "loss": 0.0074, "step": 16706 }, { "epoch": 3.8013651877133103, "grad_norm": 0.24904332944520227, "learning_rate": 1.690888983232241e-07, "loss": 0.0008, "step": 16707 }, { "epoch": 3.8015927189988625, "grad_norm": 0.5333290920692068, "learning_rate": 1.6902778436451174e-07, "loss": 0.0036, "step": 16708 }, { "epoch": 3.801820250284414, "grad_norm": 0.4587318079616776, "learning_rate": 1.6896667972503625e-07, "loss": 0.0023, "step": 16709 }, { "epoch": 3.802047781569966, "grad_norm": 0.054048639064541736, "learning_rate": 1.689055844060466e-07, "loss": 0.0003, "step": 16710 }, { "epoch": 3.802275312855518, "grad_norm": 0.7798182313461365, "learning_rate": 1.6884449840879147e-07, "loss": 0.0049, "step": 16711 }, { "epoch": 3.8025028441410695, "grad_norm": 0.1329823481949017, "learning_rate": 1.6878342173451968e-07, "loss": 0.0004, "step": 16712 }, { "epoch": 3.8027303754266213, "grad_norm": 0.829821078923882, "learning_rate": 1.6872235438447896e-07, "loss": 0.007, "step": 16713 }, { "epoch": 3.802957906712173, "grad_norm": 0.44150142826287175, "learning_rate": 1.686612963599179e-07, "loss": 0.0024, "step": 16714 }, { "epoch": 3.803185437997725, "grad_norm": 0.624841613889394, "learning_rate": 1.686002476620842e-07, "loss": 0.0021, "step": 16715 }, { "epoch": 3.8034129692832765, "grad_norm": 0.7955701793543392, "learning_rate": 1.6853920829222513e-07, "loss": 0.0028, "step": 16716 }, { "epoch": 3.8036405005688283, "grad_norm": 0.5965218685833825, "learning_rate": 1.6847817825158916e-07, "loss": 0.0078, "step": 16717 }, { "epoch": 3.80386803185438, "grad_norm": 1.1701848711900873, "learning_rate": 1.6841715754142286e-07, "loss": 0.011, "step": 16718 }, { "epoch": 3.804095563139932, "grad_norm": 0.39405885774128235, "learning_rate": 1.6835614616297396e-07, "loss": 0.0022, "step": 16719 }, { "epoch": 3.8043230944254836, "grad_norm": 0.3624903631526128, "learning_rate": 1.6829514411748903e-07, "loss": 0.0013, "step": 16720 }, { "epoch": 3.8045506257110353, "grad_norm": 0.9398872372900016, "learning_rate": 1.6823415140621483e-07, "loss": 0.0078, "step": 16721 }, { "epoch": 3.804778156996587, "grad_norm": 0.7424464710658297, "learning_rate": 1.68173168030398e-07, "loss": 0.0105, "step": 16722 }, { "epoch": 3.805005688282139, "grad_norm": 0.3439820138580133, "learning_rate": 1.68112193991285e-07, "loss": 0.0027, "step": 16723 }, { "epoch": 3.8052332195676906, "grad_norm": 0.6735398556302679, "learning_rate": 1.6805122929012214e-07, "loss": 0.0014, "step": 16724 }, { "epoch": 3.8054607508532423, "grad_norm": 0.4073458508669048, "learning_rate": 1.679902739281552e-07, "loss": 0.0017, "step": 16725 }, { "epoch": 3.805688282138794, "grad_norm": 0.34606453951911614, "learning_rate": 1.679293279066302e-07, "loss": 0.0008, "step": 16726 }, { "epoch": 3.805915813424346, "grad_norm": 0.9179487392111364, "learning_rate": 1.678683912267927e-07, "loss": 0.004, "step": 16727 }, { "epoch": 3.8061433447098976, "grad_norm": 0.2958176380877072, "learning_rate": 1.6780746388988786e-07, "loss": 0.0013, "step": 16728 }, { "epoch": 3.8063708759954493, "grad_norm": 0.2996070841801572, "learning_rate": 1.6774654589716112e-07, "loss": 0.0009, "step": 16729 }, { "epoch": 3.806598407281001, "grad_norm": 0.4830673718244052, "learning_rate": 1.676856372498576e-07, "loss": 0.0053, "step": 16730 }, { "epoch": 3.806825938566553, "grad_norm": 0.27635717207143135, "learning_rate": 1.6762473794922233e-07, "loss": 0.0008, "step": 16731 }, { "epoch": 3.8070534698521046, "grad_norm": 0.07492439701049532, "learning_rate": 1.675638479964996e-07, "loss": 0.0003, "step": 16732 }, { "epoch": 3.8072810011376563, "grad_norm": 0.2578337848442815, "learning_rate": 1.6750296739293416e-07, "loss": 0.0027, "step": 16733 }, { "epoch": 3.807508532423208, "grad_norm": 0.18192001275217898, "learning_rate": 1.6744209613977026e-07, "loss": 0.0014, "step": 16734 }, { "epoch": 3.80773606370876, "grad_norm": 0.6458236807230772, "learning_rate": 1.6738123423825177e-07, "loss": 0.0077, "step": 16735 }, { "epoch": 3.8079635949943116, "grad_norm": 0.3179806468143995, "learning_rate": 1.6732038168962275e-07, "loss": 0.0038, "step": 16736 }, { "epoch": 3.8081911262798633, "grad_norm": 0.45681678335351705, "learning_rate": 1.6725953849512694e-07, "loss": 0.0037, "step": 16737 }, { "epoch": 3.808418657565415, "grad_norm": 0.7462803516808769, "learning_rate": 1.67198704656008e-07, "loss": 0.0105, "step": 16738 }, { "epoch": 3.8086461888509673, "grad_norm": 1.2378840743022468, "learning_rate": 1.6713788017350915e-07, "loss": 0.0137, "step": 16739 }, { "epoch": 3.8088737201365186, "grad_norm": 0.42867219265038, "learning_rate": 1.6707706504887322e-07, "loss": 0.0025, "step": 16740 }, { "epoch": 3.809101251422071, "grad_norm": 2.1588992386427157, "learning_rate": 1.6701625928334375e-07, "loss": 0.0161, "step": 16741 }, { "epoch": 3.809328782707622, "grad_norm": 0.043375215552502455, "learning_rate": 1.6695546287816293e-07, "loss": 0.0001, "step": 16742 }, { "epoch": 3.8095563139931743, "grad_norm": 0.6396873617546216, "learning_rate": 1.668946758345736e-07, "loss": 0.0084, "step": 16743 }, { "epoch": 3.8097838452787256, "grad_norm": 0.5783208619712075, "learning_rate": 1.6683389815381806e-07, "loss": 0.0059, "step": 16744 }, { "epoch": 3.810011376564278, "grad_norm": 1.7915350655986277, "learning_rate": 1.6677312983713883e-07, "loss": 0.0231, "step": 16745 }, { "epoch": 3.810238907849829, "grad_norm": 0.6241176528236604, "learning_rate": 1.6671237088577757e-07, "loss": 0.0049, "step": 16746 }, { "epoch": 3.8104664391353813, "grad_norm": 0.272584372228635, "learning_rate": 1.666516213009759e-07, "loss": 0.002, "step": 16747 }, { "epoch": 3.8106939704209326, "grad_norm": 0.5626998681587205, "learning_rate": 1.6659088108397594e-07, "loss": 0.0062, "step": 16748 }, { "epoch": 3.810921501706485, "grad_norm": 0.31196589324102303, "learning_rate": 1.6653015023601857e-07, "loss": 0.0025, "step": 16749 }, { "epoch": 3.8111490329920366, "grad_norm": 0.2647640245779722, "learning_rate": 1.6646942875834537e-07, "loss": 0.0011, "step": 16750 }, { "epoch": 3.8113765642775883, "grad_norm": 0.2095521750886411, "learning_rate": 1.6640871665219723e-07, "loss": 0.0012, "step": 16751 }, { "epoch": 3.81160409556314, "grad_norm": 0.4981093095108921, "learning_rate": 1.663480139188152e-07, "loss": 0.0034, "step": 16752 }, { "epoch": 3.811831626848692, "grad_norm": 0.5778021605029855, "learning_rate": 1.6628732055943986e-07, "loss": 0.0051, "step": 16753 }, { "epoch": 3.8120591581342436, "grad_norm": 0.6657524941965537, "learning_rate": 1.6622663657531137e-07, "loss": 0.0021, "step": 16754 }, { "epoch": 3.8122866894197953, "grad_norm": 0.7792516046569308, "learning_rate": 1.661659619676704e-07, "loss": 0.0027, "step": 16755 }, { "epoch": 3.812514220705347, "grad_norm": 0.2955966842641288, "learning_rate": 1.6610529673775668e-07, "loss": 0.002, "step": 16756 }, { "epoch": 3.812741751990899, "grad_norm": 0.6854861835937988, "learning_rate": 1.660446408868103e-07, "loss": 0.0066, "step": 16757 }, { "epoch": 3.8129692832764506, "grad_norm": 0.9157565466397513, "learning_rate": 1.6598399441607107e-07, "loss": 0.0051, "step": 16758 }, { "epoch": 3.8131968145620023, "grad_norm": 0.5520488155798747, "learning_rate": 1.6592335732677816e-07, "loss": 0.0028, "step": 16759 }, { "epoch": 3.813424345847554, "grad_norm": 0.34096141030240473, "learning_rate": 1.6586272962017127e-07, "loss": 0.0023, "step": 16760 }, { "epoch": 3.813651877133106, "grad_norm": 0.4992548754655109, "learning_rate": 1.658021112974891e-07, "loss": 0.0047, "step": 16761 }, { "epoch": 3.8138794084186576, "grad_norm": 0.8554345741488348, "learning_rate": 1.65741502359971e-07, "loss": 0.0048, "step": 16762 }, { "epoch": 3.8141069397042093, "grad_norm": 0.17446548736301867, "learning_rate": 1.656809028088554e-07, "loss": 0.0007, "step": 16763 }, { "epoch": 3.814334470989761, "grad_norm": 0.11301779863245943, "learning_rate": 1.6562031264538086e-07, "loss": 0.0004, "step": 16764 }, { "epoch": 3.814562002275313, "grad_norm": 0.4628489900997983, "learning_rate": 1.6555973187078599e-07, "loss": 0.0059, "step": 16765 }, { "epoch": 3.8147895335608646, "grad_norm": 0.18050259257531334, "learning_rate": 1.6549916048630855e-07, "loss": 0.0007, "step": 16766 }, { "epoch": 3.8150170648464163, "grad_norm": 0.5115235041826213, "learning_rate": 1.65438598493187e-07, "loss": 0.0035, "step": 16767 }, { "epoch": 3.815244596131968, "grad_norm": 0.5518328774678974, "learning_rate": 1.6537804589265863e-07, "loss": 0.003, "step": 16768 }, { "epoch": 3.81547212741752, "grad_norm": 0.2777508768578347, "learning_rate": 1.6531750268596145e-07, "loss": 0.0027, "step": 16769 }, { "epoch": 3.8156996587030716, "grad_norm": 0.35828610483469514, "learning_rate": 1.6525696887433226e-07, "loss": 0.0022, "step": 16770 }, { "epoch": 3.8159271899886233, "grad_norm": 0.3308441932421407, "learning_rate": 1.6519644445900902e-07, "loss": 0.0022, "step": 16771 }, { "epoch": 3.816154721274175, "grad_norm": 0.5274586303236144, "learning_rate": 1.6513592944122837e-07, "loss": 0.0023, "step": 16772 }, { "epoch": 3.816382252559727, "grad_norm": 0.7449666685088807, "learning_rate": 1.650754238222269e-07, "loss": 0.0033, "step": 16773 }, { "epoch": 3.8166097838452786, "grad_norm": 0.3223766228320383, "learning_rate": 1.6501492760324172e-07, "loss": 0.0017, "step": 16774 }, { "epoch": 3.8168373151308304, "grad_norm": 0.38855544477434706, "learning_rate": 1.6495444078550873e-07, "loss": 0.0019, "step": 16775 }, { "epoch": 3.817064846416382, "grad_norm": 0.3803693055458548, "learning_rate": 1.6489396337026446e-07, "loss": 0.0037, "step": 16776 }, { "epoch": 3.817292377701934, "grad_norm": 0.3236202299498375, "learning_rate": 1.6483349535874513e-07, "loss": 0.0015, "step": 16777 }, { "epoch": 3.817519908987486, "grad_norm": 0.42331182799517136, "learning_rate": 1.6477303675218624e-07, "loss": 0.0027, "step": 16778 }, { "epoch": 3.8177474402730374, "grad_norm": 3.018487024081588, "learning_rate": 1.647125875518238e-07, "loss": 0.0195, "step": 16779 }, { "epoch": 3.8179749715585896, "grad_norm": 0.43377751645508794, "learning_rate": 1.6465214775889283e-07, "loss": 0.0029, "step": 16780 }, { "epoch": 3.818202502844141, "grad_norm": 0.1801651448716068, "learning_rate": 1.6459171737462912e-07, "loss": 0.0014, "step": 16781 }, { "epoch": 3.818430034129693, "grad_norm": 0.18951002668559933, "learning_rate": 1.6453129640026732e-07, "loss": 0.0014, "step": 16782 }, { "epoch": 3.8186575654152444, "grad_norm": 0.2570987379915543, "learning_rate": 1.644708848370425e-07, "loss": 0.0019, "step": 16783 }, { "epoch": 3.8188850967007966, "grad_norm": 0.8364553476316607, "learning_rate": 1.6441048268618955e-07, "loss": 0.0085, "step": 16784 }, { "epoch": 3.819112627986348, "grad_norm": 0.914102914638604, "learning_rate": 1.6435008994894268e-07, "loss": 0.0128, "step": 16785 }, { "epoch": 3.8193401592719, "grad_norm": 1.0125357936510893, "learning_rate": 1.6428970662653642e-07, "loss": 0.0063, "step": 16786 }, { "epoch": 3.8195676905574514, "grad_norm": 0.8665472187345046, "learning_rate": 1.642293327202047e-07, "loss": 0.0084, "step": 16787 }, { "epoch": 3.8197952218430036, "grad_norm": 0.3168061961489377, "learning_rate": 1.6416896823118172e-07, "loss": 0.002, "step": 16788 }, { "epoch": 3.8200227531285553, "grad_norm": 0.4342861272517504, "learning_rate": 1.6410861316070087e-07, "loss": 0.0016, "step": 16789 }, { "epoch": 3.820250284414107, "grad_norm": 0.8806177279159171, "learning_rate": 1.6404826750999587e-07, "loss": 0.0052, "step": 16790 }, { "epoch": 3.820477815699659, "grad_norm": 1.8467375167205509, "learning_rate": 1.6398793128030026e-07, "loss": 0.011, "step": 16791 }, { "epoch": 3.8207053469852106, "grad_norm": 0.5530761669697178, "learning_rate": 1.6392760447284688e-07, "loss": 0.0067, "step": 16792 }, { "epoch": 3.8209328782707623, "grad_norm": 0.8325661311259083, "learning_rate": 1.6386728708886906e-07, "loss": 0.0022, "step": 16793 }, { "epoch": 3.821160409556314, "grad_norm": 0.25388875046896164, "learning_rate": 1.638069791295991e-07, "loss": 0.0009, "step": 16794 }, { "epoch": 3.821387940841866, "grad_norm": 0.2256495552328879, "learning_rate": 1.6374668059627008e-07, "loss": 0.0009, "step": 16795 }, { "epoch": 3.8216154721274176, "grad_norm": 0.5437706187270541, "learning_rate": 1.6368639149011398e-07, "loss": 0.0051, "step": 16796 }, { "epoch": 3.8218430034129693, "grad_norm": 1.9929981972047948, "learning_rate": 1.636261118123632e-07, "loss": 0.0128, "step": 16797 }, { "epoch": 3.822070534698521, "grad_norm": 0.2865896469342788, "learning_rate": 1.6356584156424986e-07, "loss": 0.0011, "step": 16798 }, { "epoch": 3.822298065984073, "grad_norm": 0.11549103226542547, "learning_rate": 1.6350558074700555e-07, "loss": 0.0006, "step": 16799 }, { "epoch": 3.8225255972696246, "grad_norm": 0.3289137799091815, "learning_rate": 1.6344532936186208e-07, "loss": 0.0033, "step": 16800 }, { "epoch": 3.8227531285551763, "grad_norm": 0.8349809079708583, "learning_rate": 1.6338508741005058e-07, "loss": 0.0043, "step": 16801 }, { "epoch": 3.822980659840728, "grad_norm": 0.06498835617060199, "learning_rate": 1.6332485489280278e-07, "loss": 0.0001, "step": 16802 }, { "epoch": 3.82320819112628, "grad_norm": 0.5204182018568788, "learning_rate": 1.6326463181134913e-07, "loss": 0.0023, "step": 16803 }, { "epoch": 3.8234357224118316, "grad_norm": 0.7106250785613768, "learning_rate": 1.6320441816692088e-07, "loss": 0.0084, "step": 16804 }, { "epoch": 3.8236632536973834, "grad_norm": 0.616617273141252, "learning_rate": 1.631442139607487e-07, "loss": 0.0022, "step": 16805 }, { "epoch": 3.823890784982935, "grad_norm": 0.5214648063845814, "learning_rate": 1.6308401919406274e-07, "loss": 0.005, "step": 16806 }, { "epoch": 3.824118316268487, "grad_norm": 0.764141462990121, "learning_rate": 1.6302383386809367e-07, "loss": 0.0092, "step": 16807 }, { "epoch": 3.8243458475540386, "grad_norm": 0.01920445952487196, "learning_rate": 1.629636579840712e-07, "loss": 0.0001, "step": 16808 }, { "epoch": 3.8245733788395904, "grad_norm": 0.15146137023920997, "learning_rate": 1.629034915432256e-07, "loss": 0.0007, "step": 16809 }, { "epoch": 3.824800910125142, "grad_norm": 0.38209739819669836, "learning_rate": 1.6284333454678607e-07, "loss": 0.0015, "step": 16810 }, { "epoch": 3.825028441410694, "grad_norm": 0.08161889644876713, "learning_rate": 1.627831869959825e-07, "loss": 0.0003, "step": 16811 }, { "epoch": 3.8252559726962456, "grad_norm": 0.8066935428212242, "learning_rate": 1.6272304889204413e-07, "loss": 0.0051, "step": 16812 }, { "epoch": 3.8254835039817974, "grad_norm": 0.15201117534368871, "learning_rate": 1.626629202361999e-07, "loss": 0.0007, "step": 16813 }, { "epoch": 3.825711035267349, "grad_norm": 0.5401085593532895, "learning_rate": 1.626028010296791e-07, "loss": 0.0031, "step": 16814 }, { "epoch": 3.825938566552901, "grad_norm": 0.21277937915382758, "learning_rate": 1.6254269127371006e-07, "loss": 0.0012, "step": 16815 }, { "epoch": 3.8261660978384526, "grad_norm": 0.48888166522341225, "learning_rate": 1.6248259096952136e-07, "loss": 0.004, "step": 16816 }, { "epoch": 3.826393629124005, "grad_norm": 0.5180562202429277, "learning_rate": 1.6242250011834145e-07, "loss": 0.0045, "step": 16817 }, { "epoch": 3.826621160409556, "grad_norm": 0.35903379119783435, "learning_rate": 1.6236241872139847e-07, "loss": 0.0018, "step": 16818 }, { "epoch": 3.8268486916951083, "grad_norm": 1.1667862336971935, "learning_rate": 1.623023467799205e-07, "loss": 0.0108, "step": 16819 }, { "epoch": 3.8270762229806596, "grad_norm": 0.4211223733577921, "learning_rate": 1.6224228429513503e-07, "loss": 0.0023, "step": 16820 }, { "epoch": 3.827303754266212, "grad_norm": 0.8632803027238131, "learning_rate": 1.6218223126826998e-07, "loss": 0.0081, "step": 16821 }, { "epoch": 3.827531285551763, "grad_norm": 0.6953261452669198, "learning_rate": 1.6212218770055243e-07, "loss": 0.0033, "step": 16822 }, { "epoch": 3.8277588168373153, "grad_norm": 0.8187200556259736, "learning_rate": 1.6206215359320953e-07, "loss": 0.0083, "step": 16823 }, { "epoch": 3.8279863481228666, "grad_norm": 0.5071313535971201, "learning_rate": 1.6200212894746838e-07, "loss": 0.003, "step": 16824 }, { "epoch": 3.828213879408419, "grad_norm": 0.7701602139840437, "learning_rate": 1.619421137645557e-07, "loss": 0.0101, "step": 16825 }, { "epoch": 3.82844141069397, "grad_norm": 2.09678400088328, "learning_rate": 1.6188210804569845e-07, "loss": 0.0032, "step": 16826 }, { "epoch": 3.8286689419795223, "grad_norm": 0.5083984915570591, "learning_rate": 1.6182211179212267e-07, "loss": 0.0028, "step": 16827 }, { "epoch": 3.828896473265074, "grad_norm": 0.33123088071889883, "learning_rate": 1.6176212500505453e-07, "loss": 0.0017, "step": 16828 }, { "epoch": 3.829124004550626, "grad_norm": 1.103835309149317, "learning_rate": 1.617021476857203e-07, "loss": 0.0074, "step": 16829 }, { "epoch": 3.8293515358361776, "grad_norm": 0.1713947192971949, "learning_rate": 1.6164217983534556e-07, "loss": 0.001, "step": 16830 }, { "epoch": 3.8295790671217294, "grad_norm": 1.9223384231239113, "learning_rate": 1.6158222145515602e-07, "loss": 0.0177, "step": 16831 }, { "epoch": 3.829806598407281, "grad_norm": 0.5612944863136886, "learning_rate": 1.6152227254637714e-07, "loss": 0.005, "step": 16832 }, { "epoch": 3.830034129692833, "grad_norm": 1.0964850665333221, "learning_rate": 1.6146233311023441e-07, "loss": 0.0034, "step": 16833 }, { "epoch": 3.8302616609783846, "grad_norm": 1.0057974360851354, "learning_rate": 1.6140240314795269e-07, "loss": 0.0066, "step": 16834 }, { "epoch": 3.8304891922639364, "grad_norm": 0.9272970782174216, "learning_rate": 1.613424826607566e-07, "loss": 0.0126, "step": 16835 }, { "epoch": 3.830716723549488, "grad_norm": 1.0905797497487935, "learning_rate": 1.6128257164987116e-07, "loss": 0.0093, "step": 16836 }, { "epoch": 3.83094425483504, "grad_norm": 1.028603993493667, "learning_rate": 1.6122267011652037e-07, "loss": 0.0058, "step": 16837 }, { "epoch": 3.8311717861205916, "grad_norm": 0.3959672995229559, "learning_rate": 1.6116277806192919e-07, "loss": 0.0016, "step": 16838 }, { "epoch": 3.8313993174061434, "grad_norm": 0.18060839464746664, "learning_rate": 1.6110289548732116e-07, "loss": 0.0011, "step": 16839 }, { "epoch": 3.831626848691695, "grad_norm": 0.7615253130871101, "learning_rate": 1.6104302239392058e-07, "loss": 0.0081, "step": 16840 }, { "epoch": 3.831854379977247, "grad_norm": 0.09959858670924816, "learning_rate": 1.6098315878295093e-07, "loss": 0.0005, "step": 16841 }, { "epoch": 3.8320819112627986, "grad_norm": 0.1578064208173859, "learning_rate": 1.6092330465563549e-07, "loss": 0.0007, "step": 16842 }, { "epoch": 3.8323094425483504, "grad_norm": 0.8470104270425967, "learning_rate": 1.608634600131978e-07, "loss": 0.0025, "step": 16843 }, { "epoch": 3.832536973833902, "grad_norm": 1.1978856865334406, "learning_rate": 1.608036248568609e-07, "loss": 0.0082, "step": 16844 }, { "epoch": 3.832764505119454, "grad_norm": 0.2369962700742985, "learning_rate": 1.6074379918784807e-07, "loss": 0.0009, "step": 16845 }, { "epoch": 3.8329920364050056, "grad_norm": 0.6499556027109351, "learning_rate": 1.6068398300738163e-07, "loss": 0.0013, "step": 16846 }, { "epoch": 3.8332195676905574, "grad_norm": 0.6371814174934086, "learning_rate": 1.606241763166841e-07, "loss": 0.006, "step": 16847 }, { "epoch": 3.833447098976109, "grad_norm": 0.25503220155248185, "learning_rate": 1.605643791169781e-07, "loss": 0.0026, "step": 16848 }, { "epoch": 3.833674630261661, "grad_norm": 0.4630999560268229, "learning_rate": 1.605045914094855e-07, "loss": 0.0048, "step": 16849 }, { "epoch": 3.8339021615472126, "grad_norm": 0.18623366139823672, "learning_rate": 1.604448131954283e-07, "loss": 0.0005, "step": 16850 }, { "epoch": 3.8341296928327644, "grad_norm": 0.3037901863340146, "learning_rate": 1.6038504447602832e-07, "loss": 0.0016, "step": 16851 }, { "epoch": 3.834357224118316, "grad_norm": 0.4179506459076835, "learning_rate": 1.603252852525073e-07, "loss": 0.0019, "step": 16852 }, { "epoch": 3.834584755403868, "grad_norm": 0.8065900225750303, "learning_rate": 1.6026553552608647e-07, "loss": 0.0034, "step": 16853 }, { "epoch": 3.8348122866894196, "grad_norm": 0.43064827784015186, "learning_rate": 1.602057952979868e-07, "loss": 0.0053, "step": 16854 }, { "epoch": 3.8350398179749714, "grad_norm": 0.9998218102910267, "learning_rate": 1.6014606456942966e-07, "loss": 0.0064, "step": 16855 }, { "epoch": 3.8352673492605236, "grad_norm": 0.4228497345792989, "learning_rate": 1.6008634334163538e-07, "loss": 0.0038, "step": 16856 }, { "epoch": 3.835494880546075, "grad_norm": 0.36632580757895417, "learning_rate": 1.600266316158248e-07, "loss": 0.0022, "step": 16857 }, { "epoch": 3.835722411831627, "grad_norm": 0.9118317300490076, "learning_rate": 1.599669293932183e-07, "loss": 0.0054, "step": 16858 }, { "epoch": 3.8359499431171784, "grad_norm": 0.9579605843905982, "learning_rate": 1.5990723667503628e-07, "loss": 0.0055, "step": 16859 }, { "epoch": 3.8361774744027306, "grad_norm": 0.9164369350646642, "learning_rate": 1.598475534624986e-07, "loss": 0.0049, "step": 16860 }, { "epoch": 3.836405005688282, "grad_norm": 0.35070856402784323, "learning_rate": 1.5978787975682488e-07, "loss": 0.002, "step": 16861 }, { "epoch": 3.836632536973834, "grad_norm": 0.7108997488740807, "learning_rate": 1.59728215559235e-07, "loss": 0.0024, "step": 16862 }, { "epoch": 3.8368600682593854, "grad_norm": 1.0529747465816228, "learning_rate": 1.596685608709482e-07, "loss": 0.0119, "step": 16863 }, { "epoch": 3.8370875995449376, "grad_norm": 0.6509459273174626, "learning_rate": 1.596089156931837e-07, "loss": 0.0036, "step": 16864 }, { "epoch": 3.837315130830489, "grad_norm": 0.4182538691226863, "learning_rate": 1.5954928002716094e-07, "loss": 0.005, "step": 16865 }, { "epoch": 3.837542662116041, "grad_norm": 0.6566304575400562, "learning_rate": 1.5948965387409816e-07, "loss": 0.0019, "step": 16866 }, { "epoch": 3.837770193401593, "grad_norm": 0.4957799379811155, "learning_rate": 1.5943003723521456e-07, "loss": 0.002, "step": 16867 }, { "epoch": 3.8379977246871446, "grad_norm": 0.4797072426216952, "learning_rate": 1.593704301117282e-07, "loss": 0.0042, "step": 16868 }, { "epoch": 3.8382252559726964, "grad_norm": 1.7970712205083335, "learning_rate": 1.5931083250485757e-07, "loss": 0.0042, "step": 16869 }, { "epoch": 3.838452787258248, "grad_norm": 0.6608396962077949, "learning_rate": 1.5925124441582056e-07, "loss": 0.0089, "step": 16870 }, { "epoch": 3.8386803185438, "grad_norm": 0.2872774526990334, "learning_rate": 1.5919166584583507e-07, "loss": 0.0014, "step": 16871 }, { "epoch": 3.8389078498293516, "grad_norm": 0.384394549997669, "learning_rate": 1.5913209679611907e-07, "loss": 0.0043, "step": 16872 }, { "epoch": 3.8391353811149034, "grad_norm": 0.35019006870033315, "learning_rate": 1.5907253726788968e-07, "loss": 0.0025, "step": 16873 }, { "epoch": 3.839362912400455, "grad_norm": 0.9604660193952281, "learning_rate": 1.590129872623644e-07, "loss": 0.0058, "step": 16874 }, { "epoch": 3.839590443686007, "grad_norm": 0.14724215273550711, "learning_rate": 1.5895344678076012e-07, "loss": 0.0007, "step": 16875 }, { "epoch": 3.8398179749715586, "grad_norm": 0.2678854931789884, "learning_rate": 1.58893915824294e-07, "loss": 0.0021, "step": 16876 }, { "epoch": 3.8400455062571104, "grad_norm": 2.1137598578571093, "learning_rate": 1.588343943941825e-07, "loss": 0.0072, "step": 16877 }, { "epoch": 3.840273037542662, "grad_norm": 0.506334694538974, "learning_rate": 1.587748824916422e-07, "loss": 0.005, "step": 16878 }, { "epoch": 3.840500568828214, "grad_norm": 0.6732820768693313, "learning_rate": 1.5871538011788965e-07, "loss": 0.0037, "step": 16879 }, { "epoch": 3.8407281001137656, "grad_norm": 0.6053863161704803, "learning_rate": 1.5865588727414055e-07, "loss": 0.0047, "step": 16880 }, { "epoch": 3.8409556313993174, "grad_norm": 0.42329926623708697, "learning_rate": 1.585964039616112e-07, "loss": 0.0036, "step": 16881 }, { "epoch": 3.841183162684869, "grad_norm": 0.40000151319616795, "learning_rate": 1.5853693018151707e-07, "loss": 0.0042, "step": 16882 }, { "epoch": 3.841410693970421, "grad_norm": 0.4685586346456131, "learning_rate": 1.5847746593507394e-07, "loss": 0.0047, "step": 16883 }, { "epoch": 3.8416382252559726, "grad_norm": 0.3050176294082396, "learning_rate": 1.5841801122349684e-07, "loss": 0.0011, "step": 16884 }, { "epoch": 3.8418657565415244, "grad_norm": 0.0881908706441635, "learning_rate": 1.583585660480011e-07, "loss": 0.0005, "step": 16885 }, { "epoch": 3.842093287827076, "grad_norm": 0.7418441665621712, "learning_rate": 1.582991304098018e-07, "loss": 0.0053, "step": 16886 }, { "epoch": 3.842320819112628, "grad_norm": 0.46796001347549704, "learning_rate": 1.5823970431011346e-07, "loss": 0.0041, "step": 16887 }, { "epoch": 3.8425483503981797, "grad_norm": 0.0730559981146748, "learning_rate": 1.581802877501508e-07, "loss": 0.0003, "step": 16888 }, { "epoch": 3.8427758816837314, "grad_norm": 0.5239646546821153, "learning_rate": 1.5812088073112803e-07, "loss": 0.0053, "step": 16889 }, { "epoch": 3.843003412969283, "grad_norm": 0.724706657021792, "learning_rate": 1.580614832542595e-07, "loss": 0.0111, "step": 16890 }, { "epoch": 3.843230944254835, "grad_norm": 0.40109581016902995, "learning_rate": 1.58002095320759e-07, "loss": 0.002, "step": 16891 }, { "epoch": 3.8434584755403867, "grad_norm": 0.6484699192138994, "learning_rate": 1.5794271693184038e-07, "loss": 0.0065, "step": 16892 }, { "epoch": 3.8436860068259384, "grad_norm": 0.46423694385926917, "learning_rate": 1.5788334808871736e-07, "loss": 0.0033, "step": 16893 }, { "epoch": 3.84391353811149, "grad_norm": 0.13403188109784747, "learning_rate": 1.5782398879260315e-07, "loss": 0.0006, "step": 16894 }, { "epoch": 3.8441410693970424, "grad_norm": 0.2752934484280615, "learning_rate": 1.5776463904471116e-07, "loss": 0.0025, "step": 16895 }, { "epoch": 3.8443686006825937, "grad_norm": 0.8124043420767253, "learning_rate": 1.57705298846254e-07, "loss": 0.0056, "step": 16896 }, { "epoch": 3.844596131968146, "grad_norm": 0.7886824796410642, "learning_rate": 1.5764596819844478e-07, "loss": 0.0121, "step": 16897 }, { "epoch": 3.844823663253697, "grad_norm": 0.9939639622315245, "learning_rate": 1.5758664710249624e-07, "loss": 0.0095, "step": 16898 }, { "epoch": 3.8450511945392494, "grad_norm": 1.9229512923050929, "learning_rate": 1.575273355596204e-07, "loss": 0.0096, "step": 16899 }, { "epoch": 3.8452787258248007, "grad_norm": 0.3959754400624733, "learning_rate": 1.574680335710299e-07, "loss": 0.0042, "step": 16900 }, { "epoch": 3.845506257110353, "grad_norm": 0.3829971304967338, "learning_rate": 1.5740874113793631e-07, "loss": 0.0031, "step": 16901 }, { "epoch": 3.845733788395904, "grad_norm": 0.31808177290083567, "learning_rate": 1.5734945826155195e-07, "loss": 0.0017, "step": 16902 }, { "epoch": 3.8459613196814564, "grad_norm": 0.5327503685496835, "learning_rate": 1.57290184943088e-07, "loss": 0.0037, "step": 16903 }, { "epoch": 3.8461888509670077, "grad_norm": 0.4351231893535353, "learning_rate": 1.5723092118375603e-07, "loss": 0.0033, "step": 16904 }, { "epoch": 3.84641638225256, "grad_norm": 0.5296939927672909, "learning_rate": 1.571716669847676e-07, "loss": 0.0052, "step": 16905 }, { "epoch": 3.8466439135381116, "grad_norm": 0.18030307960965586, "learning_rate": 1.571124223473333e-07, "loss": 0.001, "step": 16906 }, { "epoch": 3.8468714448236634, "grad_norm": 0.6726187394998571, "learning_rate": 1.5705318727266445e-07, "loss": 0.0067, "step": 16907 }, { "epoch": 3.847098976109215, "grad_norm": 0.7243966340580594, "learning_rate": 1.5699396176197117e-07, "loss": 0.0028, "step": 16908 }, { "epoch": 3.847326507394767, "grad_norm": 0.05834836212271533, "learning_rate": 1.5693474581646448e-07, "loss": 0.0002, "step": 16909 }, { "epoch": 3.8475540386803186, "grad_norm": 0.20274681380696408, "learning_rate": 1.568755394373541e-07, "loss": 0.0009, "step": 16910 }, { "epoch": 3.8477815699658704, "grad_norm": 0.4252127191608554, "learning_rate": 1.5681634262585037e-07, "loss": 0.0033, "step": 16911 }, { "epoch": 3.848009101251422, "grad_norm": 0.1326019192773769, "learning_rate": 1.5675715538316336e-07, "loss": 0.0007, "step": 16912 }, { "epoch": 3.848236632536974, "grad_norm": 1.0562399200406656, "learning_rate": 1.5669797771050237e-07, "loss": 0.0097, "step": 16913 }, { "epoch": 3.8484641638225257, "grad_norm": 0.2439049827944369, "learning_rate": 1.566388096090772e-07, "loss": 0.0009, "step": 16914 }, { "epoch": 3.8486916951080774, "grad_norm": 0.36792276132797735, "learning_rate": 1.5657965108009689e-07, "loss": 0.0039, "step": 16915 }, { "epoch": 3.848919226393629, "grad_norm": 0.4452146169941255, "learning_rate": 1.5652050212477073e-07, "loss": 0.004, "step": 16916 }, { "epoch": 3.849146757679181, "grad_norm": 0.3292817285479272, "learning_rate": 1.5646136274430742e-07, "loss": 0.0023, "step": 16917 }, { "epoch": 3.8493742889647327, "grad_norm": 0.5580181577739637, "learning_rate": 1.5640223293991577e-07, "loss": 0.0015, "step": 16918 }, { "epoch": 3.8496018202502844, "grad_norm": 0.7812809164903384, "learning_rate": 1.5634311271280443e-07, "loss": 0.0026, "step": 16919 }, { "epoch": 3.849829351535836, "grad_norm": 0.311459762405543, "learning_rate": 1.562840020641814e-07, "loss": 0.0025, "step": 16920 }, { "epoch": 3.850056882821388, "grad_norm": 1.0010143217654905, "learning_rate": 1.562249009952551e-07, "loss": 0.0022, "step": 16921 }, { "epoch": 3.8502844141069397, "grad_norm": 0.7848759439719352, "learning_rate": 1.5616580950723334e-07, "loss": 0.0064, "step": 16922 }, { "epoch": 3.8505119453924914, "grad_norm": 0.6949072204335264, "learning_rate": 1.561067276013236e-07, "loss": 0.0041, "step": 16923 }, { "epoch": 3.850739476678043, "grad_norm": 1.2453107844036484, "learning_rate": 1.5604765527873365e-07, "loss": 0.0055, "step": 16924 }, { "epoch": 3.850967007963595, "grad_norm": 0.5113446103092606, "learning_rate": 1.5598859254067075e-07, "loss": 0.0013, "step": 16925 }, { "epoch": 3.8511945392491467, "grad_norm": 0.0685837821462116, "learning_rate": 1.5592953938834226e-07, "loss": 0.0003, "step": 16926 }, { "epoch": 3.8514220705346984, "grad_norm": 1.0585322422198113, "learning_rate": 1.558704958229547e-07, "loss": 0.0132, "step": 16927 }, { "epoch": 3.85164960182025, "grad_norm": 0.999464576238213, "learning_rate": 1.5581146184571522e-07, "loss": 0.0036, "step": 16928 }, { "epoch": 3.851877133105802, "grad_norm": 1.3694258512342967, "learning_rate": 1.5575243745783023e-07, "loss": 0.0042, "step": 16929 }, { "epoch": 3.8521046643913537, "grad_norm": 0.5775961496767859, "learning_rate": 1.5569342266050585e-07, "loss": 0.0051, "step": 16930 }, { "epoch": 3.8523321956769054, "grad_norm": 0.5749422729563475, "learning_rate": 1.556344174549484e-07, "loss": 0.0045, "step": 16931 }, { "epoch": 3.852559726962457, "grad_norm": 0.6949065017326892, "learning_rate": 1.5557542184236384e-07, "loss": 0.0059, "step": 16932 }, { "epoch": 3.852787258248009, "grad_norm": 0.34829962795396474, "learning_rate": 1.5551643582395817e-07, "loss": 0.002, "step": 16933 }, { "epoch": 3.853014789533561, "grad_norm": 0.050210940027348494, "learning_rate": 1.554574594009367e-07, "loss": 0.0002, "step": 16934 }, { "epoch": 3.8532423208191124, "grad_norm": 0.9672168898220899, "learning_rate": 1.5539849257450466e-07, "loss": 0.0096, "step": 16935 }, { "epoch": 3.8534698521046646, "grad_norm": 0.4620653772121255, "learning_rate": 1.5533953534586755e-07, "loss": 0.0033, "step": 16936 }, { "epoch": 3.853697383390216, "grad_norm": 0.7918792882006143, "learning_rate": 1.5528058771623e-07, "loss": 0.0031, "step": 16937 }, { "epoch": 3.853924914675768, "grad_norm": 1.0097881245538192, "learning_rate": 1.5522164968679706e-07, "loss": 0.0065, "step": 16938 }, { "epoch": 3.8541524459613195, "grad_norm": 0.3633396310266989, "learning_rate": 1.5516272125877322e-07, "loss": 0.0015, "step": 16939 }, { "epoch": 3.8543799772468716, "grad_norm": 1.6352101486115957, "learning_rate": 1.55103802433363e-07, "loss": 0.0043, "step": 16940 }, { "epoch": 3.854607508532423, "grad_norm": 0.7604738862092468, "learning_rate": 1.5504489321177063e-07, "loss": 0.0068, "step": 16941 }, { "epoch": 3.854835039817975, "grad_norm": 0.39493211360446717, "learning_rate": 1.5498599359519966e-07, "loss": 0.0026, "step": 16942 }, { "epoch": 3.8550625711035265, "grad_norm": 1.1498829837344544, "learning_rate": 1.5492710358485436e-07, "loss": 0.0038, "step": 16943 }, { "epoch": 3.8552901023890787, "grad_norm": 0.5368751872247529, "learning_rate": 1.5486822318193804e-07, "loss": 0.0028, "step": 16944 }, { "epoch": 3.8555176336746304, "grad_norm": 0.566998424483084, "learning_rate": 1.5480935238765426e-07, "loss": 0.0028, "step": 16945 }, { "epoch": 3.855745164960182, "grad_norm": 0.16942761718242558, "learning_rate": 1.5475049120320613e-07, "loss": 0.0009, "step": 16946 }, { "epoch": 3.855972696245734, "grad_norm": 0.4503393155630643, "learning_rate": 1.5469163962979698e-07, "loss": 0.0069, "step": 16947 }, { "epoch": 3.8562002275312857, "grad_norm": 0.47001407465268213, "learning_rate": 1.5463279766862932e-07, "loss": 0.0016, "step": 16948 }, { "epoch": 3.8564277588168374, "grad_norm": 0.12639708479423353, "learning_rate": 1.5457396532090573e-07, "loss": 0.0002, "step": 16949 }, { "epoch": 3.856655290102389, "grad_norm": 0.1486902399362336, "learning_rate": 1.5451514258782892e-07, "loss": 0.0007, "step": 16950 }, { "epoch": 3.856882821387941, "grad_norm": 0.5144152147502185, "learning_rate": 1.5445632947060072e-07, "loss": 0.0019, "step": 16951 }, { "epoch": 3.8571103526734927, "grad_norm": 0.6126548675232136, "learning_rate": 1.5439752597042341e-07, "loss": 0.0024, "step": 16952 }, { "epoch": 3.8573378839590444, "grad_norm": 0.3111985810255361, "learning_rate": 1.5433873208849898e-07, "loss": 0.0024, "step": 16953 }, { "epoch": 3.857565415244596, "grad_norm": 1.3243219057402635, "learning_rate": 1.5427994782602867e-07, "loss": 0.0093, "step": 16954 }, { "epoch": 3.857792946530148, "grad_norm": 0.6102269401555269, "learning_rate": 1.5422117318421435e-07, "loss": 0.0051, "step": 16955 }, { "epoch": 3.8580204778156997, "grad_norm": 0.20368981118813614, "learning_rate": 1.541624081642569e-07, "loss": 0.0021, "step": 16956 }, { "epoch": 3.8582480091012514, "grad_norm": 0.44515784688629095, "learning_rate": 1.541036527673577e-07, "loss": 0.0048, "step": 16957 }, { "epoch": 3.858475540386803, "grad_norm": 0.7252651811237851, "learning_rate": 1.5404490699471704e-07, "loss": 0.0023, "step": 16958 }, { "epoch": 3.858703071672355, "grad_norm": 0.4833605443436002, "learning_rate": 1.539861708475364e-07, "loss": 0.0028, "step": 16959 }, { "epoch": 3.8589306029579067, "grad_norm": 0.3228004221991399, "learning_rate": 1.539274443270157e-07, "loss": 0.0018, "step": 16960 }, { "epoch": 3.8591581342434584, "grad_norm": 0.3604993122956425, "learning_rate": 1.538687274343552e-07, "loss": 0.0017, "step": 16961 }, { "epoch": 3.85938566552901, "grad_norm": 0.31813000383863665, "learning_rate": 1.5381002017075527e-07, "loss": 0.0016, "step": 16962 }, { "epoch": 3.859613196814562, "grad_norm": 0.4450296863495843, "learning_rate": 1.5375132253741537e-07, "loss": 0.002, "step": 16963 }, { "epoch": 3.8598407281001137, "grad_norm": 0.8453459954801428, "learning_rate": 1.5369263453553538e-07, "loss": 0.012, "step": 16964 }, { "epoch": 3.8600682593856654, "grad_norm": 2.3832279930222744, "learning_rate": 1.5363395616631477e-07, "loss": 0.0168, "step": 16965 }, { "epoch": 3.860295790671217, "grad_norm": 0.33969832308258, "learning_rate": 1.535752874309531e-07, "loss": 0.0009, "step": 16966 }, { "epoch": 3.860523321956769, "grad_norm": 0.4525207341803307, "learning_rate": 1.5351662833064912e-07, "loss": 0.0019, "step": 16967 }, { "epoch": 3.8607508532423207, "grad_norm": 0.6182470818987651, "learning_rate": 1.5345797886660158e-07, "loss": 0.0038, "step": 16968 }, { "epoch": 3.8609783845278725, "grad_norm": 0.04943374684665951, "learning_rate": 1.533993390400096e-07, "loss": 0.0001, "step": 16969 }, { "epoch": 3.861205915813424, "grad_norm": 0.1419730030286609, "learning_rate": 1.5334070885207115e-07, "loss": 0.0005, "step": 16970 }, { "epoch": 3.861433447098976, "grad_norm": 0.3831474312478219, "learning_rate": 1.5328208830398492e-07, "loss": 0.0019, "step": 16971 }, { "epoch": 3.8616609783845277, "grad_norm": 1.0272018587153893, "learning_rate": 1.5322347739694897e-07, "loss": 0.0119, "step": 16972 }, { "epoch": 3.86188850967008, "grad_norm": 0.2060109764173635, "learning_rate": 1.53164876132161e-07, "loss": 0.0009, "step": 16973 }, { "epoch": 3.862116040955631, "grad_norm": 0.7132443280196674, "learning_rate": 1.5310628451081901e-07, "loss": 0.0081, "step": 16974 }, { "epoch": 3.8623435722411834, "grad_norm": 0.4600618848827366, "learning_rate": 1.5304770253412017e-07, "loss": 0.0029, "step": 16975 }, { "epoch": 3.8625711035267347, "grad_norm": 0.16482514181384147, "learning_rate": 1.5298913020326212e-07, "loss": 0.0005, "step": 16976 }, { "epoch": 3.862798634812287, "grad_norm": 0.6387191258864412, "learning_rate": 1.529305675194416e-07, "loss": 0.0073, "step": 16977 }, { "epoch": 3.863026166097838, "grad_norm": 0.8550801748379108, "learning_rate": 1.528720144838558e-07, "loss": 0.0128, "step": 16978 }, { "epoch": 3.8632536973833904, "grad_norm": 1.1908180867982554, "learning_rate": 1.5281347109770146e-07, "loss": 0.0144, "step": 16979 }, { "epoch": 3.8634812286689417, "grad_norm": 1.2260909964022664, "learning_rate": 1.5275493736217485e-07, "loss": 0.0089, "step": 16980 }, { "epoch": 3.863708759954494, "grad_norm": 0.7817541391271449, "learning_rate": 1.5269641327847264e-07, "loss": 0.0116, "step": 16981 }, { "epoch": 3.8639362912400452, "grad_norm": 1.1695380536821998, "learning_rate": 1.5263789884779056e-07, "loss": 0.0228, "step": 16982 }, { "epoch": 3.8641638225255974, "grad_norm": 0.5985478179090155, "learning_rate": 1.5257939407132487e-07, "loss": 0.0063, "step": 16983 }, { "epoch": 3.864391353811149, "grad_norm": 1.3618715445659753, "learning_rate": 1.5252089895027106e-07, "loss": 0.0131, "step": 16984 }, { "epoch": 3.864618885096701, "grad_norm": 1.068042097392438, "learning_rate": 1.5246241348582477e-07, "loss": 0.0098, "step": 16985 }, { "epoch": 3.8648464163822527, "grad_norm": 0.36988658468337515, "learning_rate": 1.5240393767918146e-07, "loss": 0.0034, "step": 16986 }, { "epoch": 3.8650739476678044, "grad_norm": 0.3385449607312852, "learning_rate": 1.5234547153153604e-07, "loss": 0.0018, "step": 16987 }, { "epoch": 3.865301478953356, "grad_norm": 1.2137558796233676, "learning_rate": 1.5228701504408366e-07, "loss": 0.0138, "step": 16988 }, { "epoch": 3.865529010238908, "grad_norm": 0.27880973929627395, "learning_rate": 1.5222856821801884e-07, "loss": 0.0025, "step": 16989 }, { "epoch": 3.8657565415244597, "grad_norm": 1.38154826318727, "learning_rate": 1.5217013105453642e-07, "loss": 0.0126, "step": 16990 }, { "epoch": 3.8659840728100114, "grad_norm": 1.1702298397438091, "learning_rate": 1.5211170355483036e-07, "loss": 0.0155, "step": 16991 }, { "epoch": 3.866211604095563, "grad_norm": 0.10002621698736247, "learning_rate": 1.52053285720095e-07, "loss": 0.0002, "step": 16992 }, { "epoch": 3.866439135381115, "grad_norm": 0.5348091561353979, "learning_rate": 1.519948775515246e-07, "loss": 0.0049, "step": 16993 }, { "epoch": 3.8666666666666667, "grad_norm": 0.6845085227022939, "learning_rate": 1.5193647905031236e-07, "loss": 0.0009, "step": 16994 }, { "epoch": 3.8668941979522184, "grad_norm": 0.5017643621510169, "learning_rate": 1.5187809021765233e-07, "loss": 0.004, "step": 16995 }, { "epoch": 3.86712172923777, "grad_norm": 0.44124533744364686, "learning_rate": 1.5181971105473744e-07, "loss": 0.0026, "step": 16996 }, { "epoch": 3.867349260523322, "grad_norm": 1.1938912996976299, "learning_rate": 1.5176134156276133e-07, "loss": 0.012, "step": 16997 }, { "epoch": 3.8675767918088737, "grad_norm": 0.7532191330715259, "learning_rate": 1.5170298174291643e-07, "loss": 0.0049, "step": 16998 }, { "epoch": 3.8678043230944255, "grad_norm": 0.8039976870107202, "learning_rate": 1.5164463159639584e-07, "loss": 0.0034, "step": 16999 }, { "epoch": 3.868031854379977, "grad_norm": 0.43392052095174205, "learning_rate": 1.5158629112439226e-07, "loss": 0.0037, "step": 17000 }, { "epoch": 3.868259385665529, "grad_norm": 0.5077122019689284, "learning_rate": 1.5152796032809765e-07, "loss": 0.0029, "step": 17001 }, { "epoch": 3.8684869169510807, "grad_norm": 1.1782707171580684, "learning_rate": 1.5146963920870464e-07, "loss": 0.0173, "step": 17002 }, { "epoch": 3.8687144482366325, "grad_norm": 0.48497235139840966, "learning_rate": 1.5141132776740473e-07, "loss": 0.0024, "step": 17003 }, { "epoch": 3.868941979522184, "grad_norm": 0.19788658481082233, "learning_rate": 1.5135302600539013e-07, "loss": 0.0007, "step": 17004 }, { "epoch": 3.869169510807736, "grad_norm": 0.5130321249388303, "learning_rate": 1.5129473392385208e-07, "loss": 0.0037, "step": 17005 }, { "epoch": 3.8693970420932877, "grad_norm": 0.7651821154118225, "learning_rate": 1.512364515239821e-07, "loss": 0.006, "step": 17006 }, { "epoch": 3.8696245733788395, "grad_norm": 0.9098263256217121, "learning_rate": 1.5117817880697161e-07, "loss": 0.0055, "step": 17007 }, { "epoch": 3.8698521046643912, "grad_norm": 0.6471036310679762, "learning_rate": 1.511199157740112e-07, "loss": 0.005, "step": 17008 }, { "epoch": 3.870079635949943, "grad_norm": 0.3613935873796824, "learning_rate": 1.5106166242629199e-07, "loss": 0.0031, "step": 17009 }, { "epoch": 3.8703071672354947, "grad_norm": 0.30937568648182523, "learning_rate": 1.5100341876500445e-07, "loss": 0.0017, "step": 17010 }, { "epoch": 3.8705346985210465, "grad_norm": 0.7390808618790672, "learning_rate": 1.5094518479133874e-07, "loss": 0.0071, "step": 17011 }, { "epoch": 3.8707622298065987, "grad_norm": 0.3652566210455038, "learning_rate": 1.5088696050648526e-07, "loss": 0.002, "step": 17012 }, { "epoch": 3.87098976109215, "grad_norm": 0.5333012462293215, "learning_rate": 1.5082874591163407e-07, "loss": 0.005, "step": 17013 }, { "epoch": 3.871217292377702, "grad_norm": 0.5493744580478492, "learning_rate": 1.5077054100797502e-07, "loss": 0.0048, "step": 17014 }, { "epoch": 3.8714448236632535, "grad_norm": 1.7950457421939727, "learning_rate": 1.5071234579669747e-07, "loss": 0.0164, "step": 17015 }, { "epoch": 3.8716723549488057, "grad_norm": 0.6301610529809328, "learning_rate": 1.5065416027899107e-07, "loss": 0.0041, "step": 17016 }, { "epoch": 3.871899886234357, "grad_norm": 0.4734711266857881, "learning_rate": 1.50595984456045e-07, "loss": 0.0026, "step": 17017 }, { "epoch": 3.872127417519909, "grad_norm": 1.0789488512519838, "learning_rate": 1.5053781832904776e-07, "loss": 0.0101, "step": 17018 }, { "epoch": 3.8723549488054605, "grad_norm": 0.3749852739724309, "learning_rate": 1.504796618991889e-07, "loss": 0.0034, "step": 17019 }, { "epoch": 3.8725824800910127, "grad_norm": 0.7991326529512776, "learning_rate": 1.5042151516765663e-07, "loss": 0.0032, "step": 17020 }, { "epoch": 3.8728100113765644, "grad_norm": 0.7738583349916772, "learning_rate": 1.503633781356395e-07, "loss": 0.0056, "step": 17021 }, { "epoch": 3.873037542662116, "grad_norm": 1.408364417439089, "learning_rate": 1.503052508043256e-07, "loss": 0.006, "step": 17022 }, { "epoch": 3.873265073947668, "grad_norm": 0.21469611238585715, "learning_rate": 1.5024713317490316e-07, "loss": 0.0015, "step": 17023 }, { "epoch": 3.8734926052332197, "grad_norm": 0.3609631662416894, "learning_rate": 1.501890252485596e-07, "loss": 0.0036, "step": 17024 }, { "epoch": 3.8737201365187715, "grad_norm": 0.8434913427530878, "learning_rate": 1.5013092702648286e-07, "loss": 0.0068, "step": 17025 }, { "epoch": 3.873947667804323, "grad_norm": 0.8293124498756271, "learning_rate": 1.5007283850986044e-07, "loss": 0.0095, "step": 17026 }, { "epoch": 3.874175199089875, "grad_norm": 0.7302489619481866, "learning_rate": 1.5001475969987925e-07, "loss": 0.0046, "step": 17027 }, { "epoch": 3.8744027303754267, "grad_norm": 0.7070645353736155, "learning_rate": 1.4995669059772662e-07, "loss": 0.0028, "step": 17028 }, { "epoch": 3.8746302616609785, "grad_norm": 0.4193719926790912, "learning_rate": 1.498986312045893e-07, "loss": 0.0014, "step": 17029 }, { "epoch": 3.87485779294653, "grad_norm": 0.808107094778887, "learning_rate": 1.498405815216536e-07, "loss": 0.0177, "step": 17030 }, { "epoch": 3.875085324232082, "grad_norm": 0.3357028620508706, "learning_rate": 1.4978254155010628e-07, "loss": 0.0017, "step": 17031 }, { "epoch": 3.8753128555176337, "grad_norm": 1.0086641402343401, "learning_rate": 1.4972451129113343e-07, "loss": 0.0091, "step": 17032 }, { "epoch": 3.8755403868031855, "grad_norm": 0.21148212444891984, "learning_rate": 1.4966649074592132e-07, "loss": 0.0013, "step": 17033 }, { "epoch": 3.875767918088737, "grad_norm": 15.713334435390871, "learning_rate": 1.4960847991565544e-07, "loss": 0.0327, "step": 17034 }, { "epoch": 3.875995449374289, "grad_norm": 1.1468982939494399, "learning_rate": 1.4955047880152181e-07, "loss": 0.019, "step": 17035 }, { "epoch": 3.8762229806598407, "grad_norm": 0.4436064270913324, "learning_rate": 1.4949248740470559e-07, "loss": 0.0027, "step": 17036 }, { "epoch": 3.8764505119453925, "grad_norm": 0.08387308189752085, "learning_rate": 1.4943450572639192e-07, "loss": 0.0003, "step": 17037 }, { "epoch": 3.8766780432309442, "grad_norm": 0.9117219509012379, "learning_rate": 1.49376533767766e-07, "loss": 0.0046, "step": 17038 }, { "epoch": 3.876905574516496, "grad_norm": 0.05446184536037763, "learning_rate": 1.4931857153001265e-07, "loss": 0.0003, "step": 17039 }, { "epoch": 3.8771331058020477, "grad_norm": 0.34780656491620315, "learning_rate": 1.4926061901431679e-07, "loss": 0.0029, "step": 17040 }, { "epoch": 3.8773606370875995, "grad_norm": 0.2617191525618556, "learning_rate": 1.492026762218625e-07, "loss": 0.0014, "step": 17041 }, { "epoch": 3.8775881683731512, "grad_norm": 1.4664798908883614, "learning_rate": 1.49144743153834e-07, "loss": 0.0086, "step": 17042 }, { "epoch": 3.877815699658703, "grad_norm": 0.40273095721213964, "learning_rate": 1.4908681981141558e-07, "loss": 0.0016, "step": 17043 }, { "epoch": 3.8780432309442547, "grad_norm": 0.5476219758071847, "learning_rate": 1.4902890619579085e-07, "loss": 0.0053, "step": 17044 }, { "epoch": 3.8782707622298065, "grad_norm": 0.4326942323257985, "learning_rate": 1.489710023081436e-07, "loss": 0.0036, "step": 17045 }, { "epoch": 3.8784982935153582, "grad_norm": 0.5519516293418284, "learning_rate": 1.489131081496572e-07, "loss": 0.0019, "step": 17046 }, { "epoch": 3.87872582480091, "grad_norm": 0.16242965240470036, "learning_rate": 1.4885522372151516e-07, "loss": 0.0006, "step": 17047 }, { "epoch": 3.8789533560864617, "grad_norm": 1.6692931140283074, "learning_rate": 1.4879734902490033e-07, "loss": 0.0092, "step": 17048 }, { "epoch": 3.8791808873720135, "grad_norm": 0.13698209949587606, "learning_rate": 1.4873948406099535e-07, "loss": 0.0003, "step": 17049 }, { "epoch": 3.8794084186575652, "grad_norm": 1.6955802443689156, "learning_rate": 1.486816288309833e-07, "loss": 0.0114, "step": 17050 }, { "epoch": 3.8796359499431174, "grad_norm": 0.4099624348681985, "learning_rate": 1.4862378333604627e-07, "loss": 0.0015, "step": 17051 }, { "epoch": 3.8798634812286688, "grad_norm": 0.8613079543272576, "learning_rate": 1.485659475773666e-07, "loss": 0.0069, "step": 17052 }, { "epoch": 3.880091012514221, "grad_norm": 0.9391809294831228, "learning_rate": 1.4850812155612648e-07, "loss": 0.0078, "step": 17053 }, { "epoch": 3.8803185437997723, "grad_norm": 1.6210391333309073, "learning_rate": 1.4845030527350776e-07, "loss": 0.0102, "step": 17054 }, { "epoch": 3.8805460750853245, "grad_norm": 0.2493961666755816, "learning_rate": 1.4839249873069215e-07, "loss": 0.0012, "step": 17055 }, { "epoch": 3.8807736063708758, "grad_norm": 0.3911921343809678, "learning_rate": 1.4833470192886084e-07, "loss": 0.004, "step": 17056 }, { "epoch": 3.881001137656428, "grad_norm": 0.26645431376741574, "learning_rate": 1.4827691486919538e-07, "loss": 0.0012, "step": 17057 }, { "epoch": 3.8812286689419793, "grad_norm": 0.7270607580213679, "learning_rate": 1.4821913755287652e-07, "loss": 0.0056, "step": 17058 }, { "epoch": 3.8814562002275315, "grad_norm": 0.5566210953192199, "learning_rate": 1.4816136998108527e-07, "loss": 0.0032, "step": 17059 }, { "epoch": 3.881683731513083, "grad_norm": 1.1790431893646827, "learning_rate": 1.481036121550026e-07, "loss": 0.0117, "step": 17060 }, { "epoch": 3.881911262798635, "grad_norm": 0.8337622965887181, "learning_rate": 1.4804586407580848e-07, "loss": 0.0124, "step": 17061 }, { "epoch": 3.8821387940841867, "grad_norm": 0.5140127827775495, "learning_rate": 1.4798812574468357e-07, "loss": 0.0058, "step": 17062 }, { "epoch": 3.8823663253697385, "grad_norm": 0.40888608007861976, "learning_rate": 1.4793039716280758e-07, "loss": 0.0015, "step": 17063 }, { "epoch": 3.88259385665529, "grad_norm": 0.18527624284577648, "learning_rate": 1.4787267833136076e-07, "loss": 0.0005, "step": 17064 }, { "epoch": 3.882821387940842, "grad_norm": 0.34635940279292665, "learning_rate": 1.4781496925152238e-07, "loss": 0.0057, "step": 17065 }, { "epoch": 3.8830489192263937, "grad_norm": 1.4674982403665862, "learning_rate": 1.4775726992447213e-07, "loss": 0.0051, "step": 17066 }, { "epoch": 3.8832764505119455, "grad_norm": 0.322981171659949, "learning_rate": 1.476995803513894e-07, "loss": 0.0013, "step": 17067 }, { "epoch": 3.8835039817974972, "grad_norm": 0.5090433326821726, "learning_rate": 1.4764190053345299e-07, "loss": 0.0026, "step": 17068 }, { "epoch": 3.883731513083049, "grad_norm": 0.9526094302186928, "learning_rate": 1.4758423047184203e-07, "loss": 0.0053, "step": 17069 }, { "epoch": 3.8839590443686007, "grad_norm": 1.4298120574662212, "learning_rate": 1.475265701677349e-07, "loss": 0.0186, "step": 17070 }, { "epoch": 3.8841865756541525, "grad_norm": 0.17717241188074476, "learning_rate": 1.4746891962231037e-07, "loss": 0.001, "step": 17071 }, { "epoch": 3.8844141069397042, "grad_norm": 0.9994997712029996, "learning_rate": 1.4741127883674634e-07, "loss": 0.01, "step": 17072 }, { "epoch": 3.884641638225256, "grad_norm": 0.7638053867118226, "learning_rate": 1.4735364781222116e-07, "loss": 0.0154, "step": 17073 }, { "epoch": 3.8848691695108077, "grad_norm": 0.9607480387358901, "learning_rate": 1.4729602654991286e-07, "loss": 0.0118, "step": 17074 }, { "epoch": 3.8850967007963595, "grad_norm": 0.36922306012127576, "learning_rate": 1.4723841505099875e-07, "loss": 0.0026, "step": 17075 }, { "epoch": 3.8853242320819112, "grad_norm": 1.143259054451986, "learning_rate": 1.4718081331665655e-07, "loss": 0.0112, "step": 17076 }, { "epoch": 3.885551763367463, "grad_norm": 1.503213925128421, "learning_rate": 1.4712322134806328e-07, "loss": 0.0106, "step": 17077 }, { "epoch": 3.8857792946530147, "grad_norm": 0.3033471280318581, "learning_rate": 1.4706563914639643e-07, "loss": 0.0013, "step": 17078 }, { "epoch": 3.8860068259385665, "grad_norm": 0.356352280001378, "learning_rate": 1.4700806671283235e-07, "loss": 0.0031, "step": 17079 }, { "epoch": 3.8862343572241183, "grad_norm": 1.0469099563919528, "learning_rate": 1.4695050404854804e-07, "loss": 0.0102, "step": 17080 }, { "epoch": 3.88646188850967, "grad_norm": 0.8150141822746856, "learning_rate": 1.4689295115472007e-07, "loss": 0.0092, "step": 17081 }, { "epoch": 3.8866894197952218, "grad_norm": 1.1542361113599657, "learning_rate": 1.468354080325244e-07, "loss": 0.0115, "step": 17082 }, { "epoch": 3.8869169510807735, "grad_norm": 0.6932574299663986, "learning_rate": 1.467778746831374e-07, "loss": 0.0047, "step": 17083 }, { "epoch": 3.8871444823663253, "grad_norm": 0.6553720727726599, "learning_rate": 1.4672035110773474e-07, "loss": 0.0025, "step": 17084 }, { "epoch": 3.887372013651877, "grad_norm": 0.10414505210262633, "learning_rate": 1.4666283730749214e-07, "loss": 0.0005, "step": 17085 }, { "epoch": 3.8875995449374288, "grad_norm": 0.41588259321110516, "learning_rate": 1.4660533328358525e-07, "loss": 0.0028, "step": 17086 }, { "epoch": 3.8878270762229805, "grad_norm": 0.8155441998909254, "learning_rate": 1.4654783903718903e-07, "loss": 0.0042, "step": 17087 }, { "epoch": 3.8880546075085323, "grad_norm": 0.45570686973451713, "learning_rate": 1.4649035456947896e-07, "loss": 0.0031, "step": 17088 }, { "epoch": 3.888282138794084, "grad_norm": 0.5222775274501, "learning_rate": 1.4643287988162954e-07, "loss": 0.0037, "step": 17089 }, { "epoch": 3.888509670079636, "grad_norm": 0.466473550070277, "learning_rate": 1.4637541497481568e-07, "loss": 0.0042, "step": 17090 }, { "epoch": 3.8887372013651875, "grad_norm": 0.3974576921796071, "learning_rate": 1.4631795985021166e-07, "loss": 0.003, "step": 17091 }, { "epoch": 3.8889647326507397, "grad_norm": 1.1774643024343403, "learning_rate": 1.462605145089919e-07, "loss": 0.0053, "step": 17092 }, { "epoch": 3.889192263936291, "grad_norm": 0.47949839072707734, "learning_rate": 1.4620307895233062e-07, "loss": 0.0042, "step": 17093 }, { "epoch": 3.8894197952218432, "grad_norm": 0.2488879072588362, "learning_rate": 1.461456531814013e-07, "loss": 0.0009, "step": 17094 }, { "epoch": 3.8896473265073945, "grad_norm": 0.1934066190014067, "learning_rate": 1.4608823719737812e-07, "loss": 0.0006, "step": 17095 }, { "epoch": 3.8898748577929467, "grad_norm": 1.3500677236112448, "learning_rate": 1.4603083100143404e-07, "loss": 0.0038, "step": 17096 }, { "epoch": 3.890102389078498, "grad_norm": 0.39962707794078495, "learning_rate": 1.4597343459474277e-07, "loss": 0.0032, "step": 17097 }, { "epoch": 3.8903299203640502, "grad_norm": 0.7142126063452486, "learning_rate": 1.45916047978477e-07, "loss": 0.0024, "step": 17098 }, { "epoch": 3.890557451649602, "grad_norm": 0.2978658317821677, "learning_rate": 1.4585867115380986e-07, "loss": 0.0009, "step": 17099 }, { "epoch": 3.8907849829351537, "grad_norm": 0.7815040466079948, "learning_rate": 1.458013041219141e-07, "loss": 0.003, "step": 17100 }, { "epoch": 3.8910125142207055, "grad_norm": 0.11821487023875457, "learning_rate": 1.4574394688396192e-07, "loss": 0.0005, "step": 17101 }, { "epoch": 3.8912400455062572, "grad_norm": 0.1861529590348994, "learning_rate": 1.4568659944112592e-07, "loss": 0.0015, "step": 17102 }, { "epoch": 3.891467576791809, "grad_norm": 0.34190735897804936, "learning_rate": 1.4562926179457787e-07, "loss": 0.0025, "step": 17103 }, { "epoch": 3.8916951080773607, "grad_norm": 0.42973712814561726, "learning_rate": 1.4557193394548994e-07, "loss": 0.0019, "step": 17104 }, { "epoch": 3.8919226393629125, "grad_norm": 0.8748053659888104, "learning_rate": 1.4551461589503345e-07, "loss": 0.0128, "step": 17105 }, { "epoch": 3.8921501706484642, "grad_norm": 0.11883320344597129, "learning_rate": 1.4545730764438008e-07, "loss": 0.0004, "step": 17106 }, { "epoch": 3.892377701934016, "grad_norm": 0.9573550619609807, "learning_rate": 1.454000091947013e-07, "loss": 0.0116, "step": 17107 }, { "epoch": 3.8926052332195678, "grad_norm": 0.5760419460254376, "learning_rate": 1.4534272054716782e-07, "loss": 0.0017, "step": 17108 }, { "epoch": 3.8928327645051195, "grad_norm": 1.2417867585217839, "learning_rate": 1.4528544170295082e-07, "loss": 0.0091, "step": 17109 }, { "epoch": 3.8930602957906713, "grad_norm": 0.47522117193831304, "learning_rate": 1.4522817266322063e-07, "loss": 0.0052, "step": 17110 }, { "epoch": 3.893287827076223, "grad_norm": 0.43882658702841953, "learning_rate": 1.4517091342914818e-07, "loss": 0.0037, "step": 17111 }, { "epoch": 3.8935153583617748, "grad_norm": 1.8934335203444088, "learning_rate": 1.4511366400190332e-07, "loss": 0.0128, "step": 17112 }, { "epoch": 3.8937428896473265, "grad_norm": 0.5554395903643583, "learning_rate": 1.4505642438265627e-07, "loss": 0.003, "step": 17113 }, { "epoch": 3.8939704209328783, "grad_norm": 0.41117457838138777, "learning_rate": 1.4499919457257704e-07, "loss": 0.0023, "step": 17114 }, { "epoch": 3.89419795221843, "grad_norm": 0.1546141637478484, "learning_rate": 1.449419745728351e-07, "loss": 0.0006, "step": 17115 }, { "epoch": 3.8944254835039818, "grad_norm": 0.45385884723740705, "learning_rate": 1.448847643846002e-07, "loss": 0.0033, "step": 17116 }, { "epoch": 3.8946530147895335, "grad_norm": 0.2839086839376236, "learning_rate": 1.4482756400904137e-07, "loss": 0.0013, "step": 17117 }, { "epoch": 3.8948805460750853, "grad_norm": 0.3289381665374979, "learning_rate": 1.4477037344732754e-07, "loss": 0.0019, "step": 17118 }, { "epoch": 3.895108077360637, "grad_norm": 1.2911816297692167, "learning_rate": 1.447131927006279e-07, "loss": 0.013, "step": 17119 }, { "epoch": 3.8953356086461888, "grad_norm": 0.9084821841828569, "learning_rate": 1.4465602177011089e-07, "loss": 0.0042, "step": 17120 }, { "epoch": 3.8955631399317405, "grad_norm": 1.4202160887016704, "learning_rate": 1.445988606569453e-07, "loss": 0.0075, "step": 17121 }, { "epoch": 3.8957906712172923, "grad_norm": 0.6151984510713, "learning_rate": 1.4454170936229902e-07, "loss": 0.0068, "step": 17122 }, { "epoch": 3.896018202502844, "grad_norm": 0.23127141354500674, "learning_rate": 1.4448456788734042e-07, "loss": 0.0027, "step": 17123 }, { "epoch": 3.896245733788396, "grad_norm": 0.3987102036259934, "learning_rate": 1.444274362332373e-07, "loss": 0.0039, "step": 17124 }, { "epoch": 3.8964732650739475, "grad_norm": 0.46041206267984647, "learning_rate": 1.44370314401157e-07, "loss": 0.0046, "step": 17125 }, { "epoch": 3.8967007963594993, "grad_norm": 0.49349757480535905, "learning_rate": 1.4431320239226728e-07, "loss": 0.0057, "step": 17126 }, { "epoch": 3.896928327645051, "grad_norm": 0.4013481951778398, "learning_rate": 1.4425610020773534e-07, "loss": 0.0033, "step": 17127 }, { "epoch": 3.897155858930603, "grad_norm": 0.2014463242770089, "learning_rate": 1.4419900784872847e-07, "loss": 0.001, "step": 17128 }, { "epoch": 3.897383390216155, "grad_norm": 1.09666199259683, "learning_rate": 1.4414192531641314e-07, "loss": 0.0052, "step": 17129 }, { "epoch": 3.8976109215017063, "grad_norm": 1.140906836980715, "learning_rate": 1.4408485261195642e-07, "loss": 0.0149, "step": 17130 }, { "epoch": 3.8978384527872585, "grad_norm": 0.18601340963145374, "learning_rate": 1.4402778973652456e-07, "loss": 0.0007, "step": 17131 }, { "epoch": 3.89806598407281, "grad_norm": 0.40503785418926047, "learning_rate": 1.439707366912836e-07, "loss": 0.0024, "step": 17132 }, { "epoch": 3.898293515358362, "grad_norm": 1.532209189005807, "learning_rate": 1.4391369347739984e-07, "loss": 0.0224, "step": 17133 }, { "epoch": 3.8985210466439133, "grad_norm": 0.36137682469728355, "learning_rate": 1.4385666009603908e-07, "loss": 0.0013, "step": 17134 }, { "epoch": 3.8987485779294655, "grad_norm": 0.2115250271743465, "learning_rate": 1.4379963654836723e-07, "loss": 0.0012, "step": 17135 }, { "epoch": 3.898976109215017, "grad_norm": 0.3322926846049266, "learning_rate": 1.4374262283554956e-07, "loss": 0.0023, "step": 17136 }, { "epoch": 3.899203640500569, "grad_norm": 0.23056605688145798, "learning_rate": 1.4368561895875106e-07, "loss": 0.0009, "step": 17137 }, { "epoch": 3.8994311717861208, "grad_norm": 0.27180384089277854, "learning_rate": 1.4362862491913724e-07, "loss": 0.0015, "step": 17138 }, { "epoch": 3.8996587030716725, "grad_norm": 1.1971983399447448, "learning_rate": 1.4357164071787236e-07, "loss": 0.0086, "step": 17139 }, { "epoch": 3.8998862343572243, "grad_norm": 0.1955512868552366, "learning_rate": 1.4351466635612183e-07, "loss": 0.0008, "step": 17140 }, { "epoch": 3.900113765642776, "grad_norm": 0.699621977038393, "learning_rate": 1.4345770183504956e-07, "loss": 0.0033, "step": 17141 }, { "epoch": 3.9003412969283278, "grad_norm": 1.9209096862484887, "learning_rate": 1.4340074715582014e-07, "loss": 0.0198, "step": 17142 }, { "epoch": 3.9005688282138795, "grad_norm": 0.37245172807135074, "learning_rate": 1.4334380231959738e-07, "loss": 0.0039, "step": 17143 }, { "epoch": 3.9007963594994313, "grad_norm": 0.7058413629651439, "learning_rate": 1.4328686732754505e-07, "loss": 0.0042, "step": 17144 }, { "epoch": 3.901023890784983, "grad_norm": 1.1202714726564704, "learning_rate": 1.4322994218082692e-07, "loss": 0.0048, "step": 17145 }, { "epoch": 3.9012514220705348, "grad_norm": 0.8501862945433257, "learning_rate": 1.4317302688060642e-07, "loss": 0.0088, "step": 17146 }, { "epoch": 3.9014789533560865, "grad_norm": 0.6814866545566, "learning_rate": 1.4311612142804708e-07, "loss": 0.0172, "step": 17147 }, { "epoch": 3.9017064846416383, "grad_norm": 0.18481396207403986, "learning_rate": 1.4305922582431167e-07, "loss": 0.0008, "step": 17148 }, { "epoch": 3.90193401592719, "grad_norm": 0.4807207928139088, "learning_rate": 1.4300234007056284e-07, "loss": 0.0065, "step": 17149 }, { "epoch": 3.9021615472127418, "grad_norm": 0.22745254350033567, "learning_rate": 1.4294546416796362e-07, "loss": 0.0012, "step": 17150 }, { "epoch": 3.9023890784982935, "grad_norm": 0.45474371311056494, "learning_rate": 1.4288859811767614e-07, "loss": 0.0037, "step": 17151 }, { "epoch": 3.9026166097838453, "grad_norm": 0.8497331432989912, "learning_rate": 1.4283174192086267e-07, "loss": 0.0023, "step": 17152 }, { "epoch": 3.902844141069397, "grad_norm": 3.1666341298903715, "learning_rate": 1.4277489557868541e-07, "loss": 0.0193, "step": 17153 }, { "epoch": 3.903071672354949, "grad_norm": 2.7341451684345093, "learning_rate": 1.4271805909230634e-07, "loss": 0.0039, "step": 17154 }, { "epoch": 3.9032992036405005, "grad_norm": 0.08751105487632053, "learning_rate": 1.4266123246288683e-07, "loss": 0.0002, "step": 17155 }, { "epoch": 3.9035267349260523, "grad_norm": 0.06072393281627931, "learning_rate": 1.4260441569158823e-07, "loss": 0.0002, "step": 17156 }, { "epoch": 3.903754266211604, "grad_norm": 0.5223913055253167, "learning_rate": 1.4254760877957206e-07, "loss": 0.0062, "step": 17157 }, { "epoch": 3.903981797497156, "grad_norm": 0.2494460563154975, "learning_rate": 1.4249081172799904e-07, "loss": 0.0016, "step": 17158 }, { "epoch": 3.9042093287827075, "grad_norm": 0.3824025481496855, "learning_rate": 1.424340245380302e-07, "loss": 0.0022, "step": 17159 }, { "epoch": 3.9044368600682593, "grad_norm": 0.31011361470741267, "learning_rate": 1.4237724721082605e-07, "loss": 0.0026, "step": 17160 }, { "epoch": 3.904664391353811, "grad_norm": 1.1378372359888425, "learning_rate": 1.4232047974754727e-07, "loss": 0.0079, "step": 17161 }, { "epoch": 3.904891922639363, "grad_norm": 1.2477226825083878, "learning_rate": 1.422637221493539e-07, "loss": 0.0082, "step": 17162 }, { "epoch": 3.9051194539249146, "grad_norm": 0.43234697408540057, "learning_rate": 1.422069744174058e-07, "loss": 0.0038, "step": 17163 }, { "epoch": 3.9053469852104663, "grad_norm": 2.8686354790902557, "learning_rate": 1.4215023655286314e-07, "loss": 0.045, "step": 17164 }, { "epoch": 3.905574516496018, "grad_norm": 0.2684334836695274, "learning_rate": 1.420935085568851e-07, "loss": 0.0009, "step": 17165 }, { "epoch": 3.90580204778157, "grad_norm": 0.40873637283434083, "learning_rate": 1.4203679043063141e-07, "loss": 0.0027, "step": 17166 }, { "epoch": 3.9060295790671216, "grad_norm": 0.44567472454499274, "learning_rate": 1.4198008217526135e-07, "loss": 0.0028, "step": 17167 }, { "epoch": 3.9062571103526738, "grad_norm": 0.8979408711144318, "learning_rate": 1.4192338379193365e-07, "loss": 0.0095, "step": 17168 }, { "epoch": 3.906484641638225, "grad_norm": 0.5928536163191949, "learning_rate": 1.4186669528180743e-07, "loss": 0.0089, "step": 17169 }, { "epoch": 3.9067121729237773, "grad_norm": 1.812645708796703, "learning_rate": 1.41810016646041e-07, "loss": 0.0204, "step": 17170 }, { "epoch": 3.9069397042093286, "grad_norm": 0.5054111608158732, "learning_rate": 1.4175334788579304e-07, "loss": 0.0066, "step": 17171 }, { "epoch": 3.9071672354948808, "grad_norm": 0.44326791359571616, "learning_rate": 1.4169668900222151e-07, "loss": 0.0037, "step": 17172 }, { "epoch": 3.907394766780432, "grad_norm": 0.8511517796555004, "learning_rate": 1.416400399964845e-07, "loss": 0.0075, "step": 17173 }, { "epoch": 3.9076222980659843, "grad_norm": 1.2534661965812575, "learning_rate": 1.4158340086973997e-07, "loss": 0.0029, "step": 17174 }, { "epoch": 3.9078498293515356, "grad_norm": 0.629548222336314, "learning_rate": 1.4152677162314526e-07, "loss": 0.0046, "step": 17175 }, { "epoch": 3.9080773606370878, "grad_norm": 0.8656837991286396, "learning_rate": 1.4147015225785806e-07, "loss": 0.0077, "step": 17176 }, { "epoch": 3.9083048919226395, "grad_norm": 0.5485224268868981, "learning_rate": 1.414135427750353e-07, "loss": 0.0067, "step": 17177 }, { "epoch": 3.9085324232081913, "grad_norm": 0.25620084728681797, "learning_rate": 1.4135694317583416e-07, "loss": 0.0021, "step": 17178 }, { "epoch": 3.908759954493743, "grad_norm": 0.30716650400305234, "learning_rate": 1.4130035346141123e-07, "loss": 0.0031, "step": 17179 }, { "epoch": 3.908987485779295, "grad_norm": 0.8874328674579252, "learning_rate": 1.412437736329232e-07, "loss": 0.011, "step": 17180 }, { "epoch": 3.9092150170648465, "grad_norm": 2.0787224538119946, "learning_rate": 1.4118720369152662e-07, "loss": 0.0179, "step": 17181 }, { "epoch": 3.9094425483503983, "grad_norm": 1.0200428588031243, "learning_rate": 1.4113064363837742e-07, "loss": 0.0095, "step": 17182 }, { "epoch": 3.90967007963595, "grad_norm": 0.3774171374176519, "learning_rate": 1.4107409347463184e-07, "loss": 0.0022, "step": 17183 }, { "epoch": 3.909897610921502, "grad_norm": 0.4597132546967579, "learning_rate": 1.4101755320144536e-07, "loss": 0.0037, "step": 17184 }, { "epoch": 3.9101251422070535, "grad_norm": 0.5747290503291973, "learning_rate": 1.409610228199739e-07, "loss": 0.0044, "step": 17185 }, { "epoch": 3.9103526734926053, "grad_norm": 1.3269670238109144, "learning_rate": 1.4090450233137244e-07, "loss": 0.0144, "step": 17186 }, { "epoch": 3.910580204778157, "grad_norm": 0.14756825228100964, "learning_rate": 1.4084799173679644e-07, "loss": 0.0011, "step": 17187 }, { "epoch": 3.910807736063709, "grad_norm": 0.631220775340965, "learning_rate": 1.4079149103740094e-07, "loss": 0.0068, "step": 17188 }, { "epoch": 3.9110352673492605, "grad_norm": 0.3281626605452535, "learning_rate": 1.4073500023434042e-07, "loss": 0.0024, "step": 17189 }, { "epoch": 3.9112627986348123, "grad_norm": 0.523112992150817, "learning_rate": 1.4067851932876976e-07, "loss": 0.0047, "step": 17190 }, { "epoch": 3.911490329920364, "grad_norm": 0.3461934261708197, "learning_rate": 1.4062204832184305e-07, "loss": 0.0016, "step": 17191 }, { "epoch": 3.911717861205916, "grad_norm": 0.566820594671785, "learning_rate": 1.4056558721471465e-07, "loss": 0.0052, "step": 17192 }, { "epoch": 3.9119453924914676, "grad_norm": 0.8533902276041477, "learning_rate": 1.4050913600853838e-07, "loss": 0.0045, "step": 17193 }, { "epoch": 3.9121729237770193, "grad_norm": 0.7445398458334063, "learning_rate": 1.40452694704468e-07, "loss": 0.0058, "step": 17194 }, { "epoch": 3.912400455062571, "grad_norm": 2.3060790577998627, "learning_rate": 1.403962633036573e-07, "loss": 0.0134, "step": 17195 }, { "epoch": 3.912627986348123, "grad_norm": 0.5420155527675762, "learning_rate": 1.4033984180725935e-07, "loss": 0.0067, "step": 17196 }, { "epoch": 3.9128555176336746, "grad_norm": 0.16401495085511233, "learning_rate": 1.4028343021642757e-07, "loss": 0.0004, "step": 17197 }, { "epoch": 3.9130830489192263, "grad_norm": 0.4558153467328559, "learning_rate": 1.4022702853231457e-07, "loss": 0.003, "step": 17198 }, { "epoch": 3.913310580204778, "grad_norm": 0.3763509313663115, "learning_rate": 1.4017063675607346e-07, "loss": 0.0021, "step": 17199 }, { "epoch": 3.91353811149033, "grad_norm": 0.3245770963224784, "learning_rate": 1.4011425488885642e-07, "loss": 0.0025, "step": 17200 }, { "epoch": 3.9137656427758816, "grad_norm": 0.6748157478240069, "learning_rate": 1.4005788293181604e-07, "loss": 0.0015, "step": 17201 }, { "epoch": 3.9139931740614333, "grad_norm": 0.6338132770512551, "learning_rate": 1.4000152088610456e-07, "loss": 0.0087, "step": 17202 }, { "epoch": 3.914220705346985, "grad_norm": 0.48150062531836985, "learning_rate": 1.3994516875287356e-07, "loss": 0.0065, "step": 17203 }, { "epoch": 3.914448236632537, "grad_norm": 1.6287922779579087, "learning_rate": 1.3988882653327518e-07, "loss": 0.0336, "step": 17204 }, { "epoch": 3.9146757679180886, "grad_norm": 1.1659385161726241, "learning_rate": 1.3983249422846074e-07, "loss": 0.0129, "step": 17205 }, { "epoch": 3.9149032992036403, "grad_norm": 0.7208562451368202, "learning_rate": 1.3977617183958126e-07, "loss": 0.0081, "step": 17206 }, { "epoch": 3.9151308304891925, "grad_norm": 0.2113091695471806, "learning_rate": 1.3971985936778843e-07, "loss": 0.0009, "step": 17207 }, { "epoch": 3.915358361774744, "grad_norm": 0.3423435033616369, "learning_rate": 1.396635568142328e-07, "loss": 0.0022, "step": 17208 }, { "epoch": 3.915585893060296, "grad_norm": 0.1345319329780559, "learning_rate": 1.3960726418006546e-07, "loss": 0.0004, "step": 17209 }, { "epoch": 3.9158134243458473, "grad_norm": 0.5630361001050433, "learning_rate": 1.3955098146643641e-07, "loss": 0.0041, "step": 17210 }, { "epoch": 3.9160409556313995, "grad_norm": 0.11562878781148661, "learning_rate": 1.3949470867449646e-07, "loss": 0.0005, "step": 17211 }, { "epoch": 3.916268486916951, "grad_norm": 0.6423203425707092, "learning_rate": 1.3943844580539538e-07, "loss": 0.0047, "step": 17212 }, { "epoch": 3.916496018202503, "grad_norm": 0.7100191729199217, "learning_rate": 1.3938219286028314e-07, "loss": 0.0067, "step": 17213 }, { "epoch": 3.9167235494880543, "grad_norm": 1.4552578322172163, "learning_rate": 1.393259498403097e-07, "loss": 0.0089, "step": 17214 }, { "epoch": 3.9169510807736065, "grad_norm": 0.5083116580241499, "learning_rate": 1.392697167466242e-07, "loss": 0.0027, "step": 17215 }, { "epoch": 3.9171786120591583, "grad_norm": 0.5152257858854304, "learning_rate": 1.392134935803763e-07, "loss": 0.0059, "step": 17216 }, { "epoch": 3.91740614334471, "grad_norm": 0.46374588440300585, "learning_rate": 1.391572803427147e-07, "loss": 0.0017, "step": 17217 }, { "epoch": 3.917633674630262, "grad_norm": 0.3180825913178631, "learning_rate": 1.391010770347887e-07, "loss": 0.0028, "step": 17218 }, { "epoch": 3.9178612059158135, "grad_norm": 0.20387359623488244, "learning_rate": 1.390448836577467e-07, "loss": 0.0012, "step": 17219 }, { "epoch": 3.9180887372013653, "grad_norm": 0.4732708836201442, "learning_rate": 1.3898870021273722e-07, "loss": 0.003, "step": 17220 }, { "epoch": 3.918316268486917, "grad_norm": 0.5842503551233342, "learning_rate": 1.3893252670090882e-07, "loss": 0.0043, "step": 17221 }, { "epoch": 3.918543799772469, "grad_norm": 0.26024055874937496, "learning_rate": 1.388763631234092e-07, "loss": 0.003, "step": 17222 }, { "epoch": 3.9187713310580206, "grad_norm": 0.23605466398095226, "learning_rate": 1.388202094813866e-07, "loss": 0.0017, "step": 17223 }, { "epoch": 3.9189988623435723, "grad_norm": 0.8028389425190772, "learning_rate": 1.3876406577598852e-07, "loss": 0.0053, "step": 17224 }, { "epoch": 3.919226393629124, "grad_norm": 0.372103642845388, "learning_rate": 1.3870793200836222e-07, "loss": 0.0017, "step": 17225 }, { "epoch": 3.919453924914676, "grad_norm": 0.44141402034272403, "learning_rate": 1.3865180817965527e-07, "loss": 0.0024, "step": 17226 }, { "epoch": 3.9196814562002276, "grad_norm": 0.3602161689544912, "learning_rate": 1.385956942910146e-07, "loss": 0.0023, "step": 17227 }, { "epoch": 3.9199089874857793, "grad_norm": 0.5705827050651353, "learning_rate": 1.385395903435873e-07, "loss": 0.0048, "step": 17228 }, { "epoch": 3.920136518771331, "grad_norm": 1.2932907808969443, "learning_rate": 1.3848349633851974e-07, "loss": 0.0066, "step": 17229 }, { "epoch": 3.920364050056883, "grad_norm": 0.26148974346505494, "learning_rate": 1.384274122769587e-07, "loss": 0.0005, "step": 17230 }, { "epoch": 3.9205915813424346, "grad_norm": 2.0423783505417674, "learning_rate": 1.3837133816005015e-07, "loss": 0.0123, "step": 17231 }, { "epoch": 3.9208191126279863, "grad_norm": 0.1217170579430949, "learning_rate": 1.3831527398894012e-07, "loss": 0.0012, "step": 17232 }, { "epoch": 3.921046643913538, "grad_norm": 0.4198822978352745, "learning_rate": 1.3825921976477453e-07, "loss": 0.0032, "step": 17233 }, { "epoch": 3.92127417519909, "grad_norm": 0.7441855460441302, "learning_rate": 1.3820317548869908e-07, "loss": 0.0016, "step": 17234 }, { "epoch": 3.9215017064846416, "grad_norm": 0.985059709301201, "learning_rate": 1.3814714116185935e-07, "loss": 0.0079, "step": 17235 }, { "epoch": 3.9217292377701933, "grad_norm": 0.914169483720997, "learning_rate": 1.380911167854003e-07, "loss": 0.0062, "step": 17236 }, { "epoch": 3.921956769055745, "grad_norm": 0.4627627244990152, "learning_rate": 1.3803510236046724e-07, "loss": 0.0026, "step": 17237 }, { "epoch": 3.922184300341297, "grad_norm": 1.310065818917203, "learning_rate": 1.3797909788820479e-07, "loss": 0.0263, "step": 17238 }, { "epoch": 3.9224118316268486, "grad_norm": 0.28062849090991804, "learning_rate": 1.3792310336975756e-07, "loss": 0.0019, "step": 17239 }, { "epoch": 3.9226393629124003, "grad_norm": 0.47904094088003296, "learning_rate": 1.3786711880627002e-07, "loss": 0.0035, "step": 17240 }, { "epoch": 3.922866894197952, "grad_norm": 0.2607981000570005, "learning_rate": 1.3781114419888644e-07, "loss": 0.0024, "step": 17241 }, { "epoch": 3.923094425483504, "grad_norm": 0.5357832554582845, "learning_rate": 1.3775517954875098e-07, "loss": 0.0033, "step": 17242 }, { "epoch": 3.9233219567690556, "grad_norm": 0.058211690009723746, "learning_rate": 1.3769922485700736e-07, "loss": 0.0002, "step": 17243 }, { "epoch": 3.9235494880546073, "grad_norm": 0.705472491667214, "learning_rate": 1.3764328012479886e-07, "loss": 0.0035, "step": 17244 }, { "epoch": 3.923777019340159, "grad_norm": 0.8442848483452486, "learning_rate": 1.3758734535326944e-07, "loss": 0.0128, "step": 17245 }, { "epoch": 3.9240045506257113, "grad_norm": 1.5161508894607871, "learning_rate": 1.3753142054356177e-07, "loss": 0.0095, "step": 17246 }, { "epoch": 3.9242320819112626, "grad_norm": 1.1684611627912698, "learning_rate": 1.3747550569681918e-07, "loss": 0.0165, "step": 17247 }, { "epoch": 3.924459613196815, "grad_norm": 0.6536701831161214, "learning_rate": 1.3741960081418432e-07, "loss": 0.0046, "step": 17248 }, { "epoch": 3.924687144482366, "grad_norm": 0.36521542317241884, "learning_rate": 1.3736370589680013e-07, "loss": 0.0028, "step": 17249 }, { "epoch": 3.9249146757679183, "grad_norm": 0.08194896655175425, "learning_rate": 1.3730782094580862e-07, "loss": 0.0003, "step": 17250 }, { "epoch": 3.9251422070534696, "grad_norm": 1.256449006245758, "learning_rate": 1.3725194596235199e-07, "loss": 0.0014, "step": 17251 }, { "epoch": 3.925369738339022, "grad_norm": 0.15135552603498562, "learning_rate": 1.3719608094757245e-07, "loss": 0.0006, "step": 17252 }, { "epoch": 3.925597269624573, "grad_norm": 0.40888011953248315, "learning_rate": 1.3714022590261153e-07, "loss": 0.0018, "step": 17253 }, { "epoch": 3.9258248009101253, "grad_norm": 0.30456562762238953, "learning_rate": 1.3708438082861084e-07, "loss": 0.0019, "step": 17254 }, { "epoch": 3.926052332195677, "grad_norm": 0.48007104950572715, "learning_rate": 1.3702854572671204e-07, "loss": 0.0069, "step": 17255 }, { "epoch": 3.926279863481229, "grad_norm": 0.7404255488571307, "learning_rate": 1.3697272059805588e-07, "loss": 0.004, "step": 17256 }, { "epoch": 3.9265073947667806, "grad_norm": 0.5287626301890797, "learning_rate": 1.3691690544378376e-07, "loss": 0.0009, "step": 17257 }, { "epoch": 3.9267349260523323, "grad_norm": 0.8256481368249127, "learning_rate": 1.3686110026503602e-07, "loss": 0.0057, "step": 17258 }, { "epoch": 3.926962457337884, "grad_norm": 0.5492702919007928, "learning_rate": 1.3680530506295356e-07, "loss": 0.006, "step": 17259 }, { "epoch": 3.927189988623436, "grad_norm": 0.7710561432939682, "learning_rate": 1.3674951983867626e-07, "loss": 0.0054, "step": 17260 }, { "epoch": 3.9274175199089876, "grad_norm": 0.8137483384723464, "learning_rate": 1.3669374459334493e-07, "loss": 0.004, "step": 17261 }, { "epoch": 3.9276450511945393, "grad_norm": 1.1160917729601805, "learning_rate": 1.3663797932809912e-07, "loss": 0.0165, "step": 17262 }, { "epoch": 3.927872582480091, "grad_norm": 0.5022813126816756, "learning_rate": 1.3658222404407853e-07, "loss": 0.0057, "step": 17263 }, { "epoch": 3.928100113765643, "grad_norm": 0.044872300336151534, "learning_rate": 1.3652647874242287e-07, "loss": 0.0002, "step": 17264 }, { "epoch": 3.9283276450511946, "grad_norm": 0.8219346366423124, "learning_rate": 1.3647074342427126e-07, "loss": 0.006, "step": 17265 }, { "epoch": 3.9285551763367463, "grad_norm": 0.14406229747845167, "learning_rate": 1.3641501809076292e-07, "loss": 0.0008, "step": 17266 }, { "epoch": 3.928782707622298, "grad_norm": 0.3160806956708214, "learning_rate": 1.3635930274303688e-07, "loss": 0.0019, "step": 17267 }, { "epoch": 3.92901023890785, "grad_norm": 0.11718115199164512, "learning_rate": 1.3630359738223192e-07, "loss": 0.0004, "step": 17268 }, { "epoch": 3.9292377701934016, "grad_norm": 0.35904864138491704, "learning_rate": 1.3624790200948646e-07, "loss": 0.0017, "step": 17269 }, { "epoch": 3.9294653014789533, "grad_norm": 0.4281216377667139, "learning_rate": 1.3619221662593858e-07, "loss": 0.0041, "step": 17270 }, { "epoch": 3.929692832764505, "grad_norm": 0.22711537861173756, "learning_rate": 1.3613654123272675e-07, "loss": 0.0009, "step": 17271 }, { "epoch": 3.929920364050057, "grad_norm": 0.6825024121799544, "learning_rate": 1.3608087583098846e-07, "loss": 0.0022, "step": 17272 }, { "epoch": 3.9301478953356086, "grad_norm": 0.20664786210735572, "learning_rate": 1.3602522042186177e-07, "loss": 0.0023, "step": 17273 }, { "epoch": 3.9303754266211604, "grad_norm": 1.2344587514853895, "learning_rate": 1.3596957500648418e-07, "loss": 0.0017, "step": 17274 }, { "epoch": 3.930602957906712, "grad_norm": 0.24269226587740703, "learning_rate": 1.3591393958599272e-07, "loss": 0.0011, "step": 17275 }, { "epoch": 3.930830489192264, "grad_norm": 0.6753645713109122, "learning_rate": 1.3585831416152473e-07, "loss": 0.0082, "step": 17276 }, { "epoch": 3.9310580204778156, "grad_norm": 0.17825459442039132, "learning_rate": 1.3580269873421682e-07, "loss": 0.0011, "step": 17277 }, { "epoch": 3.9312855517633674, "grad_norm": 0.4321544797190526, "learning_rate": 1.3574709330520603e-07, "loss": 0.0045, "step": 17278 }, { "epoch": 3.931513083048919, "grad_norm": 0.18526711051589795, "learning_rate": 1.3569149787562846e-07, "loss": 0.0007, "step": 17279 }, { "epoch": 3.931740614334471, "grad_norm": 0.4704930014728533, "learning_rate": 1.356359124466205e-07, "loss": 0.0014, "step": 17280 }, { "epoch": 3.9319681456200226, "grad_norm": 1.7464560910875926, "learning_rate": 1.3558033701931844e-07, "loss": 0.0229, "step": 17281 }, { "epoch": 3.9321956769055744, "grad_norm": 0.37336009014295807, "learning_rate": 1.3552477159485788e-07, "loss": 0.0026, "step": 17282 }, { "epoch": 3.932423208191126, "grad_norm": 0.7296637729997769, "learning_rate": 1.354692161743746e-07, "loss": 0.0028, "step": 17283 }, { "epoch": 3.932650739476678, "grad_norm": 0.32823976850947983, "learning_rate": 1.354136707590039e-07, "loss": 0.0051, "step": 17284 }, { "epoch": 3.93287827076223, "grad_norm": 0.48087056020348334, "learning_rate": 1.3535813534988133e-07, "loss": 0.0026, "step": 17285 }, { "epoch": 3.9331058020477814, "grad_norm": 1.177621896880831, "learning_rate": 1.353026099481415e-07, "loss": 0.0029, "step": 17286 }, { "epoch": 3.9333333333333336, "grad_norm": 0.2403552872095478, "learning_rate": 1.3524709455491954e-07, "loss": 0.0008, "step": 17287 }, { "epoch": 3.933560864618885, "grad_norm": 2.556737008283267, "learning_rate": 1.351915891713502e-07, "loss": 0.0107, "step": 17288 }, { "epoch": 3.933788395904437, "grad_norm": 0.10705899063496453, "learning_rate": 1.3513609379856754e-07, "loss": 0.0003, "step": 17289 }, { "epoch": 3.9340159271899884, "grad_norm": 0.25222290699449645, "learning_rate": 1.350806084377062e-07, "loss": 0.0019, "step": 17290 }, { "epoch": 3.9342434584755406, "grad_norm": 0.13885721087510425, "learning_rate": 1.3502513308989975e-07, "loss": 0.0004, "step": 17291 }, { "epoch": 3.934470989761092, "grad_norm": 0.26629874316330143, "learning_rate": 1.3496966775628243e-07, "loss": 0.0019, "step": 17292 }, { "epoch": 3.934698521046644, "grad_norm": 0.3959750122122506, "learning_rate": 1.3491421243798744e-07, "loss": 0.0045, "step": 17293 }, { "epoch": 3.934926052332196, "grad_norm": 0.31750748004546486, "learning_rate": 1.3485876713614843e-07, "loss": 0.0021, "step": 17294 }, { "epoch": 3.9351535836177476, "grad_norm": 0.15125224472048565, "learning_rate": 1.3480333185189876e-07, "loss": 0.0004, "step": 17295 }, { "epoch": 3.9353811149032993, "grad_norm": 0.5967489932243222, "learning_rate": 1.3474790658637104e-07, "loss": 0.0036, "step": 17296 }, { "epoch": 3.935608646188851, "grad_norm": 0.4855438664586379, "learning_rate": 1.3469249134069842e-07, "loss": 0.001, "step": 17297 }, { "epoch": 3.935836177474403, "grad_norm": 0.5286088741011805, "learning_rate": 1.3463708611601316e-07, "loss": 0.0026, "step": 17298 }, { "epoch": 3.9360637087599546, "grad_norm": 1.247303561920834, "learning_rate": 1.3458169091344792e-07, "loss": 0.0094, "step": 17299 }, { "epoch": 3.9362912400455063, "grad_norm": 0.9040331808114922, "learning_rate": 1.3452630573413464e-07, "loss": 0.0102, "step": 17300 }, { "epoch": 3.936518771331058, "grad_norm": 1.5041578684774843, "learning_rate": 1.3447093057920544e-07, "loss": 0.0167, "step": 17301 }, { "epoch": 3.93674630261661, "grad_norm": 0.8169422277343484, "learning_rate": 1.3441556544979216e-07, "loss": 0.0097, "step": 17302 }, { "epoch": 3.9369738339021616, "grad_norm": 0.9772858529344867, "learning_rate": 1.3436021034702616e-07, "loss": 0.0157, "step": 17303 }, { "epoch": 3.9372013651877134, "grad_norm": 1.7616477183597348, "learning_rate": 1.3430486527203905e-07, "loss": 0.0044, "step": 17304 }, { "epoch": 3.937428896473265, "grad_norm": 0.8953612775019986, "learning_rate": 1.3424953022596165e-07, "loss": 0.0059, "step": 17305 }, { "epoch": 3.937656427758817, "grad_norm": 0.16554387529777004, "learning_rate": 1.3419420520992522e-07, "loss": 0.0008, "step": 17306 }, { "epoch": 3.9378839590443686, "grad_norm": 0.3798599969488338, "learning_rate": 1.3413889022506023e-07, "loss": 0.0039, "step": 17307 }, { "epoch": 3.9381114903299204, "grad_norm": 0.9769722529038835, "learning_rate": 1.3408358527249733e-07, "loss": 0.0091, "step": 17308 }, { "epoch": 3.938339021615472, "grad_norm": 0.4683109626372034, "learning_rate": 1.3402829035336704e-07, "loss": 0.0022, "step": 17309 }, { "epoch": 3.938566552901024, "grad_norm": 0.6736537168502115, "learning_rate": 1.339730054687992e-07, "loss": 0.0035, "step": 17310 }, { "epoch": 3.9387940841865756, "grad_norm": 0.8661484487165616, "learning_rate": 1.33917730619924e-07, "loss": 0.0106, "step": 17311 }, { "epoch": 3.9390216154721274, "grad_norm": 0.5889230502897723, "learning_rate": 1.3386246580787104e-07, "loss": 0.0077, "step": 17312 }, { "epoch": 3.939249146757679, "grad_norm": 0.6679386603057896, "learning_rate": 1.338072110337696e-07, "loss": 0.0044, "step": 17313 }, { "epoch": 3.939476678043231, "grad_norm": 1.1723224588215948, "learning_rate": 1.3375196629874916e-07, "loss": 0.0115, "step": 17314 }, { "epoch": 3.9397042093287826, "grad_norm": 0.20013517778357287, "learning_rate": 1.3369673160393892e-07, "loss": 0.0009, "step": 17315 }, { "epoch": 3.9399317406143344, "grad_norm": 0.3367920766369418, "learning_rate": 1.3364150695046783e-07, "loss": 0.0055, "step": 17316 }, { "epoch": 3.940159271899886, "grad_norm": 0.40545163251169813, "learning_rate": 1.335862923394643e-07, "loss": 0.0025, "step": 17317 }, { "epoch": 3.940386803185438, "grad_norm": 1.09371493195008, "learning_rate": 1.3353108777205714e-07, "loss": 0.0065, "step": 17318 }, { "epoch": 3.9406143344709896, "grad_norm": 0.14680091061064143, "learning_rate": 1.3347589324937447e-07, "loss": 0.0007, "step": 17319 }, { "epoch": 3.9408418657565414, "grad_norm": 0.5036468467561872, "learning_rate": 1.334207087725442e-07, "loss": 0.0032, "step": 17320 }, { "epoch": 3.941069397042093, "grad_norm": 0.5515194857046493, "learning_rate": 1.3336553434269434e-07, "loss": 0.0062, "step": 17321 }, { "epoch": 3.941296928327645, "grad_norm": 0.5227857317075686, "learning_rate": 1.3331036996095253e-07, "loss": 0.0038, "step": 17322 }, { "epoch": 3.9415244596131966, "grad_norm": 0.6686011950506637, "learning_rate": 1.3325521562844654e-07, "loss": 0.006, "step": 17323 }, { "epoch": 3.941751990898749, "grad_norm": 0.2480923351520417, "learning_rate": 1.332000713463031e-07, "loss": 0.0054, "step": 17324 }, { "epoch": 3.9419795221843, "grad_norm": 0.13431445373603923, "learning_rate": 1.3314493711564972e-07, "loss": 0.0007, "step": 17325 }, { "epoch": 3.9422070534698523, "grad_norm": 0.2705069811033585, "learning_rate": 1.33089812937613e-07, "loss": 0.0021, "step": 17326 }, { "epoch": 3.9424345847554036, "grad_norm": 0.7009722148895995, "learning_rate": 1.330346988133193e-07, "loss": 0.0015, "step": 17327 }, { "epoch": 3.942662116040956, "grad_norm": 1.7624514231638502, "learning_rate": 1.3297959474389567e-07, "loss": 0.0224, "step": 17328 }, { "epoch": 3.942889647326507, "grad_norm": 0.19739209436868027, "learning_rate": 1.3292450073046797e-07, "loss": 0.0011, "step": 17329 }, { "epoch": 3.9431171786120593, "grad_norm": 1.0987787103934548, "learning_rate": 1.3286941677416233e-07, "loss": 0.007, "step": 17330 }, { "epoch": 3.9433447098976107, "grad_norm": 0.5486740453457041, "learning_rate": 1.3281434287610458e-07, "loss": 0.0057, "step": 17331 }, { "epoch": 3.943572241183163, "grad_norm": 0.05564524355116932, "learning_rate": 1.3275927903742005e-07, "loss": 0.0002, "step": 17332 }, { "epoch": 3.9437997724687146, "grad_norm": 0.1358696841852597, "learning_rate": 1.3270422525923442e-07, "loss": 0.001, "step": 17333 }, { "epoch": 3.9440273037542664, "grad_norm": 0.15941835487883854, "learning_rate": 1.3264918154267281e-07, "loss": 0.0007, "step": 17334 }, { "epoch": 3.944254835039818, "grad_norm": 1.4956831159924415, "learning_rate": 1.325941478888604e-07, "loss": 0.0215, "step": 17335 }, { "epoch": 3.94448236632537, "grad_norm": 0.4387648068896323, "learning_rate": 1.3253912429892157e-07, "loss": 0.002, "step": 17336 }, { "epoch": 3.9447098976109216, "grad_norm": 0.17507562022345868, "learning_rate": 1.324841107739814e-07, "loss": 0.0003, "step": 17337 }, { "epoch": 3.9449374288964734, "grad_norm": 0.6287290311124165, "learning_rate": 1.3242910731516393e-07, "loss": 0.0054, "step": 17338 }, { "epoch": 3.945164960182025, "grad_norm": 0.7079559347686298, "learning_rate": 1.3237411392359332e-07, "loss": 0.0049, "step": 17339 }, { "epoch": 3.945392491467577, "grad_norm": 0.027927604182700863, "learning_rate": 1.3231913060039355e-07, "loss": 0.0001, "step": 17340 }, { "epoch": 3.9456200227531286, "grad_norm": 0.3338530252187427, "learning_rate": 1.3226415734668845e-07, "loss": 0.0012, "step": 17341 }, { "epoch": 3.9458475540386804, "grad_norm": 0.35287666022068304, "learning_rate": 1.3220919416360182e-07, "loss": 0.0021, "step": 17342 }, { "epoch": 3.946075085324232, "grad_norm": 1.693983274009833, "learning_rate": 1.3215424105225651e-07, "loss": 0.0085, "step": 17343 }, { "epoch": 3.946302616609784, "grad_norm": 0.9238561114644596, "learning_rate": 1.3209929801377614e-07, "loss": 0.0048, "step": 17344 }, { "epoch": 3.9465301478953356, "grad_norm": 1.0491504074121276, "learning_rate": 1.3204436504928336e-07, "loss": 0.0077, "step": 17345 }, { "epoch": 3.9467576791808874, "grad_norm": 0.2971904554097263, "learning_rate": 1.3198944215990078e-07, "loss": 0.0013, "step": 17346 }, { "epoch": 3.946985210466439, "grad_norm": 0.21351502520936663, "learning_rate": 1.319345293467511e-07, "loss": 0.0017, "step": 17347 }, { "epoch": 3.947212741751991, "grad_norm": 0.2931553863289455, "learning_rate": 1.318796266109567e-07, "loss": 0.0019, "step": 17348 }, { "epoch": 3.9474402730375426, "grad_norm": 0.6480152982458155, "learning_rate": 1.318247339536397e-07, "loss": 0.0059, "step": 17349 }, { "epoch": 3.9476678043230944, "grad_norm": 0.9290221927289312, "learning_rate": 1.3176985137592197e-07, "loss": 0.0037, "step": 17350 }, { "epoch": 3.947895335608646, "grad_norm": 0.8375294368906704, "learning_rate": 1.3171497887892502e-07, "loss": 0.0033, "step": 17351 }, { "epoch": 3.948122866894198, "grad_norm": 0.589273179692043, "learning_rate": 1.3166011646377063e-07, "loss": 0.002, "step": 17352 }, { "epoch": 3.9483503981797496, "grad_norm": 0.5268988076977266, "learning_rate": 1.316052641315798e-07, "loss": 0.0035, "step": 17353 }, { "epoch": 3.9485779294653014, "grad_norm": 0.6422313999524466, "learning_rate": 1.315504218834738e-07, "loss": 0.0043, "step": 17354 }, { "epoch": 3.948805460750853, "grad_norm": 0.15790931256459517, "learning_rate": 1.3149558972057338e-07, "loss": 0.0005, "step": 17355 }, { "epoch": 3.949032992036405, "grad_norm": 1.184376293144616, "learning_rate": 1.3144076764399951e-07, "loss": 0.0101, "step": 17356 }, { "epoch": 3.9492605233219567, "grad_norm": 0.5292165116221901, "learning_rate": 1.3138595565487244e-07, "loss": 0.0033, "step": 17357 }, { "epoch": 3.9494880546075084, "grad_norm": 0.12206123916486297, "learning_rate": 1.3133115375431222e-07, "loss": 0.0007, "step": 17358 }, { "epoch": 3.94971558589306, "grad_norm": 0.5193995718314467, "learning_rate": 1.3127636194343936e-07, "loss": 0.0039, "step": 17359 }, { "epoch": 3.949943117178612, "grad_norm": 1.4995123155353254, "learning_rate": 1.3122158022337322e-07, "loss": 0.0028, "step": 17360 }, { "epoch": 3.9501706484641637, "grad_norm": 0.03986046206076346, "learning_rate": 1.3116680859523368e-07, "loss": 0.0001, "step": 17361 }, { "epoch": 3.9503981797497154, "grad_norm": 0.3346464244236965, "learning_rate": 1.3111204706014035e-07, "loss": 0.0022, "step": 17362 }, { "epoch": 3.9506257110352676, "grad_norm": 1.2742313841027222, "learning_rate": 1.3105729561921202e-07, "loss": 0.0162, "step": 17363 }, { "epoch": 3.950853242320819, "grad_norm": 1.6675890952810157, "learning_rate": 1.3100255427356816e-07, "loss": 0.01, "step": 17364 }, { "epoch": 3.951080773606371, "grad_norm": 0.30180344491235467, "learning_rate": 1.3094782302432725e-07, "loss": 0.002, "step": 17365 }, { "epoch": 3.9513083048919224, "grad_norm": 1.2038777761471977, "learning_rate": 1.3089310187260818e-07, "loss": 0.0107, "step": 17366 }, { "epoch": 3.9515358361774746, "grad_norm": 0.8611639609287419, "learning_rate": 1.3083839081952898e-07, "loss": 0.0073, "step": 17367 }, { "epoch": 3.951763367463026, "grad_norm": 0.6267966535427179, "learning_rate": 1.3078368986620808e-07, "loss": 0.0043, "step": 17368 }, { "epoch": 3.951990898748578, "grad_norm": 0.5768002857092874, "learning_rate": 1.3072899901376363e-07, "loss": 0.0059, "step": 17369 }, { "epoch": 3.9522184300341294, "grad_norm": 0.8548056544853179, "learning_rate": 1.3067431826331307e-07, "loss": 0.0051, "step": 17370 }, { "epoch": 3.9524459613196816, "grad_norm": 0.587538481021345, "learning_rate": 1.3061964761597427e-07, "loss": 0.0021, "step": 17371 }, { "epoch": 3.9526734926052334, "grad_norm": 0.6347083094632282, "learning_rate": 1.3056498707286433e-07, "loss": 0.0042, "step": 17372 }, { "epoch": 3.952901023890785, "grad_norm": 0.9885894867007102, "learning_rate": 1.3051033663510064e-07, "loss": 0.0102, "step": 17373 }, { "epoch": 3.953128555176337, "grad_norm": 0.130146161481278, "learning_rate": 1.3045569630379993e-07, "loss": 0.0003, "step": 17374 }, { "epoch": 3.9533560864618886, "grad_norm": 0.4429385246478639, "learning_rate": 1.3040106608007906e-07, "loss": 0.0044, "step": 17375 }, { "epoch": 3.9535836177474404, "grad_norm": 0.2111279295617767, "learning_rate": 1.3034644596505475e-07, "loss": 0.0004, "step": 17376 }, { "epoch": 3.953811149032992, "grad_norm": 1.1997805091859528, "learning_rate": 1.3029183595984296e-07, "loss": 0.007, "step": 17377 }, { "epoch": 3.954038680318544, "grad_norm": 1.4889157885380742, "learning_rate": 1.3023723606556028e-07, "loss": 0.0086, "step": 17378 }, { "epoch": 3.9542662116040956, "grad_norm": 0.8419377175788363, "learning_rate": 1.3018264628332215e-07, "loss": 0.0053, "step": 17379 }, { "epoch": 3.9544937428896474, "grad_norm": 1.0737248760883398, "learning_rate": 1.3012806661424475e-07, "loss": 0.0075, "step": 17380 }, { "epoch": 3.954721274175199, "grad_norm": 0.30776947519682274, "learning_rate": 1.3007349705944314e-07, "loss": 0.0063, "step": 17381 }, { "epoch": 3.954948805460751, "grad_norm": 0.9093064333816764, "learning_rate": 1.300189376200328e-07, "loss": 0.0037, "step": 17382 }, { "epoch": 3.9551763367463026, "grad_norm": 0.15241638839307464, "learning_rate": 1.2996438829712904e-07, "loss": 0.0004, "step": 17383 }, { "epoch": 3.9554038680318544, "grad_norm": 0.3550032156981145, "learning_rate": 1.2990984909184641e-07, "loss": 0.002, "step": 17384 }, { "epoch": 3.955631399317406, "grad_norm": 1.1103688681032893, "learning_rate": 1.2985532000529995e-07, "loss": 0.0128, "step": 17385 }, { "epoch": 3.955858930602958, "grad_norm": 0.2716058401440481, "learning_rate": 1.2980080103860377e-07, "loss": 0.0013, "step": 17386 }, { "epoch": 3.9560864618885097, "grad_norm": 0.41484672130210287, "learning_rate": 1.297462921928723e-07, "loss": 0.0029, "step": 17387 }, { "epoch": 3.9563139931740614, "grad_norm": 0.9063623172888129, "learning_rate": 1.2969179346921968e-07, "loss": 0.0113, "step": 17388 }, { "epoch": 3.956541524459613, "grad_norm": 0.4164205730126769, "learning_rate": 1.2963730486875964e-07, "loss": 0.0035, "step": 17389 }, { "epoch": 3.956769055745165, "grad_norm": 0.4725788517925031, "learning_rate": 1.2958282639260597e-07, "loss": 0.0026, "step": 17390 }, { "epoch": 3.9569965870307167, "grad_norm": 1.0877283024703572, "learning_rate": 1.2952835804187185e-07, "loss": 0.0056, "step": 17391 }, { "epoch": 3.9572241183162684, "grad_norm": 0.33231016793331286, "learning_rate": 1.2947389981767088e-07, "loss": 0.001, "step": 17392 }, { "epoch": 3.95745164960182, "grad_norm": 1.2459676663311274, "learning_rate": 1.2941945172111573e-07, "loss": 0.0084, "step": 17393 }, { "epoch": 3.957679180887372, "grad_norm": 0.6541089206750429, "learning_rate": 1.2936501375331935e-07, "loss": 0.0035, "step": 17394 }, { "epoch": 3.9579067121729237, "grad_norm": 0.8298956312365057, "learning_rate": 1.2931058591539453e-07, "loss": 0.0043, "step": 17395 }, { "epoch": 3.9581342434584754, "grad_norm": 1.0701816309381331, "learning_rate": 1.2925616820845338e-07, "loss": 0.0096, "step": 17396 }, { "epoch": 3.958361774744027, "grad_norm": 0.6883616922651702, "learning_rate": 1.2920176063360838e-07, "loss": 0.0099, "step": 17397 }, { "epoch": 3.958589306029579, "grad_norm": 0.3535878991342341, "learning_rate": 1.2914736319197123e-07, "loss": 0.0018, "step": 17398 }, { "epoch": 3.9588168373151307, "grad_norm": 0.37106793211297023, "learning_rate": 1.2909297588465401e-07, "loss": 0.0019, "step": 17399 }, { "epoch": 3.9590443686006824, "grad_norm": 0.49901096971990616, "learning_rate": 1.2903859871276797e-07, "loss": 0.0027, "step": 17400 }, { "epoch": 3.959271899886234, "grad_norm": 0.3139626517784712, "learning_rate": 1.2898423167742474e-07, "loss": 0.0022, "step": 17401 }, { "epoch": 3.9594994311717864, "grad_norm": 0.5254324331302198, "learning_rate": 1.2892987477973551e-07, "loss": 0.0024, "step": 17402 }, { "epoch": 3.9597269624573377, "grad_norm": 0.6073099461603018, "learning_rate": 1.28875528020811e-07, "loss": 0.0045, "step": 17403 }, { "epoch": 3.95995449374289, "grad_norm": 0.5633721630969555, "learning_rate": 1.2882119140176222e-07, "loss": 0.0062, "step": 17404 }, { "epoch": 3.960182025028441, "grad_norm": 0.35381568067651353, "learning_rate": 1.2876686492369942e-07, "loss": 0.0015, "step": 17405 }, { "epoch": 3.9604095563139934, "grad_norm": 0.770532218767027, "learning_rate": 1.287125485877333e-07, "loss": 0.003, "step": 17406 }, { "epoch": 3.9606370875995447, "grad_norm": 0.5435494594673098, "learning_rate": 1.2865824239497352e-07, "loss": 0.0025, "step": 17407 }, { "epoch": 3.960864618885097, "grad_norm": 0.8660472829596113, "learning_rate": 1.2860394634653036e-07, "loss": 0.0075, "step": 17408 }, { "epoch": 3.961092150170648, "grad_norm": 0.6564587130449389, "learning_rate": 1.2854966044351353e-07, "loss": 0.0047, "step": 17409 }, { "epoch": 3.9613196814562004, "grad_norm": 0.45840214149160186, "learning_rate": 1.2849538468703232e-07, "loss": 0.0036, "step": 17410 }, { "epoch": 3.961547212741752, "grad_norm": 0.7248423199797303, "learning_rate": 1.2844111907819624e-07, "loss": 0.0053, "step": 17411 }, { "epoch": 3.961774744027304, "grad_norm": 0.8129276716225406, "learning_rate": 1.2838686361811417e-07, "loss": 0.0128, "step": 17412 }, { "epoch": 3.9620022753128556, "grad_norm": 0.2281140146820924, "learning_rate": 1.2833261830789526e-07, "loss": 0.001, "step": 17413 }, { "epoch": 3.9622298065984074, "grad_norm": 0.1982691304621184, "learning_rate": 1.282783831486479e-07, "loss": 0.0007, "step": 17414 }, { "epoch": 3.962457337883959, "grad_norm": 1.0152502289999796, "learning_rate": 1.282241581414806e-07, "loss": 0.0047, "step": 17415 }, { "epoch": 3.962684869169511, "grad_norm": 1.255293236313774, "learning_rate": 1.2816994328750193e-07, "loss": 0.0102, "step": 17416 }, { "epoch": 3.9629124004550627, "grad_norm": 0.43258018062145326, "learning_rate": 1.281157385878195e-07, "loss": 0.0011, "step": 17417 }, { "epoch": 3.9631399317406144, "grad_norm": 0.31388592875178134, "learning_rate": 1.2806154404354164e-07, "loss": 0.0035, "step": 17418 }, { "epoch": 3.963367463026166, "grad_norm": 1.6690650741392696, "learning_rate": 1.2800735965577563e-07, "loss": 0.0176, "step": 17419 }, { "epoch": 3.963594994311718, "grad_norm": 0.23800639840585608, "learning_rate": 1.2795318542562886e-07, "loss": 0.0015, "step": 17420 }, { "epoch": 3.9638225255972697, "grad_norm": 0.23706632802753697, "learning_rate": 1.278990213542086e-07, "loss": 0.0011, "step": 17421 }, { "epoch": 3.9640500568828214, "grad_norm": 0.5553899861111716, "learning_rate": 1.2784486744262205e-07, "loss": 0.0053, "step": 17422 }, { "epoch": 3.964277588168373, "grad_norm": 0.4890442615449546, "learning_rate": 1.2779072369197606e-07, "loss": 0.0011, "step": 17423 }, { "epoch": 3.964505119453925, "grad_norm": 0.9208733179059824, "learning_rate": 1.277365901033769e-07, "loss": 0.0067, "step": 17424 }, { "epoch": 3.9647326507394767, "grad_norm": 0.8408810818125733, "learning_rate": 1.276824666779313e-07, "loss": 0.0073, "step": 17425 }, { "epoch": 3.9649601820250284, "grad_norm": 0.7799195005748943, "learning_rate": 1.276283534167453e-07, "loss": 0.0079, "step": 17426 }, { "epoch": 3.96518771331058, "grad_norm": 0.5796346040547853, "learning_rate": 1.2757425032092472e-07, "loss": 0.0027, "step": 17427 }, { "epoch": 3.965415244596132, "grad_norm": 0.04445487201655401, "learning_rate": 1.275201573915754e-07, "loss": 0.0001, "step": 17428 }, { "epoch": 3.9656427758816837, "grad_norm": 0.1716767210941301, "learning_rate": 1.2746607462980308e-07, "loss": 0.0008, "step": 17429 }, { "epoch": 3.9658703071672354, "grad_norm": 0.45621177049817785, "learning_rate": 1.2741200203671312e-07, "loss": 0.0034, "step": 17430 }, { "epoch": 3.966097838452787, "grad_norm": 0.27130382675018594, "learning_rate": 1.2735793961341031e-07, "loss": 0.0014, "step": 17431 }, { "epoch": 3.966325369738339, "grad_norm": 0.2650702131418536, "learning_rate": 1.273038873610001e-07, "loss": 0.0013, "step": 17432 }, { "epoch": 3.9665529010238907, "grad_norm": 1.4913394270521916, "learning_rate": 1.272498452805869e-07, "loss": 0.0143, "step": 17433 }, { "epoch": 3.9667804323094424, "grad_norm": 0.8047647512228696, "learning_rate": 1.271958133732751e-07, "loss": 0.0047, "step": 17434 }, { "epoch": 3.967007963594994, "grad_norm": 0.24135175263941927, "learning_rate": 1.2714179164016918e-07, "loss": 0.0012, "step": 17435 }, { "epoch": 3.967235494880546, "grad_norm": 0.5230621773207512, "learning_rate": 1.2708778008237327e-07, "loss": 0.0046, "step": 17436 }, { "epoch": 3.9674630261660977, "grad_norm": 0.2801102668107403, "learning_rate": 1.270337787009914e-07, "loss": 0.0009, "step": 17437 }, { "epoch": 3.9676905574516494, "grad_norm": 0.2792176297284085, "learning_rate": 1.2697978749712714e-07, "loss": 0.0018, "step": 17438 }, { "epoch": 3.967918088737201, "grad_norm": 1.2215207521361648, "learning_rate": 1.2692580647188374e-07, "loss": 0.0134, "step": 17439 }, { "epoch": 3.968145620022753, "grad_norm": 0.4854027755332585, "learning_rate": 1.268718356263649e-07, "loss": 0.0033, "step": 17440 }, { "epoch": 3.968373151308305, "grad_norm": 0.2914720260689757, "learning_rate": 1.268178749616733e-07, "loss": 0.0015, "step": 17441 }, { "epoch": 3.9686006825938565, "grad_norm": 0.7210781014813473, "learning_rate": 1.2676392447891192e-07, "loss": 0.0075, "step": 17442 }, { "epoch": 3.9688282138794087, "grad_norm": 0.312152522326317, "learning_rate": 1.2670998417918344e-07, "loss": 0.0018, "step": 17443 }, { "epoch": 3.96905574516496, "grad_norm": 0.12833233597198707, "learning_rate": 1.2665605406359047e-07, "loss": 0.0005, "step": 17444 }, { "epoch": 3.969283276450512, "grad_norm": 0.396589424019899, "learning_rate": 1.2660213413323513e-07, "loss": 0.0014, "step": 17445 }, { "epoch": 3.9695108077360635, "grad_norm": 0.13630631018474987, "learning_rate": 1.2654822438921918e-07, "loss": 0.0003, "step": 17446 }, { "epoch": 3.9697383390216157, "grad_norm": 0.5369596034449677, "learning_rate": 1.264943248326448e-07, "loss": 0.0061, "step": 17447 }, { "epoch": 3.969965870307167, "grad_norm": 0.8642869028268628, "learning_rate": 1.2644043546461316e-07, "loss": 0.0083, "step": 17448 }, { "epoch": 3.970193401592719, "grad_norm": 0.7052606768695154, "learning_rate": 1.2638655628622626e-07, "loss": 0.0054, "step": 17449 }, { "epoch": 3.970420932878271, "grad_norm": 0.47428977276765627, "learning_rate": 1.2633268729858483e-07, "loss": 0.0055, "step": 17450 }, { "epoch": 3.9706484641638227, "grad_norm": 0.9816374353825604, "learning_rate": 1.262788285027902e-07, "loss": 0.0053, "step": 17451 }, { "epoch": 3.9708759954493744, "grad_norm": 0.7074304451218899, "learning_rate": 1.2622497989994292e-07, "loss": 0.0045, "step": 17452 }, { "epoch": 3.971103526734926, "grad_norm": 0.5731869367552336, "learning_rate": 1.2617114149114346e-07, "loss": 0.0054, "step": 17453 }, { "epoch": 3.971331058020478, "grad_norm": 0.5969108479500499, "learning_rate": 1.2611731327749227e-07, "loss": 0.0036, "step": 17454 }, { "epoch": 3.9715585893060297, "grad_norm": 0.6742105610850074, "learning_rate": 1.260634952600895e-07, "loss": 0.0028, "step": 17455 }, { "epoch": 3.9717861205915814, "grad_norm": 0.3618730199452773, "learning_rate": 1.2600968744003531e-07, "loss": 0.002, "step": 17456 }, { "epoch": 3.972013651877133, "grad_norm": 0.780243028694876, "learning_rate": 1.2595588981842932e-07, "loss": 0.0048, "step": 17457 }, { "epoch": 3.972241183162685, "grad_norm": 0.2742107077115172, "learning_rate": 1.2590210239637072e-07, "loss": 0.0012, "step": 17458 }, { "epoch": 3.9724687144482367, "grad_norm": 0.35193227356601514, "learning_rate": 1.2584832517495933e-07, "loss": 0.0018, "step": 17459 }, { "epoch": 3.9726962457337884, "grad_norm": 0.40765655668535905, "learning_rate": 1.2579455815529384e-07, "loss": 0.0026, "step": 17460 }, { "epoch": 3.97292377701934, "grad_norm": 0.43009560578101397, "learning_rate": 1.2574080133847331e-07, "loss": 0.0022, "step": 17461 }, { "epoch": 3.973151308304892, "grad_norm": 0.23861181318290656, "learning_rate": 1.2568705472559652e-07, "loss": 0.0011, "step": 17462 }, { "epoch": 3.9733788395904437, "grad_norm": 0.41045645217962484, "learning_rate": 1.2563331831776193e-07, "loss": 0.0029, "step": 17463 }, { "epoch": 3.9736063708759954, "grad_norm": 0.6030662982222028, "learning_rate": 1.2557959211606789e-07, "loss": 0.0069, "step": 17464 }, { "epoch": 3.973833902161547, "grad_norm": 0.5152732458896165, "learning_rate": 1.2552587612161208e-07, "loss": 0.0048, "step": 17465 }, { "epoch": 3.974061433447099, "grad_norm": 0.16209326712380676, "learning_rate": 1.2547217033549282e-07, "loss": 0.0005, "step": 17466 }, { "epoch": 3.9742889647326507, "grad_norm": 1.0345853563278713, "learning_rate": 1.2541847475880742e-07, "loss": 0.0056, "step": 17467 }, { "epoch": 3.9745164960182024, "grad_norm": 0.37478964550817606, "learning_rate": 1.2536478939265344e-07, "loss": 0.0027, "step": 17468 }, { "epoch": 3.974744027303754, "grad_norm": 0.3720642439906031, "learning_rate": 1.2531111423812824e-07, "loss": 0.0015, "step": 17469 }, { "epoch": 3.974971558589306, "grad_norm": 0.41126076698167735, "learning_rate": 1.2525744929632864e-07, "loss": 0.0014, "step": 17470 }, { "epoch": 3.9751990898748577, "grad_norm": 0.6676701874012441, "learning_rate": 1.2520379456835165e-07, "loss": 0.0026, "step": 17471 }, { "epoch": 3.9754266211604095, "grad_norm": 0.13049062002758804, "learning_rate": 1.251501500552936e-07, "loss": 0.0004, "step": 17472 }, { "epoch": 3.975654152445961, "grad_norm": 0.4854694348528609, "learning_rate": 1.250965157582512e-07, "loss": 0.0027, "step": 17473 }, { "epoch": 3.975881683731513, "grad_norm": 0.269772178061348, "learning_rate": 1.2504289167832044e-07, "loss": 0.0023, "step": 17474 }, { "epoch": 3.9761092150170647, "grad_norm": 0.5794077499941425, "learning_rate": 1.2498927781659723e-07, "loss": 0.0058, "step": 17475 }, { "epoch": 3.9763367463026165, "grad_norm": 0.41806426999189505, "learning_rate": 1.2493567417417764e-07, "loss": 0.0025, "step": 17476 }, { "epoch": 3.976564277588168, "grad_norm": 0.9116076167128014, "learning_rate": 1.2488208075215688e-07, "loss": 0.0075, "step": 17477 }, { "epoch": 3.9767918088737204, "grad_norm": 0.2855753565345155, "learning_rate": 1.248284975516307e-07, "loss": 0.0011, "step": 17478 }, { "epoch": 3.9770193401592717, "grad_norm": 0.33073119618849656, "learning_rate": 1.2477492457369371e-07, "loss": 0.0021, "step": 17479 }, { "epoch": 3.977246871444824, "grad_norm": 0.22732675521020132, "learning_rate": 1.2472136181944144e-07, "loss": 0.0011, "step": 17480 }, { "epoch": 3.9774744027303752, "grad_norm": 0.8470419377975855, "learning_rate": 1.2466780928996808e-07, "loss": 0.0009, "step": 17481 }, { "epoch": 3.9777019340159274, "grad_norm": 1.020988280117928, "learning_rate": 1.246142669863684e-07, "loss": 0.0103, "step": 17482 }, { "epoch": 3.9779294653014787, "grad_norm": 0.852941202426274, "learning_rate": 1.2456073490973685e-07, "loss": 0.0056, "step": 17483 }, { "epoch": 3.978156996587031, "grad_norm": 0.4637809677025121, "learning_rate": 1.245072130611672e-07, "loss": 0.0033, "step": 17484 }, { "epoch": 3.9783845278725822, "grad_norm": 0.34269639049951195, "learning_rate": 1.244537014417537e-07, "loss": 0.0024, "step": 17485 }, { "epoch": 3.9786120591581344, "grad_norm": 0.22874864125962377, "learning_rate": 1.2440020005258964e-07, "loss": 0.0013, "step": 17486 }, { "epoch": 3.9788395904436857, "grad_norm": 0.6496798634705766, "learning_rate": 1.243467088947689e-07, "loss": 0.011, "step": 17487 }, { "epoch": 3.979067121729238, "grad_norm": 0.11533311557191195, "learning_rate": 1.2429322796938433e-07, "loss": 0.0004, "step": 17488 }, { "epoch": 3.9792946530147897, "grad_norm": 5.70128021940206, "learning_rate": 1.2423975727752913e-07, "loss": 0.0834, "step": 17489 }, { "epoch": 3.9795221843003414, "grad_norm": 1.4747625413415022, "learning_rate": 1.2418629682029642e-07, "loss": 0.009, "step": 17490 }, { "epoch": 3.979749715585893, "grad_norm": 1.2347171218960202, "learning_rate": 1.2413284659877846e-07, "loss": 0.0073, "step": 17491 }, { "epoch": 3.979977246871445, "grad_norm": 1.6654886546391656, "learning_rate": 1.2407940661406792e-07, "loss": 0.0049, "step": 17492 }, { "epoch": 3.9802047781569967, "grad_norm": 0.5240183716627871, "learning_rate": 1.2402597686725678e-07, "loss": 0.0016, "step": 17493 }, { "epoch": 3.9804323094425484, "grad_norm": 0.48011122995605526, "learning_rate": 1.2397255735943732e-07, "loss": 0.0043, "step": 17494 }, { "epoch": 3.9806598407281, "grad_norm": 0.4093950279431728, "learning_rate": 1.2391914809170105e-07, "loss": 0.0089, "step": 17495 }, { "epoch": 3.980887372013652, "grad_norm": 0.4126442259078763, "learning_rate": 1.2386574906513974e-07, "loss": 0.0039, "step": 17496 }, { "epoch": 3.9811149032992037, "grad_norm": 0.4156984487629721, "learning_rate": 1.2381236028084484e-07, "loss": 0.0036, "step": 17497 }, { "epoch": 3.9813424345847555, "grad_norm": 0.6028087960673134, "learning_rate": 1.237589817399073e-07, "loss": 0.0065, "step": 17498 }, { "epoch": 3.981569965870307, "grad_norm": 0.5929235680596575, "learning_rate": 1.2370561344341837e-07, "loss": 0.0021, "step": 17499 }, { "epoch": 3.981797497155859, "grad_norm": 0.7015450963689267, "learning_rate": 1.2365225539246843e-07, "loss": 0.0047, "step": 17500 }, { "epoch": 3.9820250284414107, "grad_norm": 0.6748036202022992, "learning_rate": 1.2359890758814837e-07, "loss": 0.0086, "step": 17501 }, { "epoch": 3.9822525597269625, "grad_norm": 1.0840057536625178, "learning_rate": 1.235455700315482e-07, "loss": 0.0065, "step": 17502 }, { "epoch": 3.982480091012514, "grad_norm": 3.6051017643070202, "learning_rate": 1.2349224272375824e-07, "loss": 0.0156, "step": 17503 }, { "epoch": 3.982707622298066, "grad_norm": 0.3115475505844364, "learning_rate": 1.234389256658685e-07, "loss": 0.0007, "step": 17504 }, { "epoch": 3.9829351535836177, "grad_norm": 0.2073389094899671, "learning_rate": 1.2338561885896842e-07, "loss": 0.0011, "step": 17505 }, { "epoch": 3.9831626848691695, "grad_norm": 0.24026935000986113, "learning_rate": 1.2333232230414779e-07, "loss": 0.001, "step": 17506 }, { "epoch": 3.983390216154721, "grad_norm": 0.3431734568034838, "learning_rate": 1.2327903600249568e-07, "loss": 0.0011, "step": 17507 }, { "epoch": 3.983617747440273, "grad_norm": 0.3473737438824344, "learning_rate": 1.2322575995510095e-07, "loss": 0.0016, "step": 17508 }, { "epoch": 3.9838452787258247, "grad_norm": 0.6861698641665197, "learning_rate": 1.2317249416305305e-07, "loss": 0.0043, "step": 17509 }, { "epoch": 3.9840728100113765, "grad_norm": 0.5456234281186906, "learning_rate": 1.2311923862744013e-07, "loss": 0.0028, "step": 17510 }, { "epoch": 3.9843003412969282, "grad_norm": 0.5740353208548274, "learning_rate": 1.2306599334935096e-07, "loss": 0.0018, "step": 17511 }, { "epoch": 3.98452787258248, "grad_norm": 0.7622789985246816, "learning_rate": 1.2301275832987355e-07, "loss": 0.0044, "step": 17512 }, { "epoch": 3.9847554038680317, "grad_norm": 0.12641720882179103, "learning_rate": 1.229595335700961e-07, "loss": 0.0006, "step": 17513 }, { "epoch": 3.9849829351535835, "grad_norm": 1.293598650078218, "learning_rate": 1.2290631907110621e-07, "loss": 0.0196, "step": 17514 }, { "epoch": 3.9852104664391352, "grad_norm": 0.3616435837984693, "learning_rate": 1.2285311483399162e-07, "loss": 0.0031, "step": 17515 }, { "epoch": 3.985437997724687, "grad_norm": 0.2819649940216434, "learning_rate": 1.227999208598399e-07, "loss": 0.0013, "step": 17516 }, { "epoch": 3.985665529010239, "grad_norm": 0.17970787794485496, "learning_rate": 1.2274673714973788e-07, "loss": 0.0007, "step": 17517 }, { "epoch": 3.9858930602957905, "grad_norm": 0.7968233791694826, "learning_rate": 1.2269356370477284e-07, "loss": 0.0023, "step": 17518 }, { "epoch": 3.9861205915813427, "grad_norm": 0.820206075218965, "learning_rate": 1.2264040052603135e-07, "loss": 0.005, "step": 17519 }, { "epoch": 3.986348122866894, "grad_norm": 0.4913021073190142, "learning_rate": 1.2258724761460013e-07, "loss": 0.005, "step": 17520 }, { "epoch": 3.986575654152446, "grad_norm": 0.611580188475774, "learning_rate": 1.225341049715653e-07, "loss": 0.0021, "step": 17521 }, { "epoch": 3.9868031854379975, "grad_norm": 0.1414452908326386, "learning_rate": 1.224809725980132e-07, "loss": 0.0002, "step": 17522 }, { "epoch": 3.9870307167235497, "grad_norm": 0.2163589935762649, "learning_rate": 1.2242785049502977e-07, "loss": 0.0013, "step": 17523 }, { "epoch": 3.987258248009101, "grad_norm": 0.2232979418651783, "learning_rate": 1.223747386637006e-07, "loss": 0.0005, "step": 17524 }, { "epoch": 3.987485779294653, "grad_norm": 0.1890432328754421, "learning_rate": 1.2232163710511134e-07, "loss": 0.0005, "step": 17525 }, { "epoch": 3.9877133105802045, "grad_norm": 0.36560189889371, "learning_rate": 1.222685458203472e-07, "loss": 0.002, "step": 17526 }, { "epoch": 3.9879408418657567, "grad_norm": 0.6033124966619684, "learning_rate": 1.222154648104932e-07, "loss": 0.0053, "step": 17527 }, { "epoch": 3.9881683731513085, "grad_norm": 0.2909141633011455, "learning_rate": 1.2216239407663423e-07, "loss": 0.0015, "step": 17528 }, { "epoch": 3.98839590443686, "grad_norm": 0.611402246294342, "learning_rate": 1.2210933361985505e-07, "loss": 0.0029, "step": 17529 }, { "epoch": 3.988623435722412, "grad_norm": 0.47745487688787214, "learning_rate": 1.2205628344124017e-07, "loss": 0.0007, "step": 17530 }, { "epoch": 3.9888509670079637, "grad_norm": 0.9658621934541279, "learning_rate": 1.2200324354187368e-07, "loss": 0.0019, "step": 17531 }, { "epoch": 3.9890784982935155, "grad_norm": 0.09423776997832746, "learning_rate": 1.2195021392283984e-07, "loss": 0.0003, "step": 17532 }, { "epoch": 3.989306029579067, "grad_norm": 1.064752010323322, "learning_rate": 1.2189719458522235e-07, "loss": 0.0015, "step": 17533 }, { "epoch": 3.989533560864619, "grad_norm": 0.38576958217850915, "learning_rate": 1.218441855301046e-07, "loss": 0.0031, "step": 17534 }, { "epoch": 3.9897610921501707, "grad_norm": 0.8957054233351369, "learning_rate": 1.2179118675857028e-07, "loss": 0.0096, "step": 17535 }, { "epoch": 3.9899886234357225, "grad_norm": 1.088245493826366, "learning_rate": 1.2173819827170245e-07, "loss": 0.0042, "step": 17536 }, { "epoch": 3.9902161547212742, "grad_norm": 0.16571935653249167, "learning_rate": 1.216852200705844e-07, "loss": 0.0003, "step": 17537 }, { "epoch": 3.990443686006826, "grad_norm": 0.8007658729923323, "learning_rate": 1.216322521562985e-07, "loss": 0.0047, "step": 17538 }, { "epoch": 3.9906712172923777, "grad_norm": 0.11266309069573179, "learning_rate": 1.215792945299276e-07, "loss": 0.0005, "step": 17539 }, { "epoch": 3.9908987485779295, "grad_norm": 1.68090694143008, "learning_rate": 1.2152634719255395e-07, "loss": 0.009, "step": 17540 }, { "epoch": 3.9911262798634812, "grad_norm": 0.19820447439425923, "learning_rate": 1.2147341014525956e-07, "loss": 0.0006, "step": 17541 }, { "epoch": 3.991353811149033, "grad_norm": 0.5036323396095254, "learning_rate": 1.214204833891265e-07, "loss": 0.0041, "step": 17542 }, { "epoch": 3.9915813424345847, "grad_norm": 0.029543867258822028, "learning_rate": 1.213675669252365e-07, "loss": 0.0001, "step": 17543 }, { "epoch": 3.9918088737201365, "grad_norm": 0.49918983033458597, "learning_rate": 1.213146607546712e-07, "loss": 0.0035, "step": 17544 }, { "epoch": 3.9920364050056882, "grad_norm": 0.6233330097073315, "learning_rate": 1.2126176487851188e-07, "loss": 0.0053, "step": 17545 }, { "epoch": 3.99226393629124, "grad_norm": 0.08243907459095452, "learning_rate": 1.2120887929783928e-07, "loss": 0.0003, "step": 17546 }, { "epoch": 3.9924914675767917, "grad_norm": 0.3815636324762859, "learning_rate": 1.2115600401373468e-07, "loss": 0.0031, "step": 17547 }, { "epoch": 3.9927189988623435, "grad_norm": 0.6825594441576062, "learning_rate": 1.2110313902727848e-07, "loss": 0.0026, "step": 17548 }, { "epoch": 3.9929465301478952, "grad_norm": 0.9462195802592438, "learning_rate": 1.2105028433955133e-07, "loss": 0.008, "step": 17549 }, { "epoch": 3.993174061433447, "grad_norm": 0.2550549720280645, "learning_rate": 1.2099743995163338e-07, "loss": 0.0018, "step": 17550 }, { "epoch": 3.9934015927189987, "grad_norm": 0.7824616182214014, "learning_rate": 1.2094460586460486e-07, "loss": 0.0053, "step": 17551 }, { "epoch": 3.9936291240045505, "grad_norm": 2.0269844984205334, "learning_rate": 1.208917820795455e-07, "loss": 0.0039, "step": 17552 }, { "epoch": 3.9938566552901023, "grad_norm": 0.4486923510876766, "learning_rate": 1.2083896859753469e-07, "loss": 0.004, "step": 17553 }, { "epoch": 3.994084186575654, "grad_norm": 1.7701928844488581, "learning_rate": 1.2078616541965225e-07, "loss": 0.0175, "step": 17554 }, { "epoch": 3.9943117178612058, "grad_norm": 0.09766776104293418, "learning_rate": 1.2073337254697698e-07, "loss": 0.0006, "step": 17555 }, { "epoch": 3.994539249146758, "grad_norm": 0.8371493073523643, "learning_rate": 1.2068058998058803e-07, "loss": 0.0081, "step": 17556 }, { "epoch": 3.9947667804323093, "grad_norm": 0.7097963171688468, "learning_rate": 1.2062781772156416e-07, "loss": 0.0085, "step": 17557 }, { "epoch": 3.9949943117178615, "grad_norm": 0.5136915821766495, "learning_rate": 1.205750557709842e-07, "loss": 0.0032, "step": 17558 }, { "epoch": 3.9952218430034128, "grad_norm": 0.5430744369698255, "learning_rate": 1.2052230412992632e-07, "loss": 0.0014, "step": 17559 }, { "epoch": 3.995449374288965, "grad_norm": 0.152188438853009, "learning_rate": 1.204695627994684e-07, "loss": 0.0006, "step": 17560 }, { "epoch": 3.9956769055745163, "grad_norm": 0.2953532854784422, "learning_rate": 1.2041683178068883e-07, "loss": 0.0025, "step": 17561 }, { "epoch": 3.9959044368600685, "grad_norm": 0.2111674640755319, "learning_rate": 1.2036411107466499e-07, "loss": 0.0008, "step": 17562 }, { "epoch": 3.9961319681456198, "grad_norm": 0.3922537075609454, "learning_rate": 1.2031140068247445e-07, "loss": 0.0019, "step": 17563 }, { "epoch": 3.996359499431172, "grad_norm": 0.669562396921504, "learning_rate": 1.202587006051948e-07, "loss": 0.0025, "step": 17564 }, { "epoch": 3.9965870307167233, "grad_norm": 0.4788241761623242, "learning_rate": 1.2020601084390273e-07, "loss": 0.0013, "step": 17565 }, { "epoch": 3.9968145620022755, "grad_norm": 0.44435583443986015, "learning_rate": 1.2015333139967547e-07, "loss": 0.0041, "step": 17566 }, { "epoch": 3.9970420932878272, "grad_norm": 0.9186679657167788, "learning_rate": 1.2010066227358938e-07, "loss": 0.0033, "step": 17567 }, { "epoch": 3.997269624573379, "grad_norm": 0.24084952138817592, "learning_rate": 1.2004800346672124e-07, "loss": 0.0016, "step": 17568 }, { "epoch": 3.9974971558589307, "grad_norm": 0.42401402255293197, "learning_rate": 1.1999535498014677e-07, "loss": 0.003, "step": 17569 }, { "epoch": 3.9977246871444825, "grad_norm": 0.07275033221187638, "learning_rate": 1.199427168149427e-07, "loss": 0.0003, "step": 17570 }, { "epoch": 3.9979522184300342, "grad_norm": 1.0097884805981612, "learning_rate": 1.1989008897218463e-07, "loss": 0.0089, "step": 17571 }, { "epoch": 3.998179749715586, "grad_norm": 0.9142801542473936, "learning_rate": 1.198374714529478e-07, "loss": 0.0114, "step": 17572 }, { "epoch": 3.9984072810011377, "grad_norm": 0.4143992971999338, "learning_rate": 1.1978486425830812e-07, "loss": 0.002, "step": 17573 }, { "epoch": 3.9986348122866895, "grad_norm": 1.3285190815610926, "learning_rate": 1.1973226738934035e-07, "loss": 0.0091, "step": 17574 }, { "epoch": 3.9988623435722412, "grad_norm": 0.07800866564073976, "learning_rate": 1.196796808471197e-07, "loss": 0.0002, "step": 17575 }, { "epoch": 3.999089874857793, "grad_norm": 0.16329479164285413, "learning_rate": 1.1962710463272111e-07, "loss": 0.0007, "step": 17576 }, { "epoch": 3.9993174061433447, "grad_norm": 0.6218061283003363, "learning_rate": 1.195745387472188e-07, "loss": 0.0063, "step": 17577 }, { "epoch": 3.9995449374288965, "grad_norm": 0.6081496580522621, "learning_rate": 1.195219831916874e-07, "loss": 0.0024, "step": 17578 }, { "epoch": 3.9997724687144482, "grad_norm": 0.46672326148386695, "learning_rate": 1.1946943796720074e-07, "loss": 0.0047, "step": 17579 }, { "epoch": 4.0, "grad_norm": 1.5878792525806298, "learning_rate": 1.194169030748331e-07, "loss": 0.0059, "step": 17580 }, { "epoch": 4.000227531285552, "grad_norm": 0.5113638866619918, "learning_rate": 1.193643785156579e-07, "loss": 0.0012, "step": 17581 }, { "epoch": 4.0004550625711035, "grad_norm": 0.16174634125684484, "learning_rate": 1.1931186429074878e-07, "loss": 0.0011, "step": 17582 }, { "epoch": 4.000682593856656, "grad_norm": 0.3478924154516286, "learning_rate": 1.1925936040117916e-07, "loss": 0.002, "step": 17583 }, { "epoch": 4.000910125142207, "grad_norm": 0.09331260539220379, "learning_rate": 1.1920686684802188e-07, "loss": 0.0004, "step": 17584 }, { "epoch": 4.001137656427759, "grad_norm": 0.1460990760482538, "learning_rate": 1.1915438363235006e-07, "loss": 0.0008, "step": 17585 }, { "epoch": 4.0013651877133105, "grad_norm": 0.11553262977700615, "learning_rate": 1.1910191075523602e-07, "loss": 0.0005, "step": 17586 }, { "epoch": 4.001592718998863, "grad_norm": 0.4194701646964688, "learning_rate": 1.190494482177526e-07, "loss": 0.0016, "step": 17587 }, { "epoch": 4.001820250284414, "grad_norm": 0.19099830895533706, "learning_rate": 1.1899699602097172e-07, "loss": 0.0013, "step": 17588 }, { "epoch": 4.002047781569966, "grad_norm": 0.06976670575037486, "learning_rate": 1.1894455416596555e-07, "loss": 0.0002, "step": 17589 }, { "epoch": 4.0022753128555175, "grad_norm": 0.43658249020280127, "learning_rate": 1.1889212265380604e-07, "loss": 0.007, "step": 17590 }, { "epoch": 4.00250284414107, "grad_norm": 0.3913026314651342, "learning_rate": 1.1883970148556447e-07, "loss": 0.0013, "step": 17591 }, { "epoch": 4.002730375426621, "grad_norm": 0.254393117751463, "learning_rate": 1.1878729066231262e-07, "loss": 0.0015, "step": 17592 }, { "epoch": 4.002957906712173, "grad_norm": 0.20294278079975875, "learning_rate": 1.1873489018512124e-07, "loss": 0.001, "step": 17593 }, { "epoch": 4.0031854379977245, "grad_norm": 0.09180593487327696, "learning_rate": 1.186825000550617e-07, "loss": 0.0003, "step": 17594 }, { "epoch": 4.003412969283277, "grad_norm": 0.30393666688204224, "learning_rate": 1.1863012027320445e-07, "loss": 0.0019, "step": 17595 }, { "epoch": 4.003640500568828, "grad_norm": 0.17469967322737548, "learning_rate": 1.1857775084062016e-07, "loss": 0.001, "step": 17596 }, { "epoch": 4.00386803185438, "grad_norm": 0.26533655305298154, "learning_rate": 1.1852539175837938e-07, "loss": 0.0023, "step": 17597 }, { "epoch": 4.0040955631399315, "grad_norm": 0.0974025654181668, "learning_rate": 1.1847304302755179e-07, "loss": 0.0004, "step": 17598 }, { "epoch": 4.004323094425484, "grad_norm": 0.2796731490204897, "learning_rate": 1.1842070464920772e-07, "loss": 0.0015, "step": 17599 }, { "epoch": 4.004550625711035, "grad_norm": 0.378771965852966, "learning_rate": 1.1836837662441653e-07, "loss": 0.0075, "step": 17600 }, { "epoch": 4.004778156996587, "grad_norm": 0.16754517190113521, "learning_rate": 1.1831605895424807e-07, "loss": 0.0013, "step": 17601 }, { "epoch": 4.0050056882821385, "grad_norm": 0.27743723814170707, "learning_rate": 1.1826375163977124e-07, "loss": 0.0022, "step": 17602 }, { "epoch": 4.005233219567691, "grad_norm": 0.2971109293711673, "learning_rate": 1.1821145468205522e-07, "loss": 0.0018, "step": 17603 }, { "epoch": 4.005460750853242, "grad_norm": 0.1981479426984606, "learning_rate": 1.181591680821692e-07, "loss": 0.0012, "step": 17604 }, { "epoch": 4.005688282138794, "grad_norm": 0.3180149487710303, "learning_rate": 1.181068918411813e-07, "loss": 0.0024, "step": 17605 }, { "epoch": 4.0059158134243456, "grad_norm": 0.35592543422041534, "learning_rate": 1.1805462596016039e-07, "loss": 0.0032, "step": 17606 }, { "epoch": 4.006143344709898, "grad_norm": 0.23635232378637236, "learning_rate": 1.1800237044017435e-07, "loss": 0.0015, "step": 17607 }, { "epoch": 4.006370875995449, "grad_norm": 0.11097234319303148, "learning_rate": 1.1795012528229149e-07, "loss": 0.0002, "step": 17608 }, { "epoch": 4.006598407281001, "grad_norm": 0.32978875816128117, "learning_rate": 1.1789789048757924e-07, "loss": 0.0053, "step": 17609 }, { "epoch": 4.006825938566553, "grad_norm": 0.9846887616702944, "learning_rate": 1.1784566605710548e-07, "loss": 0.0041, "step": 17610 }, { "epoch": 4.007053469852105, "grad_norm": 0.342038030381719, "learning_rate": 1.1779345199193756e-07, "loss": 0.0021, "step": 17611 }, { "epoch": 4.007281001137656, "grad_norm": 0.2888238439951332, "learning_rate": 1.1774124829314255e-07, "loss": 0.0022, "step": 17612 }, { "epoch": 4.007508532423208, "grad_norm": 0.25398344311809995, "learning_rate": 1.1768905496178748e-07, "loss": 0.0015, "step": 17613 }, { "epoch": 4.00773606370876, "grad_norm": 0.13820568376242923, "learning_rate": 1.1763687199893903e-07, "loss": 0.0008, "step": 17614 }, { "epoch": 4.007963594994312, "grad_norm": 0.7462464205510049, "learning_rate": 1.1758469940566362e-07, "loss": 0.0045, "step": 17615 }, { "epoch": 4.008191126279863, "grad_norm": 0.5247693215220444, "learning_rate": 1.1753253718302763e-07, "loss": 0.0016, "step": 17616 }, { "epoch": 4.008418657565415, "grad_norm": 0.1645736545521432, "learning_rate": 1.1748038533209716e-07, "loss": 0.0009, "step": 17617 }, { "epoch": 4.008646188850967, "grad_norm": 0.08018807210799372, "learning_rate": 1.1742824385393835e-07, "loss": 0.0004, "step": 17618 }, { "epoch": 4.008873720136519, "grad_norm": 0.2544146887992277, "learning_rate": 1.1737611274961654e-07, "loss": 0.0024, "step": 17619 }, { "epoch": 4.009101251422071, "grad_norm": 0.3076635148712501, "learning_rate": 1.1732399202019739e-07, "loss": 0.0023, "step": 17620 }, { "epoch": 4.009328782707622, "grad_norm": 0.021800704183773452, "learning_rate": 1.1727188166674612e-07, "loss": 0.0, "step": 17621 }, { "epoch": 4.0095563139931745, "grad_norm": 0.40548404482300404, "learning_rate": 1.1721978169032755e-07, "loss": 0.0026, "step": 17622 }, { "epoch": 4.009783845278726, "grad_norm": 0.29624117364697744, "learning_rate": 1.1716769209200673e-07, "loss": 0.0022, "step": 17623 }, { "epoch": 4.010011376564278, "grad_norm": 0.1581516124441281, "learning_rate": 1.1711561287284818e-07, "loss": 0.0007, "step": 17624 }, { "epoch": 4.010238907849829, "grad_norm": 0.2666943805108751, "learning_rate": 1.1706354403391657e-07, "loss": 0.0016, "step": 17625 }, { "epoch": 4.0104664391353815, "grad_norm": 0.04659037737624291, "learning_rate": 1.1701148557627572e-07, "loss": 0.0001, "step": 17626 }, { "epoch": 4.010693970420933, "grad_norm": 0.29248776207194477, "learning_rate": 1.1695943750098984e-07, "loss": 0.001, "step": 17627 }, { "epoch": 4.010921501706485, "grad_norm": 0.3776523179661419, "learning_rate": 1.1690739980912267e-07, "loss": 0.0013, "step": 17628 }, { "epoch": 4.011149032992036, "grad_norm": 0.422244647938743, "learning_rate": 1.1685537250173746e-07, "loss": 0.0024, "step": 17629 }, { "epoch": 4.0113765642775885, "grad_norm": 0.2802752987918904, "learning_rate": 1.1680335557989809e-07, "loss": 0.0018, "step": 17630 }, { "epoch": 4.01160409556314, "grad_norm": 0.49354313388299714, "learning_rate": 1.1675134904466722e-07, "loss": 0.0022, "step": 17631 }, { "epoch": 4.011831626848692, "grad_norm": 0.10588945485649634, "learning_rate": 1.1669935289710818e-07, "loss": 0.0003, "step": 17632 }, { "epoch": 4.012059158134243, "grad_norm": 0.05948617838141287, "learning_rate": 1.1664736713828339e-07, "loss": 0.0002, "step": 17633 }, { "epoch": 4.0122866894197955, "grad_norm": 0.19734411065909227, "learning_rate": 1.1659539176925535e-07, "loss": 0.0002, "step": 17634 }, { "epoch": 4.012514220705347, "grad_norm": 0.46825386823927995, "learning_rate": 1.165434267910863e-07, "loss": 0.0019, "step": 17635 }, { "epoch": 4.012741751990899, "grad_norm": 0.12723119381011166, "learning_rate": 1.1649147220483847e-07, "loss": 0.0007, "step": 17636 }, { "epoch": 4.01296928327645, "grad_norm": 0.05805373845360542, "learning_rate": 1.1643952801157383e-07, "loss": 0.0003, "step": 17637 }, { "epoch": 4.0131968145620025, "grad_norm": 0.22618558552352858, "learning_rate": 1.1638759421235365e-07, "loss": 0.0018, "step": 17638 }, { "epoch": 4.013424345847554, "grad_norm": 0.9525899541923333, "learning_rate": 1.1633567080823971e-07, "loss": 0.0082, "step": 17639 }, { "epoch": 4.013651877133106, "grad_norm": 0.3512675780940863, "learning_rate": 1.1628375780029316e-07, "loss": 0.0012, "step": 17640 }, { "epoch": 4.013879408418657, "grad_norm": 0.8870120844185929, "learning_rate": 1.1623185518957469e-07, "loss": 0.0059, "step": 17641 }, { "epoch": 4.0141069397042095, "grad_norm": 0.38444592492226765, "learning_rate": 1.161799629771454e-07, "loss": 0.002, "step": 17642 }, { "epoch": 4.014334470989761, "grad_norm": 0.15581165583955134, "learning_rate": 1.1612808116406575e-07, "loss": 0.0008, "step": 17643 }, { "epoch": 4.014562002275313, "grad_norm": 0.2700948507446802, "learning_rate": 1.1607620975139635e-07, "loss": 0.0015, "step": 17644 }, { "epoch": 4.014789533560864, "grad_norm": 1.0108207135728189, "learning_rate": 1.1602434874019706e-07, "loss": 0.0053, "step": 17645 }, { "epoch": 4.0150170648464165, "grad_norm": 0.4706421613161336, "learning_rate": 1.1597249813152806e-07, "loss": 0.0016, "step": 17646 }, { "epoch": 4.015244596131968, "grad_norm": 0.22325991216591923, "learning_rate": 1.1592065792644894e-07, "loss": 0.001, "step": 17647 }, { "epoch": 4.01547212741752, "grad_norm": 0.21247638434638458, "learning_rate": 1.158688281260191e-07, "loss": 0.0017, "step": 17648 }, { "epoch": 4.015699658703071, "grad_norm": 0.6715121325484813, "learning_rate": 1.1581700873129799e-07, "loss": 0.0065, "step": 17649 }, { "epoch": 4.0159271899886235, "grad_norm": 0.8468256158908188, "learning_rate": 1.1576519974334469e-07, "loss": 0.0125, "step": 17650 }, { "epoch": 4.016154721274175, "grad_norm": 0.12125319347447604, "learning_rate": 1.157134011632182e-07, "loss": 0.0005, "step": 17651 }, { "epoch": 4.016382252559727, "grad_norm": 0.3594844722313566, "learning_rate": 1.1566161299197715e-07, "loss": 0.0044, "step": 17652 }, { "epoch": 4.016609783845278, "grad_norm": 0.2607241531298887, "learning_rate": 1.156098352306797e-07, "loss": 0.001, "step": 17653 }, { "epoch": 4.0168373151308305, "grad_norm": 0.12391200159564056, "learning_rate": 1.155580678803845e-07, "loss": 0.0004, "step": 17654 }, { "epoch": 4.017064846416382, "grad_norm": 0.13981975182312592, "learning_rate": 1.1550631094214927e-07, "loss": 0.0005, "step": 17655 }, { "epoch": 4.017292377701934, "grad_norm": 0.11788704667820925, "learning_rate": 1.1545456441703191e-07, "loss": 0.0006, "step": 17656 }, { "epoch": 4.017519908987485, "grad_norm": 0.48683850070415124, "learning_rate": 1.1540282830609005e-07, "loss": 0.0053, "step": 17657 }, { "epoch": 4.0177474402730375, "grad_norm": 0.21278758488572527, "learning_rate": 1.1535110261038124e-07, "loss": 0.0009, "step": 17658 }, { "epoch": 4.01797497155859, "grad_norm": 0.21966399243874837, "learning_rate": 1.1529938733096255e-07, "loss": 0.001, "step": 17659 }, { "epoch": 4.018202502844141, "grad_norm": 0.3261088499857686, "learning_rate": 1.1524768246889077e-07, "loss": 0.0018, "step": 17660 }, { "epoch": 4.018430034129693, "grad_norm": 0.036017353697012824, "learning_rate": 1.1519598802522291e-07, "loss": 0.0001, "step": 17661 }, { "epoch": 4.0186575654152445, "grad_norm": 0.025905360180801165, "learning_rate": 1.1514430400101528e-07, "loss": 0.0001, "step": 17662 }, { "epoch": 4.018885096700797, "grad_norm": 0.1583884401884447, "learning_rate": 1.1509263039732433e-07, "loss": 0.0008, "step": 17663 }, { "epoch": 4.019112627986348, "grad_norm": 0.09571323129997593, "learning_rate": 1.1504096721520613e-07, "loss": 0.0005, "step": 17664 }, { "epoch": 4.0193401592719, "grad_norm": 0.13152987923283055, "learning_rate": 1.1498931445571675e-07, "loss": 0.0007, "step": 17665 }, { "epoch": 4.0195676905574516, "grad_norm": 0.16350065941941214, "learning_rate": 1.1493767211991178e-07, "loss": 0.0013, "step": 17666 }, { "epoch": 4.019795221843004, "grad_norm": 0.36729626148706085, "learning_rate": 1.1488604020884648e-07, "loss": 0.0029, "step": 17667 }, { "epoch": 4.020022753128555, "grad_norm": 0.32813579772496465, "learning_rate": 1.1483441872357643e-07, "loss": 0.0011, "step": 17668 }, { "epoch": 4.020250284414107, "grad_norm": 0.18319711409690786, "learning_rate": 1.1478280766515644e-07, "loss": 0.0017, "step": 17669 }, { "epoch": 4.020477815699659, "grad_norm": 1.0294581951990012, "learning_rate": 1.1473120703464138e-07, "loss": 0.0156, "step": 17670 }, { "epoch": 4.020705346985211, "grad_norm": 2.6853496096137706, "learning_rate": 1.146796168330861e-07, "loss": 0.0073, "step": 17671 }, { "epoch": 4.020932878270762, "grad_norm": 0.36123022936555504, "learning_rate": 1.146280370615447e-07, "loss": 0.0025, "step": 17672 }, { "epoch": 4.021160409556314, "grad_norm": 0.11909915438588797, "learning_rate": 1.1457646772107173e-07, "loss": 0.0004, "step": 17673 }, { "epoch": 4.021387940841866, "grad_norm": 0.1856889282766462, "learning_rate": 1.1452490881272073e-07, "loss": 0.0014, "step": 17674 }, { "epoch": 4.021615472127418, "grad_norm": 0.24106097138315577, "learning_rate": 1.1447336033754586e-07, "loss": 0.0015, "step": 17675 }, { "epoch": 4.021843003412969, "grad_norm": 0.07952715421483504, "learning_rate": 1.1442182229660044e-07, "loss": 0.0003, "step": 17676 }, { "epoch": 4.022070534698521, "grad_norm": 0.15092658735961953, "learning_rate": 1.143702946909378e-07, "loss": 0.0004, "step": 17677 }, { "epoch": 4.022298065984073, "grad_norm": 0.03969027415827998, "learning_rate": 1.143187775216114e-07, "loss": 0.0001, "step": 17678 }, { "epoch": 4.022525597269625, "grad_norm": 0.366054064482111, "learning_rate": 1.1426727078967366e-07, "loss": 0.0039, "step": 17679 }, { "epoch": 4.022753128555176, "grad_norm": 0.4370202608849622, "learning_rate": 1.1421577449617776e-07, "loss": 0.0007, "step": 17680 }, { "epoch": 4.022980659840728, "grad_norm": 0.15210100407168883, "learning_rate": 1.1416428864217586e-07, "loss": 0.0005, "step": 17681 }, { "epoch": 4.02320819112628, "grad_norm": 0.3269079990649512, "learning_rate": 1.1411281322872045e-07, "loss": 0.0035, "step": 17682 }, { "epoch": 4.023435722411832, "grad_norm": 0.06638418909337179, "learning_rate": 1.1406134825686339e-07, "loss": 0.0002, "step": 17683 }, { "epoch": 4.023663253697383, "grad_norm": 0.16784421398938038, "learning_rate": 1.1400989372765653e-07, "loss": 0.001, "step": 17684 }, { "epoch": 4.023890784982935, "grad_norm": 0.3276075544529414, "learning_rate": 1.1395844964215183e-07, "loss": 0.0013, "step": 17685 }, { "epoch": 4.024118316268487, "grad_norm": 0.5170235924545492, "learning_rate": 1.1390701600140032e-07, "loss": 0.0019, "step": 17686 }, { "epoch": 4.024345847554039, "grad_norm": 0.73339637726579, "learning_rate": 1.1385559280645358e-07, "loss": 0.0075, "step": 17687 }, { "epoch": 4.02457337883959, "grad_norm": 0.54555313885013, "learning_rate": 1.1380418005836218e-07, "loss": 0.0038, "step": 17688 }, { "epoch": 4.024800910125142, "grad_norm": 0.23417008809122575, "learning_rate": 1.1375277775817734e-07, "loss": 0.0023, "step": 17689 }, { "epoch": 4.025028441410694, "grad_norm": 0.047219310126454354, "learning_rate": 1.1370138590694927e-07, "loss": 0.0001, "step": 17690 }, { "epoch": 4.025255972696246, "grad_norm": 0.22580719940966784, "learning_rate": 1.1365000450572844e-07, "loss": 0.0016, "step": 17691 }, { "epoch": 4.025483503981797, "grad_norm": 0.16085508084236574, "learning_rate": 1.1359863355556514e-07, "loss": 0.0006, "step": 17692 }, { "epoch": 4.025711035267349, "grad_norm": 0.14298362166130757, "learning_rate": 1.1354727305750907e-07, "loss": 0.0004, "step": 17693 }, { "epoch": 4.025938566552901, "grad_norm": 0.28123310083815517, "learning_rate": 1.1349592301261023e-07, "loss": 0.0007, "step": 17694 }, { "epoch": 4.026166097838453, "grad_norm": 0.22296442182115692, "learning_rate": 1.1344458342191777e-07, "loss": 0.0008, "step": 17695 }, { "epoch": 4.026393629124004, "grad_norm": 0.3106352176768505, "learning_rate": 1.1339325428648115e-07, "loss": 0.0038, "step": 17696 }, { "epoch": 4.026621160409556, "grad_norm": 0.3489464247804447, "learning_rate": 1.1334193560734966e-07, "loss": 0.0021, "step": 17697 }, { "epoch": 4.0268486916951085, "grad_norm": 0.18596450173420975, "learning_rate": 1.1329062738557176e-07, "loss": 0.0009, "step": 17698 }, { "epoch": 4.02707622298066, "grad_norm": 0.31378690867327175, "learning_rate": 1.132393296221964e-07, "loss": 0.0017, "step": 17699 }, { "epoch": 4.027303754266212, "grad_norm": 0.2243238187778062, "learning_rate": 1.1318804231827178e-07, "loss": 0.0023, "step": 17700 }, { "epoch": 4.027531285551763, "grad_norm": 0.26085640047177333, "learning_rate": 1.1313676547484636e-07, "loss": 0.001, "step": 17701 }, { "epoch": 4.0277588168373155, "grad_norm": 0.09473396384169978, "learning_rate": 1.1308549909296792e-07, "loss": 0.0003, "step": 17702 }, { "epoch": 4.027986348122867, "grad_norm": 0.17956520257486336, "learning_rate": 1.130342431736843e-07, "loss": 0.0012, "step": 17703 }, { "epoch": 4.028213879408419, "grad_norm": 0.4093021282284313, "learning_rate": 1.1298299771804334e-07, "loss": 0.003, "step": 17704 }, { "epoch": 4.02844141069397, "grad_norm": 0.06991874864672375, "learning_rate": 1.1293176272709199e-07, "loss": 0.0002, "step": 17705 }, { "epoch": 4.0286689419795225, "grad_norm": 0.3471937027545475, "learning_rate": 1.1288053820187772e-07, "loss": 0.0014, "step": 17706 }, { "epoch": 4.028896473265074, "grad_norm": 0.4404065785320551, "learning_rate": 1.1282932414344729e-07, "loss": 0.0031, "step": 17707 }, { "epoch": 4.029124004550626, "grad_norm": 0.2821392450765282, "learning_rate": 1.1277812055284756e-07, "loss": 0.0015, "step": 17708 }, { "epoch": 4.029351535836177, "grad_norm": 0.027665844034832774, "learning_rate": 1.1272692743112483e-07, "loss": 0.0001, "step": 17709 }, { "epoch": 4.0295790671217295, "grad_norm": 0.2606907488804659, "learning_rate": 1.1267574477932551e-07, "loss": 0.0012, "step": 17710 }, { "epoch": 4.029806598407281, "grad_norm": 0.19913683038137708, "learning_rate": 1.1262457259849584e-07, "loss": 0.0011, "step": 17711 }, { "epoch": 4.030034129692833, "grad_norm": 0.2500837496362678, "learning_rate": 1.1257341088968136e-07, "loss": 0.0007, "step": 17712 }, { "epoch": 4.030261660978384, "grad_norm": 1.3218362202645433, "learning_rate": 1.1252225965392813e-07, "loss": 0.004, "step": 17713 }, { "epoch": 4.0304891922639365, "grad_norm": 0.12919367975329826, "learning_rate": 1.1247111889228112e-07, "loss": 0.0006, "step": 17714 }, { "epoch": 4.030716723549488, "grad_norm": 0.4523133121149643, "learning_rate": 1.1241998860578598e-07, "loss": 0.0029, "step": 17715 }, { "epoch": 4.03094425483504, "grad_norm": 0.12420624965714705, "learning_rate": 1.1236886879548734e-07, "loss": 0.0008, "step": 17716 }, { "epoch": 4.031171786120591, "grad_norm": 0.06885358701278119, "learning_rate": 1.1231775946243021e-07, "loss": 0.0002, "step": 17717 }, { "epoch": 4.0313993174061435, "grad_norm": 0.046819594802207425, "learning_rate": 1.1226666060765931e-07, "loss": 0.0002, "step": 17718 }, { "epoch": 4.031626848691695, "grad_norm": 0.14418930185802645, "learning_rate": 1.1221557223221866e-07, "loss": 0.0006, "step": 17719 }, { "epoch": 4.031854379977247, "grad_norm": 0.38069388781415403, "learning_rate": 1.1216449433715276e-07, "loss": 0.0031, "step": 17720 }, { "epoch": 4.032081911262798, "grad_norm": 0.33975710535458425, "learning_rate": 1.1211342692350537e-07, "loss": 0.0029, "step": 17721 }, { "epoch": 4.0323094425483506, "grad_norm": 0.11851754497223158, "learning_rate": 1.1206236999232005e-07, "loss": 0.0006, "step": 17722 }, { "epoch": 4.032536973833902, "grad_norm": 1.0366743753722376, "learning_rate": 1.1201132354464052e-07, "loss": 0.0066, "step": 17723 }, { "epoch": 4.032764505119454, "grad_norm": 0.31096152855073184, "learning_rate": 1.1196028758151005e-07, "loss": 0.0047, "step": 17724 }, { "epoch": 4.032992036405005, "grad_norm": 0.20485995177511018, "learning_rate": 1.1190926210397184e-07, "loss": 0.002, "step": 17725 }, { "epoch": 4.033219567690558, "grad_norm": 0.19355013062379137, "learning_rate": 1.1185824711306851e-07, "loss": 0.0011, "step": 17726 }, { "epoch": 4.033447098976109, "grad_norm": 0.06758224109183253, "learning_rate": 1.1180724260984294e-07, "loss": 0.0002, "step": 17727 }, { "epoch": 4.033674630261661, "grad_norm": 0.43503240497630524, "learning_rate": 1.1175624859533753e-07, "loss": 0.0031, "step": 17728 }, { "epoch": 4.033902161547212, "grad_norm": 0.11032558729432346, "learning_rate": 1.1170526507059423e-07, "loss": 0.0004, "step": 17729 }, { "epoch": 4.034129692832765, "grad_norm": 0.36109720994346506, "learning_rate": 1.1165429203665527e-07, "loss": 0.0026, "step": 17730 }, { "epoch": 4.034357224118316, "grad_norm": 0.028349494980135143, "learning_rate": 1.1160332949456246e-07, "loss": 0.0001, "step": 17731 }, { "epoch": 4.034584755403868, "grad_norm": 0.10241850992082649, "learning_rate": 1.1155237744535745e-07, "loss": 0.0003, "step": 17732 }, { "epoch": 4.034812286689419, "grad_norm": 0.0935468634257417, "learning_rate": 1.1150143589008143e-07, "loss": 0.0003, "step": 17733 }, { "epoch": 4.035039817974972, "grad_norm": 0.2399783559013261, "learning_rate": 1.1145050482977572e-07, "loss": 0.0018, "step": 17734 }, { "epoch": 4.035267349260523, "grad_norm": 0.12734331984353273, "learning_rate": 1.1139958426548123e-07, "loss": 0.0005, "step": 17735 }, { "epoch": 4.035494880546075, "grad_norm": 0.0475798245651899, "learning_rate": 1.1134867419823843e-07, "loss": 0.0002, "step": 17736 }, { "epoch": 4.035722411831627, "grad_norm": 0.2856277354607917, "learning_rate": 1.1129777462908803e-07, "loss": 0.0022, "step": 17737 }, { "epoch": 4.035949943117179, "grad_norm": 0.11083960666227406, "learning_rate": 1.112468855590703e-07, "loss": 0.0003, "step": 17738 }, { "epoch": 4.036177474402731, "grad_norm": 0.13645480992391845, "learning_rate": 1.1119600698922546e-07, "loss": 0.0007, "step": 17739 }, { "epoch": 4.036405005688282, "grad_norm": 0.14298716677837026, "learning_rate": 1.1114513892059329e-07, "loss": 0.0007, "step": 17740 }, { "epoch": 4.036632536973834, "grad_norm": 0.7899662080327738, "learning_rate": 1.1109428135421326e-07, "loss": 0.0053, "step": 17741 }, { "epoch": 4.036860068259386, "grad_norm": 0.04835746490275776, "learning_rate": 1.1104343429112513e-07, "loss": 0.0001, "step": 17742 }, { "epoch": 4.037087599544938, "grad_norm": 0.2870115033270855, "learning_rate": 1.1099259773236768e-07, "loss": 0.0019, "step": 17743 }, { "epoch": 4.037315130830489, "grad_norm": 0.02185644413782124, "learning_rate": 1.1094177167898027e-07, "loss": 0.0001, "step": 17744 }, { "epoch": 4.037542662116041, "grad_norm": 0.2986682594604268, "learning_rate": 1.1089095613200152e-07, "loss": 0.001, "step": 17745 }, { "epoch": 4.037770193401593, "grad_norm": 0.06381617392049219, "learning_rate": 1.1084015109247032e-07, "loss": 0.0004, "step": 17746 }, { "epoch": 4.037997724687145, "grad_norm": 0.24810266776763215, "learning_rate": 1.1078935656142472e-07, "loss": 0.0016, "step": 17747 }, { "epoch": 4.038225255972696, "grad_norm": 0.919781275366089, "learning_rate": 1.107385725399028e-07, "loss": 0.0032, "step": 17748 }, { "epoch": 4.038452787258248, "grad_norm": 0.25382979374220854, "learning_rate": 1.1068779902894282e-07, "loss": 0.0016, "step": 17749 }, { "epoch": 4.0386803185438, "grad_norm": 0.18526861395408098, "learning_rate": 1.1063703602958192e-07, "loss": 0.001, "step": 17750 }, { "epoch": 4.038907849829352, "grad_norm": 0.13602315387920313, "learning_rate": 1.1058628354285839e-07, "loss": 0.0007, "step": 17751 }, { "epoch": 4.039135381114903, "grad_norm": 0.6149922595155506, "learning_rate": 1.1053554156980888e-07, "loss": 0.0026, "step": 17752 }, { "epoch": 4.039362912400455, "grad_norm": 0.32108613200391845, "learning_rate": 1.1048481011147096e-07, "loss": 0.0039, "step": 17753 }, { "epoch": 4.039590443686007, "grad_norm": 0.11032120631747101, "learning_rate": 1.1043408916888112e-07, "loss": 0.0008, "step": 17754 }, { "epoch": 4.039817974971559, "grad_norm": 0.12202196852921209, "learning_rate": 1.1038337874307609e-07, "loss": 0.0008, "step": 17755 }, { "epoch": 4.04004550625711, "grad_norm": 0.2568320938569289, "learning_rate": 1.1033267883509224e-07, "loss": 0.001, "step": 17756 }, { "epoch": 4.040273037542662, "grad_norm": 0.1552844786491983, "learning_rate": 1.1028198944596586e-07, "loss": 0.0005, "step": 17757 }, { "epoch": 4.040500568828214, "grad_norm": 0.08658572611078363, "learning_rate": 1.102313105767331e-07, "loss": 0.0002, "step": 17758 }, { "epoch": 4.040728100113766, "grad_norm": 0.5319232089872342, "learning_rate": 1.1018064222842962e-07, "loss": 0.0021, "step": 17759 }, { "epoch": 4.040955631399317, "grad_norm": 0.22485479674403144, "learning_rate": 1.1012998440209082e-07, "loss": 0.0021, "step": 17760 }, { "epoch": 4.041183162684869, "grad_norm": 0.67854369077151, "learning_rate": 1.1007933709875234e-07, "loss": 0.005, "step": 17761 }, { "epoch": 4.041410693970421, "grad_norm": 0.40863147119778553, "learning_rate": 1.1002870031944904e-07, "loss": 0.003, "step": 17762 }, { "epoch": 4.041638225255973, "grad_norm": 0.07516831604647876, "learning_rate": 1.09978074065216e-07, "loss": 0.0004, "step": 17763 }, { "epoch": 4.041865756541524, "grad_norm": 0.4127100805116341, "learning_rate": 1.0992745833708789e-07, "loss": 0.0025, "step": 17764 }, { "epoch": 4.042093287827076, "grad_norm": 0.6227357771422495, "learning_rate": 1.0987685313609934e-07, "loss": 0.0059, "step": 17765 }, { "epoch": 4.042320819112628, "grad_norm": 0.31719366370899227, "learning_rate": 1.0982625846328455e-07, "loss": 0.0019, "step": 17766 }, { "epoch": 4.04254835039818, "grad_norm": 0.7139464021141576, "learning_rate": 1.0977567431967734e-07, "loss": 0.0042, "step": 17767 }, { "epoch": 4.042775881683731, "grad_norm": 0.3141629668624551, "learning_rate": 1.0972510070631194e-07, "loss": 0.0028, "step": 17768 }, { "epoch": 4.043003412969283, "grad_norm": 0.3730114194969132, "learning_rate": 1.0967453762422163e-07, "loss": 0.0029, "step": 17769 }, { "epoch": 4.043230944254835, "grad_norm": 0.17187071067877427, "learning_rate": 1.0962398507444002e-07, "loss": 0.001, "step": 17770 }, { "epoch": 4.043458475540387, "grad_norm": 0.05873470146430497, "learning_rate": 1.0957344305800033e-07, "loss": 0.0002, "step": 17771 }, { "epoch": 4.043686006825938, "grad_norm": 0.3497979534172812, "learning_rate": 1.095229115759356e-07, "loss": 0.0009, "step": 17772 }, { "epoch": 4.04391353811149, "grad_norm": 0.29452037775198214, "learning_rate": 1.0947239062927849e-07, "loss": 0.0009, "step": 17773 }, { "epoch": 4.044141069397042, "grad_norm": 0.06445895785302272, "learning_rate": 1.0942188021906143e-07, "loss": 0.0002, "step": 17774 }, { "epoch": 4.044368600682594, "grad_norm": 0.1699274288927349, "learning_rate": 1.0937138034631707e-07, "loss": 0.0004, "step": 17775 }, { "epoch": 4.044596131968146, "grad_norm": 0.24869053802332725, "learning_rate": 1.0932089101207723e-07, "loss": 0.0026, "step": 17776 }, { "epoch": 4.044823663253697, "grad_norm": 0.4662941406908599, "learning_rate": 1.0927041221737392e-07, "loss": 0.0026, "step": 17777 }, { "epoch": 4.0450511945392496, "grad_norm": 0.30784749010249624, "learning_rate": 1.0921994396323909e-07, "loss": 0.001, "step": 17778 }, { "epoch": 4.045278725824801, "grad_norm": 0.3316991909379558, "learning_rate": 1.0916948625070375e-07, "loss": 0.0027, "step": 17779 }, { "epoch": 4.045506257110353, "grad_norm": 0.3707092408987465, "learning_rate": 1.0911903908079967e-07, "loss": 0.0012, "step": 17780 }, { "epoch": 4.045733788395904, "grad_norm": 0.12427628952450007, "learning_rate": 1.0906860245455738e-07, "loss": 0.0003, "step": 17781 }, { "epoch": 4.045961319681457, "grad_norm": 0.365126846284451, "learning_rate": 1.0901817637300818e-07, "loss": 0.003, "step": 17782 }, { "epoch": 4.046188850967008, "grad_norm": 0.33120238317793366, "learning_rate": 1.0896776083718231e-07, "loss": 0.0022, "step": 17783 }, { "epoch": 4.04641638225256, "grad_norm": 0.3170669819983062, "learning_rate": 1.089173558481104e-07, "loss": 0.0012, "step": 17784 }, { "epoch": 4.046643913538111, "grad_norm": 0.24356912492623356, "learning_rate": 1.0886696140682267e-07, "loss": 0.0021, "step": 17785 }, { "epoch": 4.046871444823664, "grad_norm": 0.04152055573309771, "learning_rate": 1.0881657751434883e-07, "loss": 0.0001, "step": 17786 }, { "epoch": 4.047098976109215, "grad_norm": 0.24294475658218018, "learning_rate": 1.0876620417171898e-07, "loss": 0.0019, "step": 17787 }, { "epoch": 4.047326507394767, "grad_norm": 0.5177559080365952, "learning_rate": 1.087158413799623e-07, "loss": 0.0081, "step": 17788 }, { "epoch": 4.047554038680318, "grad_norm": 0.4730833548143579, "learning_rate": 1.086654891401085e-07, "loss": 0.007, "step": 17789 }, { "epoch": 4.047781569965871, "grad_norm": 0.18603674227679642, "learning_rate": 1.086151474531863e-07, "loss": 0.0007, "step": 17790 }, { "epoch": 4.048009101251422, "grad_norm": 0.07039285395054085, "learning_rate": 1.0856481632022478e-07, "loss": 0.0002, "step": 17791 }, { "epoch": 4.048236632536974, "grad_norm": 0.38788047554414995, "learning_rate": 1.0851449574225272e-07, "loss": 0.0018, "step": 17792 }, { "epoch": 4.048464163822525, "grad_norm": 0.16427440580760508, "learning_rate": 1.0846418572029831e-07, "loss": 0.0007, "step": 17793 }, { "epoch": 4.048691695108078, "grad_norm": 0.14281356495210246, "learning_rate": 1.0841388625539018e-07, "loss": 0.0004, "step": 17794 }, { "epoch": 4.048919226393629, "grad_norm": 0.029145354798181266, "learning_rate": 1.0836359734855591e-07, "loss": 0.0001, "step": 17795 }, { "epoch": 4.049146757679181, "grad_norm": 0.3633314011012963, "learning_rate": 1.083133190008237e-07, "loss": 0.001, "step": 17796 }, { "epoch": 4.049374288964732, "grad_norm": 0.15820951629849436, "learning_rate": 1.0826305121322087e-07, "loss": 0.0002, "step": 17797 }, { "epoch": 4.049601820250285, "grad_norm": 0.0646488545966698, "learning_rate": 1.082127939867749e-07, "loss": 0.0002, "step": 17798 }, { "epoch": 4.049829351535836, "grad_norm": 0.17293369984133622, "learning_rate": 1.0816254732251314e-07, "loss": 0.0009, "step": 17799 }, { "epoch": 4.050056882821388, "grad_norm": 0.4167275147796335, "learning_rate": 1.0811231122146215e-07, "loss": 0.0054, "step": 17800 }, { "epoch": 4.050284414106939, "grad_norm": 0.4982605573554587, "learning_rate": 1.0806208568464905e-07, "loss": 0.0028, "step": 17801 }, { "epoch": 4.050511945392492, "grad_norm": 0.5178059577534736, "learning_rate": 1.0801187071310012e-07, "loss": 0.0063, "step": 17802 }, { "epoch": 4.050739476678043, "grad_norm": 0.7781589169241004, "learning_rate": 1.0796166630784178e-07, "loss": 0.0065, "step": 17803 }, { "epoch": 4.050967007963595, "grad_norm": 0.16239259693201755, "learning_rate": 1.079114724699e-07, "loss": 0.0007, "step": 17804 }, { "epoch": 4.051194539249146, "grad_norm": 0.6075252833245268, "learning_rate": 1.0786128920030067e-07, "loss": 0.0087, "step": 17805 }, { "epoch": 4.051422070534699, "grad_norm": 0.08446966777618442, "learning_rate": 1.0781111650006969e-07, "loss": 0.0003, "step": 17806 }, { "epoch": 4.05164960182025, "grad_norm": 0.09095419117086365, "learning_rate": 1.0776095437023209e-07, "loss": 0.0003, "step": 17807 }, { "epoch": 4.051877133105802, "grad_norm": 0.28731323346642806, "learning_rate": 1.0771080281181352e-07, "loss": 0.0009, "step": 17808 }, { "epoch": 4.052104664391353, "grad_norm": 0.18612422364321848, "learning_rate": 1.0766066182583877e-07, "loss": 0.001, "step": 17809 }, { "epoch": 4.052332195676906, "grad_norm": 0.1106900299884439, "learning_rate": 1.0761053141333244e-07, "loss": 0.0005, "step": 17810 }, { "epoch": 4.052559726962457, "grad_norm": 0.4456466318047388, "learning_rate": 1.0756041157531927e-07, "loss": 0.0047, "step": 17811 }, { "epoch": 4.052787258248009, "grad_norm": 0.03763379965925863, "learning_rate": 1.075103023128237e-07, "loss": 0.0002, "step": 17812 }, { "epoch": 4.05301478953356, "grad_norm": 0.19298057315824663, "learning_rate": 1.0746020362686994e-07, "loss": 0.0014, "step": 17813 }, { "epoch": 4.053242320819113, "grad_norm": 0.35241887321087895, "learning_rate": 1.074101155184816e-07, "loss": 0.0024, "step": 17814 }, { "epoch": 4.053469852104665, "grad_norm": 0.43933988747017627, "learning_rate": 1.0736003798868279e-07, "loss": 0.0024, "step": 17815 }, { "epoch": 4.053697383390216, "grad_norm": 0.1260547798634921, "learning_rate": 1.0730997103849679e-07, "loss": 0.0005, "step": 17816 }, { "epoch": 4.053924914675768, "grad_norm": 0.22855006905950767, "learning_rate": 1.0725991466894653e-07, "loss": 0.0016, "step": 17817 }, { "epoch": 4.05415244596132, "grad_norm": 0.11180521079124336, "learning_rate": 1.0720986888105571e-07, "loss": 0.0004, "step": 17818 }, { "epoch": 4.054379977246872, "grad_norm": 0.04446735942642196, "learning_rate": 1.0715983367584677e-07, "loss": 0.0002, "step": 17819 }, { "epoch": 4.054607508532423, "grad_norm": 0.2436758533922848, "learning_rate": 1.0710980905434258e-07, "loss": 0.001, "step": 17820 }, { "epoch": 4.054835039817975, "grad_norm": 0.28276066795578303, "learning_rate": 1.0705979501756531e-07, "loss": 0.0012, "step": 17821 }, { "epoch": 4.055062571103527, "grad_norm": 0.3793121088350941, "learning_rate": 1.0700979156653739e-07, "loss": 0.0025, "step": 17822 }, { "epoch": 4.055290102389079, "grad_norm": 0.14702900536528457, "learning_rate": 1.0695979870228052e-07, "loss": 0.0005, "step": 17823 }, { "epoch": 4.05551763367463, "grad_norm": 0.24792278768253873, "learning_rate": 1.0690981642581665e-07, "loss": 0.0006, "step": 17824 }, { "epoch": 4.055745164960182, "grad_norm": 0.0904616093154429, "learning_rate": 1.068598447381674e-07, "loss": 0.0003, "step": 17825 }, { "epoch": 4.055972696245734, "grad_norm": 0.4681038316766109, "learning_rate": 1.0680988364035388e-07, "loss": 0.004, "step": 17826 }, { "epoch": 4.056200227531286, "grad_norm": 0.28560818747983097, "learning_rate": 1.0675993313339745e-07, "loss": 0.0016, "step": 17827 }, { "epoch": 4.056427758816837, "grad_norm": 0.3959105124687778, "learning_rate": 1.0670999321831889e-07, "loss": 0.0032, "step": 17828 }, { "epoch": 4.056655290102389, "grad_norm": 0.2636919347567209, "learning_rate": 1.0666006389613868e-07, "loss": 0.0019, "step": 17829 }, { "epoch": 4.056882821387941, "grad_norm": 0.24791932475760306, "learning_rate": 1.0661014516787749e-07, "loss": 0.0012, "step": 17830 }, { "epoch": 4.057110352673493, "grad_norm": 0.08434701338474516, "learning_rate": 1.0656023703455559e-07, "loss": 0.0002, "step": 17831 }, { "epoch": 4.057337883959044, "grad_norm": 0.3318942646983862, "learning_rate": 1.0651033949719308e-07, "loss": 0.0031, "step": 17832 }, { "epoch": 4.057565415244596, "grad_norm": 0.36639697931020854, "learning_rate": 1.064604525568095e-07, "loss": 0.0018, "step": 17833 }, { "epoch": 4.057792946530148, "grad_norm": 0.06788559253489163, "learning_rate": 1.0641057621442477e-07, "loss": 0.0003, "step": 17834 }, { "epoch": 4.0580204778157, "grad_norm": 0.27760714414684523, "learning_rate": 1.063607104710581e-07, "loss": 0.0006, "step": 17835 }, { "epoch": 4.058248009101251, "grad_norm": 0.1468016904851076, "learning_rate": 1.0631085532772853e-07, "loss": 0.0007, "step": 17836 }, { "epoch": 4.058475540386803, "grad_norm": 0.10572090271133538, "learning_rate": 1.062610107854551e-07, "loss": 0.0006, "step": 17837 }, { "epoch": 4.058703071672355, "grad_norm": 0.08661540601284227, "learning_rate": 1.0621117684525665e-07, "loss": 0.0002, "step": 17838 }, { "epoch": 4.058930602957907, "grad_norm": 0.30073753275613835, "learning_rate": 1.0616135350815176e-07, "loss": 0.0012, "step": 17839 }, { "epoch": 4.059158134243458, "grad_norm": 0.32353877495430267, "learning_rate": 1.0611154077515839e-07, "loss": 0.0012, "step": 17840 }, { "epoch": 4.05938566552901, "grad_norm": 0.2255335760949572, "learning_rate": 1.0606173864729508e-07, "loss": 0.0022, "step": 17841 }, { "epoch": 4.059613196814562, "grad_norm": 0.3835113297235849, "learning_rate": 1.0601194712557939e-07, "loss": 0.0044, "step": 17842 }, { "epoch": 4.059840728100114, "grad_norm": 0.3944102173696446, "learning_rate": 1.0596216621102884e-07, "loss": 0.0017, "step": 17843 }, { "epoch": 4.060068259385665, "grad_norm": 0.3570165443711586, "learning_rate": 1.0591239590466102e-07, "loss": 0.0026, "step": 17844 }, { "epoch": 4.060295790671217, "grad_norm": 0.6454730508677056, "learning_rate": 1.0586263620749327e-07, "loss": 0.0076, "step": 17845 }, { "epoch": 4.060523321956769, "grad_norm": 0.5124217896813856, "learning_rate": 1.0581288712054251e-07, "loss": 0.0047, "step": 17846 }, { "epoch": 4.060750853242321, "grad_norm": 0.06451910200390867, "learning_rate": 1.0576314864482559e-07, "loss": 0.0002, "step": 17847 }, { "epoch": 4.060978384527872, "grad_norm": 0.5174647388965563, "learning_rate": 1.0571342078135878e-07, "loss": 0.0024, "step": 17848 }, { "epoch": 4.061205915813424, "grad_norm": 0.4405841167513056, "learning_rate": 1.0566370353115876e-07, "loss": 0.0023, "step": 17849 }, { "epoch": 4.061433447098976, "grad_norm": 0.283371346372477, "learning_rate": 1.0561399689524138e-07, "loss": 0.002, "step": 17850 }, { "epoch": 4.061660978384528, "grad_norm": 0.762953159999188, "learning_rate": 1.0556430087462266e-07, "loss": 0.0066, "step": 17851 }, { "epoch": 4.06188850967008, "grad_norm": 0.38150126082803176, "learning_rate": 1.0551461547031843e-07, "loss": 0.0032, "step": 17852 }, { "epoch": 4.062116040955631, "grad_norm": 0.27102187066165456, "learning_rate": 1.0546494068334411e-07, "loss": 0.0004, "step": 17853 }, { "epoch": 4.062343572241184, "grad_norm": 0.3762942978861428, "learning_rate": 1.0541527651471498e-07, "loss": 0.003, "step": 17854 }, { "epoch": 4.062571103526735, "grad_norm": 0.10889082706953644, "learning_rate": 1.0536562296544587e-07, "loss": 0.0005, "step": 17855 }, { "epoch": 4.062798634812287, "grad_norm": 0.682279897702654, "learning_rate": 1.0531598003655192e-07, "loss": 0.0033, "step": 17856 }, { "epoch": 4.063026166097838, "grad_norm": 0.10819383599507122, "learning_rate": 1.0526634772904734e-07, "loss": 0.0005, "step": 17857 }, { "epoch": 4.063253697383391, "grad_norm": 0.45716624338315526, "learning_rate": 1.0521672604394684e-07, "loss": 0.0015, "step": 17858 }, { "epoch": 4.063481228668942, "grad_norm": 0.11237296735872566, "learning_rate": 1.0516711498226452e-07, "loss": 0.0002, "step": 17859 }, { "epoch": 4.063708759954494, "grad_norm": 0.3681821046548583, "learning_rate": 1.0511751454501451e-07, "loss": 0.0028, "step": 17860 }, { "epoch": 4.063936291240045, "grad_norm": 0.06424763606457679, "learning_rate": 1.0506792473321037e-07, "loss": 0.0003, "step": 17861 }, { "epoch": 4.064163822525598, "grad_norm": 0.7993932972346024, "learning_rate": 1.0501834554786545e-07, "loss": 0.0125, "step": 17862 }, { "epoch": 4.064391353811149, "grad_norm": 0.8421430586017475, "learning_rate": 1.0496877698999341e-07, "loss": 0.0026, "step": 17863 }, { "epoch": 4.064618885096701, "grad_norm": 0.18153296248287082, "learning_rate": 1.0491921906060708e-07, "loss": 0.0007, "step": 17864 }, { "epoch": 4.064846416382252, "grad_norm": 0.20353367842042117, "learning_rate": 1.0486967176071936e-07, "loss": 0.0013, "step": 17865 }, { "epoch": 4.065073947667805, "grad_norm": 0.14431796711506661, "learning_rate": 1.0482013509134317e-07, "loss": 0.0004, "step": 17866 }, { "epoch": 4.065301478953356, "grad_norm": 0.40885994540354337, "learning_rate": 1.0477060905349057e-07, "loss": 0.0033, "step": 17867 }, { "epoch": 4.065529010238908, "grad_norm": 0.4872146969844022, "learning_rate": 1.0472109364817413e-07, "loss": 0.0026, "step": 17868 }, { "epoch": 4.065756541524459, "grad_norm": 0.29796831688469905, "learning_rate": 1.0467158887640556e-07, "loss": 0.0021, "step": 17869 }, { "epoch": 4.065984072810012, "grad_norm": 0.5789511733522215, "learning_rate": 1.046220947391969e-07, "loss": 0.0082, "step": 17870 }, { "epoch": 4.066211604095563, "grad_norm": 0.15353846387673595, "learning_rate": 1.0457261123755947e-07, "loss": 0.0005, "step": 17871 }, { "epoch": 4.066439135381115, "grad_norm": 0.36232498034834343, "learning_rate": 1.045231383725047e-07, "loss": 0.002, "step": 17872 }, { "epoch": 4.066666666666666, "grad_norm": 0.12038909306414174, "learning_rate": 1.0447367614504399e-07, "loss": 0.0003, "step": 17873 }, { "epoch": 4.066894197952219, "grad_norm": 0.2597852266440889, "learning_rate": 1.0442422455618787e-07, "loss": 0.0007, "step": 17874 }, { "epoch": 4.06712172923777, "grad_norm": 0.367609550512646, "learning_rate": 1.0437478360694732e-07, "loss": 0.0014, "step": 17875 }, { "epoch": 4.067349260523322, "grad_norm": 0.09903998339152445, "learning_rate": 1.0432535329833259e-07, "loss": 0.0005, "step": 17876 }, { "epoch": 4.067576791808873, "grad_norm": 0.1938271405006643, "learning_rate": 1.0427593363135411e-07, "loss": 0.0014, "step": 17877 }, { "epoch": 4.067804323094426, "grad_norm": 0.28662025441719774, "learning_rate": 1.0422652460702183e-07, "loss": 0.0051, "step": 17878 }, { "epoch": 4.068031854379977, "grad_norm": 0.35003715000051533, "learning_rate": 1.0417712622634579e-07, "loss": 0.0035, "step": 17879 }, { "epoch": 4.068259385665529, "grad_norm": 0.08711182963049016, "learning_rate": 1.0412773849033546e-07, "loss": 0.0003, "step": 17880 }, { "epoch": 4.0684869169510804, "grad_norm": 0.14036395223813675, "learning_rate": 1.0407836140000009e-07, "loss": 0.0003, "step": 17881 }, { "epoch": 4.068714448236633, "grad_norm": 0.6864590235487503, "learning_rate": 1.0402899495634909e-07, "loss": 0.0041, "step": 17882 }, { "epoch": 4.068941979522184, "grad_norm": 0.05817147105684468, "learning_rate": 1.0397963916039123e-07, "loss": 0.0001, "step": 17883 }, { "epoch": 4.069169510807736, "grad_norm": 0.5745983853316723, "learning_rate": 1.039302940131353e-07, "loss": 0.003, "step": 17884 }, { "epoch": 4.0693970420932875, "grad_norm": 0.6531031558845887, "learning_rate": 1.0388095951559007e-07, "loss": 0.0011, "step": 17885 }, { "epoch": 4.06962457337884, "grad_norm": 0.31148133686088825, "learning_rate": 1.0383163566876339e-07, "loss": 0.0019, "step": 17886 }, { "epoch": 4.069852104664391, "grad_norm": 0.051192651409135245, "learning_rate": 1.0378232247366387e-07, "loss": 0.0001, "step": 17887 }, { "epoch": 4.070079635949943, "grad_norm": 0.6061983473781392, "learning_rate": 1.0373301993129886e-07, "loss": 0.0036, "step": 17888 }, { "epoch": 4.0703071672354945, "grad_norm": 0.29933402505498236, "learning_rate": 1.0368372804267647e-07, "loss": 0.0012, "step": 17889 }, { "epoch": 4.070534698521047, "grad_norm": 0.4625067325307398, "learning_rate": 1.036344468088038e-07, "loss": 0.0041, "step": 17890 }, { "epoch": 4.070762229806599, "grad_norm": 0.11342788920934205, "learning_rate": 1.0358517623068817e-07, "loss": 0.0008, "step": 17891 }, { "epoch": 4.07098976109215, "grad_norm": 0.1508805833348189, "learning_rate": 1.0353591630933677e-07, "loss": 0.0006, "step": 17892 }, { "epoch": 4.071217292377702, "grad_norm": 0.18995437804942092, "learning_rate": 1.0348666704575606e-07, "loss": 0.0006, "step": 17893 }, { "epoch": 4.071444823663254, "grad_norm": 0.10859938552818088, "learning_rate": 1.0343742844095297e-07, "loss": 0.0006, "step": 17894 }, { "epoch": 4.071672354948806, "grad_norm": 0.07772586332120732, "learning_rate": 1.0338820049593343e-07, "loss": 0.0003, "step": 17895 }, { "epoch": 4.071899886234357, "grad_norm": 0.06940379963750504, "learning_rate": 1.0333898321170397e-07, "loss": 0.0004, "step": 17896 }, { "epoch": 4.072127417519909, "grad_norm": 0.6617473159897655, "learning_rate": 1.0328977658927014e-07, "loss": 0.0037, "step": 17897 }, { "epoch": 4.072354948805461, "grad_norm": 0.08441569510346787, "learning_rate": 1.0324058062963785e-07, "loss": 0.0002, "step": 17898 }, { "epoch": 4.072582480091013, "grad_norm": 0.14985740670543807, "learning_rate": 1.0319139533381268e-07, "loss": 0.0007, "step": 17899 }, { "epoch": 4.072810011376564, "grad_norm": 0.42539820707304704, "learning_rate": 1.0314222070279961e-07, "loss": 0.0018, "step": 17900 }, { "epoch": 4.073037542662116, "grad_norm": 0.30143353640448367, "learning_rate": 1.0309305673760393e-07, "loss": 0.0007, "step": 17901 }, { "epoch": 4.073265073947668, "grad_norm": 0.10892718230989853, "learning_rate": 1.0304390343923019e-07, "loss": 0.0003, "step": 17902 }, { "epoch": 4.07349260523322, "grad_norm": 0.038838411492656724, "learning_rate": 1.0299476080868329e-07, "loss": 0.0001, "step": 17903 }, { "epoch": 4.073720136518771, "grad_norm": 0.04738470753604168, "learning_rate": 1.0294562884696733e-07, "loss": 0.0001, "step": 17904 }, { "epoch": 4.073947667804323, "grad_norm": 0.6347812209725406, "learning_rate": 1.0289650755508661e-07, "loss": 0.0042, "step": 17905 }, { "epoch": 4.074175199089875, "grad_norm": 0.21949506862416307, "learning_rate": 1.0284739693404523e-07, "loss": 0.0007, "step": 17906 }, { "epoch": 4.074402730375427, "grad_norm": 0.18689248742370232, "learning_rate": 1.0279829698484664e-07, "loss": 0.0009, "step": 17907 }, { "epoch": 4.074630261660978, "grad_norm": 0.5096762794757285, "learning_rate": 1.0274920770849469e-07, "loss": 0.0022, "step": 17908 }, { "epoch": 4.07485779294653, "grad_norm": 0.6954118579723012, "learning_rate": 1.0270012910599224e-07, "loss": 0.0028, "step": 17909 }, { "epoch": 4.075085324232082, "grad_norm": 0.4250505818978632, "learning_rate": 1.0265106117834281e-07, "loss": 0.0033, "step": 17910 }, { "epoch": 4.075312855517634, "grad_norm": 0.4233623533985302, "learning_rate": 1.0260200392654893e-07, "loss": 0.0036, "step": 17911 }, { "epoch": 4.075540386803185, "grad_norm": 0.11196927274728459, "learning_rate": 1.0255295735161332e-07, "loss": 0.0002, "step": 17912 }, { "epoch": 4.075767918088737, "grad_norm": 0.2810802602595047, "learning_rate": 1.0250392145453863e-07, "loss": 0.0016, "step": 17913 }, { "epoch": 4.075995449374289, "grad_norm": 0.04994957763373084, "learning_rate": 1.0245489623632663e-07, "loss": 0.0002, "step": 17914 }, { "epoch": 4.076222980659841, "grad_norm": 0.30981137185977276, "learning_rate": 1.0240588169797983e-07, "loss": 0.0011, "step": 17915 }, { "epoch": 4.076450511945392, "grad_norm": 0.6063896634193496, "learning_rate": 1.0235687784049965e-07, "loss": 0.0028, "step": 17916 }, { "epoch": 4.076678043230944, "grad_norm": 0.5846733394204862, "learning_rate": 1.0230788466488756e-07, "loss": 0.002, "step": 17917 }, { "epoch": 4.076905574516496, "grad_norm": 0.3830386395003629, "learning_rate": 1.0225890217214505e-07, "loss": 0.0025, "step": 17918 }, { "epoch": 4.077133105802048, "grad_norm": 0.16976586056299225, "learning_rate": 1.0220993036327329e-07, "loss": 0.0008, "step": 17919 }, { "epoch": 4.077360637087599, "grad_norm": 0.2534817108642358, "learning_rate": 1.0216096923927318e-07, "loss": 0.0013, "step": 17920 }, { "epoch": 4.077588168373151, "grad_norm": 0.44381110014695097, "learning_rate": 1.021120188011452e-07, "loss": 0.0063, "step": 17921 }, { "epoch": 4.077815699658703, "grad_norm": 0.6287905336426518, "learning_rate": 1.0206307904989006e-07, "loss": 0.0042, "step": 17922 }, { "epoch": 4.078043230944255, "grad_norm": 0.14782439454556986, "learning_rate": 1.0201414998650793e-07, "loss": 0.0009, "step": 17923 }, { "epoch": 4.078270762229806, "grad_norm": 0.047071970605402504, "learning_rate": 1.019652316119986e-07, "loss": 0.0001, "step": 17924 }, { "epoch": 4.078498293515358, "grad_norm": 0.37430760306657546, "learning_rate": 1.0191632392736197e-07, "loss": 0.0015, "step": 17925 }, { "epoch": 4.07872582480091, "grad_norm": 0.4057283901917354, "learning_rate": 1.0186742693359779e-07, "loss": 0.002, "step": 17926 }, { "epoch": 4.078953356086462, "grad_norm": 0.23934686815136044, "learning_rate": 1.0181854063170543e-07, "loss": 0.0009, "step": 17927 }, { "epoch": 4.079180887372013, "grad_norm": 0.1525116799409595, "learning_rate": 1.0176966502268376e-07, "loss": 0.0005, "step": 17928 }, { "epoch": 4.079408418657565, "grad_norm": 0.11019039603813256, "learning_rate": 1.0172080010753212e-07, "loss": 0.0004, "step": 17929 }, { "epoch": 4.079635949943118, "grad_norm": 0.14198988510004376, "learning_rate": 1.0167194588724893e-07, "loss": 0.0009, "step": 17930 }, { "epoch": 4.079863481228669, "grad_norm": 0.3661387360931656, "learning_rate": 1.0162310236283258e-07, "loss": 0.0028, "step": 17931 }, { "epoch": 4.080091012514221, "grad_norm": 0.5639248497230284, "learning_rate": 1.0157426953528151e-07, "loss": 0.0025, "step": 17932 }, { "epoch": 4.080318543799772, "grad_norm": 0.45501177328991493, "learning_rate": 1.0152544740559378e-07, "loss": 0.001, "step": 17933 }, { "epoch": 4.080546075085325, "grad_norm": 0.315329434300969, "learning_rate": 1.0147663597476737e-07, "loss": 0.0014, "step": 17934 }, { "epoch": 4.080773606370876, "grad_norm": 0.19271130592694596, "learning_rate": 1.0142783524379966e-07, "loss": 0.001, "step": 17935 }, { "epoch": 4.081001137656428, "grad_norm": 0.19884078564867397, "learning_rate": 1.0137904521368806e-07, "loss": 0.001, "step": 17936 }, { "epoch": 4.081228668941979, "grad_norm": 0.10323518700581635, "learning_rate": 1.013302658854299e-07, "loss": 0.0003, "step": 17937 }, { "epoch": 4.081456200227532, "grad_norm": 1.1610107649877024, "learning_rate": 1.0128149726002179e-07, "loss": 0.0049, "step": 17938 }, { "epoch": 4.081683731513083, "grad_norm": 0.09217586880280051, "learning_rate": 1.0123273933846101e-07, "loss": 0.0003, "step": 17939 }, { "epoch": 4.081911262798635, "grad_norm": 0.15511050828990225, "learning_rate": 1.0118399212174362e-07, "loss": 0.0004, "step": 17940 }, { "epoch": 4.0821387940841865, "grad_norm": 0.09366770233488732, "learning_rate": 1.0113525561086628e-07, "loss": 0.0003, "step": 17941 }, { "epoch": 4.082366325369739, "grad_norm": 0.36749651974821684, "learning_rate": 1.010865298068249e-07, "loss": 0.0045, "step": 17942 }, { "epoch": 4.08259385665529, "grad_norm": 0.12800601079365068, "learning_rate": 1.0103781471061523e-07, "loss": 0.0006, "step": 17943 }, { "epoch": 4.082821387940842, "grad_norm": 0.48904580019504756, "learning_rate": 1.00989110323233e-07, "loss": 0.0022, "step": 17944 }, { "epoch": 4.0830489192263935, "grad_norm": 0.5638786728937584, "learning_rate": 1.0094041664567372e-07, "loss": 0.0042, "step": 17945 }, { "epoch": 4.083276450511946, "grad_norm": 0.07312516716087866, "learning_rate": 1.0089173367893265e-07, "loss": 0.0002, "step": 17946 }, { "epoch": 4.083503981797497, "grad_norm": 0.17689240727525823, "learning_rate": 1.0084306142400454e-07, "loss": 0.0012, "step": 17947 }, { "epoch": 4.083731513083049, "grad_norm": 0.07352982525203466, "learning_rate": 1.0079439988188452e-07, "loss": 0.0004, "step": 17948 }, { "epoch": 4.0839590443686005, "grad_norm": 0.02786565959128792, "learning_rate": 1.0074574905356689e-07, "loss": 0.0001, "step": 17949 }, { "epoch": 4.084186575654153, "grad_norm": 0.2726306717059214, "learning_rate": 1.0069710894004582e-07, "loss": 0.0016, "step": 17950 }, { "epoch": 4.084414106939704, "grad_norm": 0.17201861080481892, "learning_rate": 1.0064847954231571e-07, "loss": 0.0006, "step": 17951 }, { "epoch": 4.084641638225256, "grad_norm": 0.31676191382382407, "learning_rate": 1.0059986086137035e-07, "loss": 0.0016, "step": 17952 }, { "epoch": 4.0848691695108075, "grad_norm": 0.07086997737789191, "learning_rate": 1.0055125289820354e-07, "loss": 0.0002, "step": 17953 }, { "epoch": 4.08509670079636, "grad_norm": 0.025915521518712816, "learning_rate": 1.0050265565380871e-07, "loss": 0.0001, "step": 17954 }, { "epoch": 4.085324232081911, "grad_norm": 0.12344951808136699, "learning_rate": 1.004540691291788e-07, "loss": 0.0003, "step": 17955 }, { "epoch": 4.085551763367463, "grad_norm": 0.4164892018229078, "learning_rate": 1.0040549332530728e-07, "loss": 0.0026, "step": 17956 }, { "epoch": 4.0857792946530145, "grad_norm": 0.4621017768483328, "learning_rate": 1.0035692824318654e-07, "loss": 0.002, "step": 17957 }, { "epoch": 4.086006825938567, "grad_norm": 0.6252086959851872, "learning_rate": 1.0030837388380933e-07, "loss": 0.006, "step": 17958 }, { "epoch": 4.086234357224118, "grad_norm": 0.16715752698193892, "learning_rate": 1.0025983024816811e-07, "loss": 0.0008, "step": 17959 }, { "epoch": 4.08646188850967, "grad_norm": 0.17679997839728853, "learning_rate": 1.0021129733725501e-07, "loss": 0.0004, "step": 17960 }, { "epoch": 4.0866894197952215, "grad_norm": 0.2704045739179713, "learning_rate": 1.0016277515206195e-07, "loss": 0.0011, "step": 17961 }, { "epoch": 4.086916951080774, "grad_norm": 0.7414339569373607, "learning_rate": 1.0011426369358035e-07, "loss": 0.0015, "step": 17962 }, { "epoch": 4.087144482366325, "grad_norm": 0.05126051051930566, "learning_rate": 1.0006576296280213e-07, "loss": 0.0002, "step": 17963 }, { "epoch": 4.087372013651877, "grad_norm": 0.30460791889938743, "learning_rate": 1.0001727296071816e-07, "loss": 0.0022, "step": 17964 }, { "epoch": 4.0875995449374285, "grad_norm": 0.19874350036573085, "learning_rate": 9.996879368831966e-08, "loss": 0.0011, "step": 17965 }, { "epoch": 4.087827076222981, "grad_norm": 0.140865465044505, "learning_rate": 9.992032514659751e-08, "loss": 0.0004, "step": 17966 }, { "epoch": 4.088054607508532, "grad_norm": 0.16670792905529794, "learning_rate": 9.987186733654238e-08, "loss": 0.0005, "step": 17967 }, { "epoch": 4.088282138794084, "grad_norm": 0.41778183490466864, "learning_rate": 9.982342025914458e-08, "loss": 0.0023, "step": 17968 }, { "epoch": 4.088509670079636, "grad_norm": 0.5810432282857532, "learning_rate": 9.977498391539414e-08, "loss": 0.0033, "step": 17969 }, { "epoch": 4.088737201365188, "grad_norm": 0.4783764650742631, "learning_rate": 9.97265583062812e-08, "loss": 0.0056, "step": 17970 }, { "epoch": 4.08896473265074, "grad_norm": 0.30749085209689825, "learning_rate": 9.967814343279529e-08, "loss": 0.0018, "step": 17971 }, { "epoch": 4.089192263936291, "grad_norm": 0.08190353414234525, "learning_rate": 9.962973929592612e-08, "loss": 0.0004, "step": 17972 }, { "epoch": 4.089419795221843, "grad_norm": 0.5899554366176715, "learning_rate": 9.958134589666297e-08, "loss": 0.0035, "step": 17973 }, { "epoch": 4.089647326507395, "grad_norm": 1.072761411830846, "learning_rate": 9.953296323599474e-08, "loss": 0.0016, "step": 17974 }, { "epoch": 4.089874857792947, "grad_norm": 0.12373044163632546, "learning_rate": 9.948459131491055e-08, "loss": 0.0004, "step": 17975 }, { "epoch": 4.090102389078498, "grad_norm": 0.11626094194147175, "learning_rate": 9.943623013439867e-08, "loss": 0.001, "step": 17976 }, { "epoch": 4.09032992036405, "grad_norm": 0.31300357647790605, "learning_rate": 9.93878796954479e-08, "loss": 0.0019, "step": 17977 }, { "epoch": 4.090557451649602, "grad_norm": 0.12302000829652096, "learning_rate": 9.933953999904614e-08, "loss": 0.0004, "step": 17978 }, { "epoch": 4.090784982935154, "grad_norm": 0.128778566645822, "learning_rate": 9.929121104618148e-08, "loss": 0.0007, "step": 17979 }, { "epoch": 4.091012514220705, "grad_norm": 0.2050685555172462, "learning_rate": 9.924289283784181e-08, "loss": 0.0009, "step": 17980 }, { "epoch": 4.091240045506257, "grad_norm": 0.19712810295109418, "learning_rate": 9.919458537501432e-08, "loss": 0.0006, "step": 17981 }, { "epoch": 4.091467576791809, "grad_norm": 0.22478533344184182, "learning_rate": 9.914628865868677e-08, "loss": 0.0016, "step": 17982 }, { "epoch": 4.091695108077361, "grad_norm": 0.14741533324481587, "learning_rate": 9.909800268984581e-08, "loss": 0.0007, "step": 17983 }, { "epoch": 4.091922639362912, "grad_norm": 0.4656984848993642, "learning_rate": 9.904972746947878e-08, "loss": 0.0032, "step": 17984 }, { "epoch": 4.092150170648464, "grad_norm": 0.2193788962253508, "learning_rate": 9.900146299857188e-08, "loss": 0.0007, "step": 17985 }, { "epoch": 4.092377701934016, "grad_norm": 0.47835149902864654, "learning_rate": 9.895320927811174e-08, "loss": 0.0036, "step": 17986 }, { "epoch": 4.092605233219568, "grad_norm": 0.08203493256601015, "learning_rate": 9.890496630908478e-08, "loss": 0.0002, "step": 17987 }, { "epoch": 4.092832764505119, "grad_norm": 0.13962647981607382, "learning_rate": 9.885673409247662e-08, "loss": 0.0005, "step": 17988 }, { "epoch": 4.093060295790671, "grad_norm": 0.40989669732316314, "learning_rate": 9.880851262927336e-08, "loss": 0.0025, "step": 17989 }, { "epoch": 4.093287827076223, "grad_norm": 0.3797856359233331, "learning_rate": 9.876030192046034e-08, "loss": 0.0015, "step": 17990 }, { "epoch": 4.093515358361775, "grad_norm": 0.12888546846956958, "learning_rate": 9.871210196702313e-08, "loss": 0.0006, "step": 17991 }, { "epoch": 4.093742889647326, "grad_norm": 0.33963679177303435, "learning_rate": 9.866391276994652e-08, "loss": 0.001, "step": 17992 }, { "epoch": 4.093970420932878, "grad_norm": 0.2097387570721717, "learning_rate": 9.861573433021562e-08, "loss": 0.0016, "step": 17993 }, { "epoch": 4.09419795221843, "grad_norm": 0.26546634873160724, "learning_rate": 9.85675666488152e-08, "loss": 0.002, "step": 17994 }, { "epoch": 4.094425483503982, "grad_norm": 0.08270699864005071, "learning_rate": 9.851940972672946e-08, "loss": 0.0002, "step": 17995 }, { "epoch": 4.094653014789533, "grad_norm": 0.16740100878158728, "learning_rate": 9.847126356494298e-08, "loss": 0.0006, "step": 17996 }, { "epoch": 4.0948805460750854, "grad_norm": 0.41064077665674664, "learning_rate": 9.84231281644394e-08, "loss": 0.0024, "step": 17997 }, { "epoch": 4.095108077360637, "grad_norm": 0.01892323608099915, "learning_rate": 9.837500352620267e-08, "loss": 0.0, "step": 17998 }, { "epoch": 4.095335608646189, "grad_norm": 0.050561001415819234, "learning_rate": 9.832688965121656e-08, "loss": 0.0002, "step": 17999 }, { "epoch": 4.09556313993174, "grad_norm": 0.3887526039042907, "learning_rate": 9.827878654046414e-08, "loss": 0.0023, "step": 18000 }, { "epoch": 4.0957906712172925, "grad_norm": 0.2211870990542651, "learning_rate": 9.823069419492882e-08, "loss": 0.001, "step": 18001 }, { "epoch": 4.096018202502844, "grad_norm": 0.37465081060180644, "learning_rate": 9.818261261559326e-08, "loss": 0.0022, "step": 18002 }, { "epoch": 4.096245733788396, "grad_norm": 0.08222776020603374, "learning_rate": 9.813454180344038e-08, "loss": 0.0004, "step": 18003 }, { "epoch": 4.096473265073947, "grad_norm": 1.0617220905985811, "learning_rate": 9.808648175945249e-08, "loss": 0.0039, "step": 18004 }, { "epoch": 4.0967007963594995, "grad_norm": 0.3464290225761955, "learning_rate": 9.803843248461182e-08, "loss": 0.0013, "step": 18005 }, { "epoch": 4.096928327645051, "grad_norm": 0.2539542731093453, "learning_rate": 9.799039397990068e-08, "loss": 0.0015, "step": 18006 }, { "epoch": 4.097155858930603, "grad_norm": 0.30385225459651855, "learning_rate": 9.794236624630062e-08, "loss": 0.0017, "step": 18007 }, { "epoch": 4.097383390216155, "grad_norm": 0.43425946084488215, "learning_rate": 9.789434928479343e-08, "loss": 0.0018, "step": 18008 }, { "epoch": 4.0976109215017065, "grad_norm": 0.6391398881736989, "learning_rate": 9.784634309636019e-08, "loss": 0.0015, "step": 18009 }, { "epoch": 4.097838452787259, "grad_norm": 0.06982135422755005, "learning_rate": 9.77983476819825e-08, "loss": 0.0004, "step": 18010 }, { "epoch": 4.09806598407281, "grad_norm": 0.45679030172802515, "learning_rate": 9.775036304264087e-08, "loss": 0.0024, "step": 18011 }, { "epoch": 4.098293515358362, "grad_norm": 0.6495143357213959, "learning_rate": 9.77023891793162e-08, "loss": 0.0035, "step": 18012 }, { "epoch": 4.0985210466439135, "grad_norm": 0.17227282338484362, "learning_rate": 9.765442609298918e-08, "loss": 0.0005, "step": 18013 }, { "epoch": 4.098748577929466, "grad_norm": 0.16367821805230726, "learning_rate": 9.760647378463974e-08, "loss": 0.0004, "step": 18014 }, { "epoch": 4.098976109215017, "grad_norm": 0.07563197950295093, "learning_rate": 9.755853225524823e-08, "loss": 0.0004, "step": 18015 }, { "epoch": 4.099203640500569, "grad_norm": 0.5000857649036725, "learning_rate": 9.751060150579424e-08, "loss": 0.0023, "step": 18016 }, { "epoch": 4.0994311717861205, "grad_norm": 0.2606915806985077, "learning_rate": 9.746268153725764e-08, "loss": 0.0024, "step": 18017 }, { "epoch": 4.099658703071673, "grad_norm": 0.3566389759972823, "learning_rate": 9.741477235061755e-08, "loss": 0.0029, "step": 18018 }, { "epoch": 4.099886234357224, "grad_norm": 0.3307315468367529, "learning_rate": 9.736687394685328e-08, "loss": 0.0061, "step": 18019 }, { "epoch": 4.100113765642776, "grad_norm": 0.5128116329306712, "learning_rate": 9.731898632694393e-08, "loss": 0.0021, "step": 18020 }, { "epoch": 4.1003412969283275, "grad_norm": 0.33498274659598043, "learning_rate": 9.727110949186797e-08, "loss": 0.0004, "step": 18021 }, { "epoch": 4.10056882821388, "grad_norm": 0.17982596726229677, "learning_rate": 9.722324344260417e-08, "loss": 0.0003, "step": 18022 }, { "epoch": 4.100796359499431, "grad_norm": 0.36503295981168865, "learning_rate": 9.717538818013066e-08, "loss": 0.0038, "step": 18023 }, { "epoch": 4.101023890784983, "grad_norm": 2.328093795984389, "learning_rate": 9.712754370542545e-08, "loss": 0.0058, "step": 18024 }, { "epoch": 4.1012514220705345, "grad_norm": 0.058774025380610884, "learning_rate": 9.707971001946647e-08, "loss": 0.0004, "step": 18025 }, { "epoch": 4.101478953356087, "grad_norm": 0.7128072284607235, "learning_rate": 9.70318871232314e-08, "loss": 0.0014, "step": 18026 }, { "epoch": 4.101706484641638, "grad_norm": 0.056252565339479706, "learning_rate": 9.698407501769778e-08, "loss": 0.0002, "step": 18027 }, { "epoch": 4.10193401592719, "grad_norm": 0.10543989984138666, "learning_rate": 9.693627370384256e-08, "loss": 0.0003, "step": 18028 }, { "epoch": 4.1021615472127415, "grad_norm": 0.22190752706479594, "learning_rate": 9.688848318264283e-08, "loss": 0.0013, "step": 18029 }, { "epoch": 4.102389078498294, "grad_norm": 0.37881695667091925, "learning_rate": 9.684070345507535e-08, "loss": 0.0049, "step": 18030 }, { "epoch": 4.102616609783845, "grad_norm": 0.1728604371359328, "learning_rate": 9.679293452211649e-08, "loss": 0.0002, "step": 18031 }, { "epoch": 4.102844141069397, "grad_norm": 0.1660115238818362, "learning_rate": 9.674517638474268e-08, "loss": 0.001, "step": 18032 }, { "epoch": 4.1030716723549485, "grad_norm": 0.06405728179864949, "learning_rate": 9.669742904393e-08, "loss": 0.0003, "step": 18033 }, { "epoch": 4.103299203640501, "grad_norm": 0.2572507338685943, "learning_rate": 9.664969250065451e-08, "loss": 0.0008, "step": 18034 }, { "epoch": 4.103526734926052, "grad_norm": 0.22729289821244864, "learning_rate": 9.66019667558915e-08, "loss": 0.0005, "step": 18035 }, { "epoch": 4.103754266211604, "grad_norm": 0.688018771323866, "learning_rate": 9.655425181061675e-08, "loss": 0.0018, "step": 18036 }, { "epoch": 4.1039817974971555, "grad_norm": 0.33587138356905666, "learning_rate": 9.650654766580524e-08, "loss": 0.0018, "step": 18037 }, { "epoch": 4.104209328782708, "grad_norm": 0.30914253811189785, "learning_rate": 9.645885432243194e-08, "loss": 0.0025, "step": 18038 }, { "epoch": 4.104436860068259, "grad_norm": 0.016674017354058594, "learning_rate": 9.641117178147162e-08, "loss": 0.0001, "step": 18039 }, { "epoch": 4.104664391353811, "grad_norm": 0.3283300618755998, "learning_rate": 9.636350004389887e-08, "loss": 0.0026, "step": 18040 }, { "epoch": 4.1048919226393625, "grad_norm": 0.19570383142398143, "learning_rate": 9.631583911068817e-08, "loss": 0.001, "step": 18041 }, { "epoch": 4.105119453924915, "grad_norm": 0.16456819480231386, "learning_rate": 9.626818898281355e-08, "loss": 0.0005, "step": 18042 }, { "epoch": 4.105346985210466, "grad_norm": 0.11952033235694039, "learning_rate": 9.62205496612486e-08, "loss": 0.0002, "step": 18043 }, { "epoch": 4.105574516496018, "grad_norm": 0.10384146071926428, "learning_rate": 9.617292114696739e-08, "loss": 0.0005, "step": 18044 }, { "epoch": 4.1058020477815695, "grad_norm": 0.5101888968336503, "learning_rate": 9.612530344094297e-08, "loss": 0.0038, "step": 18045 }, { "epoch": 4.106029579067122, "grad_norm": 0.5314777025794638, "learning_rate": 9.607769654414879e-08, "loss": 0.0034, "step": 18046 }, { "epoch": 4.106257110352674, "grad_norm": 0.3194594881630868, "learning_rate": 9.603010045755784e-08, "loss": 0.0024, "step": 18047 }, { "epoch": 4.106484641638225, "grad_norm": 0.0741831718889311, "learning_rate": 9.598251518214301e-08, "loss": 0.0003, "step": 18048 }, { "epoch": 4.106712172923777, "grad_norm": 0.1465608861359099, "learning_rate": 9.593494071887672e-08, "loss": 0.0005, "step": 18049 }, { "epoch": 4.106939704209329, "grad_norm": 0.7149761103287585, "learning_rate": 9.588737706873116e-08, "loss": 0.0017, "step": 18050 }, { "epoch": 4.107167235494881, "grad_norm": 0.7082629488984201, "learning_rate": 9.583982423267877e-08, "loss": 0.0024, "step": 18051 }, { "epoch": 4.107394766780432, "grad_norm": 0.11068849086164306, "learning_rate": 9.579228221169106e-08, "loss": 0.0005, "step": 18052 }, { "epoch": 4.1076222980659844, "grad_norm": 0.5724028696953904, "learning_rate": 9.574475100673989e-08, "loss": 0.0054, "step": 18053 }, { "epoch": 4.107849829351536, "grad_norm": 0.16134407867298559, "learning_rate": 9.569723061879678e-08, "loss": 0.0011, "step": 18054 }, { "epoch": 4.108077360637088, "grad_norm": 0.3531660736198449, "learning_rate": 9.564972104883302e-08, "loss": 0.0025, "step": 18055 }, { "epoch": 4.108304891922639, "grad_norm": 0.43022515788990007, "learning_rate": 9.560222229781952e-08, "loss": 0.0033, "step": 18056 }, { "epoch": 4.1085324232081915, "grad_norm": 0.14298228365469332, "learning_rate": 9.555473436672687e-08, "loss": 0.0005, "step": 18057 }, { "epoch": 4.108759954493743, "grad_norm": 0.19682706947076628, "learning_rate": 9.550725725652597e-08, "loss": 0.0006, "step": 18058 }, { "epoch": 4.108987485779295, "grad_norm": 0.035892251907675095, "learning_rate": 9.545979096818674e-08, "loss": 0.0001, "step": 18059 }, { "epoch": 4.109215017064846, "grad_norm": 0.24018651335635366, "learning_rate": 9.541233550267987e-08, "loss": 0.0007, "step": 18060 }, { "epoch": 4.1094425483503985, "grad_norm": 0.21328492213175715, "learning_rate": 9.536489086097493e-08, "loss": 0.0008, "step": 18061 }, { "epoch": 4.10967007963595, "grad_norm": 0.32273306961175047, "learning_rate": 9.531745704404155e-08, "loss": 0.0013, "step": 18062 }, { "epoch": 4.109897610921502, "grad_norm": 0.27146276998516555, "learning_rate": 9.527003405284937e-08, "loss": 0.001, "step": 18063 }, { "epoch": 4.110125142207053, "grad_norm": 0.39083132729289893, "learning_rate": 9.522262188836742e-08, "loss": 0.0025, "step": 18064 }, { "epoch": 4.1103526734926055, "grad_norm": 0.4178643555869114, "learning_rate": 9.517522055156485e-08, "loss": 0.0035, "step": 18065 }, { "epoch": 4.110580204778157, "grad_norm": 0.18445965946565945, "learning_rate": 9.512783004341046e-08, "loss": 0.0011, "step": 18066 }, { "epoch": 4.110807736063709, "grad_norm": 0.06059343079047688, "learning_rate": 9.508045036487293e-08, "loss": 0.0001, "step": 18067 }, { "epoch": 4.11103526734926, "grad_norm": 0.2793318834743207, "learning_rate": 9.503308151692044e-08, "loss": 0.0046, "step": 18068 }, { "epoch": 4.1112627986348125, "grad_norm": 0.13215847796122365, "learning_rate": 9.49857235005211e-08, "loss": 0.0007, "step": 18069 }, { "epoch": 4.111490329920364, "grad_norm": 0.09316630641973006, "learning_rate": 9.493837631664303e-08, "loss": 0.0003, "step": 18070 }, { "epoch": 4.111717861205916, "grad_norm": 0.10916620699184175, "learning_rate": 9.489103996625365e-08, "loss": 0.0003, "step": 18071 }, { "epoch": 4.111945392491467, "grad_norm": 0.5786386451967817, "learning_rate": 9.484371445032052e-08, "loss": 0.0104, "step": 18072 }, { "epoch": 4.1121729237770195, "grad_norm": 0.1070533261034048, "learning_rate": 9.4796399769811e-08, "loss": 0.0004, "step": 18073 }, { "epoch": 4.112400455062571, "grad_norm": 1.019365269448424, "learning_rate": 9.474909592569213e-08, "loss": 0.0087, "step": 18074 }, { "epoch": 4.112627986348123, "grad_norm": 0.18928512319818047, "learning_rate": 9.47018029189306e-08, "loss": 0.0012, "step": 18075 }, { "epoch": 4.112855517633674, "grad_norm": 0.47281342710532615, "learning_rate": 9.465452075049293e-08, "loss": 0.0042, "step": 18076 }, { "epoch": 4.1130830489192265, "grad_norm": 0.033023351432343195, "learning_rate": 9.460724942134576e-08, "loss": 0.0001, "step": 18077 }, { "epoch": 4.113310580204778, "grad_norm": 0.2875454122169852, "learning_rate": 9.455998893245486e-08, "loss": 0.0021, "step": 18078 }, { "epoch": 4.11353811149033, "grad_norm": 0.4388052094357349, "learning_rate": 9.451273928478631e-08, "loss": 0.0038, "step": 18079 }, { "epoch": 4.113765642775881, "grad_norm": 0.18544220681843823, "learning_rate": 9.446550047930596e-08, "loss": 0.0013, "step": 18080 }, { "epoch": 4.1139931740614335, "grad_norm": 1.0092874636825158, "learning_rate": 9.441827251697907e-08, "loss": 0.0027, "step": 18081 }, { "epoch": 4.114220705346985, "grad_norm": 0.5670136200517997, "learning_rate": 9.43710553987711e-08, "loss": 0.0023, "step": 18082 }, { "epoch": 4.114448236632537, "grad_norm": 0.7206329941601562, "learning_rate": 9.432384912564679e-08, "loss": 0.009, "step": 18083 }, { "epoch": 4.114675767918088, "grad_norm": 0.6553142419994875, "learning_rate": 9.427665369857126e-08, "loss": 0.008, "step": 18084 }, { "epoch": 4.1149032992036405, "grad_norm": 0.013536238394291529, "learning_rate": 9.422946911850881e-08, "loss": 0.0, "step": 18085 }, { "epoch": 4.115130830489193, "grad_norm": 0.17432853043387309, "learning_rate": 9.418229538642396e-08, "loss": 0.0007, "step": 18086 }, { "epoch": 4.115358361774744, "grad_norm": 0.10386244117368029, "learning_rate": 9.413513250328098e-08, "loss": 0.0003, "step": 18087 }, { "epoch": 4.115585893060296, "grad_norm": 0.11528634320807044, "learning_rate": 9.408798047004355e-08, "loss": 0.0007, "step": 18088 }, { "epoch": 4.1158134243458475, "grad_norm": 0.09563507364337571, "learning_rate": 9.404083928767558e-08, "loss": 0.0006, "step": 18089 }, { "epoch": 4.1160409556314, "grad_norm": 0.05890597508765412, "learning_rate": 9.399370895714036e-08, "loss": 0.0002, "step": 18090 }, { "epoch": 4.116268486916951, "grad_norm": 0.7145584640303302, "learning_rate": 9.39465894794013e-08, "loss": 0.0021, "step": 18091 }, { "epoch": 4.116496018202503, "grad_norm": 0.1770178011489846, "learning_rate": 9.389948085542125e-08, "loss": 0.001, "step": 18092 }, { "epoch": 4.1167235494880545, "grad_norm": 0.18738516745735478, "learning_rate": 9.385238308616318e-08, "loss": 0.0006, "step": 18093 }, { "epoch": 4.116951080773607, "grad_norm": 0.220108905876973, "learning_rate": 9.380529617258979e-08, "loss": 0.0009, "step": 18094 }, { "epoch": 4.117178612059158, "grad_norm": 0.3543551344311772, "learning_rate": 9.375822011566317e-08, "loss": 0.0027, "step": 18095 }, { "epoch": 4.11740614334471, "grad_norm": 0.22445164105316784, "learning_rate": 9.371115491634573e-08, "loss": 0.0007, "step": 18096 }, { "epoch": 4.1176336746302615, "grad_norm": 0.9993666019931126, "learning_rate": 9.366410057559915e-08, "loss": 0.0112, "step": 18097 }, { "epoch": 4.117861205915814, "grad_norm": 0.2562177469409504, "learning_rate": 9.361705709438542e-08, "loss": 0.0011, "step": 18098 }, { "epoch": 4.118088737201365, "grad_norm": 0.33752848570443483, "learning_rate": 9.357002447366565e-08, "loss": 0.001, "step": 18099 }, { "epoch": 4.118316268486917, "grad_norm": 0.44850763880965777, "learning_rate": 9.352300271440136e-08, "loss": 0.0017, "step": 18100 }, { "epoch": 4.1185437997724685, "grad_norm": 0.540649486239775, "learning_rate": 9.347599181755366e-08, "loss": 0.0047, "step": 18101 }, { "epoch": 4.118771331058021, "grad_norm": 0.18640891609572752, "learning_rate": 9.342899178408312e-08, "loss": 0.0013, "step": 18102 }, { "epoch": 4.118998862343572, "grad_norm": 0.6268582629038282, "learning_rate": 9.338200261495061e-08, "loss": 0.0029, "step": 18103 }, { "epoch": 4.119226393629124, "grad_norm": 0.13974843137611834, "learning_rate": 9.33350243111162e-08, "loss": 0.001, "step": 18104 }, { "epoch": 4.1194539249146755, "grad_norm": 0.7718587513882347, "learning_rate": 9.328805687354031e-08, "loss": 0.0104, "step": 18105 }, { "epoch": 4.119681456200228, "grad_norm": 0.041076133575411865, "learning_rate": 9.324110030318266e-08, "loss": 0.0001, "step": 18106 }, { "epoch": 4.119908987485779, "grad_norm": 0.19750575468485834, "learning_rate": 9.319415460100304e-08, "loss": 0.0009, "step": 18107 }, { "epoch": 4.120136518771331, "grad_norm": 0.12543111969483658, "learning_rate": 9.314721976796108e-08, "loss": 0.0004, "step": 18108 }, { "epoch": 4.1203640500568826, "grad_norm": 0.46536890725567276, "learning_rate": 9.310029580501575e-08, "loss": 0.0009, "step": 18109 }, { "epoch": 4.120591581342435, "grad_norm": 0.4292944008530505, "learning_rate": 9.305338271312642e-08, "loss": 0.0017, "step": 18110 }, { "epoch": 4.120819112627986, "grad_norm": 0.27098588583442385, "learning_rate": 9.300648049325157e-08, "loss": 0.0021, "step": 18111 }, { "epoch": 4.121046643913538, "grad_norm": 0.17880187826697472, "learning_rate": 9.295958914635007e-08, "loss": 0.0007, "step": 18112 }, { "epoch": 4.12127417519909, "grad_norm": 0.42997177251324736, "learning_rate": 9.291270867338006e-08, "loss": 0.0058, "step": 18113 }, { "epoch": 4.121501706484642, "grad_norm": 0.4308118157484281, "learning_rate": 9.286583907529983e-08, "loss": 0.0053, "step": 18114 }, { "epoch": 4.121729237770193, "grad_norm": 0.1309569512294006, "learning_rate": 9.28189803530674e-08, "loss": 0.0006, "step": 18115 }, { "epoch": 4.121956769055745, "grad_norm": 0.1412854108149582, "learning_rate": 9.277213250764027e-08, "loss": 0.0007, "step": 18116 }, { "epoch": 4.122184300341297, "grad_norm": 0.17257649030391994, "learning_rate": 9.272529553997613e-08, "loss": 0.0004, "step": 18117 }, { "epoch": 4.122411831626849, "grad_norm": 0.7566360715283496, "learning_rate": 9.267846945103222e-08, "loss": 0.0064, "step": 18118 }, { "epoch": 4.1226393629124, "grad_norm": 0.35435734359812104, "learning_rate": 9.263165424176511e-08, "loss": 0.0033, "step": 18119 }, { "epoch": 4.122866894197952, "grad_norm": 0.1820586802071802, "learning_rate": 9.258484991313231e-08, "loss": 0.0026, "step": 18120 }, { "epoch": 4.123094425483504, "grad_norm": 0.4016980264977618, "learning_rate": 9.253805646609e-08, "loss": 0.0022, "step": 18121 }, { "epoch": 4.123321956769056, "grad_norm": 0.17089100099451632, "learning_rate": 9.249127390159471e-08, "loss": 0.0007, "step": 18122 }, { "epoch": 4.123549488054607, "grad_norm": 0.1483622072656315, "learning_rate": 9.244450222060248e-08, "loss": 0.0005, "step": 18123 }, { "epoch": 4.123777019340159, "grad_norm": 0.02294650750785896, "learning_rate": 9.23977414240694e-08, "loss": 0.0001, "step": 18124 }, { "epoch": 4.1240045506257115, "grad_norm": 0.5454761643251496, "learning_rate": 9.235099151295092e-08, "loss": 0.0008, "step": 18125 }, { "epoch": 4.124232081911263, "grad_norm": 0.3114730946735527, "learning_rate": 9.230425248820272e-08, "loss": 0.0026, "step": 18126 }, { "epoch": 4.124459613196815, "grad_norm": 0.5648606249139165, "learning_rate": 9.225752435078013e-08, "loss": 0.0056, "step": 18127 }, { "epoch": 4.124687144482366, "grad_norm": 0.212909984963526, "learning_rate": 9.22108071016379e-08, "loss": 0.0013, "step": 18128 }, { "epoch": 4.1249146757679185, "grad_norm": 0.16153671040476775, "learning_rate": 9.216410074173121e-08, "loss": 0.0005, "step": 18129 }, { "epoch": 4.12514220705347, "grad_norm": 0.25473975436086266, "learning_rate": 9.211740527201446e-08, "loss": 0.0008, "step": 18130 }, { "epoch": 4.125369738339022, "grad_norm": 0.42791659852378655, "learning_rate": 9.207072069344181e-08, "loss": 0.0011, "step": 18131 }, { "epoch": 4.125597269624573, "grad_norm": 0.5191009795386305, "learning_rate": 9.202404700696763e-08, "loss": 0.003, "step": 18132 }, { "epoch": 4.1258248009101255, "grad_norm": 0.3059255540855714, "learning_rate": 9.197738421354588e-08, "loss": 0.002, "step": 18133 }, { "epoch": 4.126052332195677, "grad_norm": 0.28347072487758823, "learning_rate": 9.193073231413032e-08, "loss": 0.0035, "step": 18134 }, { "epoch": 4.126279863481229, "grad_norm": 0.6748683202109889, "learning_rate": 9.18840913096742e-08, "loss": 0.0031, "step": 18135 }, { "epoch": 4.12650739476678, "grad_norm": 0.0929749093818126, "learning_rate": 9.1837461201131e-08, "loss": 0.0003, "step": 18136 }, { "epoch": 4.1267349260523325, "grad_norm": 0.22670439747089152, "learning_rate": 9.179084198945371e-08, "loss": 0.0007, "step": 18137 }, { "epoch": 4.126962457337884, "grad_norm": 0.7291812619213148, "learning_rate": 9.174423367559491e-08, "loss": 0.0012, "step": 18138 }, { "epoch": 4.127189988623436, "grad_norm": 0.2948053260161233, "learning_rate": 9.169763626050737e-08, "loss": 0.0019, "step": 18139 }, { "epoch": 4.127417519908987, "grad_norm": 0.7281443772431333, "learning_rate": 9.165104974514338e-08, "loss": 0.0021, "step": 18140 }, { "epoch": 4.1276450511945395, "grad_norm": 0.880257563906539, "learning_rate": 9.160447413045539e-08, "loss": 0.0078, "step": 18141 }, { "epoch": 4.127872582480091, "grad_norm": 0.5018158623760046, "learning_rate": 9.155790941739489e-08, "loss": 0.0076, "step": 18142 }, { "epoch": 4.128100113765643, "grad_norm": 0.18512971880728268, "learning_rate": 9.151135560691395e-08, "loss": 0.0005, "step": 18143 }, { "epoch": 4.128327645051194, "grad_norm": 0.26545516341320113, "learning_rate": 9.146481269996378e-08, "loss": 0.0007, "step": 18144 }, { "epoch": 4.1285551763367465, "grad_norm": 0.6443650153266708, "learning_rate": 9.141828069749566e-08, "loss": 0.0048, "step": 18145 }, { "epoch": 4.128782707622298, "grad_norm": 0.11992114896590099, "learning_rate": 9.137175960046062e-08, "loss": 0.0006, "step": 18146 }, { "epoch": 4.12901023890785, "grad_norm": 0.08436388834628263, "learning_rate": 9.132524940980957e-08, "loss": 0.0002, "step": 18147 }, { "epoch": 4.129237770193401, "grad_norm": 0.07884580166446917, "learning_rate": 9.127875012649313e-08, "loss": 0.0002, "step": 18148 }, { "epoch": 4.1294653014789535, "grad_norm": 0.30694799585655275, "learning_rate": 9.12322617514616e-08, "loss": 0.0014, "step": 18149 }, { "epoch": 4.129692832764505, "grad_norm": 0.359141779631538, "learning_rate": 9.118578428566498e-08, "loss": 0.002, "step": 18150 }, { "epoch": 4.129920364050057, "grad_norm": 0.38579004605077083, "learning_rate": 9.11393177300534e-08, "loss": 0.0019, "step": 18151 }, { "epoch": 4.130147895335608, "grad_norm": 0.15412809409773906, "learning_rate": 9.109286208557625e-08, "loss": 0.0006, "step": 18152 }, { "epoch": 4.1303754266211605, "grad_norm": 0.17869663369456013, "learning_rate": 9.104641735318325e-08, "loss": 0.0008, "step": 18153 }, { "epoch": 4.130602957906712, "grad_norm": 0.05404651109014311, "learning_rate": 9.099998353382364e-08, "loss": 0.0002, "step": 18154 }, { "epoch": 4.130830489192264, "grad_norm": 0.2368878162376614, "learning_rate": 9.095356062844646e-08, "loss": 0.0007, "step": 18155 }, { "epoch": 4.131058020477815, "grad_norm": 0.2780681041836184, "learning_rate": 9.090714863800046e-08, "loss": 0.0019, "step": 18156 }, { "epoch": 4.1312855517633675, "grad_norm": 0.24949378962346289, "learning_rate": 9.086074756343405e-08, "loss": 0.0005, "step": 18157 }, { "epoch": 4.131513083048919, "grad_norm": 0.20239591243753954, "learning_rate": 9.081435740569591e-08, "loss": 0.001, "step": 18158 }, { "epoch": 4.131740614334471, "grad_norm": 0.04270850315163773, "learning_rate": 9.076797816573384e-08, "loss": 0.0001, "step": 18159 }, { "epoch": 4.131968145620022, "grad_norm": 0.15720784181481848, "learning_rate": 9.072160984449581e-08, "loss": 0.0003, "step": 18160 }, { "epoch": 4.1321956769055745, "grad_norm": 0.2746352549227087, "learning_rate": 9.067525244292969e-08, "loss": 0.0033, "step": 18161 }, { "epoch": 4.132423208191126, "grad_norm": 0.14550905139532694, "learning_rate": 9.062890596198296e-08, "loss": 0.0008, "step": 18162 }, { "epoch": 4.132650739476678, "grad_norm": 0.10815616971260458, "learning_rate": 9.058257040260269e-08, "loss": 0.0005, "step": 18163 }, { "epoch": 4.13287827076223, "grad_norm": 0.9584393900405944, "learning_rate": 9.053624576573585e-08, "loss": 0.0046, "step": 18164 }, { "epoch": 4.1331058020477816, "grad_norm": 0.50760496499439, "learning_rate": 9.048993205232947e-08, "loss": 0.0038, "step": 18165 }, { "epoch": 4.133333333333334, "grad_norm": 0.3522751607795669, "learning_rate": 9.044362926332976e-08, "loss": 0.0019, "step": 18166 }, { "epoch": 4.133560864618885, "grad_norm": 0.24507657075599257, "learning_rate": 9.039733739968338e-08, "loss": 0.0011, "step": 18167 }, { "epoch": 4.133788395904437, "grad_norm": 0.432273624831647, "learning_rate": 9.035105646233638e-08, "loss": 0.0021, "step": 18168 }, { "epoch": 4.134015927189989, "grad_norm": 0.5846573143464596, "learning_rate": 9.030478645223454e-08, "loss": 0.0027, "step": 18169 }, { "epoch": 4.134243458475541, "grad_norm": 0.3410892036018476, "learning_rate": 9.025852737032374e-08, "loss": 0.003, "step": 18170 }, { "epoch": 4.134470989761092, "grad_norm": 0.16020775142622046, "learning_rate": 9.021227921754922e-08, "loss": 0.001, "step": 18171 }, { "epoch": 4.134698521046644, "grad_norm": 0.4101203046723559, "learning_rate": 9.016604199485635e-08, "loss": 0.0023, "step": 18172 }, { "epoch": 4.134926052332196, "grad_norm": 0.26047027547009716, "learning_rate": 9.011981570319003e-08, "loss": 0.001, "step": 18173 }, { "epoch": 4.135153583617748, "grad_norm": 0.3167114063985958, "learning_rate": 9.007360034349502e-08, "loss": 0.0024, "step": 18174 }, { "epoch": 4.135381114903299, "grad_norm": 0.09994434531470597, "learning_rate": 9.00273959167161e-08, "loss": 0.0003, "step": 18175 }, { "epoch": 4.135608646188851, "grad_norm": 0.3737175225806655, "learning_rate": 8.998120242379734e-08, "loss": 0.003, "step": 18176 }, { "epoch": 4.135836177474403, "grad_norm": 0.6785654413624866, "learning_rate": 8.993501986568308e-08, "loss": 0.0058, "step": 18177 }, { "epoch": 4.136063708759955, "grad_norm": 0.12838381255800577, "learning_rate": 8.988884824331696e-08, "loss": 0.0007, "step": 18178 }, { "epoch": 4.136291240045506, "grad_norm": 0.799017738246292, "learning_rate": 8.984268755764289e-08, "loss": 0.0029, "step": 18179 }, { "epoch": 4.136518771331058, "grad_norm": 0.06872447388259736, "learning_rate": 8.979653780960392e-08, "loss": 0.0001, "step": 18180 }, { "epoch": 4.13674630261661, "grad_norm": 0.02460957363376837, "learning_rate": 8.975039900014381e-08, "loss": 0.0001, "step": 18181 }, { "epoch": 4.136973833902162, "grad_norm": 0.34887401610566465, "learning_rate": 8.970427113020527e-08, "loss": 0.001, "step": 18182 }, { "epoch": 4.137201365187713, "grad_norm": 0.040565639798225854, "learning_rate": 8.965815420073098e-08, "loss": 0.0001, "step": 18183 }, { "epoch": 4.137428896473265, "grad_norm": 0.08534976869910268, "learning_rate": 8.961204821266375e-08, "loss": 0.0002, "step": 18184 }, { "epoch": 4.137656427758817, "grad_norm": 0.3873321817594261, "learning_rate": 8.95659531669455e-08, "loss": 0.0021, "step": 18185 }, { "epoch": 4.137883959044369, "grad_norm": 0.5128404748061252, "learning_rate": 8.951986906451868e-08, "loss": 0.0032, "step": 18186 }, { "epoch": 4.13811149032992, "grad_norm": 0.29710019668705556, "learning_rate": 8.947379590632514e-08, "loss": 0.0017, "step": 18187 }, { "epoch": 4.138339021615472, "grad_norm": 0.2214132478105827, "learning_rate": 8.942773369330623e-08, "loss": 0.001, "step": 18188 }, { "epoch": 4.138566552901024, "grad_norm": 0.38574456637520876, "learning_rate": 8.938168242640383e-08, "loss": 0.007, "step": 18189 }, { "epoch": 4.138794084186576, "grad_norm": 0.5259240332311484, "learning_rate": 8.933564210655873e-08, "loss": 0.0024, "step": 18190 }, { "epoch": 4.139021615472127, "grad_norm": 0.14743191994792024, "learning_rate": 8.928961273471225e-08, "loss": 0.0008, "step": 18191 }, { "epoch": 4.139249146757679, "grad_norm": 0.036668744450386204, "learning_rate": 8.92435943118048e-08, "loss": 0.0001, "step": 18192 }, { "epoch": 4.139476678043231, "grad_norm": 0.10782740324653119, "learning_rate": 8.919758683877705e-08, "loss": 0.0003, "step": 18193 }, { "epoch": 4.139704209328783, "grad_norm": 0.22039782143232942, "learning_rate": 8.915159031656955e-08, "loss": 0.0008, "step": 18194 }, { "epoch": 4.139931740614334, "grad_norm": 0.4633922374578739, "learning_rate": 8.910560474612202e-08, "loss": 0.005, "step": 18195 }, { "epoch": 4.140159271899886, "grad_norm": 0.2069374688530302, "learning_rate": 8.905963012837463e-08, "loss": 0.0003, "step": 18196 }, { "epoch": 4.140386803185438, "grad_norm": 0.45479298208483215, "learning_rate": 8.901366646426678e-08, "loss": 0.0013, "step": 18197 }, { "epoch": 4.14061433447099, "grad_norm": 0.2696073126950873, "learning_rate": 8.89677137547381e-08, "loss": 0.002, "step": 18198 }, { "epoch": 4.140841865756541, "grad_norm": 0.47973786898806414, "learning_rate": 8.892177200072744e-08, "loss": 0.004, "step": 18199 }, { "epoch": 4.141069397042093, "grad_norm": 0.22750186667303376, "learning_rate": 8.887584120317406e-08, "loss": 0.0015, "step": 18200 }, { "epoch": 4.141296928327645, "grad_norm": 0.09534930414972756, "learning_rate": 8.882992136301671e-08, "loss": 0.0002, "step": 18201 }, { "epoch": 4.141524459613197, "grad_norm": 0.6590497599685815, "learning_rate": 8.878401248119374e-08, "loss": 0.0019, "step": 18202 }, { "epoch": 4.141751990898749, "grad_norm": 0.0714848196809912, "learning_rate": 8.873811455864356e-08, "loss": 0.0004, "step": 18203 }, { "epoch": 4.1419795221843, "grad_norm": 0.5981080186651742, "learning_rate": 8.869222759630418e-08, "loss": 0.0049, "step": 18204 }, { "epoch": 4.1422070534698525, "grad_norm": 0.06874771230776242, "learning_rate": 8.864635159511352e-08, "loss": 0.0002, "step": 18205 }, { "epoch": 4.142434584755404, "grad_norm": 0.41617930212037, "learning_rate": 8.860048655600903e-08, "loss": 0.0021, "step": 18206 }, { "epoch": 4.142662116040956, "grad_norm": 0.15554071531977565, "learning_rate": 8.855463247992822e-08, "loss": 0.0008, "step": 18207 }, { "epoch": 4.142889647326507, "grad_norm": 0.38799793637610597, "learning_rate": 8.85087893678084e-08, "loss": 0.004, "step": 18208 }, { "epoch": 4.1431171786120595, "grad_norm": 0.02227928112671817, "learning_rate": 8.846295722058626e-08, "loss": 0.0001, "step": 18209 }, { "epoch": 4.143344709897611, "grad_norm": 0.23898394244400348, "learning_rate": 8.841713603919881e-08, "loss": 0.0004, "step": 18210 }, { "epoch": 4.143572241183163, "grad_norm": 0.5589567151840205, "learning_rate": 8.837132582458225e-08, "loss": 0.0005, "step": 18211 }, { "epoch": 4.143799772468714, "grad_norm": 0.32593754627872795, "learning_rate": 8.832552657767314e-08, "loss": 0.0021, "step": 18212 }, { "epoch": 4.1440273037542665, "grad_norm": 0.16288604945594223, "learning_rate": 8.827973829940724e-08, "loss": 0.0006, "step": 18213 }, { "epoch": 4.144254835039818, "grad_norm": 0.4579215267756103, "learning_rate": 8.823396099072054e-08, "loss": 0.0032, "step": 18214 }, { "epoch": 4.14448236632537, "grad_norm": 0.054560107032900686, "learning_rate": 8.818819465254882e-08, "loss": 0.0001, "step": 18215 }, { "epoch": 4.144709897610921, "grad_norm": 0.1174372485830248, "learning_rate": 8.814243928582711e-08, "loss": 0.0003, "step": 18216 }, { "epoch": 4.1449374288964735, "grad_norm": 0.6513100974290531, "learning_rate": 8.809669489149082e-08, "loss": 0.0027, "step": 18217 }, { "epoch": 4.145164960182025, "grad_norm": 0.09966711848218289, "learning_rate": 8.80509614704747e-08, "loss": 0.0002, "step": 18218 }, { "epoch": 4.145392491467577, "grad_norm": 0.6755803887711025, "learning_rate": 8.80052390237137e-08, "loss": 0.0029, "step": 18219 }, { "epoch": 4.145620022753128, "grad_norm": 0.17916168569613714, "learning_rate": 8.7959527552142e-08, "loss": 0.0008, "step": 18220 }, { "epoch": 4.1458475540386805, "grad_norm": 0.8715122376481955, "learning_rate": 8.791382705669399e-08, "loss": 0.0059, "step": 18221 }, { "epoch": 4.146075085324232, "grad_norm": 0.32374087939733537, "learning_rate": 8.786813753830393e-08, "loss": 0.0022, "step": 18222 }, { "epoch": 4.146302616609784, "grad_norm": 0.09547074160132049, "learning_rate": 8.782245899790522e-08, "loss": 0.0003, "step": 18223 }, { "epoch": 4.146530147895335, "grad_norm": 0.26060507535809213, "learning_rate": 8.777679143643185e-08, "loss": 0.002, "step": 18224 }, { "epoch": 4.146757679180888, "grad_norm": 0.12973014502745367, "learning_rate": 8.773113485481696e-08, "loss": 0.0007, "step": 18225 }, { "epoch": 4.146985210466439, "grad_norm": 0.06336474712811581, "learning_rate": 8.768548925399356e-08, "loss": 0.0003, "step": 18226 }, { "epoch": 4.147212741751991, "grad_norm": 0.32061027452740193, "learning_rate": 8.763985463489472e-08, "loss": 0.0016, "step": 18227 }, { "epoch": 4.147440273037542, "grad_norm": 0.04345970480031657, "learning_rate": 8.759423099845308e-08, "loss": 0.0001, "step": 18228 }, { "epoch": 4.147667804323095, "grad_norm": 0.34620032446739885, "learning_rate": 8.75486183456014e-08, "loss": 0.0023, "step": 18229 }, { "epoch": 4.147895335608646, "grad_norm": 0.16164280564677774, "learning_rate": 8.75030166772714e-08, "loss": 0.0004, "step": 18230 }, { "epoch": 4.148122866894198, "grad_norm": 0.1200666048298598, "learning_rate": 8.745742599439562e-08, "loss": 0.0002, "step": 18231 }, { "epoch": 4.148350398179749, "grad_norm": 0.13535773704808324, "learning_rate": 8.741184629790552e-08, "loss": 0.0002, "step": 18232 }, { "epoch": 4.148577929465302, "grad_norm": 0.15914486671417724, "learning_rate": 8.736627758873261e-08, "loss": 0.0006, "step": 18233 }, { "epoch": 4.148805460750853, "grad_norm": 0.495231167539727, "learning_rate": 8.732071986780833e-08, "loss": 0.0024, "step": 18234 }, { "epoch": 4.149032992036405, "grad_norm": 0.4123601282721431, "learning_rate": 8.727517313606382e-08, "loss": 0.003, "step": 18235 }, { "epoch": 4.149260523321956, "grad_norm": 0.05111113398881201, "learning_rate": 8.722963739443014e-08, "loss": 0.0001, "step": 18236 }, { "epoch": 4.149488054607509, "grad_norm": 0.42088223929138013, "learning_rate": 8.718411264383782e-08, "loss": 0.0027, "step": 18237 }, { "epoch": 4.14971558589306, "grad_norm": 0.09434020478991872, "learning_rate": 8.713859888521705e-08, "loss": 0.0002, "step": 18238 }, { "epoch": 4.149943117178612, "grad_norm": 0.14035685976610918, "learning_rate": 8.709309611949851e-08, "loss": 0.0006, "step": 18239 }, { "epoch": 4.150170648464163, "grad_norm": 0.0506446784611389, "learning_rate": 8.704760434761165e-08, "loss": 0.0002, "step": 18240 }, { "epoch": 4.150398179749716, "grad_norm": 0.30449333088456676, "learning_rate": 8.700212357048682e-08, "loss": 0.0014, "step": 18241 }, { "epoch": 4.150625711035268, "grad_norm": 0.30971070961894354, "learning_rate": 8.695665378905309e-08, "loss": 0.0037, "step": 18242 }, { "epoch": 4.150853242320819, "grad_norm": 0.06247168938511422, "learning_rate": 8.691119500424019e-08, "loss": 0.0002, "step": 18243 }, { "epoch": 4.151080773606371, "grad_norm": 0.2245489017413627, "learning_rate": 8.686574721697698e-08, "loss": 0.0009, "step": 18244 }, { "epoch": 4.151308304891923, "grad_norm": 0.06281753594987802, "learning_rate": 8.682031042819214e-08, "loss": 0.0001, "step": 18245 }, { "epoch": 4.151535836177475, "grad_norm": 0.3426916352580782, "learning_rate": 8.677488463881459e-08, "loss": 0.0022, "step": 18246 }, { "epoch": 4.151763367463026, "grad_norm": 0.3329897661120601, "learning_rate": 8.672946984977268e-08, "loss": 0.0029, "step": 18247 }, { "epoch": 4.151990898748578, "grad_norm": 0.4994635316911408, "learning_rate": 8.668406606199476e-08, "loss": 0.0018, "step": 18248 }, { "epoch": 4.15221843003413, "grad_norm": 0.27909387312764244, "learning_rate": 8.663867327640852e-08, "loss": 0.001, "step": 18249 }, { "epoch": 4.152445961319682, "grad_norm": 0.19377485670588393, "learning_rate": 8.659329149394199e-08, "loss": 0.0002, "step": 18250 }, { "epoch": 4.152673492605233, "grad_norm": 0.44515580442890984, "learning_rate": 8.654792071552246e-08, "loss": 0.0047, "step": 18251 }, { "epoch": 4.152901023890785, "grad_norm": 0.20859212493318685, "learning_rate": 8.650256094207721e-08, "loss": 0.0017, "step": 18252 }, { "epoch": 4.153128555176337, "grad_norm": 1.0550411779250841, "learning_rate": 8.645721217453343e-08, "loss": 0.0022, "step": 18253 }, { "epoch": 4.153356086461889, "grad_norm": 0.12179041514497273, "learning_rate": 8.641187441381788e-08, "loss": 0.0004, "step": 18254 }, { "epoch": 4.15358361774744, "grad_norm": 0.08658153878918305, "learning_rate": 8.636654766085744e-08, "loss": 0.0003, "step": 18255 }, { "epoch": 4.153811149032992, "grad_norm": 0.2975120446283689, "learning_rate": 8.632123191657828e-08, "loss": 0.0019, "step": 18256 }, { "epoch": 4.154038680318544, "grad_norm": 0.5363392578194738, "learning_rate": 8.627592718190642e-08, "loss": 0.0016, "step": 18257 }, { "epoch": 4.154266211604096, "grad_norm": 0.240540940926308, "learning_rate": 8.623063345776818e-08, "loss": 0.0005, "step": 18258 }, { "epoch": 4.154493742889647, "grad_norm": 0.4047177134521964, "learning_rate": 8.618535074508889e-08, "loss": 0.0017, "step": 18259 }, { "epoch": 4.154721274175199, "grad_norm": 2.2538565611685013, "learning_rate": 8.614007904479429e-08, "loss": 0.0129, "step": 18260 }, { "epoch": 4.154948805460751, "grad_norm": 1.137453374160393, "learning_rate": 8.609481835780954e-08, "loss": 0.0021, "step": 18261 }, { "epoch": 4.155176336746303, "grad_norm": 0.0865191183001384, "learning_rate": 8.604956868505993e-08, "loss": 0.0004, "step": 18262 }, { "epoch": 4.155403868031854, "grad_norm": 0.12416871912739975, "learning_rate": 8.60043300274701e-08, "loss": 0.0004, "step": 18263 }, { "epoch": 4.155631399317406, "grad_norm": 0.21446701387826253, "learning_rate": 8.595910238596447e-08, "loss": 0.0007, "step": 18264 }, { "epoch": 4.155858930602958, "grad_norm": 0.38778789582811646, "learning_rate": 8.591388576146775e-08, "loss": 0.0018, "step": 18265 }, { "epoch": 4.15608646188851, "grad_norm": 0.7327726615831087, "learning_rate": 8.586868015490371e-08, "loss": 0.0049, "step": 18266 }, { "epoch": 4.156313993174061, "grad_norm": 0.44639689781287445, "learning_rate": 8.582348556719654e-08, "loss": 0.0072, "step": 18267 }, { "epoch": 4.156541524459613, "grad_norm": 0.2937803496695957, "learning_rate": 8.577830199926987e-08, "loss": 0.0012, "step": 18268 }, { "epoch": 4.156769055745165, "grad_norm": 0.22952226418706437, "learning_rate": 8.573312945204728e-08, "loss": 0.0011, "step": 18269 }, { "epoch": 4.156996587030717, "grad_norm": 0.2461860418429826, "learning_rate": 8.56879679264519e-08, "loss": 0.0022, "step": 18270 }, { "epoch": 4.157224118316268, "grad_norm": 1.1984024903037995, "learning_rate": 8.564281742340668e-08, "loss": 0.0063, "step": 18271 }, { "epoch": 4.15745164960182, "grad_norm": 0.09568162665722618, "learning_rate": 8.559767794383451e-08, "loss": 0.0004, "step": 18272 }, { "epoch": 4.157679180887372, "grad_norm": 0.6026857392224377, "learning_rate": 8.55525494886579e-08, "loss": 0.001, "step": 18273 }, { "epoch": 4.157906712172924, "grad_norm": 0.06164749606551471, "learning_rate": 8.550743205879918e-08, "loss": 0.0002, "step": 18274 }, { "epoch": 4.158134243458475, "grad_norm": 0.9572014356499575, "learning_rate": 8.546232565518065e-08, "loss": 0.0038, "step": 18275 }, { "epoch": 4.158361774744027, "grad_norm": 0.083876475161656, "learning_rate": 8.541723027872394e-08, "loss": 0.0003, "step": 18276 }, { "epoch": 4.158589306029579, "grad_norm": 0.2784647116821803, "learning_rate": 8.537214593035103e-08, "loss": 0.001, "step": 18277 }, { "epoch": 4.158816837315131, "grad_norm": 0.5389149542737792, "learning_rate": 8.532707261098297e-08, "loss": 0.0028, "step": 18278 }, { "epoch": 4.159044368600682, "grad_norm": 0.2841897244332172, "learning_rate": 8.528201032154134e-08, "loss": 0.002, "step": 18279 }, { "epoch": 4.159271899886234, "grad_norm": 0.40021768175639405, "learning_rate": 8.523695906294684e-08, "loss": 0.0019, "step": 18280 }, { "epoch": 4.1594994311717866, "grad_norm": 0.42767127938006483, "learning_rate": 8.519191883612034e-08, "loss": 0.0044, "step": 18281 }, { "epoch": 4.159726962457338, "grad_norm": 0.7304054850517907, "learning_rate": 8.514688964198256e-08, "loss": 0.0028, "step": 18282 }, { "epoch": 4.15995449374289, "grad_norm": 0.02479914537832314, "learning_rate": 8.510187148145353e-08, "loss": 0.0001, "step": 18283 }, { "epoch": 4.160182025028441, "grad_norm": 0.035560180298263425, "learning_rate": 8.505686435545355e-08, "loss": 0.0001, "step": 18284 }, { "epoch": 4.160409556313994, "grad_norm": 0.29913306051294647, "learning_rate": 8.501186826490239e-08, "loss": 0.0022, "step": 18285 }, { "epoch": 4.160637087599545, "grad_norm": 0.35178124557989304, "learning_rate": 8.49668832107197e-08, "loss": 0.0025, "step": 18286 }, { "epoch": 4.160864618885097, "grad_norm": 0.16442242662281656, "learning_rate": 8.492190919382484e-08, "loss": 0.0009, "step": 18287 }, { "epoch": 4.161092150170648, "grad_norm": 0.9898855058539271, "learning_rate": 8.487694621513707e-08, "loss": 0.0085, "step": 18288 }, { "epoch": 4.161319681456201, "grad_norm": 0.38442299746378006, "learning_rate": 8.483199427557543e-08, "loss": 0.0011, "step": 18289 }, { "epoch": 4.161547212741752, "grad_norm": 0.36581200197211694, "learning_rate": 8.478705337605843e-08, "loss": 0.0022, "step": 18290 }, { "epoch": 4.161774744027304, "grad_norm": 0.4530982885741518, "learning_rate": 8.474212351750479e-08, "loss": 0.005, "step": 18291 }, { "epoch": 4.162002275312855, "grad_norm": 0.857522379769689, "learning_rate": 8.469720470083264e-08, "loss": 0.0058, "step": 18292 }, { "epoch": 4.162229806598408, "grad_norm": 0.09316856395124871, "learning_rate": 8.465229692696024e-08, "loss": 0.0003, "step": 18293 }, { "epoch": 4.162457337883959, "grad_norm": 0.4686202832828918, "learning_rate": 8.46074001968051e-08, "loss": 0.006, "step": 18294 }, { "epoch": 4.162684869169511, "grad_norm": 0.17212036814952128, "learning_rate": 8.456251451128509e-08, "loss": 0.0005, "step": 18295 }, { "epoch": 4.162912400455062, "grad_norm": 1.0765213627903585, "learning_rate": 8.45176398713176e-08, "loss": 0.0054, "step": 18296 }, { "epoch": 4.163139931740615, "grad_norm": 0.16425096448010576, "learning_rate": 8.447277627781962e-08, "loss": 0.0004, "step": 18297 }, { "epoch": 4.163367463026166, "grad_norm": 0.32303063503166535, "learning_rate": 8.442792373170825e-08, "loss": 0.0022, "step": 18298 }, { "epoch": 4.163594994311718, "grad_norm": 0.31724171956556724, "learning_rate": 8.43830822339e-08, "loss": 0.0023, "step": 18299 }, { "epoch": 4.163822525597269, "grad_norm": 1.4355313025861038, "learning_rate": 8.433825178531154e-08, "loss": 0.0026, "step": 18300 }, { "epoch": 4.164050056882822, "grad_norm": 0.02257267124712225, "learning_rate": 8.429343238685897e-08, "loss": 0.0001, "step": 18301 }, { "epoch": 4.164277588168373, "grad_norm": 0.061984234558038004, "learning_rate": 8.424862403945834e-08, "loss": 0.0002, "step": 18302 }, { "epoch": 4.164505119453925, "grad_norm": 0.2671938934665924, "learning_rate": 8.420382674402566e-08, "loss": 0.0016, "step": 18303 }, { "epoch": 4.164732650739476, "grad_norm": 0.16743167737579223, "learning_rate": 8.415904050147617e-08, "loss": 0.0004, "step": 18304 }, { "epoch": 4.164960182025029, "grad_norm": 0.8847818135234482, "learning_rate": 8.411426531272561e-08, "loss": 0.0025, "step": 18305 }, { "epoch": 4.16518771331058, "grad_norm": 0.3170332897222002, "learning_rate": 8.406950117868864e-08, "loss": 0.0009, "step": 18306 }, { "epoch": 4.165415244596132, "grad_norm": 0.3635658718975984, "learning_rate": 8.402474810028045e-08, "loss": 0.0007, "step": 18307 }, { "epoch": 4.165642775881683, "grad_norm": 0.1854516928288825, "learning_rate": 8.398000607841579e-08, "loss": 0.0007, "step": 18308 }, { "epoch": 4.165870307167236, "grad_norm": 0.22434135733518992, "learning_rate": 8.393527511400886e-08, "loss": 0.0013, "step": 18309 }, { "epoch": 4.166097838452787, "grad_norm": 0.05539314751105671, "learning_rate": 8.389055520797406e-08, "loss": 0.0001, "step": 18310 }, { "epoch": 4.166325369738339, "grad_norm": 0.05779938434583954, "learning_rate": 8.384584636122524e-08, "loss": 0.0003, "step": 18311 }, { "epoch": 4.16655290102389, "grad_norm": 1.0394454752983775, "learning_rate": 8.38011485746764e-08, "loss": 0.0111, "step": 18312 }, { "epoch": 4.166780432309443, "grad_norm": 0.3107178204865362, "learning_rate": 8.37564618492407e-08, "loss": 0.0022, "step": 18313 }, { "epoch": 4.167007963594994, "grad_norm": 0.037401491302044634, "learning_rate": 8.371178618583178e-08, "loss": 0.0001, "step": 18314 }, { "epoch": 4.167235494880546, "grad_norm": 0.1730927915966139, "learning_rate": 8.366712158536273e-08, "loss": 0.0005, "step": 18315 }, { "epoch": 4.167463026166097, "grad_norm": 0.13830933547101942, "learning_rate": 8.362246804874616e-08, "loss": 0.0006, "step": 18316 }, { "epoch": 4.16769055745165, "grad_norm": 0.27302891179278654, "learning_rate": 8.357782557689493e-08, "loss": 0.0005, "step": 18317 }, { "epoch": 4.167918088737201, "grad_norm": 0.6852367105512404, "learning_rate": 8.353319417072133e-08, "loss": 0.003, "step": 18318 }, { "epoch": 4.168145620022753, "grad_norm": 0.7491635222087372, "learning_rate": 8.348857383113771e-08, "loss": 0.0032, "step": 18319 }, { "epoch": 4.168373151308305, "grad_norm": 0.20736795354698392, "learning_rate": 8.344396455905575e-08, "loss": 0.0007, "step": 18320 }, { "epoch": 4.168600682593857, "grad_norm": 0.21024117122564775, "learning_rate": 8.33993663553874e-08, "loss": 0.0004, "step": 18321 }, { "epoch": 4.168828213879409, "grad_norm": 0.44741132127905425, "learning_rate": 8.335477922104415e-08, "loss": 0.0029, "step": 18322 }, { "epoch": 4.16905574516496, "grad_norm": 0.842519362340707, "learning_rate": 8.331020315693716e-08, "loss": 0.0027, "step": 18323 }, { "epoch": 4.169283276450512, "grad_norm": 1.4454219787214853, "learning_rate": 8.326563816397771e-08, "loss": 0.009, "step": 18324 }, { "epoch": 4.169510807736064, "grad_norm": 0.05560949504167569, "learning_rate": 8.322108424307633e-08, "loss": 0.0002, "step": 18325 }, { "epoch": 4.169738339021616, "grad_norm": 0.15302271531938744, "learning_rate": 8.317654139514388e-08, "loss": 0.0004, "step": 18326 }, { "epoch": 4.169965870307167, "grad_norm": 0.724683417589955, "learning_rate": 8.313200962109045e-08, "loss": 0.01, "step": 18327 }, { "epoch": 4.170193401592719, "grad_norm": 0.18672450623710712, "learning_rate": 8.308748892182645e-08, "loss": 0.0008, "step": 18328 }, { "epoch": 4.170420932878271, "grad_norm": 0.0982820406390865, "learning_rate": 8.304297929826177e-08, "loss": 0.0003, "step": 18329 }, { "epoch": 4.170648464163823, "grad_norm": 0.2627270433945124, "learning_rate": 8.299848075130595e-08, "loss": 0.0016, "step": 18330 }, { "epoch": 4.170875995449374, "grad_norm": 0.15519607626987453, "learning_rate": 8.295399328186869e-08, "loss": 0.0009, "step": 18331 }, { "epoch": 4.171103526734926, "grad_norm": 0.3130127466022347, "learning_rate": 8.290951689085912e-08, "loss": 0.0016, "step": 18332 }, { "epoch": 4.171331058020478, "grad_norm": 0.6503926374301603, "learning_rate": 8.286505157918602e-08, "loss": 0.0018, "step": 18333 }, { "epoch": 4.17155858930603, "grad_norm": 0.02611198399566405, "learning_rate": 8.282059734775846e-08, "loss": 0.0001, "step": 18334 }, { "epoch": 4.171786120591581, "grad_norm": 0.09596683550720152, "learning_rate": 8.277615419748489e-08, "loss": 0.0005, "step": 18335 }, { "epoch": 4.172013651877133, "grad_norm": 0.15679980382525063, "learning_rate": 8.273172212927386e-08, "loss": 0.0007, "step": 18336 }, { "epoch": 4.172241183162685, "grad_norm": 0.6732038953164869, "learning_rate": 8.268730114403316e-08, "loss": 0.0081, "step": 18337 }, { "epoch": 4.172468714448237, "grad_norm": 0.11940165851879374, "learning_rate": 8.264289124267098e-08, "loss": 0.0004, "step": 18338 }, { "epoch": 4.172696245733788, "grad_norm": 0.4245426776160863, "learning_rate": 8.259849242609481e-08, "loss": 0.0023, "step": 18339 }, { "epoch": 4.17292377701934, "grad_norm": 0.34185714524028415, "learning_rate": 8.255410469521199e-08, "loss": 0.0012, "step": 18340 }, { "epoch": 4.173151308304892, "grad_norm": 0.21861083150666547, "learning_rate": 8.250972805092974e-08, "loss": 0.0005, "step": 18341 }, { "epoch": 4.173378839590444, "grad_norm": 0.07373650876463833, "learning_rate": 8.246536249415523e-08, "loss": 0.0002, "step": 18342 }, { "epoch": 4.173606370875995, "grad_norm": 0.389740215644943, "learning_rate": 8.24210080257952e-08, "loss": 0.0013, "step": 18343 }, { "epoch": 4.173833902161547, "grad_norm": 0.15082152412115737, "learning_rate": 8.237666464675601e-08, "loss": 0.0004, "step": 18344 }, { "epoch": 4.174061433447099, "grad_norm": 0.20196131724923574, "learning_rate": 8.233233235794399e-08, "loss": 0.0008, "step": 18345 }, { "epoch": 4.174288964732651, "grad_norm": 0.1772362591479963, "learning_rate": 8.228801116026537e-08, "loss": 0.0008, "step": 18346 }, { "epoch": 4.174516496018202, "grad_norm": 0.09386562719745742, "learning_rate": 8.224370105462572e-08, "loss": 0.0002, "step": 18347 }, { "epoch": 4.174744027303754, "grad_norm": 0.07894241049125691, "learning_rate": 8.219940204193075e-08, "loss": 0.0004, "step": 18348 }, { "epoch": 4.174971558589306, "grad_norm": 0.09081500027661554, "learning_rate": 8.2155114123086e-08, "loss": 0.0003, "step": 18349 }, { "epoch": 4.175199089874858, "grad_norm": 0.10026552810701225, "learning_rate": 8.211083729899663e-08, "loss": 0.0003, "step": 18350 }, { "epoch": 4.175426621160409, "grad_norm": 1.2049425729945453, "learning_rate": 8.206657157056749e-08, "loss": 0.0104, "step": 18351 }, { "epoch": 4.175654152445961, "grad_norm": 0.3925887196644266, "learning_rate": 8.202231693870321e-08, "loss": 0.0019, "step": 18352 }, { "epoch": 4.175881683731513, "grad_norm": 0.15915733496677828, "learning_rate": 8.197807340430838e-08, "loss": 0.0011, "step": 18353 }, { "epoch": 4.176109215017065, "grad_norm": 0.13065271472581194, "learning_rate": 8.193384096828717e-08, "loss": 0.0003, "step": 18354 }, { "epoch": 4.176336746302616, "grad_norm": 0.420035230328404, "learning_rate": 8.188961963154371e-08, "loss": 0.0011, "step": 18355 }, { "epoch": 4.176564277588168, "grad_norm": 0.2595884121934602, "learning_rate": 8.184540939498171e-08, "loss": 0.002, "step": 18356 }, { "epoch": 4.17679180887372, "grad_norm": 0.24563555145166502, "learning_rate": 8.180121025950498e-08, "loss": 0.0017, "step": 18357 }, { "epoch": 4.177019340159272, "grad_norm": 0.34882756746554594, "learning_rate": 8.17570222260166e-08, "loss": 0.0019, "step": 18358 }, { "epoch": 4.177246871444824, "grad_norm": 0.4780760642769423, "learning_rate": 8.171284529541974e-08, "loss": 0.0029, "step": 18359 }, { "epoch": 4.177474402730375, "grad_norm": 0.355349161701737, "learning_rate": 8.166867946861743e-08, "loss": 0.0014, "step": 18360 }, { "epoch": 4.177701934015928, "grad_norm": 0.10556385424214777, "learning_rate": 8.162452474651216e-08, "loss": 0.0003, "step": 18361 }, { "epoch": 4.177929465301479, "grad_norm": 0.06290916814831805, "learning_rate": 8.158038113000636e-08, "loss": 0.0002, "step": 18362 }, { "epoch": 4.178156996587031, "grad_norm": 0.20189562166502664, "learning_rate": 8.153624862000254e-08, "loss": 0.001, "step": 18363 }, { "epoch": 4.178384527872582, "grad_norm": 0.11068434665872802, "learning_rate": 8.149212721740241e-08, "loss": 0.0003, "step": 18364 }, { "epoch": 4.178612059158135, "grad_norm": 0.17255215230047377, "learning_rate": 8.144801692310786e-08, "loss": 0.0005, "step": 18365 }, { "epoch": 4.178839590443686, "grad_norm": 0.06391689422821839, "learning_rate": 8.140391773802027e-08, "loss": 0.0002, "step": 18366 }, { "epoch": 4.179067121729238, "grad_norm": 0.16672541593240145, "learning_rate": 8.135982966304107e-08, "loss": 0.0006, "step": 18367 }, { "epoch": 4.179294653014789, "grad_norm": 0.5180533725029458, "learning_rate": 8.131575269907134e-08, "loss": 0.0036, "step": 18368 }, { "epoch": 4.179522184300342, "grad_norm": 0.48055751740521546, "learning_rate": 8.127168684701204e-08, "loss": 0.0068, "step": 18369 }, { "epoch": 4.179749715585893, "grad_norm": 0.251618672216388, "learning_rate": 8.12276321077636e-08, "loss": 0.0018, "step": 18370 }, { "epoch": 4.179977246871445, "grad_norm": 0.07148641160229705, "learning_rate": 8.118358848222637e-08, "loss": 0.0002, "step": 18371 }, { "epoch": 4.180204778156996, "grad_norm": 0.23179389050195243, "learning_rate": 8.113955597130082e-08, "loss": 0.0013, "step": 18372 }, { "epoch": 4.180432309442549, "grad_norm": 0.04109097935532419, "learning_rate": 8.10955345758866e-08, "loss": 0.0001, "step": 18373 }, { "epoch": 4.1806598407281, "grad_norm": 0.0773037639789068, "learning_rate": 8.105152429688346e-08, "loss": 0.0002, "step": 18374 }, { "epoch": 4.180887372013652, "grad_norm": 0.7223343114594437, "learning_rate": 8.100752513519105e-08, "loss": 0.0028, "step": 18375 }, { "epoch": 4.181114903299203, "grad_norm": 0.32212938563379634, "learning_rate": 8.096353709170863e-08, "loss": 0.0014, "step": 18376 }, { "epoch": 4.181342434584756, "grad_norm": 0.0865151833819404, "learning_rate": 8.091956016733515e-08, "loss": 0.0002, "step": 18377 }, { "epoch": 4.181569965870307, "grad_norm": 1.6657149200938335, "learning_rate": 8.087559436296931e-08, "loss": 0.0074, "step": 18378 }, { "epoch": 4.181797497155859, "grad_norm": 0.4483586734860929, "learning_rate": 8.083163967950995e-08, "loss": 0.0041, "step": 18379 }, { "epoch": 4.18202502844141, "grad_norm": 0.783734790209051, "learning_rate": 8.078769611785519e-08, "loss": 0.0072, "step": 18380 }, { "epoch": 4.182252559726963, "grad_norm": 0.04952931101731601, "learning_rate": 8.074376367890317e-08, "loss": 0.0002, "step": 18381 }, { "epoch": 4.182480091012514, "grad_norm": 0.018375902046900035, "learning_rate": 8.069984236355202e-08, "loss": 0.0001, "step": 18382 }, { "epoch": 4.182707622298066, "grad_norm": 0.2677418689918755, "learning_rate": 8.065593217269907e-08, "loss": 0.0027, "step": 18383 }, { "epoch": 4.1829351535836174, "grad_norm": 0.22518996331380398, "learning_rate": 8.061203310724213e-08, "loss": 0.0012, "step": 18384 }, { "epoch": 4.18316268486917, "grad_norm": 0.3045908659413195, "learning_rate": 8.056814516807808e-08, "loss": 0.001, "step": 18385 }, { "epoch": 4.183390216154721, "grad_norm": 0.48775733509684227, "learning_rate": 8.052426835610424e-08, "loss": 0.0059, "step": 18386 }, { "epoch": 4.183617747440273, "grad_norm": 0.1841746511806889, "learning_rate": 8.048040267221703e-08, "loss": 0.0015, "step": 18387 }, { "epoch": 4.1838452787258245, "grad_norm": 0.14179992734975896, "learning_rate": 8.043654811731312e-08, "loss": 0.0006, "step": 18388 }, { "epoch": 4.184072810011377, "grad_norm": 0.21875985692990474, "learning_rate": 8.039270469228908e-08, "loss": 0.0009, "step": 18389 }, { "epoch": 4.184300341296928, "grad_norm": 0.6082572507679145, "learning_rate": 8.03488723980405e-08, "loss": 0.0039, "step": 18390 }, { "epoch": 4.18452787258248, "grad_norm": 0.1360719847718651, "learning_rate": 8.030505123546369e-08, "loss": 0.0004, "step": 18391 }, { "epoch": 4.1847554038680315, "grad_norm": 0.20124656087865128, "learning_rate": 8.02612412054539e-08, "loss": 0.0008, "step": 18392 }, { "epoch": 4.184982935153584, "grad_norm": 0.8133501209785721, "learning_rate": 8.021744230890687e-08, "loss": 0.0042, "step": 18393 }, { "epoch": 4.185210466439135, "grad_norm": 0.33682320446494846, "learning_rate": 8.017365454671744e-08, "loss": 0.0013, "step": 18394 }, { "epoch": 4.185437997724687, "grad_norm": 0.23689381550038166, "learning_rate": 8.012987791978074e-08, "loss": 0.0011, "step": 18395 }, { "epoch": 4.1856655290102385, "grad_norm": 0.38117683527455, "learning_rate": 8.008611242899151e-08, "loss": 0.0012, "step": 18396 }, { "epoch": 4.185893060295791, "grad_norm": 0.2723572340009264, "learning_rate": 8.004235807524414e-08, "loss": 0.001, "step": 18397 }, { "epoch": 4.186120591581343, "grad_norm": 0.23225975523882414, "learning_rate": 7.999861485943297e-08, "loss": 0.0013, "step": 18398 }, { "epoch": 4.186348122866894, "grad_norm": 0.025132087503166502, "learning_rate": 7.995488278245194e-08, "loss": 0.0001, "step": 18399 }, { "epoch": 4.186575654152446, "grad_norm": 0.05755023131025612, "learning_rate": 7.991116184519497e-08, "loss": 0.0002, "step": 18400 }, { "epoch": 4.186803185437998, "grad_norm": 0.6091427875633427, "learning_rate": 7.986745204855546e-08, "loss": 0.0063, "step": 18401 }, { "epoch": 4.18703071672355, "grad_norm": 0.45756139905254417, "learning_rate": 7.982375339342686e-08, "loss": 0.0031, "step": 18402 }, { "epoch": 4.187258248009101, "grad_norm": 0.1432956568889087, "learning_rate": 7.978006588070248e-08, "loss": 0.0003, "step": 18403 }, { "epoch": 4.187485779294653, "grad_norm": 1.0791367050911784, "learning_rate": 7.973638951127489e-08, "loss": 0.0093, "step": 18404 }, { "epoch": 4.187713310580205, "grad_norm": 0.9289476193767413, "learning_rate": 7.969272428603694e-08, "loss": 0.0035, "step": 18405 }, { "epoch": 4.187940841865757, "grad_norm": 0.29564488292469865, "learning_rate": 7.964907020588094e-08, "loss": 0.0014, "step": 18406 }, { "epoch": 4.188168373151308, "grad_norm": 0.5809540772670512, "learning_rate": 7.960542727169937e-08, "loss": 0.0026, "step": 18407 }, { "epoch": 4.18839590443686, "grad_norm": 1.2908012355953704, "learning_rate": 7.956179548438379e-08, "loss": 0.0025, "step": 18408 }, { "epoch": 4.188623435722412, "grad_norm": 0.839743638227453, "learning_rate": 7.951817484482629e-08, "loss": 0.0108, "step": 18409 }, { "epoch": 4.188850967007964, "grad_norm": 0.29535422290283886, "learning_rate": 7.947456535391834e-08, "loss": 0.0013, "step": 18410 }, { "epoch": 4.189078498293515, "grad_norm": 0.06838595543587202, "learning_rate": 7.943096701255114e-08, "loss": 0.0002, "step": 18411 }, { "epoch": 4.189306029579067, "grad_norm": 0.1427789488234456, "learning_rate": 7.93873798216159e-08, "loss": 0.0007, "step": 18412 }, { "epoch": 4.189533560864619, "grad_norm": 0.4650224601159173, "learning_rate": 7.934380378200324e-08, "loss": 0.0035, "step": 18413 }, { "epoch": 4.189761092150171, "grad_norm": 1.3045991541560469, "learning_rate": 7.930023889460403e-08, "loss": 0.0094, "step": 18414 }, { "epoch": 4.189988623435722, "grad_norm": 0.06063199925453249, "learning_rate": 7.925668516030841e-08, "loss": 0.0003, "step": 18415 }, { "epoch": 4.190216154721274, "grad_norm": 1.1048496903697977, "learning_rate": 7.921314258000669e-08, "loss": 0.0103, "step": 18416 }, { "epoch": 4.190443686006826, "grad_norm": 0.5489194301925884, "learning_rate": 7.916961115458887e-08, "loss": 0.0031, "step": 18417 }, { "epoch": 4.190671217292378, "grad_norm": 0.1975307236549833, "learning_rate": 7.912609088494442e-08, "loss": 0.001, "step": 18418 }, { "epoch": 4.190898748577929, "grad_norm": 0.16775293691086374, "learning_rate": 7.908258177196309e-08, "loss": 0.0004, "step": 18419 }, { "epoch": 4.191126279863481, "grad_norm": 0.3451288490043098, "learning_rate": 7.903908381653405e-08, "loss": 0.0053, "step": 18420 }, { "epoch": 4.191353811149033, "grad_norm": 0.3456397014760861, "learning_rate": 7.899559701954606e-08, "loss": 0.001, "step": 18421 }, { "epoch": 4.191581342434585, "grad_norm": 0.5524441625805515, "learning_rate": 7.895212138188813e-08, "loss": 0.0037, "step": 18422 }, { "epoch": 4.191808873720136, "grad_norm": 0.24130838052326525, "learning_rate": 7.890865690444879e-08, "loss": 0.0008, "step": 18423 }, { "epoch": 4.192036405005688, "grad_norm": 0.25871680060949265, "learning_rate": 7.886520358811649e-08, "loss": 0.0006, "step": 18424 }, { "epoch": 4.19226393629124, "grad_norm": 0.06545225409846558, "learning_rate": 7.882176143377912e-08, "loss": 0.0002, "step": 18425 }, { "epoch": 4.192491467576792, "grad_norm": 0.38544137282484553, "learning_rate": 7.87783304423248e-08, "loss": 0.0015, "step": 18426 }, { "epoch": 4.192718998862343, "grad_norm": 0.2216098764435882, "learning_rate": 7.873491061464108e-08, "loss": 0.0006, "step": 18427 }, { "epoch": 4.192946530147895, "grad_norm": 0.44741602056030344, "learning_rate": 7.869150195161504e-08, "loss": 0.003, "step": 18428 }, { "epoch": 4.193174061433447, "grad_norm": 0.20339026813419844, "learning_rate": 7.86481044541345e-08, "loss": 0.0007, "step": 18429 }, { "epoch": 4.193401592718999, "grad_norm": 0.6233365397048101, "learning_rate": 7.86047181230859e-08, "loss": 0.0021, "step": 18430 }, { "epoch": 4.19362912400455, "grad_norm": 0.19479193794846, "learning_rate": 7.856134295935638e-08, "loss": 0.001, "step": 18431 }, { "epoch": 4.193856655290102, "grad_norm": 0.09018626814041711, "learning_rate": 7.851797896383212e-08, "loss": 0.0002, "step": 18432 }, { "epoch": 4.194084186575654, "grad_norm": 0.2688939233709731, "learning_rate": 7.847462613739962e-08, "loss": 0.0017, "step": 18433 }, { "epoch": 4.194311717861206, "grad_norm": 0.44933475400830053, "learning_rate": 7.843128448094477e-08, "loss": 0.0017, "step": 18434 }, { "epoch": 4.194539249146757, "grad_norm": 0.49446237602755405, "learning_rate": 7.838795399535346e-08, "loss": 0.0034, "step": 18435 }, { "epoch": 4.194766780432309, "grad_norm": 0.08526230291857084, "learning_rate": 7.834463468151143e-08, "loss": 0.0005, "step": 18436 }, { "epoch": 4.194994311717862, "grad_norm": 0.1230770753370024, "learning_rate": 7.830132654030376e-08, "loss": 0.0003, "step": 18437 }, { "epoch": 4.195221843003413, "grad_norm": 0.09297241999117438, "learning_rate": 7.82580295726159e-08, "loss": 0.0002, "step": 18438 }, { "epoch": 4.195449374288965, "grad_norm": 0.08523864742212596, "learning_rate": 7.821474377933257e-08, "loss": 0.0002, "step": 18439 }, { "epoch": 4.1956769055745164, "grad_norm": 0.17517132611458142, "learning_rate": 7.81714691613384e-08, "loss": 0.0006, "step": 18440 }, { "epoch": 4.195904436860069, "grad_norm": 0.09061350985807445, "learning_rate": 7.81282057195179e-08, "loss": 0.0005, "step": 18441 }, { "epoch": 4.19613196814562, "grad_norm": 0.4081971856581929, "learning_rate": 7.808495345475537e-08, "loss": 0.0032, "step": 18442 }, { "epoch": 4.196359499431172, "grad_norm": 0.24217883540694163, "learning_rate": 7.80417123679348e-08, "loss": 0.0014, "step": 18443 }, { "epoch": 4.1965870307167235, "grad_norm": 0.5178637909615642, "learning_rate": 7.799848245993988e-08, "loss": 0.0028, "step": 18444 }, { "epoch": 4.196814562002276, "grad_norm": 0.10057024646956746, "learning_rate": 7.795526373165426e-08, "loss": 0.0006, "step": 18445 }, { "epoch": 4.197042093287827, "grad_norm": 0.4549293377209386, "learning_rate": 7.79120561839613e-08, "loss": 0.0049, "step": 18446 }, { "epoch": 4.197269624573379, "grad_norm": 0.23625297061709016, "learning_rate": 7.786885981774371e-08, "loss": 0.0011, "step": 18447 }, { "epoch": 4.1974971558589305, "grad_norm": 0.31873333064736203, "learning_rate": 7.782567463388464e-08, "loss": 0.0019, "step": 18448 }, { "epoch": 4.197724687144483, "grad_norm": 0.03115501920109513, "learning_rate": 7.778250063326671e-08, "loss": 0.0001, "step": 18449 }, { "epoch": 4.197952218430034, "grad_norm": 0.09373770637107975, "learning_rate": 7.773933781677241e-08, "loss": 0.0003, "step": 18450 }, { "epoch": 4.198179749715586, "grad_norm": 0.22612499116116064, "learning_rate": 7.769618618528374e-08, "loss": 0.001, "step": 18451 }, { "epoch": 4.1984072810011375, "grad_norm": 0.5222167367035349, "learning_rate": 7.765304573968256e-08, "loss": 0.0032, "step": 18452 }, { "epoch": 4.19863481228669, "grad_norm": 0.3592796018033838, "learning_rate": 7.760991648085088e-08, "loss": 0.0014, "step": 18453 }, { "epoch": 4.198862343572241, "grad_norm": 0.0747950508548136, "learning_rate": 7.756679840966987e-08, "loss": 0.0003, "step": 18454 }, { "epoch": 4.199089874857793, "grad_norm": 0.19846419602967408, "learning_rate": 7.75236915270209e-08, "loss": 0.0011, "step": 18455 }, { "epoch": 4.1993174061433445, "grad_norm": 0.30418439493309996, "learning_rate": 7.748059583378508e-08, "loss": 0.0013, "step": 18456 }, { "epoch": 4.199544937428897, "grad_norm": 0.0658789284406926, "learning_rate": 7.743751133084322e-08, "loss": 0.0004, "step": 18457 }, { "epoch": 4.199772468714448, "grad_norm": 0.9608760004727278, "learning_rate": 7.739443801907582e-08, "loss": 0.0049, "step": 18458 }, { "epoch": 4.2, "grad_norm": 0.8158519209436847, "learning_rate": 7.735137589936305e-08, "loss": 0.0072, "step": 18459 }, { "epoch": 4.2002275312855515, "grad_norm": 0.08844630355761215, "learning_rate": 7.73083249725854e-08, "loss": 0.0003, "step": 18460 }, { "epoch": 4.200455062571104, "grad_norm": 0.2646648767949408, "learning_rate": 7.726528523962231e-08, "loss": 0.0019, "step": 18461 }, { "epoch": 4.200682593856655, "grad_norm": 0.4139961297514192, "learning_rate": 7.722225670135371e-08, "loss": 0.0018, "step": 18462 }, { "epoch": 4.200910125142207, "grad_norm": 0.16208566630584267, "learning_rate": 7.717923935865901e-08, "loss": 0.0005, "step": 18463 }, { "epoch": 4.2011376564277585, "grad_norm": 0.39184662152626065, "learning_rate": 7.713623321241745e-08, "loss": 0.0032, "step": 18464 }, { "epoch": 4.201365187713311, "grad_norm": 0.3045028863706003, "learning_rate": 7.709323826350798e-08, "loss": 0.0028, "step": 18465 }, { "epoch": 4.201592718998862, "grad_norm": 0.06239393876943588, "learning_rate": 7.705025451280906e-08, "loss": 0.0003, "step": 18466 }, { "epoch": 4.201820250284414, "grad_norm": 0.497422141355487, "learning_rate": 7.700728196119958e-08, "loss": 0.0019, "step": 18467 }, { "epoch": 4.2020477815699655, "grad_norm": 0.45028376203612386, "learning_rate": 7.696432060955758e-08, "loss": 0.0012, "step": 18468 }, { "epoch": 4.202275312855518, "grad_norm": 0.28840531390664315, "learning_rate": 7.692137045876111e-08, "loss": 0.0009, "step": 18469 }, { "epoch": 4.202502844141069, "grad_norm": 0.14645153615750556, "learning_rate": 7.687843150968828e-08, "loss": 0.0004, "step": 18470 }, { "epoch": 4.202730375426621, "grad_norm": 0.13022237889199756, "learning_rate": 7.683550376321624e-08, "loss": 0.0003, "step": 18471 }, { "epoch": 4.2029579067121725, "grad_norm": 0.0797220952868772, "learning_rate": 7.679258722022277e-08, "loss": 0.0002, "step": 18472 }, { "epoch": 4.203185437997725, "grad_norm": 0.034807662837372605, "learning_rate": 7.674968188158473e-08, "loss": 0.0001, "step": 18473 }, { "epoch": 4.203412969283276, "grad_norm": 0.6462932536935354, "learning_rate": 7.67067877481792e-08, "loss": 0.0023, "step": 18474 }, { "epoch": 4.203640500568828, "grad_norm": 0.15662856940851963, "learning_rate": 7.666390482088268e-08, "loss": 0.0005, "step": 18475 }, { "epoch": 4.20386803185438, "grad_norm": 0.1903736522399455, "learning_rate": 7.662103310057165e-08, "loss": 0.0004, "step": 18476 }, { "epoch": 4.204095563139932, "grad_norm": 0.2200483943475834, "learning_rate": 7.657817258812261e-08, "loss": 0.0012, "step": 18477 }, { "epoch": 4.204323094425484, "grad_norm": 0.07379222319221407, "learning_rate": 7.653532328441112e-08, "loss": 0.0002, "step": 18478 }, { "epoch": 4.204550625711035, "grad_norm": 0.16651806770589395, "learning_rate": 7.649248519031337e-08, "loss": 0.0006, "step": 18479 }, { "epoch": 4.204778156996587, "grad_norm": 0.15617978391952606, "learning_rate": 7.644965830670447e-08, "loss": 0.0009, "step": 18480 }, { "epoch": 4.205005688282139, "grad_norm": 0.21637147616757893, "learning_rate": 7.64068426344601e-08, "loss": 0.0009, "step": 18481 }, { "epoch": 4.205233219567691, "grad_norm": 0.12773555824903057, "learning_rate": 7.636403817445508e-08, "loss": 0.0006, "step": 18482 }, { "epoch": 4.205460750853242, "grad_norm": 0.49357556161706584, "learning_rate": 7.632124492756431e-08, "loss": 0.0074, "step": 18483 }, { "epoch": 4.205688282138794, "grad_norm": 0.13026072146732096, "learning_rate": 7.627846289466251e-08, "loss": 0.0003, "step": 18484 }, { "epoch": 4.205915813424346, "grad_norm": 0.5703781419472432, "learning_rate": 7.623569207662391e-08, "loss": 0.0065, "step": 18485 }, { "epoch": 4.206143344709898, "grad_norm": 1.374198264448146, "learning_rate": 7.61929324743229e-08, "loss": 0.005, "step": 18486 }, { "epoch": 4.206370875995449, "grad_norm": 0.23405887272300988, "learning_rate": 7.615018408863306e-08, "loss": 0.0011, "step": 18487 }, { "epoch": 4.206598407281001, "grad_norm": 0.3642580809042374, "learning_rate": 7.610744692042836e-08, "loss": 0.0017, "step": 18488 }, { "epoch": 4.206825938566553, "grad_norm": 0.3296503177139092, "learning_rate": 7.606472097058233e-08, "loss": 0.0009, "step": 18489 }, { "epoch": 4.207053469852105, "grad_norm": 0.1711607967538353, "learning_rate": 7.602200623996789e-08, "loss": 0.0009, "step": 18490 }, { "epoch": 4.207281001137656, "grad_norm": 0.4108548879824662, "learning_rate": 7.597930272945836e-08, "loss": 0.0016, "step": 18491 }, { "epoch": 4.207508532423208, "grad_norm": 0.22056333451411475, "learning_rate": 7.593661043992631e-08, "loss": 0.0006, "step": 18492 }, { "epoch": 4.20773606370876, "grad_norm": 0.33928189048987195, "learning_rate": 7.589392937224452e-08, "loss": 0.0036, "step": 18493 }, { "epoch": 4.207963594994312, "grad_norm": 0.5237158360873198, "learning_rate": 7.585125952728499e-08, "loss": 0.0022, "step": 18494 }, { "epoch": 4.208191126279863, "grad_norm": 0.4068130712491468, "learning_rate": 7.580860090592002e-08, "loss": 0.0032, "step": 18495 }, { "epoch": 4.208418657565415, "grad_norm": 0.4318865694375016, "learning_rate": 7.576595350902161e-08, "loss": 0.0023, "step": 18496 }, { "epoch": 4.208646188850967, "grad_norm": 0.31518814704474424, "learning_rate": 7.572331733746108e-08, "loss": 0.0017, "step": 18497 }, { "epoch": 4.208873720136519, "grad_norm": 0.10108604604106912, "learning_rate": 7.568069239211009e-08, "loss": 0.0004, "step": 18498 }, { "epoch": 4.20910125142207, "grad_norm": 0.503568712991137, "learning_rate": 7.563807867383963e-08, "loss": 0.0025, "step": 18499 }, { "epoch": 4.2093287827076225, "grad_norm": 0.1903692363720478, "learning_rate": 7.559547618352085e-08, "loss": 0.0007, "step": 18500 }, { "epoch": 4.209556313993174, "grad_norm": 0.1767102134814901, "learning_rate": 7.555288492202426e-08, "loss": 0.0003, "step": 18501 }, { "epoch": 4.209783845278726, "grad_norm": 0.6111390536918575, "learning_rate": 7.551030489022039e-08, "loss": 0.0063, "step": 18502 }, { "epoch": 4.210011376564277, "grad_norm": 0.29024898147827, "learning_rate": 7.546773608897967e-08, "loss": 0.0012, "step": 18503 }, { "epoch": 4.2102389078498295, "grad_norm": 0.021289064555329824, "learning_rate": 7.542517851917189e-08, "loss": 0.0001, "step": 18504 }, { "epoch": 4.210466439135381, "grad_norm": 0.023003831461754647, "learning_rate": 7.538263218166711e-08, "loss": 0.0, "step": 18505 }, { "epoch": 4.210693970420933, "grad_norm": 0.12868076895059924, "learning_rate": 7.534009707733464e-08, "loss": 0.0007, "step": 18506 }, { "epoch": 4.210921501706484, "grad_norm": 0.24948667478732497, "learning_rate": 7.529757320704399e-08, "loss": 0.0009, "step": 18507 }, { "epoch": 4.2111490329920365, "grad_norm": 0.18883154874243102, "learning_rate": 7.525506057166412e-08, "loss": 0.0003, "step": 18508 }, { "epoch": 4.211376564277588, "grad_norm": 0.41688570729464636, "learning_rate": 7.5212559172064e-08, "loss": 0.001, "step": 18509 }, { "epoch": 4.21160409556314, "grad_norm": 0.8957226117380468, "learning_rate": 7.517006900911242e-08, "loss": 0.0062, "step": 18510 }, { "epoch": 4.211831626848691, "grad_norm": 0.5490148922782432, "learning_rate": 7.512759008367753e-08, "loss": 0.0027, "step": 18511 }, { "epoch": 4.2120591581342435, "grad_norm": 0.13050805552376224, "learning_rate": 7.508512239662778e-08, "loss": 0.0004, "step": 18512 }, { "epoch": 4.212286689419795, "grad_norm": 0.06360521141607596, "learning_rate": 7.504266594883083e-08, "loss": 0.0001, "step": 18513 }, { "epoch": 4.212514220705347, "grad_norm": 0.42347135785122336, "learning_rate": 7.500022074115481e-08, "loss": 0.0011, "step": 18514 }, { "epoch": 4.212741751990899, "grad_norm": 0.38571712847226175, "learning_rate": 7.495778677446677e-08, "loss": 0.001, "step": 18515 }, { "epoch": 4.2129692832764505, "grad_norm": 0.18565551612145845, "learning_rate": 7.491536404963432e-08, "loss": 0.0006, "step": 18516 }, { "epoch": 4.213196814562003, "grad_norm": 0.08877232743653792, "learning_rate": 7.487295256752445e-08, "loss": 0.0003, "step": 18517 }, { "epoch": 4.213424345847554, "grad_norm": 0.5714151576793187, "learning_rate": 7.483055232900383e-08, "loss": 0.0045, "step": 18518 }, { "epoch": 4.213651877133106, "grad_norm": 0.3895468729040816, "learning_rate": 7.478816333493926e-08, "loss": 0.0034, "step": 18519 }, { "epoch": 4.2138794084186575, "grad_norm": 0.09852550404977482, "learning_rate": 7.474578558619681e-08, "loss": 0.0004, "step": 18520 }, { "epoch": 4.21410693970421, "grad_norm": 0.12827963203925866, "learning_rate": 7.470341908364293e-08, "loss": 0.0006, "step": 18521 }, { "epoch": 4.214334470989761, "grad_norm": 0.2706091722441168, "learning_rate": 7.466106382814316e-08, "loss": 0.0013, "step": 18522 }, { "epoch": 4.214562002275313, "grad_norm": 0.18945166837441776, "learning_rate": 7.461871982056334e-08, "loss": 0.001, "step": 18523 }, { "epoch": 4.2147895335608645, "grad_norm": 0.10302794529646947, "learning_rate": 7.457638706176912e-08, "loss": 0.0004, "step": 18524 }, { "epoch": 4.215017064846417, "grad_norm": 0.5468068903268781, "learning_rate": 7.453406555262533e-08, "loss": 0.0018, "step": 18525 }, { "epoch": 4.215244596131968, "grad_norm": 0.25092955995494415, "learning_rate": 7.449175529399721e-08, "loss": 0.0006, "step": 18526 }, { "epoch": 4.21547212741752, "grad_norm": 0.9025410183112689, "learning_rate": 7.44494562867494e-08, "loss": 0.0034, "step": 18527 }, { "epoch": 4.2156996587030715, "grad_norm": 0.21678242143163934, "learning_rate": 7.440716853174636e-08, "loss": 0.0009, "step": 18528 }, { "epoch": 4.215927189988624, "grad_norm": 0.0696023751589579, "learning_rate": 7.436489202985238e-08, "loss": 0.0003, "step": 18529 }, { "epoch": 4.216154721274175, "grad_norm": 0.27815998624947064, "learning_rate": 7.432262678193157e-08, "loss": 0.0015, "step": 18530 }, { "epoch": 4.216382252559727, "grad_norm": 0.05077731506903936, "learning_rate": 7.428037278884782e-08, "loss": 0.0002, "step": 18531 }, { "epoch": 4.2166097838452785, "grad_norm": 0.4386227079014406, "learning_rate": 7.423813005146461e-08, "loss": 0.0012, "step": 18532 }, { "epoch": 4.216837315130831, "grad_norm": 0.027140206352816287, "learning_rate": 7.419589857064549e-08, "loss": 0.0001, "step": 18533 }, { "epoch": 4.217064846416382, "grad_norm": 0.3819473271643398, "learning_rate": 7.415367834725337e-08, "loss": 0.0025, "step": 18534 }, { "epoch": 4.217292377701934, "grad_norm": 0.34275155323314044, "learning_rate": 7.411146938215117e-08, "loss": 0.0023, "step": 18535 }, { "epoch": 4.2175199089874855, "grad_norm": 0.8181790921063117, "learning_rate": 7.406927167620163e-08, "loss": 0.0096, "step": 18536 }, { "epoch": 4.217747440273038, "grad_norm": 0.32597832164811913, "learning_rate": 7.402708523026713e-08, "loss": 0.0033, "step": 18537 }, { "epoch": 4.217974971558589, "grad_norm": 0.6115291699833507, "learning_rate": 7.398491004521016e-08, "loss": 0.0005, "step": 18538 }, { "epoch": 4.218202502844141, "grad_norm": 0.08426768285464843, "learning_rate": 7.394274612189236e-08, "loss": 0.0002, "step": 18539 }, { "epoch": 4.2184300341296925, "grad_norm": 0.1611737885456848, "learning_rate": 7.39005934611757e-08, "loss": 0.0005, "step": 18540 }, { "epoch": 4.218657565415245, "grad_norm": 0.5368197760991831, "learning_rate": 7.385845206392163e-08, "loss": 0.0047, "step": 18541 }, { "epoch": 4.218885096700796, "grad_norm": 0.15503999919555386, "learning_rate": 7.381632193099133e-08, "loss": 0.0006, "step": 18542 }, { "epoch": 4.219112627986348, "grad_norm": 0.14479663734738724, "learning_rate": 7.377420306324594e-08, "loss": 0.0005, "step": 18543 }, { "epoch": 4.2193401592718995, "grad_norm": 0.291677551038152, "learning_rate": 7.373209546154628e-08, "loss": 0.003, "step": 18544 }, { "epoch": 4.219567690557452, "grad_norm": 0.030604040052411026, "learning_rate": 7.368999912675319e-08, "loss": 0.0001, "step": 18545 }, { "epoch": 4.219795221843003, "grad_norm": 2.601285432790258, "learning_rate": 7.364791405972683e-08, "loss": 0.0098, "step": 18546 }, { "epoch": 4.220022753128555, "grad_norm": 0.3023147100724286, "learning_rate": 7.360584026132718e-08, "loss": 0.0013, "step": 18547 }, { "epoch": 4.2202502844141065, "grad_norm": 0.07787039505766181, "learning_rate": 7.356377773241448e-08, "loss": 0.0003, "step": 18548 }, { "epoch": 4.220477815699659, "grad_norm": 0.4127015827188447, "learning_rate": 7.352172647384797e-08, "loss": 0.0056, "step": 18549 }, { "epoch": 4.22070534698521, "grad_norm": 0.19408558492416722, "learning_rate": 7.347968648648764e-08, "loss": 0.0009, "step": 18550 }, { "epoch": 4.220932878270762, "grad_norm": 0.37907763353798485, "learning_rate": 7.34376577711924e-08, "loss": 0.0014, "step": 18551 }, { "epoch": 4.2211604095563136, "grad_norm": 0.4635215178099999, "learning_rate": 7.339564032882135e-08, "loss": 0.0045, "step": 18552 }, { "epoch": 4.221387940841866, "grad_norm": 0.4159439016299153, "learning_rate": 7.335363416023325e-08, "loss": 0.0027, "step": 18553 }, { "epoch": 4.221615472127418, "grad_norm": 0.07716389021211853, "learning_rate": 7.331163926628644e-08, "loss": 0.0001, "step": 18554 }, { "epoch": 4.221843003412969, "grad_norm": 0.19684347013732273, "learning_rate": 7.326965564783933e-08, "loss": 0.0012, "step": 18555 }, { "epoch": 4.2220705346985214, "grad_norm": 0.6460528826135279, "learning_rate": 7.322768330574999e-08, "loss": 0.0018, "step": 18556 }, { "epoch": 4.222298065984073, "grad_norm": 0.3572154339781666, "learning_rate": 7.318572224087648e-08, "loss": 0.0028, "step": 18557 }, { "epoch": 4.222525597269625, "grad_norm": 0.33504726615210156, "learning_rate": 7.314377245407615e-08, "loss": 0.0023, "step": 18558 }, { "epoch": 4.222753128555176, "grad_norm": 0.3753584129199749, "learning_rate": 7.310183394620634e-08, "loss": 0.0013, "step": 18559 }, { "epoch": 4.2229806598407285, "grad_norm": 0.18125957013749222, "learning_rate": 7.305990671812438e-08, "loss": 0.0011, "step": 18560 }, { "epoch": 4.22320819112628, "grad_norm": 1.728812317174097, "learning_rate": 7.301799077068697e-08, "loss": 0.0173, "step": 18561 }, { "epoch": 4.223435722411832, "grad_norm": 0.10585139537101981, "learning_rate": 7.297608610475092e-08, "loss": 0.0002, "step": 18562 }, { "epoch": 4.223663253697383, "grad_norm": 0.06557207077306736, "learning_rate": 7.29341927211727e-08, "loss": 0.0002, "step": 18563 }, { "epoch": 4.2238907849829355, "grad_norm": 0.21819966105482513, "learning_rate": 7.289231062080869e-08, "loss": 0.0009, "step": 18564 }, { "epoch": 4.224118316268487, "grad_norm": 1.1027135653255296, "learning_rate": 7.285043980451468e-08, "loss": 0.0051, "step": 18565 }, { "epoch": 4.224345847554039, "grad_norm": 1.1458279092171377, "learning_rate": 7.280858027314631e-08, "loss": 0.0036, "step": 18566 }, { "epoch": 4.22457337883959, "grad_norm": 0.30203079773439906, "learning_rate": 7.276673202755942e-08, "loss": 0.0008, "step": 18567 }, { "epoch": 4.2248009101251425, "grad_norm": 0.05953434153280469, "learning_rate": 7.272489506860901e-08, "loss": 0.0003, "step": 18568 }, { "epoch": 4.225028441410694, "grad_norm": 0.21504087534904182, "learning_rate": 7.268306939715038e-08, "loss": 0.0011, "step": 18569 }, { "epoch": 4.225255972696246, "grad_norm": 0.6862780919441045, "learning_rate": 7.264125501403818e-08, "loss": 0.0083, "step": 18570 }, { "epoch": 4.225483503981797, "grad_norm": 0.10641235202135853, "learning_rate": 7.259945192012736e-08, "loss": 0.0005, "step": 18571 }, { "epoch": 4.2257110352673495, "grad_norm": 0.054999105277864904, "learning_rate": 7.255766011627197e-08, "loss": 0.0002, "step": 18572 }, { "epoch": 4.225938566552901, "grad_norm": 0.385966100411385, "learning_rate": 7.251587960332624e-08, "loss": 0.0049, "step": 18573 }, { "epoch": 4.226166097838453, "grad_norm": 0.664666387449763, "learning_rate": 7.247411038214413e-08, "loss": 0.0069, "step": 18574 }, { "epoch": 4.226393629124004, "grad_norm": 0.10234504635143811, "learning_rate": 7.243235245357922e-08, "loss": 0.0007, "step": 18575 }, { "epoch": 4.2266211604095565, "grad_norm": 0.037237987913929085, "learning_rate": 7.239060581848506e-08, "loss": 0.0001, "step": 18576 }, { "epoch": 4.226848691695108, "grad_norm": 0.7456293249758551, "learning_rate": 7.234887047771498e-08, "loss": 0.0011, "step": 18577 }, { "epoch": 4.22707622298066, "grad_norm": 0.5709313112241772, "learning_rate": 7.230714643212171e-08, "loss": 0.0017, "step": 18578 }, { "epoch": 4.227303754266211, "grad_norm": 0.31616488770904216, "learning_rate": 7.226543368255825e-08, "loss": 0.0011, "step": 18579 }, { "epoch": 4.2275312855517635, "grad_norm": 0.2872228322029813, "learning_rate": 7.222373222987695e-08, "loss": 0.0006, "step": 18580 }, { "epoch": 4.227758816837315, "grad_norm": 0.44590516167685396, "learning_rate": 7.218204207493036e-08, "loss": 0.0033, "step": 18581 }, { "epoch": 4.227986348122867, "grad_norm": 1.01064898110761, "learning_rate": 7.214036321857021e-08, "loss": 0.0019, "step": 18582 }, { "epoch": 4.228213879408418, "grad_norm": 0.12077523475613837, "learning_rate": 7.20986956616486e-08, "loss": 0.0002, "step": 18583 }, { "epoch": 4.2284414106939705, "grad_norm": 0.1662409509616041, "learning_rate": 7.205703940501713e-08, "loss": 0.0003, "step": 18584 }, { "epoch": 4.228668941979522, "grad_norm": 0.23211068450703448, "learning_rate": 7.201539444952694e-08, "loss": 0.0011, "step": 18585 }, { "epoch": 4.228896473265074, "grad_norm": 0.21568707119751324, "learning_rate": 7.197376079602956e-08, "loss": 0.0008, "step": 18586 }, { "epoch": 4.229124004550625, "grad_norm": 1.8659460562790697, "learning_rate": 7.193213844537548e-08, "loss": 0.0211, "step": 18587 }, { "epoch": 4.2293515358361775, "grad_norm": 0.5916711881017038, "learning_rate": 7.189052739841579e-08, "loss": 0.0013, "step": 18588 }, { "epoch": 4.229579067121729, "grad_norm": 0.027655836931163337, "learning_rate": 7.184892765600054e-08, "loss": 0.0001, "step": 18589 }, { "epoch": 4.229806598407281, "grad_norm": 0.17217802023134074, "learning_rate": 7.180733921898025e-08, "loss": 0.0007, "step": 18590 }, { "epoch": 4.230034129692832, "grad_norm": 0.05040215445533061, "learning_rate": 7.176576208820493e-08, "loss": 0.0002, "step": 18591 }, { "epoch": 4.2302616609783845, "grad_norm": 0.14509870814770587, "learning_rate": 7.172419626452403e-08, "loss": 0.0003, "step": 18592 }, { "epoch": 4.230489192263937, "grad_norm": 0.28407486998297865, "learning_rate": 7.168264174878752e-08, "loss": 0.0023, "step": 18593 }, { "epoch": 4.230716723549488, "grad_norm": 0.14212200186243745, "learning_rate": 7.164109854184427e-08, "loss": 0.0003, "step": 18594 }, { "epoch": 4.23094425483504, "grad_norm": 0.4399887456919768, "learning_rate": 7.159956664454363e-08, "loss": 0.0062, "step": 18595 }, { "epoch": 4.2311717861205915, "grad_norm": 0.373145020977256, "learning_rate": 7.155804605773426e-08, "loss": 0.003, "step": 18596 }, { "epoch": 4.231399317406144, "grad_norm": 0.3653122035228161, "learning_rate": 7.151653678226484e-08, "loss": 0.0014, "step": 18597 }, { "epoch": 4.231626848691695, "grad_norm": 1.1547141316202107, "learning_rate": 7.147503881898388e-08, "loss": 0.0054, "step": 18598 }, { "epoch": 4.231854379977247, "grad_norm": 0.3687532963770326, "learning_rate": 7.143355216873927e-08, "loss": 0.0014, "step": 18599 }, { "epoch": 4.2320819112627985, "grad_norm": 0.8173052986193592, "learning_rate": 7.13920768323792e-08, "loss": 0.0079, "step": 18600 }, { "epoch": 4.232309442548351, "grad_norm": 0.1602164960692855, "learning_rate": 7.135061281075107e-08, "loss": 0.0009, "step": 18601 }, { "epoch": 4.232536973833902, "grad_norm": 0.3869481169911655, "learning_rate": 7.130916010470252e-08, "loss": 0.0017, "step": 18602 }, { "epoch": 4.232764505119454, "grad_norm": 0.42638300473009266, "learning_rate": 7.126771871508058e-08, "loss": 0.002, "step": 18603 }, { "epoch": 4.2329920364050055, "grad_norm": 0.561100666420415, "learning_rate": 7.122628864273236e-08, "loss": 0.0021, "step": 18604 }, { "epoch": 4.233219567690558, "grad_norm": 0.27687604147129286, "learning_rate": 7.118486988850482e-08, "loss": 0.0012, "step": 18605 }, { "epoch": 4.233447098976109, "grad_norm": 0.08929208474228649, "learning_rate": 7.114346245324407e-08, "loss": 0.0001, "step": 18606 }, { "epoch": 4.233674630261661, "grad_norm": 0.28037508482000845, "learning_rate": 7.110206633779676e-08, "loss": 0.0017, "step": 18607 }, { "epoch": 4.2339021615472126, "grad_norm": 0.44673097159971714, "learning_rate": 7.106068154300864e-08, "loss": 0.0036, "step": 18608 }, { "epoch": 4.234129692832765, "grad_norm": 0.15438031858006507, "learning_rate": 7.101930806972572e-08, "loss": 0.0004, "step": 18609 }, { "epoch": 4.234357224118316, "grad_norm": 0.1860338622315163, "learning_rate": 7.09779459187937e-08, "loss": 0.0003, "step": 18610 }, { "epoch": 4.234584755403868, "grad_norm": 0.4357412539013417, "learning_rate": 7.093659509105768e-08, "loss": 0.0025, "step": 18611 }, { "epoch": 4.23481228668942, "grad_norm": 0.3904098881141291, "learning_rate": 7.089525558736307e-08, "loss": 0.0017, "step": 18612 }, { "epoch": 4.235039817974972, "grad_norm": 0.5556953985528764, "learning_rate": 7.08539274085545e-08, "loss": 0.0031, "step": 18613 }, { "epoch": 4.235267349260523, "grad_norm": 0.7490880985991735, "learning_rate": 7.081261055547697e-08, "loss": 0.0024, "step": 18614 }, { "epoch": 4.235494880546075, "grad_norm": 0.22460555901462373, "learning_rate": 7.077130502897455e-08, "loss": 0.0004, "step": 18615 }, { "epoch": 4.235722411831627, "grad_norm": 0.2623364423013093, "learning_rate": 7.073001082989167e-08, "loss": 0.0011, "step": 18616 }, { "epoch": 4.235949943117179, "grad_norm": 0.20165360291141338, "learning_rate": 7.06887279590724e-08, "loss": 0.0011, "step": 18617 }, { "epoch": 4.23617747440273, "grad_norm": 0.3702970574059814, "learning_rate": 7.064745641736017e-08, "loss": 0.0023, "step": 18618 }, { "epoch": 4.236405005688282, "grad_norm": 0.359141542359789, "learning_rate": 7.060619620559886e-08, "loss": 0.002, "step": 18619 }, { "epoch": 4.236632536973834, "grad_norm": 0.7599596996491259, "learning_rate": 7.056494732463143e-08, "loss": 0.0051, "step": 18620 }, { "epoch": 4.236860068259386, "grad_norm": 0.08118351050769967, "learning_rate": 7.052370977530123e-08, "loss": 0.0003, "step": 18621 }, { "epoch": 4.237087599544937, "grad_norm": 0.5891979729393628, "learning_rate": 7.048248355845077e-08, "loss": 0.0084, "step": 18622 }, { "epoch": 4.237315130830489, "grad_norm": 0.10526907916557117, "learning_rate": 7.044126867492285e-08, "loss": 0.0006, "step": 18623 }, { "epoch": 4.237542662116041, "grad_norm": 0.5586224825576067, "learning_rate": 7.040006512555986e-08, "loss": 0.0024, "step": 18624 }, { "epoch": 4.237770193401593, "grad_norm": 0.3848161353196819, "learning_rate": 7.035887291120369e-08, "loss": 0.0034, "step": 18625 }, { "epoch": 4.237997724687144, "grad_norm": 1.0116352853265786, "learning_rate": 7.031769203269652e-08, "loss": 0.0117, "step": 18626 }, { "epoch": 4.238225255972696, "grad_norm": 0.04066639868827334, "learning_rate": 7.027652249087975e-08, "loss": 0.0002, "step": 18627 }, { "epoch": 4.238452787258248, "grad_norm": 0.26550714665340336, "learning_rate": 7.023536428659502e-08, "loss": 0.0013, "step": 18628 }, { "epoch": 4.2386803185438, "grad_norm": 0.4079016312936561, "learning_rate": 7.019421742068335e-08, "loss": 0.0016, "step": 18629 }, { "epoch": 4.238907849829351, "grad_norm": 0.5186035698455947, "learning_rate": 7.015308189398586e-08, "loss": 0.0022, "step": 18630 }, { "epoch": 4.239135381114903, "grad_norm": 0.46164045600681725, "learning_rate": 7.011195770734321e-08, "loss": 0.0051, "step": 18631 }, { "epoch": 4.2393629124004555, "grad_norm": 0.07496220174934888, "learning_rate": 7.007084486159589e-08, "loss": 0.0003, "step": 18632 }, { "epoch": 4.239590443686007, "grad_norm": 0.29202392821576967, "learning_rate": 7.002974335758431e-08, "loss": 0.0015, "step": 18633 }, { "epoch": 4.239817974971559, "grad_norm": 0.09187954436870571, "learning_rate": 6.998865319614842e-08, "loss": 0.0003, "step": 18634 }, { "epoch": 4.24004550625711, "grad_norm": 0.2770761801184591, "learning_rate": 6.994757437812784e-08, "loss": 0.001, "step": 18635 }, { "epoch": 4.2402730375426625, "grad_norm": 0.16869626153834294, "learning_rate": 6.990650690436231e-08, "loss": 0.0007, "step": 18636 }, { "epoch": 4.240500568828214, "grad_norm": 0.7946452305254027, "learning_rate": 6.986545077569115e-08, "loss": 0.0137, "step": 18637 }, { "epoch": 4.240728100113766, "grad_norm": 0.26868842536413634, "learning_rate": 6.982440599295363e-08, "loss": 0.0026, "step": 18638 }, { "epoch": 4.240955631399317, "grad_norm": 0.0671973245607948, "learning_rate": 6.978337255698839e-08, "loss": 0.0002, "step": 18639 }, { "epoch": 4.2411831626848695, "grad_norm": 0.5938189716569954, "learning_rate": 6.974235046863432e-08, "loss": 0.0037, "step": 18640 }, { "epoch": 4.241410693970421, "grad_norm": 0.7261420760482286, "learning_rate": 6.970133972872966e-08, "loss": 0.0031, "step": 18641 }, { "epoch": 4.241638225255973, "grad_norm": 0.6483422139604611, "learning_rate": 6.966034033811256e-08, "loss": 0.0045, "step": 18642 }, { "epoch": 4.241865756541524, "grad_norm": 0.2543434976544596, "learning_rate": 6.961935229762102e-08, "loss": 0.0011, "step": 18643 }, { "epoch": 4.2420932878270765, "grad_norm": 0.5842315846813091, "learning_rate": 6.957837560809276e-08, "loss": 0.0041, "step": 18644 }, { "epoch": 4.242320819112628, "grad_norm": 1.7803934747889232, "learning_rate": 6.953741027036547e-08, "loss": 0.065, "step": 18645 }, { "epoch": 4.24254835039818, "grad_norm": 0.34243646297920033, "learning_rate": 6.949645628527609e-08, "loss": 0.0007, "step": 18646 }, { "epoch": 4.242775881683731, "grad_norm": 0.6708464404716645, "learning_rate": 6.945551365366191e-08, "loss": 0.0032, "step": 18647 }, { "epoch": 4.2430034129692835, "grad_norm": 0.5791413867108611, "learning_rate": 6.941458237635958e-08, "loss": 0.003, "step": 18648 }, { "epoch": 4.243230944254835, "grad_norm": 0.13141691307091816, "learning_rate": 6.93736624542056e-08, "loss": 0.0007, "step": 18649 }, { "epoch": 4.243458475540387, "grad_norm": 0.2032937170977661, "learning_rate": 6.933275388803631e-08, "loss": 0.001, "step": 18650 }, { "epoch": 4.243686006825938, "grad_norm": 0.018646470548774097, "learning_rate": 6.929185667868793e-08, "loss": 0.0001, "step": 18651 }, { "epoch": 4.2439135381114905, "grad_norm": 0.5960800966837504, "learning_rate": 6.925097082699637e-08, "loss": 0.0021, "step": 18652 }, { "epoch": 4.244141069397042, "grad_norm": 0.2856531920538372, "learning_rate": 6.921009633379716e-08, "loss": 0.0025, "step": 18653 }, { "epoch": 4.244368600682594, "grad_norm": 0.34648646635500085, "learning_rate": 6.91692331999256e-08, "loss": 0.0008, "step": 18654 }, { "epoch": 4.244596131968145, "grad_norm": 0.2639626324506149, "learning_rate": 6.912838142621704e-08, "loss": 0.001, "step": 18655 }, { "epoch": 4.2448236632536975, "grad_norm": 0.20823688422247238, "learning_rate": 6.908754101350626e-08, "loss": 0.001, "step": 18656 }, { "epoch": 4.245051194539249, "grad_norm": 0.4287756364607184, "learning_rate": 6.904671196262799e-08, "loss": 0.0007, "step": 18657 }, { "epoch": 4.245278725824801, "grad_norm": 0.38393854908700903, "learning_rate": 6.900589427441679e-08, "loss": 0.0018, "step": 18658 }, { "epoch": 4.245506257110352, "grad_norm": 0.9289570745255894, "learning_rate": 6.896508794970697e-08, "loss": 0.0018, "step": 18659 }, { "epoch": 4.2457337883959045, "grad_norm": 0.43420243820692356, "learning_rate": 6.892429298933238e-08, "loss": 0.0038, "step": 18660 }, { "epoch": 4.245961319681456, "grad_norm": 0.08741438250505806, "learning_rate": 6.888350939412675e-08, "loss": 0.0004, "step": 18661 }, { "epoch": 4.246188850967008, "grad_norm": 0.5197881154424913, "learning_rate": 6.884273716492383e-08, "loss": 0.0036, "step": 18662 }, { "epoch": 4.246416382252559, "grad_norm": 0.5108104568399418, "learning_rate": 6.880197630255665e-08, "loss": 0.0032, "step": 18663 }, { "epoch": 4.2466439135381115, "grad_norm": 0.4461242457211697, "learning_rate": 6.876122680785851e-08, "loss": 0.0022, "step": 18664 }, { "epoch": 4.246871444823663, "grad_norm": 0.2734419264458551, "learning_rate": 6.872048868166225e-08, "loss": 0.0008, "step": 18665 }, { "epoch": 4.247098976109215, "grad_norm": 0.17533790873140712, "learning_rate": 6.867976192480038e-08, "loss": 0.0005, "step": 18666 }, { "epoch": 4.247326507394766, "grad_norm": 0.046650258072654396, "learning_rate": 6.863904653810539e-08, "loss": 0.0001, "step": 18667 }, { "epoch": 4.2475540386803186, "grad_norm": 0.10444226766771308, "learning_rate": 6.859834252240918e-08, "loss": 0.0004, "step": 18668 }, { "epoch": 4.24778156996587, "grad_norm": 0.1153334163781717, "learning_rate": 6.85576498785441e-08, "loss": 0.0004, "step": 18669 }, { "epoch": 4.248009101251422, "grad_norm": 0.12993346065035705, "learning_rate": 6.851696860734121e-08, "loss": 0.0007, "step": 18670 }, { "epoch": 4.248236632536974, "grad_norm": 0.1583903268808291, "learning_rate": 6.847629870963268e-08, "loss": 0.0006, "step": 18671 }, { "epoch": 4.248464163822526, "grad_norm": 0.4255375630736067, "learning_rate": 6.843564018624932e-08, "loss": 0.002, "step": 18672 }, { "epoch": 4.248691695108078, "grad_norm": 0.7207000492447498, "learning_rate": 6.839499303802205e-08, "loss": 0.0049, "step": 18673 }, { "epoch": 4.248919226393629, "grad_norm": 0.12195855589331389, "learning_rate": 6.835435726578188e-08, "loss": 0.0008, "step": 18674 }, { "epoch": 4.249146757679181, "grad_norm": 0.22051470927364925, "learning_rate": 6.831373287035912e-08, "loss": 0.0019, "step": 18675 }, { "epoch": 4.249374288964733, "grad_norm": 0.47280636837714574, "learning_rate": 6.827311985258406e-08, "loss": 0.0022, "step": 18676 }, { "epoch": 4.249601820250285, "grad_norm": 0.14858793595360223, "learning_rate": 6.823251821328689e-08, "loss": 0.0005, "step": 18677 }, { "epoch": 4.249829351535836, "grad_norm": 0.2826027007022551, "learning_rate": 6.819192795329748e-08, "loss": 0.0019, "step": 18678 }, { "epoch": 4.250056882821388, "grad_norm": 0.045801704332453196, "learning_rate": 6.815134907344533e-08, "loss": 0.0001, "step": 18679 }, { "epoch": 4.25028441410694, "grad_norm": 0.18094601478358552, "learning_rate": 6.811078157455963e-08, "loss": 0.0017, "step": 18680 }, { "epoch": 4.250511945392492, "grad_norm": 0.8339055170367314, "learning_rate": 6.807022545746985e-08, "loss": 0.0061, "step": 18681 }, { "epoch": 4.250739476678043, "grad_norm": 0.6432252909639257, "learning_rate": 6.80296807230045e-08, "loss": 0.0053, "step": 18682 }, { "epoch": 4.250967007963595, "grad_norm": 0.05403636722286376, "learning_rate": 6.79891473719925e-08, "loss": 0.0001, "step": 18683 }, { "epoch": 4.251194539249147, "grad_norm": 0.07750308840961066, "learning_rate": 6.794862540526228e-08, "loss": 0.0003, "step": 18684 }, { "epoch": 4.251422070534699, "grad_norm": 0.5598123884120259, "learning_rate": 6.790811482364185e-08, "loss": 0.0056, "step": 18685 }, { "epoch": 4.25164960182025, "grad_norm": 0.28164296584743936, "learning_rate": 6.786761562795945e-08, "loss": 0.0025, "step": 18686 }, { "epoch": 4.251877133105802, "grad_norm": 0.1459148611424143, "learning_rate": 6.782712781904247e-08, "loss": 0.0006, "step": 18687 }, { "epoch": 4.252104664391354, "grad_norm": 0.3703390665538131, "learning_rate": 6.778665139771871e-08, "loss": 0.0005, "step": 18688 }, { "epoch": 4.252332195676906, "grad_norm": 0.5015155263685099, "learning_rate": 6.774618636481517e-08, "loss": 0.0025, "step": 18689 }, { "epoch": 4.252559726962457, "grad_norm": 0.5140630748045403, "learning_rate": 6.770573272115909e-08, "loss": 0.0012, "step": 18690 }, { "epoch": 4.252787258248009, "grad_norm": 1.248761961666872, "learning_rate": 6.76652904675773e-08, "loss": 0.0061, "step": 18691 }, { "epoch": 4.253014789533561, "grad_norm": 1.0686535388042893, "learning_rate": 6.76248596048961e-08, "loss": 0.002, "step": 18692 }, { "epoch": 4.253242320819113, "grad_norm": 1.090834503592839, "learning_rate": 6.758444013394213e-08, "loss": 0.0031, "step": 18693 }, { "epoch": 4.253469852104664, "grad_norm": 0.060376618440572724, "learning_rate": 6.754403205554122e-08, "loss": 0.0001, "step": 18694 }, { "epoch": 4.253697383390216, "grad_norm": 0.18738353720429138, "learning_rate": 6.750363537051947e-08, "loss": 0.0007, "step": 18695 }, { "epoch": 4.253924914675768, "grad_norm": 0.8100549449706606, "learning_rate": 6.746325007970226e-08, "loss": 0.0031, "step": 18696 }, { "epoch": 4.25415244596132, "grad_norm": 0.32979059376233455, "learning_rate": 6.742287618391519e-08, "loss": 0.0006, "step": 18697 }, { "epoch": 4.254379977246871, "grad_norm": 0.42716025690402265, "learning_rate": 6.738251368398355e-08, "loss": 0.0027, "step": 18698 }, { "epoch": 4.254607508532423, "grad_norm": 0.10989142411280597, "learning_rate": 6.734216258073189e-08, "loss": 0.0006, "step": 18699 }, { "epoch": 4.254835039817975, "grad_norm": 0.3573849263703024, "learning_rate": 6.730182287498527e-08, "loss": 0.0028, "step": 18700 }, { "epoch": 4.255062571103527, "grad_norm": 0.2175733448995372, "learning_rate": 6.726149456756786e-08, "loss": 0.0004, "step": 18701 }, { "epoch": 4.255290102389078, "grad_norm": 0.2820508851558104, "learning_rate": 6.722117765930424e-08, "loss": 0.0017, "step": 18702 }, { "epoch": 4.25551763367463, "grad_norm": 0.27158654210372507, "learning_rate": 6.71808721510181e-08, "loss": 0.0012, "step": 18703 }, { "epoch": 4.255745164960182, "grad_norm": 0.3002464845488334, "learning_rate": 6.71405780435333e-08, "loss": 0.0023, "step": 18704 }, { "epoch": 4.255972696245734, "grad_norm": 0.16117285935149533, "learning_rate": 6.710029533767349e-08, "loss": 0.0007, "step": 18705 }, { "epoch": 4.256200227531286, "grad_norm": 0.010623703583301104, "learning_rate": 6.706002403426185e-08, "loss": 0.0, "step": 18706 }, { "epoch": 4.256427758816837, "grad_norm": 0.6207189103203845, "learning_rate": 6.701976413412157e-08, "loss": 0.0021, "step": 18707 }, { "epoch": 4.256655290102389, "grad_norm": 0.2590967209050133, "learning_rate": 6.697951563807537e-08, "loss": 0.0009, "step": 18708 }, { "epoch": 4.256882821387941, "grad_norm": 0.5894735360416294, "learning_rate": 6.693927854694596e-08, "loss": 0.0053, "step": 18709 }, { "epoch": 4.257110352673493, "grad_norm": 0.16072607378141185, "learning_rate": 6.689905286155554e-08, "loss": 0.0008, "step": 18710 }, { "epoch": 4.257337883959044, "grad_norm": 0.4421242305442844, "learning_rate": 6.685883858272645e-08, "loss": 0.0029, "step": 18711 }, { "epoch": 4.2575654152445965, "grad_norm": 0.43241414680779766, "learning_rate": 6.681863571128051e-08, "loss": 0.0029, "step": 18712 }, { "epoch": 4.257792946530148, "grad_norm": 0.38794063886164504, "learning_rate": 6.677844424803938e-08, "loss": 0.0023, "step": 18713 }, { "epoch": 4.2580204778157, "grad_norm": 0.5710125490830703, "learning_rate": 6.673826419382454e-08, "loss": 0.0093, "step": 18714 }, { "epoch": 4.258248009101251, "grad_norm": 0.3752965785235291, "learning_rate": 6.669809554945714e-08, "loss": 0.0036, "step": 18715 }, { "epoch": 4.2584755403868035, "grad_norm": 0.21616234844766957, "learning_rate": 6.665793831575825e-08, "loss": 0.0015, "step": 18716 }, { "epoch": 4.258703071672355, "grad_norm": 0.6989661297966271, "learning_rate": 6.661779249354842e-08, "loss": 0.0056, "step": 18717 }, { "epoch": 4.258930602957907, "grad_norm": 0.378048333910046, "learning_rate": 6.65776580836483e-08, "loss": 0.0015, "step": 18718 }, { "epoch": 4.259158134243458, "grad_norm": 0.022692738841179807, "learning_rate": 6.653753508687827e-08, "loss": 0.0001, "step": 18719 }, { "epoch": 4.2593856655290105, "grad_norm": 0.25326610507126024, "learning_rate": 6.649742350405817e-08, "loss": 0.0011, "step": 18720 }, { "epoch": 4.259613196814562, "grad_norm": 0.4418173884339012, "learning_rate": 6.645732333600794e-08, "loss": 0.0014, "step": 18721 }, { "epoch": 4.259840728100114, "grad_norm": 0.24302450624547556, "learning_rate": 6.641723458354712e-08, "loss": 0.0009, "step": 18722 }, { "epoch": 4.260068259385665, "grad_norm": 0.09186900154262728, "learning_rate": 6.637715724749493e-08, "loss": 0.0003, "step": 18723 }, { "epoch": 4.2602957906712176, "grad_norm": 0.448015420736878, "learning_rate": 6.633709132867052e-08, "loss": 0.0023, "step": 18724 }, { "epoch": 4.260523321956769, "grad_norm": 0.405746681620916, "learning_rate": 6.629703682789285e-08, "loss": 0.0013, "step": 18725 }, { "epoch": 4.260750853242321, "grad_norm": 0.1750706027544282, "learning_rate": 6.625699374598067e-08, "loss": 0.0004, "step": 18726 }, { "epoch": 4.260978384527872, "grad_norm": 0.020381550193067734, "learning_rate": 6.62169620837521e-08, "loss": 0.0, "step": 18727 }, { "epoch": 4.261205915813425, "grad_norm": 0.0823528473898088, "learning_rate": 6.617694184202557e-08, "loss": 0.0004, "step": 18728 }, { "epoch": 4.261433447098976, "grad_norm": 0.1376935580033375, "learning_rate": 6.613693302161897e-08, "loss": 0.0002, "step": 18729 }, { "epoch": 4.261660978384528, "grad_norm": 0.2043867961221778, "learning_rate": 6.609693562334977e-08, "loss": 0.001, "step": 18730 }, { "epoch": 4.261888509670079, "grad_norm": 0.32816642064421275, "learning_rate": 6.605694964803567e-08, "loss": 0.0011, "step": 18731 }, { "epoch": 4.262116040955632, "grad_norm": 0.04201907283160161, "learning_rate": 6.601697509649382e-08, "loss": 0.0001, "step": 18732 }, { "epoch": 4.262343572241183, "grad_norm": 0.3500597674622321, "learning_rate": 6.597701196954145e-08, "loss": 0.0009, "step": 18733 }, { "epoch": 4.262571103526735, "grad_norm": 0.06918694823481825, "learning_rate": 6.593706026799499e-08, "loss": 0.0003, "step": 18734 }, { "epoch": 4.262798634812286, "grad_norm": 0.42346974309456875, "learning_rate": 6.589711999267126e-08, "loss": 0.0026, "step": 18735 }, { "epoch": 4.263026166097839, "grad_norm": 0.36068318497109325, "learning_rate": 6.585719114438637e-08, "loss": 0.0019, "step": 18736 }, { "epoch": 4.26325369738339, "grad_norm": 0.5328397896119337, "learning_rate": 6.581727372395645e-08, "loss": 0.0044, "step": 18737 }, { "epoch": 4.263481228668942, "grad_norm": 0.22425777538697897, "learning_rate": 6.577736773219746e-08, "loss": 0.0005, "step": 18738 }, { "epoch": 4.263708759954493, "grad_norm": 0.31282476832737394, "learning_rate": 6.57374731699248e-08, "loss": 0.0017, "step": 18739 }, { "epoch": 4.263936291240046, "grad_norm": 0.32013048035889297, "learning_rate": 6.569759003795403e-08, "loss": 0.0009, "step": 18740 }, { "epoch": 4.264163822525597, "grad_norm": 0.2021510251561477, "learning_rate": 6.565771833710031e-08, "loss": 0.0006, "step": 18741 }, { "epoch": 4.264391353811149, "grad_norm": 0.5370195464083489, "learning_rate": 6.561785806817822e-08, "loss": 0.006, "step": 18742 }, { "epoch": 4.2646188850967, "grad_norm": 0.29864233991482764, "learning_rate": 6.55780092320027e-08, "loss": 0.0019, "step": 18743 }, { "epoch": 4.264846416382253, "grad_norm": 0.03140258784118714, "learning_rate": 6.553817182938807e-08, "loss": 0.0001, "step": 18744 }, { "epoch": 4.265073947667805, "grad_norm": 0.5865636924850042, "learning_rate": 6.549834586114877e-08, "loss": 0.0027, "step": 18745 }, { "epoch": 4.265301478953356, "grad_norm": 0.05988906621170194, "learning_rate": 6.545853132809851e-08, "loss": 0.0003, "step": 18746 }, { "epoch": 4.265529010238907, "grad_norm": 0.523270611540575, "learning_rate": 6.541872823105123e-08, "loss": 0.0015, "step": 18747 }, { "epoch": 4.26575654152446, "grad_norm": 0.6702314230534323, "learning_rate": 6.53789365708203e-08, "loss": 0.0058, "step": 18748 }, { "epoch": 4.265984072810012, "grad_norm": 0.2979794026740775, "learning_rate": 6.533915634821884e-08, "loss": 0.0015, "step": 18749 }, { "epoch": 4.266211604095563, "grad_norm": 1.393705536355575, "learning_rate": 6.529938756406013e-08, "loss": 0.0055, "step": 18750 }, { "epoch": 4.266439135381115, "grad_norm": 0.34261653034757583, "learning_rate": 6.525963021915688e-08, "loss": 0.0007, "step": 18751 }, { "epoch": 4.266666666666667, "grad_norm": 0.731045947125308, "learning_rate": 6.521988431432175e-08, "loss": 0.0057, "step": 18752 }, { "epoch": 4.266894197952219, "grad_norm": 0.22263652215972668, "learning_rate": 6.518014985036694e-08, "loss": 0.0009, "step": 18753 }, { "epoch": 4.26712172923777, "grad_norm": 0.7397999772543709, "learning_rate": 6.514042682810466e-08, "loss": 0.0035, "step": 18754 }, { "epoch": 4.267349260523322, "grad_norm": 0.08428847439269538, "learning_rate": 6.510071524834676e-08, "loss": 0.0002, "step": 18755 }, { "epoch": 4.267576791808874, "grad_norm": 0.330370487232405, "learning_rate": 6.506101511190473e-08, "loss": 0.001, "step": 18756 }, { "epoch": 4.267804323094426, "grad_norm": 0.46713449818760344, "learning_rate": 6.502132641959001e-08, "loss": 0.0035, "step": 18757 }, { "epoch": 4.268031854379977, "grad_norm": 0.4822517926117485, "learning_rate": 6.49816491722139e-08, "loss": 0.0024, "step": 18758 }, { "epoch": 4.268259385665529, "grad_norm": 1.1220435985700177, "learning_rate": 6.494198337058735e-08, "loss": 0.0083, "step": 18759 }, { "epoch": 4.268486916951081, "grad_norm": 0.3724170745158072, "learning_rate": 6.490232901552096e-08, "loss": 0.0013, "step": 18760 }, { "epoch": 4.268714448236633, "grad_norm": 0.06647942798435671, "learning_rate": 6.486268610782505e-08, "loss": 0.0003, "step": 18761 }, { "epoch": 4.268941979522184, "grad_norm": 0.4932243368161479, "learning_rate": 6.482305464831015e-08, "loss": 0.0041, "step": 18762 }, { "epoch": 4.269169510807736, "grad_norm": 0.2693686925262826, "learning_rate": 6.478343463778597e-08, "loss": 0.0008, "step": 18763 }, { "epoch": 4.269397042093288, "grad_norm": 0.23560197492050838, "learning_rate": 6.474382607706233e-08, "loss": 0.0016, "step": 18764 }, { "epoch": 4.26962457337884, "grad_norm": 0.7383085519661883, "learning_rate": 6.470422896694889e-08, "loss": 0.0064, "step": 18765 }, { "epoch": 4.269852104664391, "grad_norm": 0.0721985411829328, "learning_rate": 6.466464330825498e-08, "loss": 0.0002, "step": 18766 }, { "epoch": 4.270079635949943, "grad_norm": 0.0733831952788898, "learning_rate": 6.46250691017896e-08, "loss": 0.0002, "step": 18767 }, { "epoch": 4.270307167235495, "grad_norm": 0.36487561395100926, "learning_rate": 6.458550634836136e-08, "loss": 0.0027, "step": 18768 }, { "epoch": 4.270534698521047, "grad_norm": 0.161033024821774, "learning_rate": 6.454595504877912e-08, "loss": 0.001, "step": 18769 }, { "epoch": 4.270762229806598, "grad_norm": 0.5947794913292149, "learning_rate": 6.450641520385103e-08, "loss": 0.002, "step": 18770 }, { "epoch": 4.27098976109215, "grad_norm": 0.948145946725443, "learning_rate": 6.446688681438537e-08, "loss": 0.0051, "step": 18771 }, { "epoch": 4.271217292377702, "grad_norm": 0.040927960460058074, "learning_rate": 6.442736988119008e-08, "loss": 0.0001, "step": 18772 }, { "epoch": 4.271444823663254, "grad_norm": 0.25913483835382917, "learning_rate": 6.438786440507251e-08, "loss": 0.0013, "step": 18773 }, { "epoch": 4.271672354948805, "grad_norm": 0.32476448573477795, "learning_rate": 6.434837038684048e-08, "loss": 0.0019, "step": 18774 }, { "epoch": 4.271899886234357, "grad_norm": 0.2840868684867384, "learning_rate": 6.430888782730085e-08, "loss": 0.0027, "step": 18775 }, { "epoch": 4.272127417519909, "grad_norm": 0.037374178363718, "learning_rate": 6.426941672726075e-08, "loss": 0.0001, "step": 18776 }, { "epoch": 4.272354948805461, "grad_norm": 0.014587614660744963, "learning_rate": 6.422995708752683e-08, "loss": 0.0001, "step": 18777 }, { "epoch": 4.272582480091012, "grad_norm": 0.6430697271934176, "learning_rate": 6.419050890890553e-08, "loss": 0.0034, "step": 18778 }, { "epoch": 4.272810011376564, "grad_norm": 0.03599300633873292, "learning_rate": 6.415107219220326e-08, "loss": 0.0001, "step": 18779 }, { "epoch": 4.273037542662116, "grad_norm": 0.25188417723802575, "learning_rate": 6.41116469382258e-08, "loss": 0.0017, "step": 18780 }, { "epoch": 4.273265073947668, "grad_norm": 0.41737986418438716, "learning_rate": 6.407223314777916e-08, "loss": 0.0031, "step": 18781 }, { "epoch": 4.273492605233219, "grad_norm": 0.8432440217223114, "learning_rate": 6.403283082166874e-08, "loss": 0.0075, "step": 18782 }, { "epoch": 4.273720136518771, "grad_norm": 0.0477892435219102, "learning_rate": 6.399343996069993e-08, "loss": 0.0002, "step": 18783 }, { "epoch": 4.273947667804324, "grad_norm": 0.11627568005388615, "learning_rate": 6.395406056567772e-08, "loss": 0.0003, "step": 18784 }, { "epoch": 4.274175199089875, "grad_norm": 0.28270449454079094, "learning_rate": 6.3914692637407e-08, "loss": 0.0015, "step": 18785 }, { "epoch": 4.274402730375426, "grad_norm": 0.36637117170512296, "learning_rate": 6.387533617669243e-08, "loss": 0.0034, "step": 18786 }, { "epoch": 4.274630261660978, "grad_norm": 0.12060499741613774, "learning_rate": 6.38359911843383e-08, "loss": 0.0005, "step": 18787 }, { "epoch": 4.274857792946531, "grad_norm": 0.6389967992487278, "learning_rate": 6.379665766114887e-08, "loss": 0.0046, "step": 18788 }, { "epoch": 4.275085324232082, "grad_norm": 0.18941565167987956, "learning_rate": 6.37573356079279e-08, "loss": 0.0007, "step": 18789 }, { "epoch": 4.275312855517634, "grad_norm": 0.4189314183886349, "learning_rate": 6.371802502547916e-08, "loss": 0.0047, "step": 18790 }, { "epoch": 4.275540386803185, "grad_norm": 0.42310608479260925, "learning_rate": 6.367872591460593e-08, "loss": 0.0015, "step": 18791 }, { "epoch": 4.275767918088738, "grad_norm": 0.11151062572620674, "learning_rate": 6.363943827611157e-08, "loss": 0.0004, "step": 18792 }, { "epoch": 4.275995449374289, "grad_norm": 0.4367763248668114, "learning_rate": 6.360016211079914e-08, "loss": 0.0059, "step": 18793 }, { "epoch": 4.276222980659841, "grad_norm": 0.44924684630997025, "learning_rate": 6.35608974194711e-08, "loss": 0.0035, "step": 18794 }, { "epoch": 4.276450511945392, "grad_norm": 0.576402346397991, "learning_rate": 6.352164420293023e-08, "loss": 0.0059, "step": 18795 }, { "epoch": 4.276678043230945, "grad_norm": 0.06929180157102342, "learning_rate": 6.348240246197852e-08, "loss": 0.0002, "step": 18796 }, { "epoch": 4.276905574516496, "grad_norm": 0.043736412943438885, "learning_rate": 6.344317219741811e-08, "loss": 0.0002, "step": 18797 }, { "epoch": 4.277133105802048, "grad_norm": 0.6434370000170304, "learning_rate": 6.340395341005098e-08, "loss": 0.006, "step": 18798 }, { "epoch": 4.277360637087599, "grad_norm": 0.5517101454778839, "learning_rate": 6.336474610067839e-08, "loss": 0.0043, "step": 18799 }, { "epoch": 4.277588168373152, "grad_norm": 0.03807669656447446, "learning_rate": 6.33255502701019e-08, "loss": 0.0001, "step": 18800 }, { "epoch": 4.277815699658703, "grad_norm": 0.16573350758637378, "learning_rate": 6.32863659191224e-08, "loss": 0.001, "step": 18801 }, { "epoch": 4.278043230944255, "grad_norm": 0.4216009062771506, "learning_rate": 6.324719304854094e-08, "loss": 0.0033, "step": 18802 }, { "epoch": 4.278270762229806, "grad_norm": 0.10657308481100376, "learning_rate": 6.320803165915796e-08, "loss": 0.0002, "step": 18803 }, { "epoch": 4.278498293515359, "grad_norm": 0.037178784867909126, "learning_rate": 6.316888175177393e-08, "loss": 0.0001, "step": 18804 }, { "epoch": 4.27872582480091, "grad_norm": 0.10103707971220664, "learning_rate": 6.312974332718922e-08, "loss": 0.0003, "step": 18805 }, { "epoch": 4.278953356086462, "grad_norm": 0.10083883960651384, "learning_rate": 6.309061638620328e-08, "loss": 0.0004, "step": 18806 }, { "epoch": 4.279180887372013, "grad_norm": 0.2398596334325958, "learning_rate": 6.305150092961628e-08, "loss": 0.0013, "step": 18807 }, { "epoch": 4.279408418657566, "grad_norm": 0.1529756567421419, "learning_rate": 6.301239695822727e-08, "loss": 0.0006, "step": 18808 }, { "epoch": 4.279635949943117, "grad_norm": 0.41350732020216635, "learning_rate": 6.297330447283576e-08, "loss": 0.002, "step": 18809 }, { "epoch": 4.279863481228669, "grad_norm": 0.21558532608140127, "learning_rate": 6.293422347424049e-08, "loss": 0.0009, "step": 18810 }, { "epoch": 4.28009101251422, "grad_norm": 0.22320991928931408, "learning_rate": 6.289515396324032e-08, "loss": 0.0005, "step": 18811 }, { "epoch": 4.280318543799773, "grad_norm": 0.28007495902025276, "learning_rate": 6.285609594063386e-08, "loss": 0.0016, "step": 18812 }, { "epoch": 4.280546075085324, "grad_norm": 0.031043722459692166, "learning_rate": 6.281704940721917e-08, "loss": 0.0001, "step": 18813 }, { "epoch": 4.280773606370876, "grad_norm": 0.08737737439301149, "learning_rate": 6.277801436379453e-08, "loss": 0.0002, "step": 18814 }, { "epoch": 4.281001137656427, "grad_norm": 0.3520452910692195, "learning_rate": 6.273899081115746e-08, "loss": 0.0013, "step": 18815 }, { "epoch": 4.28122866894198, "grad_norm": 0.3318077822947688, "learning_rate": 6.269997875010584e-08, "loss": 0.0006, "step": 18816 }, { "epoch": 4.281456200227531, "grad_norm": 0.28814111957825517, "learning_rate": 6.266097818143677e-08, "loss": 0.0024, "step": 18817 }, { "epoch": 4.281683731513083, "grad_norm": 0.04057601844010105, "learning_rate": 6.262198910594736e-08, "loss": 0.0002, "step": 18818 }, { "epoch": 4.281911262798634, "grad_norm": 0.2784453571676836, "learning_rate": 6.258301152443469e-08, "loss": 0.0026, "step": 18819 }, { "epoch": 4.282138794084187, "grad_norm": 0.13658501109308213, "learning_rate": 6.254404543769514e-08, "loss": 0.0004, "step": 18820 }, { "epoch": 4.282366325369738, "grad_norm": 0.39448524851462924, "learning_rate": 6.25050908465253e-08, "loss": 0.001, "step": 18821 }, { "epoch": 4.28259385665529, "grad_norm": 0.14676675233577718, "learning_rate": 6.246614775172112e-08, "loss": 0.0002, "step": 18822 }, { "epoch": 4.282821387940842, "grad_norm": 0.46233901975835556, "learning_rate": 6.242721615407885e-08, "loss": 0.0021, "step": 18823 }, { "epoch": 4.283048919226394, "grad_norm": 0.08326216561577032, "learning_rate": 6.238829605439375e-08, "loss": 0.0003, "step": 18824 }, { "epoch": 4.283276450511945, "grad_norm": 0.13121488825343253, "learning_rate": 6.234938745346156e-08, "loss": 0.0004, "step": 18825 }, { "epoch": 4.283503981797497, "grad_norm": 0.28634828943402085, "learning_rate": 6.231049035207757e-08, "loss": 0.0018, "step": 18826 }, { "epoch": 4.283731513083049, "grad_norm": 0.0717023541727757, "learning_rate": 6.227160475103646e-08, "loss": 0.0002, "step": 18827 }, { "epoch": 4.283959044368601, "grad_norm": 0.47252364322914314, "learning_rate": 6.223273065113334e-08, "loss": 0.0035, "step": 18828 }, { "epoch": 4.284186575654153, "grad_norm": 0.145307669406772, "learning_rate": 6.219386805316252e-08, "loss": 0.0002, "step": 18829 }, { "epoch": 4.284414106939704, "grad_norm": 0.3347244390119615, "learning_rate": 6.215501695791826e-08, "loss": 0.0028, "step": 18830 }, { "epoch": 4.284641638225256, "grad_norm": 0.2860585429728484, "learning_rate": 6.211617736619456e-08, "loss": 0.0014, "step": 18831 }, { "epoch": 4.284869169510808, "grad_norm": 0.38392105949568145, "learning_rate": 6.207734927878537e-08, "loss": 0.0024, "step": 18832 }, { "epoch": 4.28509670079636, "grad_norm": 0.36142870881993033, "learning_rate": 6.203853269648432e-08, "loss": 0.0009, "step": 18833 }, { "epoch": 4.285324232081911, "grad_norm": 0.09420523387358981, "learning_rate": 6.199972762008447e-08, "loss": 0.0003, "step": 18834 }, { "epoch": 4.285551763367463, "grad_norm": 3.3712298029159506, "learning_rate": 6.196093405037934e-08, "loss": 0.0168, "step": 18835 }, { "epoch": 4.285779294653015, "grad_norm": 0.5264569210483142, "learning_rate": 6.192215198816146e-08, "loss": 0.0019, "step": 18836 }, { "epoch": 4.286006825938567, "grad_norm": 0.02149163296121291, "learning_rate": 6.188338143422345e-08, "loss": 0.0001, "step": 18837 }, { "epoch": 4.286234357224118, "grad_norm": 0.20751832791445332, "learning_rate": 6.18446223893579e-08, "loss": 0.0007, "step": 18838 }, { "epoch": 4.28646188850967, "grad_norm": 0.2092974187653857, "learning_rate": 6.180587485435687e-08, "loss": 0.0005, "step": 18839 }, { "epoch": 4.286689419795222, "grad_norm": 0.3252517322874339, "learning_rate": 6.176713883001243e-08, "loss": 0.0013, "step": 18840 }, { "epoch": 4.286916951080774, "grad_norm": 0.5387747564554485, "learning_rate": 6.172841431711603e-08, "loss": 0.0056, "step": 18841 }, { "epoch": 4.287144482366325, "grad_norm": 0.21044339962431519, "learning_rate": 6.168970131645934e-08, "loss": 0.0013, "step": 18842 }, { "epoch": 4.287372013651877, "grad_norm": 0.35153953627088136, "learning_rate": 6.165099982883351e-08, "loss": 0.001, "step": 18843 }, { "epoch": 4.287599544937429, "grad_norm": 0.13689419839340114, "learning_rate": 6.161230985502947e-08, "loss": 0.0009, "step": 18844 }, { "epoch": 4.287827076222981, "grad_norm": 0.26343971381287795, "learning_rate": 6.157363139583796e-08, "loss": 0.0013, "step": 18845 }, { "epoch": 4.288054607508532, "grad_norm": 0.19177558599010902, "learning_rate": 6.153496445204958e-08, "loss": 0.0014, "step": 18846 }, { "epoch": 4.288282138794084, "grad_norm": 0.04500714036014029, "learning_rate": 6.14963090244547e-08, "loss": 0.0001, "step": 18847 }, { "epoch": 4.288509670079636, "grad_norm": 0.26329786626549356, "learning_rate": 6.145766511384328e-08, "loss": 0.0009, "step": 18848 }, { "epoch": 4.288737201365188, "grad_norm": 0.666460713658092, "learning_rate": 6.141903272100493e-08, "loss": 0.0028, "step": 18849 }, { "epoch": 4.288964732650739, "grad_norm": 0.5650597076115984, "learning_rate": 6.138041184672962e-08, "loss": 0.0019, "step": 18850 }, { "epoch": 4.289192263936291, "grad_norm": 0.5685006457631007, "learning_rate": 6.134180249180625e-08, "loss": 0.002, "step": 18851 }, { "epoch": 4.289419795221843, "grad_norm": 0.1270193431288316, "learning_rate": 6.130320465702416e-08, "loss": 0.0004, "step": 18852 }, { "epoch": 4.289647326507395, "grad_norm": 0.183532840895267, "learning_rate": 6.126461834317227e-08, "loss": 0.0008, "step": 18853 }, { "epoch": 4.289874857792946, "grad_norm": 0.30706385447464446, "learning_rate": 6.122604355103922e-08, "loss": 0.0011, "step": 18854 }, { "epoch": 4.290102389078498, "grad_norm": 0.7790406145728898, "learning_rate": 6.118748028141337e-08, "loss": 0.0055, "step": 18855 }, { "epoch": 4.29032992036405, "grad_norm": 0.28244814271506014, "learning_rate": 6.114892853508268e-08, "loss": 0.0017, "step": 18856 }, { "epoch": 4.290557451649602, "grad_norm": 0.1426359211191557, "learning_rate": 6.111038831283527e-08, "loss": 0.0004, "step": 18857 }, { "epoch": 4.290784982935153, "grad_norm": 0.1095837856375702, "learning_rate": 6.10718596154588e-08, "loss": 0.0003, "step": 18858 }, { "epoch": 4.291012514220705, "grad_norm": 0.313626671246985, "learning_rate": 6.103334244374087e-08, "loss": 0.0015, "step": 18859 }, { "epoch": 4.291240045506257, "grad_norm": 0.2415475075299367, "learning_rate": 6.099483679846842e-08, "loss": 0.0006, "step": 18860 }, { "epoch": 4.291467576791809, "grad_norm": 0.4712807667894408, "learning_rate": 6.095634268042876e-08, "loss": 0.004, "step": 18861 }, { "epoch": 4.291695108077361, "grad_norm": 0.019667156718368696, "learning_rate": 6.09178600904084e-08, "loss": 0.0001, "step": 18862 }, { "epoch": 4.291922639362912, "grad_norm": 0.19550687008552448, "learning_rate": 6.087938902919379e-08, "loss": 0.0003, "step": 18863 }, { "epoch": 4.292150170648464, "grad_norm": 0.18078905810203824, "learning_rate": 6.084092949757134e-08, "loss": 0.0006, "step": 18864 }, { "epoch": 4.292377701934016, "grad_norm": 0.035937464687728654, "learning_rate": 6.080248149632712e-08, "loss": 0.0001, "step": 18865 }, { "epoch": 4.292605233219568, "grad_norm": 0.2355858682193705, "learning_rate": 6.0764045026247e-08, "loss": 0.001, "step": 18866 }, { "epoch": 4.292832764505119, "grad_norm": 0.2223510104318187, "learning_rate": 6.072562008811644e-08, "loss": 0.0008, "step": 18867 }, { "epoch": 4.293060295790672, "grad_norm": 0.07967459199825407, "learning_rate": 6.068720668272062e-08, "loss": 0.0003, "step": 18868 }, { "epoch": 4.293287827076223, "grad_norm": 0.1318998813352236, "learning_rate": 6.0648804810845e-08, "loss": 0.0006, "step": 18869 }, { "epoch": 4.293515358361775, "grad_norm": 0.4170928029323994, "learning_rate": 6.061041447327408e-08, "loss": 0.0017, "step": 18870 }, { "epoch": 4.293742889647326, "grad_norm": 0.21269849118624176, "learning_rate": 6.05720356707927e-08, "loss": 0.0005, "step": 18871 }, { "epoch": 4.293970420932879, "grad_norm": 0.21642751298960358, "learning_rate": 6.053366840418517e-08, "loss": 0.0009, "step": 18872 }, { "epoch": 4.29419795221843, "grad_norm": 0.23617155023351763, "learning_rate": 6.049531267423581e-08, "loss": 0.0012, "step": 18873 }, { "epoch": 4.294425483503982, "grad_norm": 0.4183209393944379, "learning_rate": 6.045696848172842e-08, "loss": 0.0036, "step": 18874 }, { "epoch": 4.294653014789533, "grad_norm": 0.18780326128174613, "learning_rate": 6.04186358274466e-08, "loss": 0.0009, "step": 18875 }, { "epoch": 4.294880546075086, "grad_norm": 0.4706834161040438, "learning_rate": 6.038031471217398e-08, "loss": 0.0092, "step": 18876 }, { "epoch": 4.295108077360637, "grad_norm": 0.18195411383750382, "learning_rate": 6.034200513669355e-08, "loss": 0.0006, "step": 18877 }, { "epoch": 4.295335608646189, "grad_norm": 0.24303856234750323, "learning_rate": 6.03037071017884e-08, "loss": 0.0018, "step": 18878 }, { "epoch": 4.29556313993174, "grad_norm": 0.052556573114099475, "learning_rate": 6.026542060824141e-08, "loss": 0.0002, "step": 18879 }, { "epoch": 4.295790671217293, "grad_norm": 0.13917844363543702, "learning_rate": 6.022714565683485e-08, "loss": 0.0004, "step": 18880 }, { "epoch": 4.296018202502844, "grad_norm": 0.28303105632524295, "learning_rate": 6.018888224835123e-08, "loss": 0.0012, "step": 18881 }, { "epoch": 4.296245733788396, "grad_norm": 0.0723653214165035, "learning_rate": 6.015063038357228e-08, "loss": 0.0002, "step": 18882 }, { "epoch": 4.2964732650739474, "grad_norm": 0.23935451649352965, "learning_rate": 6.011239006328017e-08, "loss": 0.0007, "step": 18883 }, { "epoch": 4.2967007963595, "grad_norm": 0.4816370775018716, "learning_rate": 6.007416128825613e-08, "loss": 0.0038, "step": 18884 }, { "epoch": 4.296928327645051, "grad_norm": 0.18480331385470078, "learning_rate": 6.00359440592816e-08, "loss": 0.0008, "step": 18885 }, { "epoch": 4.297155858930603, "grad_norm": 0.06697057768629379, "learning_rate": 5.99977383771378e-08, "loss": 0.0003, "step": 18886 }, { "epoch": 4.2973833902161545, "grad_norm": 0.188449797065647, "learning_rate": 5.995954424260535e-08, "loss": 0.001, "step": 18887 }, { "epoch": 4.297610921501707, "grad_norm": 0.2901963554892812, "learning_rate": 5.992136165646513e-08, "loss": 0.0005, "step": 18888 }, { "epoch": 4.297838452787258, "grad_norm": 3.2255023843263957, "learning_rate": 5.988319061949722e-08, "loss": 0.0354, "step": 18889 }, { "epoch": 4.29806598407281, "grad_norm": 0.24248354781619252, "learning_rate": 5.984503113248212e-08, "loss": 0.0005, "step": 18890 }, { "epoch": 4.2982935153583615, "grad_norm": 0.23389868488947435, "learning_rate": 5.98068831961994e-08, "loss": 0.0012, "step": 18891 }, { "epoch": 4.298521046643914, "grad_norm": 0.24304805662306747, "learning_rate": 5.976874681142893e-08, "loss": 0.0012, "step": 18892 }, { "epoch": 4.298748577929465, "grad_norm": 0.08399627930739227, "learning_rate": 5.973062197895015e-08, "loss": 0.0003, "step": 18893 }, { "epoch": 4.298976109215017, "grad_norm": 0.1951133501035529, "learning_rate": 5.969250869954215e-08, "loss": 0.0008, "step": 18894 }, { "epoch": 4.2992036405005685, "grad_norm": 0.0551294019136571, "learning_rate": 5.965440697398406e-08, "loss": 0.0001, "step": 18895 }, { "epoch": 4.299431171786121, "grad_norm": 0.4989049723834436, "learning_rate": 5.961631680305439e-08, "loss": 0.0019, "step": 18896 }, { "epoch": 4.299658703071672, "grad_norm": 0.08992308647588258, "learning_rate": 5.9578238187531846e-08, "loss": 0.0003, "step": 18897 }, { "epoch": 4.299886234357224, "grad_norm": 0.3947137903205636, "learning_rate": 5.9540171128194473e-08, "loss": 0.0051, "step": 18898 }, { "epoch": 4.3001137656427755, "grad_norm": 4.638627461730851, "learning_rate": 5.950211562582048e-08, "loss": 0.0489, "step": 18899 }, { "epoch": 4.300341296928328, "grad_norm": 0.2922076172090733, "learning_rate": 5.946407168118763e-08, "loss": 0.0019, "step": 18900 }, { "epoch": 4.30056882821388, "grad_norm": 0.2961705794818995, "learning_rate": 5.942603929507337e-08, "loss": 0.001, "step": 18901 }, { "epoch": 4.300796359499431, "grad_norm": 0.21906464024804856, "learning_rate": 5.9388018468255134e-08, "loss": 0.0023, "step": 18902 }, { "epoch": 4.3010238907849825, "grad_norm": 0.2723974594881638, "learning_rate": 5.93500092015098e-08, "loss": 0.0015, "step": 18903 }, { "epoch": 4.301251422070535, "grad_norm": 0.18284124251238285, "learning_rate": 5.931201149561451e-08, "loss": 0.0011, "step": 18904 }, { "epoch": 4.301478953356087, "grad_norm": 0.5036262838680745, "learning_rate": 5.927402535134554e-08, "loss": 0.0036, "step": 18905 }, { "epoch": 4.301706484641638, "grad_norm": 0.13156402336403827, "learning_rate": 5.923605076947947e-08, "loss": 0.0005, "step": 18906 }, { "epoch": 4.30193401592719, "grad_norm": 0.08799201239211377, "learning_rate": 5.919808775079243e-08, "loss": 0.0001, "step": 18907 }, { "epoch": 4.302161547212742, "grad_norm": 0.22715617142566866, "learning_rate": 5.916013629606018e-08, "loss": 0.0014, "step": 18908 }, { "epoch": 4.302389078498294, "grad_norm": 0.4739052928923991, "learning_rate": 5.91221964060585e-08, "loss": 0.0035, "step": 18909 }, { "epoch": 4.302616609783845, "grad_norm": 0.17633179164675455, "learning_rate": 5.908426808156273e-08, "loss": 0.0011, "step": 18910 }, { "epoch": 4.302844141069397, "grad_norm": 0.09055398822617103, "learning_rate": 5.9046351323348224e-08, "loss": 0.0002, "step": 18911 }, { "epoch": 4.303071672354949, "grad_norm": 1.7961547833762204, "learning_rate": 5.900844613218965e-08, "loss": 0.0051, "step": 18912 }, { "epoch": 4.303299203640501, "grad_norm": 0.4065496113470444, "learning_rate": 5.8970552508861854e-08, "loss": 0.0015, "step": 18913 }, { "epoch": 4.303526734926052, "grad_norm": 0.17220344864480325, "learning_rate": 5.8932670454139444e-08, "loss": 0.001, "step": 18914 }, { "epoch": 4.303754266211604, "grad_norm": 0.44005593029414536, "learning_rate": 5.8894799968796446e-08, "loss": 0.0014, "step": 18915 }, { "epoch": 4.303981797497156, "grad_norm": 0.15676968314009643, "learning_rate": 5.885694105360711e-08, "loss": 0.0005, "step": 18916 }, { "epoch": 4.304209328782708, "grad_norm": 0.45344684238993727, "learning_rate": 5.8819093709345034e-08, "loss": 0.003, "step": 18917 }, { "epoch": 4.304436860068259, "grad_norm": 0.3838077889923743, "learning_rate": 5.878125793678351e-08, "loss": 0.0006, "step": 18918 }, { "epoch": 4.304664391353811, "grad_norm": 0.15054816973646365, "learning_rate": 5.874343373669641e-08, "loss": 0.0004, "step": 18919 }, { "epoch": 4.304891922639363, "grad_norm": 0.3643414182691461, "learning_rate": 5.870562110985627e-08, "loss": 0.001, "step": 18920 }, { "epoch": 4.305119453924915, "grad_norm": 0.6537526114643234, "learning_rate": 5.866782005703626e-08, "loss": 0.0028, "step": 18921 }, { "epoch": 4.305346985210466, "grad_norm": 0.30730226757049817, "learning_rate": 5.8630030579008706e-08, "loss": 0.0019, "step": 18922 }, { "epoch": 4.305574516496018, "grad_norm": 0.5242847200326629, "learning_rate": 5.8592252676546166e-08, "loss": 0.0026, "step": 18923 }, { "epoch": 4.30580204778157, "grad_norm": 0.07322037299404315, "learning_rate": 5.85544863504206e-08, "loss": 0.0002, "step": 18924 }, { "epoch": 4.306029579067122, "grad_norm": 0.5964917569327562, "learning_rate": 5.851673160140389e-08, "loss": 0.0039, "step": 18925 }, { "epoch": 4.306257110352673, "grad_norm": 0.5148584420005835, "learning_rate": 5.847898843026785e-08, "loss": 0.0047, "step": 18926 }, { "epoch": 4.306484641638225, "grad_norm": 0.48713530135039496, "learning_rate": 5.8441256837783595e-08, "loss": 0.0038, "step": 18927 }, { "epoch": 4.306712172923777, "grad_norm": 0.6667402232562316, "learning_rate": 5.840353682472259e-08, "loss": 0.0025, "step": 18928 }, { "epoch": 4.306939704209329, "grad_norm": 0.07590089162410073, "learning_rate": 5.8365828391855546e-08, "loss": 0.0002, "step": 18929 }, { "epoch": 4.30716723549488, "grad_norm": 0.2793197572402212, "learning_rate": 5.83281315399533e-08, "loss": 0.0008, "step": 18930 }, { "epoch": 4.307394766780432, "grad_norm": 0.431782732452581, "learning_rate": 5.829044626978614e-08, "loss": 0.0053, "step": 18931 }, { "epoch": 4.307622298065984, "grad_norm": 0.3059008640462848, "learning_rate": 5.8252772582124345e-08, "loss": 0.0026, "step": 18932 }, { "epoch": 4.307849829351536, "grad_norm": 0.42172934536375173, "learning_rate": 5.8215110477738074e-08, "loss": 0.0024, "step": 18933 }, { "epoch": 4.308077360637087, "grad_norm": 0.18685822790857962, "learning_rate": 5.817745995739685e-08, "loss": 0.0004, "step": 18934 }, { "epoch": 4.308304891922639, "grad_norm": 0.24717718181340725, "learning_rate": 5.813982102187033e-08, "loss": 0.0003, "step": 18935 }, { "epoch": 4.308532423208191, "grad_norm": 0.27329266363965604, "learning_rate": 5.8102193671927684e-08, "loss": 0.0025, "step": 18936 }, { "epoch": 4.308759954493743, "grad_norm": 0.047490081545189786, "learning_rate": 5.806457790833789e-08, "loss": 0.0001, "step": 18937 }, { "epoch": 4.308987485779294, "grad_norm": 0.36618723860824515, "learning_rate": 5.802697373186984e-08, "loss": 0.0013, "step": 18938 }, { "epoch": 4.309215017064846, "grad_norm": 0.19198675848007912, "learning_rate": 5.798938114329203e-08, "loss": 0.0015, "step": 18939 }, { "epoch": 4.309442548350399, "grad_norm": 0.19765135040297363, "learning_rate": 5.7951800143372996e-08, "loss": 0.0015, "step": 18940 }, { "epoch": 4.30967007963595, "grad_norm": 0.29770553978445946, "learning_rate": 5.7914230732880605e-08, "loss": 0.0025, "step": 18941 }, { "epoch": 4.309897610921501, "grad_norm": 0.3848396359836679, "learning_rate": 5.787667291258278e-08, "loss": 0.0023, "step": 18942 }, { "epoch": 4.3101251422070535, "grad_norm": 0.30263869851695896, "learning_rate": 5.7839126683247176e-08, "loss": 0.002, "step": 18943 }, { "epoch": 4.310352673492606, "grad_norm": 0.6218602487392593, "learning_rate": 5.780159204564102e-08, "loss": 0.0045, "step": 18944 }, { "epoch": 4.310580204778157, "grad_norm": 0.10393843970652733, "learning_rate": 5.776406900053155e-08, "loss": 0.0004, "step": 18945 }, { "epoch": 4.310807736063709, "grad_norm": 0.3444466713510889, "learning_rate": 5.7726557548685655e-08, "loss": 0.0037, "step": 18946 }, { "epoch": 4.3110352673492605, "grad_norm": 0.2861416135576739, "learning_rate": 5.768905769087008e-08, "loss": 0.001, "step": 18947 }, { "epoch": 4.311262798634813, "grad_norm": 0.2509256665599069, "learning_rate": 5.765156942785115e-08, "loss": 0.0009, "step": 18948 }, { "epoch": 4.311490329920364, "grad_norm": 0.557532535169228, "learning_rate": 5.7614092760395134e-08, "loss": 0.0021, "step": 18949 }, { "epoch": 4.311717861205916, "grad_norm": 0.7064565748321886, "learning_rate": 5.757662768926794e-08, "loss": 0.0038, "step": 18950 }, { "epoch": 4.3119453924914675, "grad_norm": 0.32488999184894846, "learning_rate": 5.753917421523522e-08, "loss": 0.0011, "step": 18951 }, { "epoch": 4.31217292377702, "grad_norm": 0.4784465653743235, "learning_rate": 5.750173233906253e-08, "loss": 0.003, "step": 18952 }, { "epoch": 4.312400455062571, "grad_norm": 0.30307034121108806, "learning_rate": 5.74643020615151e-08, "loss": 0.0023, "step": 18953 }, { "epoch": 4.312627986348123, "grad_norm": 0.05099221362354341, "learning_rate": 5.7426883383358e-08, "loss": 0.0002, "step": 18954 }, { "epoch": 4.3128555176336745, "grad_norm": 0.6122979363467039, "learning_rate": 5.7389476305356044e-08, "loss": 0.0027, "step": 18955 }, { "epoch": 4.313083048919227, "grad_norm": 0.4929230814436234, "learning_rate": 5.735208082827348e-08, "loss": 0.0011, "step": 18956 }, { "epoch": 4.313310580204778, "grad_norm": 0.16675090714597038, "learning_rate": 5.7314696952874905e-08, "loss": 0.0006, "step": 18957 }, { "epoch": 4.31353811149033, "grad_norm": 2.042383142945516, "learning_rate": 5.7277324679924154e-08, "loss": 0.0152, "step": 18958 }, { "epoch": 4.3137656427758815, "grad_norm": 0.13247628831012623, "learning_rate": 5.7239964010185125e-08, "loss": 0.0007, "step": 18959 }, { "epoch": 4.313993174061434, "grad_norm": 0.02191950286955536, "learning_rate": 5.720261494442145e-08, "loss": 0.0001, "step": 18960 }, { "epoch": 4.314220705346985, "grad_norm": 0.09682696564758124, "learning_rate": 5.71652774833966e-08, "loss": 0.0004, "step": 18961 }, { "epoch": 4.314448236632537, "grad_norm": 0.07462729426718312, "learning_rate": 5.7127951627873464e-08, "loss": 0.0002, "step": 18962 }, { "epoch": 4.3146757679180885, "grad_norm": 0.33742712678906756, "learning_rate": 5.709063737861495e-08, "loss": 0.0027, "step": 18963 }, { "epoch": 4.314903299203641, "grad_norm": 0.6129700601900462, "learning_rate": 5.705333473638379e-08, "loss": 0.0013, "step": 18964 }, { "epoch": 4.315130830489192, "grad_norm": 0.534975689349455, "learning_rate": 5.701604370194222e-08, "loss": 0.0038, "step": 18965 }, { "epoch": 4.315358361774744, "grad_norm": 0.8064181966242641, "learning_rate": 5.6978764276052545e-08, "loss": 0.0104, "step": 18966 }, { "epoch": 4.3155858930602955, "grad_norm": 0.2818839312661159, "learning_rate": 5.6941496459476595e-08, "loss": 0.001, "step": 18967 }, { "epoch": 4.315813424345848, "grad_norm": 0.9012429701707418, "learning_rate": 5.690424025297625e-08, "loss": 0.0017, "step": 18968 }, { "epoch": 4.316040955631399, "grad_norm": 13.384147587039937, "learning_rate": 5.686699565731285e-08, "loss": 0.0167, "step": 18969 }, { "epoch": 4.316268486916951, "grad_norm": 0.26584730570301845, "learning_rate": 5.6829762673247386e-08, "loss": 0.0012, "step": 18970 }, { "epoch": 4.3164960182025025, "grad_norm": 0.332697380393047, "learning_rate": 5.679254130154119e-08, "loss": 0.0011, "step": 18971 }, { "epoch": 4.316723549488055, "grad_norm": 0.45560749720498894, "learning_rate": 5.6755331542954694e-08, "loss": 0.0011, "step": 18972 }, { "epoch": 4.316951080773606, "grad_norm": 0.2925511853631036, "learning_rate": 5.671813339824847e-08, "loss": 0.0016, "step": 18973 }, { "epoch": 4.317178612059158, "grad_norm": 0.44855258941728443, "learning_rate": 5.668094686818303e-08, "loss": 0.0009, "step": 18974 }, { "epoch": 4.3174061433447095, "grad_norm": 0.4654740205675623, "learning_rate": 5.6643771953518095e-08, "loss": 0.0039, "step": 18975 }, { "epoch": 4.317633674630262, "grad_norm": 0.10317798907745604, "learning_rate": 5.6606608655013635e-08, "loss": 0.0004, "step": 18976 }, { "epoch": 4.317861205915813, "grad_norm": 0.5648817306138405, "learning_rate": 5.6569456973429025e-08, "loss": 0.0071, "step": 18977 }, { "epoch": 4.318088737201365, "grad_norm": 0.28751124368864706, "learning_rate": 5.653231690952374e-08, "loss": 0.0009, "step": 18978 }, { "epoch": 4.318316268486917, "grad_norm": 0.07484842109811596, "learning_rate": 5.6495188464056745e-08, "loss": 0.0002, "step": 18979 }, { "epoch": 4.318543799772469, "grad_norm": 0.06811666340758463, "learning_rate": 5.645807163778702e-08, "loss": 0.0002, "step": 18980 }, { "epoch": 4.31877133105802, "grad_norm": 0.36059375393742293, "learning_rate": 5.6420966431473126e-08, "loss": 0.0017, "step": 18981 }, { "epoch": 4.318998862343572, "grad_norm": 0.1545762622422574, "learning_rate": 5.638387284587321e-08, "loss": 0.0004, "step": 18982 }, { "epoch": 4.319226393629124, "grad_norm": 0.24671113072364717, "learning_rate": 5.634679088174576e-08, "loss": 0.0013, "step": 18983 }, { "epoch": 4.319453924914676, "grad_norm": 0.2509498033748342, "learning_rate": 5.630972053984829e-08, "loss": 0.0015, "step": 18984 }, { "epoch": 4.319681456200228, "grad_norm": 0.2796086139046096, "learning_rate": 5.62726618209386e-08, "loss": 0.0015, "step": 18985 }, { "epoch": 4.319908987485779, "grad_norm": 0.24150639110014385, "learning_rate": 5.623561472577428e-08, "loss": 0.0013, "step": 18986 }, { "epoch": 4.320136518771331, "grad_norm": 1.5955632197034797, "learning_rate": 5.6198579255112295e-08, "loss": 0.0055, "step": 18987 }, { "epoch": 4.320364050056883, "grad_norm": 0.13875423454353106, "learning_rate": 5.6161555409709675e-08, "loss": 0.0008, "step": 18988 }, { "epoch": 4.320591581342435, "grad_norm": 0.11696932933519948, "learning_rate": 5.612454319032297e-08, "loss": 0.0002, "step": 18989 }, { "epoch": 4.320819112627986, "grad_norm": 0.6665844535535113, "learning_rate": 5.608754259770893e-08, "loss": 0.0049, "step": 18990 }, { "epoch": 4.321046643913538, "grad_norm": 0.1588427828124699, "learning_rate": 5.605055363262342e-08, "loss": 0.0006, "step": 18991 }, { "epoch": 4.32127417519909, "grad_norm": 0.539864200792741, "learning_rate": 5.601357629582263e-08, "loss": 0.0035, "step": 18992 }, { "epoch": 4.321501706484642, "grad_norm": 0.20261196378643756, "learning_rate": 5.597661058806242e-08, "loss": 0.0008, "step": 18993 }, { "epoch": 4.321729237770193, "grad_norm": 0.30367105334033534, "learning_rate": 5.593965651009808e-08, "loss": 0.001, "step": 18994 }, { "epoch": 4.321956769055745, "grad_norm": 0.15709356622456372, "learning_rate": 5.590271406268506e-08, "loss": 0.0005, "step": 18995 }, { "epoch": 4.322184300341297, "grad_norm": 0.2976026799370904, "learning_rate": 5.5865783246578166e-08, "loss": 0.0015, "step": 18996 }, { "epoch": 4.322411831626849, "grad_norm": 0.15098507413614415, "learning_rate": 5.582886406253256e-08, "loss": 0.0005, "step": 18997 }, { "epoch": 4.3226393629124, "grad_norm": 0.09813944784326366, "learning_rate": 5.579195651130235e-08, "loss": 0.0005, "step": 18998 }, { "epoch": 4.3228668941979524, "grad_norm": 0.3643432927234119, "learning_rate": 5.5755060593642166e-08, "loss": 0.0026, "step": 18999 }, { "epoch": 4.323094425483504, "grad_norm": 1.5872924754831963, "learning_rate": 5.57181763103061e-08, "loss": 0.0086, "step": 19000 }, { "epoch": 4.323321956769056, "grad_norm": 1.172275103916621, "learning_rate": 5.568130366204787e-08, "loss": 0.0033, "step": 19001 }, { "epoch": 4.323549488054607, "grad_norm": 0.11005507135153858, "learning_rate": 5.5644442649621166e-08, "loss": 0.0005, "step": 19002 }, { "epoch": 4.3237770193401595, "grad_norm": 0.5342232914576933, "learning_rate": 5.560759327377929e-08, "loss": 0.0066, "step": 19003 }, { "epoch": 4.324004550625711, "grad_norm": 0.5173475473733647, "learning_rate": 5.557075553527545e-08, "loss": 0.0018, "step": 19004 }, { "epoch": 4.324232081911263, "grad_norm": 0.1888443629144907, "learning_rate": 5.5533929434862454e-08, "loss": 0.0008, "step": 19005 }, { "epoch": 4.324459613196814, "grad_norm": 0.8280032798224578, "learning_rate": 5.549711497329302e-08, "loss": 0.0049, "step": 19006 }, { "epoch": 4.3246871444823665, "grad_norm": 0.385459872204456, "learning_rate": 5.546031215131961e-08, "loss": 0.0012, "step": 19007 }, { "epoch": 4.324914675767918, "grad_norm": 0.3245412171266733, "learning_rate": 5.542352096969426e-08, "loss": 0.0015, "step": 19008 }, { "epoch": 4.32514220705347, "grad_norm": 0.27767174855553656, "learning_rate": 5.538674142916915e-08, "loss": 0.0013, "step": 19009 }, { "epoch": 4.325369738339021, "grad_norm": 0.2883607708922111, "learning_rate": 5.534997353049576e-08, "loss": 0.0021, "step": 19010 }, { "epoch": 4.3255972696245735, "grad_norm": 0.3202259992556643, "learning_rate": 5.5313217274425705e-08, "loss": 0.0035, "step": 19011 }, { "epoch": 4.325824800910125, "grad_norm": 0.036997324121675994, "learning_rate": 5.527647266171006e-08, "loss": 0.0001, "step": 19012 }, { "epoch": 4.326052332195677, "grad_norm": 0.3984962527624663, "learning_rate": 5.523973969309995e-08, "loss": 0.0006, "step": 19013 }, { "epoch": 4.326279863481228, "grad_norm": 0.4891725072865981, "learning_rate": 5.5203018369346176e-08, "loss": 0.003, "step": 19014 }, { "epoch": 4.3265073947667805, "grad_norm": 0.12291888663739751, "learning_rate": 5.516630869119903e-08, "loss": 0.0006, "step": 19015 }, { "epoch": 4.326734926052332, "grad_norm": 1.2587997340447894, "learning_rate": 5.5129610659409094e-08, "loss": 0.01, "step": 19016 }, { "epoch": 4.326962457337884, "grad_norm": 0.5311653198211357, "learning_rate": 5.509292427472612e-08, "loss": 0.0026, "step": 19017 }, { "epoch": 4.327189988623436, "grad_norm": 0.29585580867104827, "learning_rate": 5.505624953790013e-08, "loss": 0.0023, "step": 19018 }, { "epoch": 4.3274175199089875, "grad_norm": 0.7696375971602282, "learning_rate": 5.5019586449680527e-08, "loss": 0.008, "step": 19019 }, { "epoch": 4.327645051194539, "grad_norm": 0.0387358041321686, "learning_rate": 5.498293501081671e-08, "loss": 0.0001, "step": 19020 }, { "epoch": 4.327872582480091, "grad_norm": 0.07516294011866961, "learning_rate": 5.49462952220578e-08, "loss": 0.0002, "step": 19021 }, { "epoch": 4.328100113765643, "grad_norm": 0.27131462922669614, "learning_rate": 5.4909667084152574e-08, "loss": 0.0027, "step": 19022 }, { "epoch": 4.3283276450511945, "grad_norm": 0.7435399184815619, "learning_rate": 5.487305059784981e-08, "loss": 0.0018, "step": 19023 }, { "epoch": 4.328555176336747, "grad_norm": 0.12150833182211954, "learning_rate": 5.483644576389766e-08, "loss": 0.0004, "step": 19024 }, { "epoch": 4.328782707622298, "grad_norm": 0.24709617118987381, "learning_rate": 5.4799852583044336e-08, "loss": 0.0007, "step": 19025 }, { "epoch": 4.32901023890785, "grad_norm": 0.3063817006648043, "learning_rate": 5.4763271056037724e-08, "loss": 0.0008, "step": 19026 }, { "epoch": 4.3292377701934015, "grad_norm": 0.34071243269159596, "learning_rate": 5.4726701183625485e-08, "loss": 0.0021, "step": 19027 }, { "epoch": 4.329465301478954, "grad_norm": 0.3913547380841971, "learning_rate": 5.469014296655521e-08, "loss": 0.0021, "step": 19028 }, { "epoch": 4.329692832764505, "grad_norm": 0.11538439682384123, "learning_rate": 5.465359640557381e-08, "loss": 0.0007, "step": 19029 }, { "epoch": 4.329920364050057, "grad_norm": 0.33890499641315575, "learning_rate": 5.4617061501428465e-08, "loss": 0.0028, "step": 19030 }, { "epoch": 4.3301478953356085, "grad_norm": 0.2727220024706601, "learning_rate": 5.45805382548658e-08, "loss": 0.0012, "step": 19031 }, { "epoch": 4.330375426621161, "grad_norm": 1.9055117003471602, "learning_rate": 5.4544026666632085e-08, "loss": 0.0156, "step": 19032 }, { "epoch": 4.330602957906712, "grad_norm": 0.295648931654826, "learning_rate": 5.450752673747381e-08, "loss": 0.0015, "step": 19033 }, { "epoch": 4.330830489192264, "grad_norm": 0.5074217700037502, "learning_rate": 5.4471038468136837e-08, "loss": 0.0035, "step": 19034 }, { "epoch": 4.3310580204778155, "grad_norm": 0.496855756868313, "learning_rate": 5.443456185936703e-08, "loss": 0.0037, "step": 19035 }, { "epoch": 4.331285551763368, "grad_norm": 0.10021229277374574, "learning_rate": 5.4398096911909834e-08, "loss": 0.0006, "step": 19036 }, { "epoch": 4.331513083048919, "grad_norm": 0.15511742099138393, "learning_rate": 5.4361643626510556e-08, "loss": 0.0008, "step": 19037 }, { "epoch": 4.331740614334471, "grad_norm": 0.19382913545264768, "learning_rate": 5.432520200391422e-08, "loss": 0.0003, "step": 19038 }, { "epoch": 4.3319681456200225, "grad_norm": 1.048935445268082, "learning_rate": 5.4288772044865447e-08, "loss": 0.0068, "step": 19039 }, { "epoch": 4.332195676905575, "grad_norm": 0.10844232993962477, "learning_rate": 5.425235375010912e-08, "loss": 0.0004, "step": 19040 }, { "epoch": 4.332423208191126, "grad_norm": 0.6195206378738083, "learning_rate": 5.42159471203893e-08, "loss": 0.0032, "step": 19041 }, { "epoch": 4.332650739476678, "grad_norm": 0.39497843886900313, "learning_rate": 5.417955215645032e-08, "loss": 0.0043, "step": 19042 }, { "epoch": 4.3328782707622295, "grad_norm": 0.11729297614205003, "learning_rate": 5.414316885903589e-08, "loss": 0.0006, "step": 19043 }, { "epoch": 4.333105802047782, "grad_norm": 0.23315763614601945, "learning_rate": 5.410679722888952e-08, "loss": 0.0009, "step": 19044 }, { "epoch": 4.333333333333333, "grad_norm": 0.07874929951718285, "learning_rate": 5.4070437266754634e-08, "loss": 0.0003, "step": 19045 }, { "epoch": 4.333560864618885, "grad_norm": 0.6698908483781998, "learning_rate": 5.403408897337439e-08, "loss": 0.0045, "step": 19046 }, { "epoch": 4.3337883959044365, "grad_norm": 0.8111885693015832, "learning_rate": 5.399775234949181e-08, "loss": 0.0061, "step": 19047 }, { "epoch": 4.334015927189989, "grad_norm": 0.39322458146343087, "learning_rate": 5.396142739584935e-08, "loss": 0.003, "step": 19048 }, { "epoch": 4.33424345847554, "grad_norm": 0.6625525349309838, "learning_rate": 5.392511411318961e-08, "loss": 0.002, "step": 19049 }, { "epoch": 4.334470989761092, "grad_norm": 0.6680125418867713, "learning_rate": 5.388881250225464e-08, "loss": 0.0022, "step": 19050 }, { "epoch": 4.3346985210466435, "grad_norm": 0.193497013973803, "learning_rate": 5.385252256378634e-08, "loss": 0.0009, "step": 19051 }, { "epoch": 4.334926052332196, "grad_norm": 0.12317080413019361, "learning_rate": 5.3816244298526415e-08, "loss": 0.0005, "step": 19052 }, { "epoch": 4.335153583617747, "grad_norm": 1.7326105036145272, "learning_rate": 5.377997770721642e-08, "loss": 0.0071, "step": 19053 }, { "epoch": 4.335381114903299, "grad_norm": 3.1681184373376174, "learning_rate": 5.374372279059764e-08, "loss": 0.0171, "step": 19054 }, { "epoch": 4.335608646188851, "grad_norm": 0.25765758789467386, "learning_rate": 5.370747954941087e-08, "loss": 0.0006, "step": 19055 }, { "epoch": 4.335836177474403, "grad_norm": 0.044486653802508755, "learning_rate": 5.3671247984396976e-08, "loss": 0.0001, "step": 19056 }, { "epoch": 4.336063708759955, "grad_norm": 0.8995812754537461, "learning_rate": 5.363502809629655e-08, "loss": 0.0066, "step": 19057 }, { "epoch": 4.336291240045506, "grad_norm": 0.033483729281130054, "learning_rate": 5.359881988584954e-08, "loss": 0.0001, "step": 19058 }, { "epoch": 4.336518771331058, "grad_norm": 0.19139625846573904, "learning_rate": 5.356262335379621e-08, "loss": 0.0005, "step": 19059 }, { "epoch": 4.33674630261661, "grad_norm": 0.04104496333343473, "learning_rate": 5.35264385008763e-08, "loss": 0.0001, "step": 19060 }, { "epoch": 4.336973833902162, "grad_norm": 0.5887208941675329, "learning_rate": 5.349026532782957e-08, "loss": 0.0007, "step": 19061 }, { "epoch": 4.337201365187713, "grad_norm": 1.353644034614493, "learning_rate": 5.345410383539508e-08, "loss": 0.005, "step": 19062 }, { "epoch": 4.3374288964732655, "grad_norm": 0.3309883958967388, "learning_rate": 5.341795402431189e-08, "loss": 0.0009, "step": 19063 }, { "epoch": 4.337656427758817, "grad_norm": 0.08919250253604398, "learning_rate": 5.3381815895319005e-08, "loss": 0.0004, "step": 19064 }, { "epoch": 4.337883959044369, "grad_norm": 0.15078643960230065, "learning_rate": 5.3345689449154775e-08, "loss": 0.0006, "step": 19065 }, { "epoch": 4.33811149032992, "grad_norm": 1.0989562075516848, "learning_rate": 5.3309574686557786e-08, "loss": 0.0042, "step": 19066 }, { "epoch": 4.3383390216154725, "grad_norm": 0.45176746524000216, "learning_rate": 5.327347160826612e-08, "loss": 0.0027, "step": 19067 }, { "epoch": 4.338566552901024, "grad_norm": 0.9665802081622, "learning_rate": 5.323738021501768e-08, "loss": 0.0031, "step": 19068 }, { "epoch": 4.338794084186576, "grad_norm": 0.45836527571703933, "learning_rate": 5.320130050755004e-08, "loss": 0.0034, "step": 19069 }, { "epoch": 4.339021615472127, "grad_norm": 0.2833222744034056, "learning_rate": 5.316523248660055e-08, "loss": 0.0014, "step": 19070 }, { "epoch": 4.3392491467576795, "grad_norm": 0.09609011137456581, "learning_rate": 5.312917615290653e-08, "loss": 0.0003, "step": 19071 }, { "epoch": 4.339476678043231, "grad_norm": 0.8026694375929744, "learning_rate": 5.309313150720474e-08, "loss": 0.0048, "step": 19072 }, { "epoch": 4.339704209328783, "grad_norm": 0.3104218207209773, "learning_rate": 5.30570985502319e-08, "loss": 0.0044, "step": 19073 }, { "epoch": 4.339931740614334, "grad_norm": 0.5914394758420305, "learning_rate": 5.30210772827245e-08, "loss": 0.0047, "step": 19074 }, { "epoch": 4.3401592718998865, "grad_norm": 0.18675485368177913, "learning_rate": 5.298506770541889e-08, "loss": 0.001, "step": 19075 }, { "epoch": 4.340386803185438, "grad_norm": 0.2202089522532855, "learning_rate": 5.2949069819050875e-08, "loss": 0.0011, "step": 19076 }, { "epoch": 4.34061433447099, "grad_norm": 0.421465330353456, "learning_rate": 5.2913083624356114e-08, "loss": 0.0011, "step": 19077 }, { "epoch": 4.340841865756541, "grad_norm": 0.4152290856754608, "learning_rate": 5.2877109122070334e-08, "loss": 0.0018, "step": 19078 }, { "epoch": 4.3410693970420935, "grad_norm": 0.45523563606010603, "learning_rate": 5.284114631292851e-08, "loss": 0.0014, "step": 19079 }, { "epoch": 4.341296928327645, "grad_norm": 0.26208457496262655, "learning_rate": 5.280519519766582e-08, "loss": 0.001, "step": 19080 }, { "epoch": 4.341524459613197, "grad_norm": 0.025522635326736938, "learning_rate": 5.2769255777017084e-08, "loss": 0.0001, "step": 19081 }, { "epoch": 4.341751990898748, "grad_norm": 0.12317909976480149, "learning_rate": 5.273332805171665e-08, "loss": 0.0005, "step": 19082 }, { "epoch": 4.3419795221843005, "grad_norm": 0.2291571085320216, "learning_rate": 5.269741202249906e-08, "loss": 0.0011, "step": 19083 }, { "epoch": 4.342207053469852, "grad_norm": 1.147467761305952, "learning_rate": 5.266150769009819e-08, "loss": 0.0112, "step": 19084 }, { "epoch": 4.342434584755404, "grad_norm": 0.5660321504294183, "learning_rate": 5.262561505524795e-08, "loss": 0.0036, "step": 19085 }, { "epoch": 4.342662116040955, "grad_norm": 0.08967321856592755, "learning_rate": 5.258973411868186e-08, "loss": 0.0004, "step": 19086 }, { "epoch": 4.3428896473265075, "grad_norm": 0.3693238969810105, "learning_rate": 5.2553864881133215e-08, "loss": 0.0006, "step": 19087 }, { "epoch": 4.343117178612059, "grad_norm": 0.0447138687591552, "learning_rate": 5.251800734333533e-08, "loss": 0.0001, "step": 19088 }, { "epoch": 4.343344709897611, "grad_norm": 0.4811798689214373, "learning_rate": 5.24821615060208e-08, "loss": 0.0029, "step": 19089 }, { "epoch": 4.343572241183162, "grad_norm": 0.45232527413501566, "learning_rate": 5.244632736992244e-08, "loss": 0.0045, "step": 19090 }, { "epoch": 4.3437997724687145, "grad_norm": 2.625641553058841, "learning_rate": 5.241050493577253e-08, "loss": 0.0037, "step": 19091 }, { "epoch": 4.344027303754266, "grad_norm": 0.11449019340935845, "learning_rate": 5.2374694204303314e-08, "loss": 0.0004, "step": 19092 }, { "epoch": 4.344254835039818, "grad_norm": 0.35215422208283237, "learning_rate": 5.2338895176246506e-08, "loss": 0.0012, "step": 19093 }, { "epoch": 4.344482366325369, "grad_norm": 0.42803073450388807, "learning_rate": 5.2303107852333944e-08, "loss": 0.0038, "step": 19094 }, { "epoch": 4.3447098976109215, "grad_norm": 0.26347600771096585, "learning_rate": 5.2267332233297134e-08, "loss": 0.0008, "step": 19095 }, { "epoch": 4.344937428896474, "grad_norm": 0.10609330424510996, "learning_rate": 5.223156831986702e-08, "loss": 0.0003, "step": 19096 }, { "epoch": 4.345164960182025, "grad_norm": 0.3082982368123334, "learning_rate": 5.219581611277474e-08, "loss": 0.0011, "step": 19097 }, { "epoch": 4.345392491467576, "grad_norm": 0.32051912120000225, "learning_rate": 5.216007561275084e-08, "loss": 0.0008, "step": 19098 }, { "epoch": 4.3456200227531285, "grad_norm": 0.17229232865740468, "learning_rate": 5.212434682052604e-08, "loss": 0.0008, "step": 19099 }, { "epoch": 4.345847554038681, "grad_norm": 0.08405946516255294, "learning_rate": 5.208862973683025e-08, "loss": 0.0005, "step": 19100 }, { "epoch": 4.346075085324232, "grad_norm": 0.2746175793611209, "learning_rate": 5.2052924362393714e-08, "loss": 0.0022, "step": 19101 }, { "epoch": 4.346302616609784, "grad_norm": 0.12604235358135757, "learning_rate": 5.201723069794613e-08, "loss": 0.0009, "step": 19102 }, { "epoch": 4.3465301478953355, "grad_norm": 0.7121236559494408, "learning_rate": 5.1981548744216915e-08, "loss": 0.008, "step": 19103 }, { "epoch": 4.346757679180888, "grad_norm": 0.7375114292923155, "learning_rate": 5.194587850193555e-08, "loss": 0.0034, "step": 19104 }, { "epoch": 4.346985210466439, "grad_norm": 0.10571716042721173, "learning_rate": 5.1910219971830765e-08, "loss": 0.0003, "step": 19105 }, { "epoch": 4.347212741751991, "grad_norm": 0.13878463729462404, "learning_rate": 5.1874573154631555e-08, "loss": 0.0005, "step": 19106 }, { "epoch": 4.3474402730375425, "grad_norm": 0.1175031566333529, "learning_rate": 5.18389380510665e-08, "loss": 0.0007, "step": 19107 }, { "epoch": 4.347667804323095, "grad_norm": 0.3915942321503702, "learning_rate": 5.180331466186378e-08, "loss": 0.0025, "step": 19108 }, { "epoch": 4.347895335608646, "grad_norm": 0.3910598365842172, "learning_rate": 5.176770298775162e-08, "loss": 0.0032, "step": 19109 }, { "epoch": 4.348122866894198, "grad_norm": 0.6706069713543761, "learning_rate": 5.1732103029457774e-08, "loss": 0.0046, "step": 19110 }, { "epoch": 4.3483503981797496, "grad_norm": 0.30206750444905783, "learning_rate": 5.169651478770986e-08, "loss": 0.001, "step": 19111 }, { "epoch": 4.348577929465302, "grad_norm": 0.24894570506900576, "learning_rate": 5.166093826323514e-08, "loss": 0.0008, "step": 19112 }, { "epoch": 4.348805460750853, "grad_norm": 0.22135658109110914, "learning_rate": 5.162537345676087e-08, "loss": 0.0015, "step": 19113 }, { "epoch": 4.349032992036405, "grad_norm": 0.6242659506795878, "learning_rate": 5.1589820369013917e-08, "loss": 0.0024, "step": 19114 }, { "epoch": 4.349260523321957, "grad_norm": 0.1627856603407246, "learning_rate": 5.155427900072084e-08, "loss": 0.001, "step": 19115 }, { "epoch": 4.349488054607509, "grad_norm": 0.2453986191854774, "learning_rate": 5.1518749352608145e-08, "loss": 0.002, "step": 19116 }, { "epoch": 4.34971558589306, "grad_norm": 0.22205327560580324, "learning_rate": 5.1483231425401846e-08, "loss": 0.0007, "step": 19117 }, { "epoch": 4.349943117178612, "grad_norm": 0.9061592757870899, "learning_rate": 5.1447725219828105e-08, "loss": 0.0133, "step": 19118 }, { "epoch": 4.350170648464164, "grad_norm": 0.016286200228382963, "learning_rate": 5.141223073661231e-08, "loss": 0.0001, "step": 19119 }, { "epoch": 4.350398179749716, "grad_norm": 0.3435477445948607, "learning_rate": 5.137674797647998e-08, "loss": 0.0022, "step": 19120 }, { "epoch": 4.350625711035267, "grad_norm": 0.19776438035210084, "learning_rate": 5.134127694015653e-08, "loss": 0.0007, "step": 19121 }, { "epoch": 4.350853242320819, "grad_norm": 0.3451188131284384, "learning_rate": 5.1305817628366714e-08, "loss": 0.0015, "step": 19122 }, { "epoch": 4.351080773606371, "grad_norm": 0.296907451256382, "learning_rate": 5.127037004183537e-08, "loss": 0.0012, "step": 19123 }, { "epoch": 4.351308304891923, "grad_norm": 0.2783099789407249, "learning_rate": 5.123493418128685e-08, "loss": 0.0022, "step": 19124 }, { "epoch": 4.351535836177474, "grad_norm": 0.2027908158718397, "learning_rate": 5.11995100474455e-08, "loss": 0.001, "step": 19125 }, { "epoch": 4.351763367463026, "grad_norm": 0.24577159159909823, "learning_rate": 5.1164097641035264e-08, "loss": 0.001, "step": 19126 }, { "epoch": 4.351990898748578, "grad_norm": 1.0568568799687765, "learning_rate": 5.112869696277993e-08, "loss": 0.003, "step": 19127 }, { "epoch": 4.35221843003413, "grad_norm": 0.1942993254105472, "learning_rate": 5.1093308013403163e-08, "loss": 0.0007, "step": 19128 }, { "epoch": 4.352445961319681, "grad_norm": 0.11215504166791868, "learning_rate": 5.105793079362799e-08, "loss": 0.0006, "step": 19129 }, { "epoch": 4.352673492605233, "grad_norm": 0.7685926467740986, "learning_rate": 5.1022565304177654e-08, "loss": 0.0035, "step": 19130 }, { "epoch": 4.352901023890785, "grad_norm": 0.264493501125408, "learning_rate": 5.0987211545774976e-08, "loss": 0.0008, "step": 19131 }, { "epoch": 4.353128555176337, "grad_norm": 0.406238255918837, "learning_rate": 5.09518695191423e-08, "loss": 0.004, "step": 19132 }, { "epoch": 4.353356086461888, "grad_norm": 0.5367491700927494, "learning_rate": 5.0916539225002034e-08, "loss": 0.0084, "step": 19133 }, { "epoch": 4.35358361774744, "grad_norm": 0.07849151971346122, "learning_rate": 5.088122066407637e-08, "loss": 0.0002, "step": 19134 }, { "epoch": 4.3538111490329925, "grad_norm": 0.41713502207725905, "learning_rate": 5.0845913837087245e-08, "loss": 0.0017, "step": 19135 }, { "epoch": 4.354038680318544, "grad_norm": 0.3654064181704694, "learning_rate": 5.0810618744755944e-08, "loss": 0.0025, "step": 19136 }, { "epoch": 4.354266211604095, "grad_norm": 0.8443057285637375, "learning_rate": 5.077533538780419e-08, "loss": 0.0063, "step": 19137 }, { "epoch": 4.354493742889647, "grad_norm": 0.4992554091166344, "learning_rate": 5.0740063766952924e-08, "loss": 0.0034, "step": 19138 }, { "epoch": 4.3547212741751995, "grad_norm": 0.6355974608309385, "learning_rate": 5.07048038829229e-08, "loss": 0.004, "step": 19139 }, { "epoch": 4.354948805460751, "grad_norm": 0.2546529467072551, "learning_rate": 5.066955573643499e-08, "loss": 0.0009, "step": 19140 }, { "epoch": 4.355176336746303, "grad_norm": 0.15441870966110288, "learning_rate": 5.063431932820946e-08, "loss": 0.0003, "step": 19141 }, { "epoch": 4.355403868031854, "grad_norm": 0.6934727357490779, "learning_rate": 5.059909465896663e-08, "loss": 0.0028, "step": 19142 }, { "epoch": 4.3556313993174065, "grad_norm": 0.4076707810777417, "learning_rate": 5.056388172942628e-08, "loss": 0.0032, "step": 19143 }, { "epoch": 4.355858930602958, "grad_norm": 0.2283669473355214, "learning_rate": 5.052868054030824e-08, "loss": 0.0009, "step": 19144 }, { "epoch": 4.35608646188851, "grad_norm": 0.12393568833871886, "learning_rate": 5.049349109233194e-08, "loss": 0.0004, "step": 19145 }, { "epoch": 4.356313993174061, "grad_norm": 1.9309641911582938, "learning_rate": 5.045831338621632e-08, "loss": 0.0171, "step": 19146 }, { "epoch": 4.3565415244596135, "grad_norm": 0.0627669379317213, "learning_rate": 5.042314742268059e-08, "loss": 0.0003, "step": 19147 }, { "epoch": 4.356769055745165, "grad_norm": 0.6930914948646901, "learning_rate": 5.038799320244349e-08, "loss": 0.0033, "step": 19148 }, { "epoch": 4.356996587030717, "grad_norm": 0.2242846896333448, "learning_rate": 5.0352850726223536e-08, "loss": 0.0007, "step": 19149 }, { "epoch": 4.357224118316268, "grad_norm": 0.16556244251521124, "learning_rate": 5.031771999473883e-08, "loss": 0.0006, "step": 19150 }, { "epoch": 4.3574516496018205, "grad_norm": 0.8235561124245362, "learning_rate": 5.028260100870742e-08, "loss": 0.0032, "step": 19151 }, { "epoch": 4.357679180887372, "grad_norm": 0.16439783649911938, "learning_rate": 5.02474937688472e-08, "loss": 0.0005, "step": 19152 }, { "epoch": 4.357906712172924, "grad_norm": 0.06714165656846499, "learning_rate": 5.021239827587544e-08, "loss": 0.0001, "step": 19153 }, { "epoch": 4.358134243458475, "grad_norm": 0.3174201996767602, "learning_rate": 5.0177314530509626e-08, "loss": 0.0013, "step": 19154 }, { "epoch": 4.3583617747440275, "grad_norm": 0.4464114776411711, "learning_rate": 5.014224253346675e-08, "loss": 0.002, "step": 19155 }, { "epoch": 4.358589306029579, "grad_norm": 0.17643373871319884, "learning_rate": 5.010718228546374e-08, "loss": 0.0007, "step": 19156 }, { "epoch": 4.358816837315131, "grad_norm": 0.6811099637661175, "learning_rate": 5.0072133787217035e-08, "loss": 0.0054, "step": 19157 }, { "epoch": 4.359044368600682, "grad_norm": 0.03431101245297398, "learning_rate": 5.0037097039442944e-08, "loss": 0.0001, "step": 19158 }, { "epoch": 4.3592718998862345, "grad_norm": 0.3041552244866395, "learning_rate": 5.000207204285762e-08, "loss": 0.0013, "step": 19159 }, { "epoch": 4.359499431171786, "grad_norm": 0.837905601901694, "learning_rate": 4.996705879817675e-08, "loss": 0.0037, "step": 19160 }, { "epoch": 4.359726962457338, "grad_norm": 0.29874464715649207, "learning_rate": 4.9932057306116207e-08, "loss": 0.0008, "step": 19161 }, { "epoch": 4.359954493742889, "grad_norm": 0.15870956057788133, "learning_rate": 4.98970675673912e-08, "loss": 0.0007, "step": 19162 }, { "epoch": 4.3601820250284415, "grad_norm": 0.5120956836236106, "learning_rate": 4.9862089582716904e-08, "loss": 0.0034, "step": 19163 }, { "epoch": 4.360409556313993, "grad_norm": 0.48858671044549284, "learning_rate": 4.9827123352808244e-08, "loss": 0.0012, "step": 19164 }, { "epoch": 4.360637087599545, "grad_norm": 0.07955415066249721, "learning_rate": 4.979216887837972e-08, "loss": 0.0002, "step": 19165 }, { "epoch": 4.360864618885096, "grad_norm": 0.5423025637134297, "learning_rate": 4.975722616014575e-08, "loss": 0.0021, "step": 19166 }, { "epoch": 4.3610921501706486, "grad_norm": 0.2854970169928749, "learning_rate": 4.972229519882063e-08, "loss": 0.0019, "step": 19167 }, { "epoch": 4.3613196814562, "grad_norm": 0.05908458248996631, "learning_rate": 4.9687375995118305e-08, "loss": 0.0001, "step": 19168 }, { "epoch": 4.361547212741752, "grad_norm": 0.4296086189583812, "learning_rate": 4.9652468549752365e-08, "loss": 0.0023, "step": 19169 }, { "epoch": 4.361774744027303, "grad_norm": 0.6899332881823297, "learning_rate": 4.961757286343613e-08, "loss": 0.0071, "step": 19170 }, { "epoch": 4.362002275312856, "grad_norm": 0.23926461966156695, "learning_rate": 4.958268893688313e-08, "loss": 0.0011, "step": 19171 }, { "epoch": 4.362229806598407, "grad_norm": 0.6919345428728234, "learning_rate": 4.9547816770805986e-08, "loss": 0.0102, "step": 19172 }, { "epoch": 4.362457337883959, "grad_norm": 0.7909633760515586, "learning_rate": 4.9512956365917605e-08, "loss": 0.0023, "step": 19173 }, { "epoch": 4.362684869169511, "grad_norm": 0.19124250075223767, "learning_rate": 4.947810772293039e-08, "loss": 0.0003, "step": 19174 }, { "epoch": 4.362912400455063, "grad_norm": 0.08286533565956238, "learning_rate": 4.9443270842556777e-08, "loss": 0.0002, "step": 19175 }, { "epoch": 4.363139931740614, "grad_norm": 0.3554201191880723, "learning_rate": 4.940844572550861e-08, "loss": 0.001, "step": 19176 }, { "epoch": 4.363367463026166, "grad_norm": 0.09662813915213407, "learning_rate": 4.937363237249762e-08, "loss": 0.0004, "step": 19177 }, { "epoch": 4.363594994311718, "grad_norm": 0.32525831363001906, "learning_rate": 4.933883078423539e-08, "loss": 0.0015, "step": 19178 }, { "epoch": 4.36382252559727, "grad_norm": 0.18369096933364432, "learning_rate": 4.930404096143315e-08, "loss": 0.0009, "step": 19179 }, { "epoch": 4.364050056882822, "grad_norm": 0.6310648887518294, "learning_rate": 4.926926290480194e-08, "loss": 0.0059, "step": 19180 }, { "epoch": 4.364277588168373, "grad_norm": 0.10828537500059385, "learning_rate": 4.923449661505257e-08, "loss": 0.0005, "step": 19181 }, { "epoch": 4.364505119453925, "grad_norm": 0.3190748990565077, "learning_rate": 4.91997420928958e-08, "loss": 0.0018, "step": 19182 }, { "epoch": 4.364732650739477, "grad_norm": 0.5234593975455051, "learning_rate": 4.9164999339041746e-08, "loss": 0.0034, "step": 19183 }, { "epoch": 4.364960182025029, "grad_norm": 0.3021043244451028, "learning_rate": 4.91302683542004e-08, "loss": 0.0012, "step": 19184 }, { "epoch": 4.36518771331058, "grad_norm": 0.052703649966060286, "learning_rate": 4.909554913908182e-08, "loss": 0.0002, "step": 19185 }, { "epoch": 4.365415244596132, "grad_norm": 0.3481115499242992, "learning_rate": 4.906084169439537e-08, "loss": 0.0016, "step": 19186 }, { "epoch": 4.365642775881684, "grad_norm": 0.20377071826707682, "learning_rate": 4.9026146020850555e-08, "loss": 0.0006, "step": 19187 }, { "epoch": 4.365870307167236, "grad_norm": 0.07872310315974007, "learning_rate": 4.899146211915659e-08, "loss": 0.0003, "step": 19188 }, { "epoch": 4.366097838452787, "grad_norm": 0.7620371419277673, "learning_rate": 4.895678999002208e-08, "loss": 0.0027, "step": 19189 }, { "epoch": 4.366325369738339, "grad_norm": 0.4094430760124107, "learning_rate": 4.8922129634155976e-08, "loss": 0.0009, "step": 19190 }, { "epoch": 4.366552901023891, "grad_norm": 0.2385794069076131, "learning_rate": 4.888748105226632e-08, "loss": 0.0008, "step": 19191 }, { "epoch": 4.366780432309443, "grad_norm": 0.5837473610576872, "learning_rate": 4.885284424506164e-08, "loss": 0.0037, "step": 19192 }, { "epoch": 4.367007963594994, "grad_norm": 0.3149378284145056, "learning_rate": 4.8818219213249486e-08, "loss": 0.0018, "step": 19193 }, { "epoch": 4.367235494880546, "grad_norm": 1.2790406783145658, "learning_rate": 4.878360595753771e-08, "loss": 0.0033, "step": 19194 }, { "epoch": 4.367463026166098, "grad_norm": 0.17678972725726377, "learning_rate": 4.874900447863387e-08, "loss": 0.0012, "step": 19195 }, { "epoch": 4.36769055745165, "grad_norm": 0.17276498535334744, "learning_rate": 4.8714414777244965e-08, "loss": 0.0006, "step": 19196 }, { "epoch": 4.367918088737201, "grad_norm": 0.08663035438404301, "learning_rate": 4.8679836854077995e-08, "loss": 0.0002, "step": 19197 }, { "epoch": 4.368145620022753, "grad_norm": 0.42859796774475206, "learning_rate": 4.864527070983969e-08, "loss": 0.0008, "step": 19198 }, { "epoch": 4.368373151308305, "grad_norm": 0.42318424674911786, "learning_rate": 4.861071634523658e-08, "loss": 0.0008, "step": 19199 }, { "epoch": 4.368600682593857, "grad_norm": 0.32650056916801334, "learning_rate": 4.8576173760974674e-08, "loss": 0.002, "step": 19200 }, { "epoch": 4.368828213879408, "grad_norm": 0.13960031876232934, "learning_rate": 4.8541642957760096e-08, "loss": 0.0006, "step": 19201 }, { "epoch": 4.36905574516496, "grad_norm": 0.2861096018720446, "learning_rate": 4.850712393629879e-08, "loss": 0.001, "step": 19202 }, { "epoch": 4.369283276450512, "grad_norm": 0.21902124105795126, "learning_rate": 4.84726166972959e-08, "loss": 0.0009, "step": 19203 }, { "epoch": 4.369510807736064, "grad_norm": 0.6593573781705243, "learning_rate": 4.843812124145697e-08, "loss": 0.0032, "step": 19204 }, { "epoch": 4.369738339021615, "grad_norm": 0.5420995817924742, "learning_rate": 4.840363756948685e-08, "loss": 0.0024, "step": 19205 }, { "epoch": 4.369965870307167, "grad_norm": 0.22139841494370516, "learning_rate": 4.8369165682090464e-08, "loss": 0.0013, "step": 19206 }, { "epoch": 4.370193401592719, "grad_norm": 0.3166613197002353, "learning_rate": 4.833470557997218e-08, "loss": 0.0014, "step": 19207 }, { "epoch": 4.370420932878271, "grad_norm": 0.2634327046675445, "learning_rate": 4.830025726383643e-08, "loss": 0.0034, "step": 19208 }, { "epoch": 4.370648464163822, "grad_norm": 0.2967387746053416, "learning_rate": 4.826582073438738e-08, "loss": 0.001, "step": 19209 }, { "epoch": 4.370875995449374, "grad_norm": 0.05160531301320133, "learning_rate": 4.823139599232855e-08, "loss": 0.0001, "step": 19210 }, { "epoch": 4.371103526734926, "grad_norm": 0.5230141016323416, "learning_rate": 4.819698303836377e-08, "loss": 0.0028, "step": 19211 }, { "epoch": 4.371331058020478, "grad_norm": 0.18262551447438577, "learning_rate": 4.816258187319622e-08, "loss": 0.0008, "step": 19212 }, { "epoch": 4.37155858930603, "grad_norm": 0.28573357616132644, "learning_rate": 4.812819249752923e-08, "loss": 0.0015, "step": 19213 }, { "epoch": 4.371786120591581, "grad_norm": 0.11674359139201569, "learning_rate": 4.8093814912065285e-08, "loss": 0.0005, "step": 19214 }, { "epoch": 4.372013651877133, "grad_norm": 0.2162042737971034, "learning_rate": 4.80594491175073e-08, "loss": 0.0021, "step": 19215 }, { "epoch": 4.372241183162685, "grad_norm": 0.24924154081250843, "learning_rate": 4.8025095114557635e-08, "loss": 0.0013, "step": 19216 }, { "epoch": 4.372468714448237, "grad_norm": 0.3042140444353096, "learning_rate": 4.799075290391822e-08, "loss": 0.0015, "step": 19217 }, { "epoch": 4.372696245733788, "grad_norm": 0.49996834033011317, "learning_rate": 4.7956422486291136e-08, "loss": 0.0031, "step": 19218 }, { "epoch": 4.3729237770193405, "grad_norm": 0.08769999026362885, "learning_rate": 4.792210386237804e-08, "loss": 0.0003, "step": 19219 }, { "epoch": 4.373151308304892, "grad_norm": 0.2510063309987947, "learning_rate": 4.788779703288019e-08, "loss": 0.0011, "step": 19220 }, { "epoch": 4.373378839590444, "grad_norm": 0.25206173465781206, "learning_rate": 4.785350199849874e-08, "loss": 0.0032, "step": 19221 }, { "epoch": 4.373606370875995, "grad_norm": 1.1977153237382123, "learning_rate": 4.781921875993481e-08, "loss": 0.0055, "step": 19222 }, { "epoch": 4.3738339021615475, "grad_norm": 0.29785114444640415, "learning_rate": 4.778494731788902e-08, "loss": 0.0011, "step": 19223 }, { "epoch": 4.374061433447099, "grad_norm": 0.8710805911223974, "learning_rate": 4.7750687673061775e-08, "loss": 0.0039, "step": 19224 }, { "epoch": 4.374288964732651, "grad_norm": 0.3879733096964569, "learning_rate": 4.771643982615329e-08, "loss": 0.0016, "step": 19225 }, { "epoch": 4.374516496018202, "grad_norm": 0.035411703657685, "learning_rate": 4.768220377786348e-08, "loss": 0.0001, "step": 19226 }, { "epoch": 4.374744027303755, "grad_norm": 0.23964747406271883, "learning_rate": 4.764797952889214e-08, "loss": 0.0008, "step": 19227 }, { "epoch": 4.374971558589306, "grad_norm": 1.624365590069546, "learning_rate": 4.761376707993884e-08, "loss": 0.0056, "step": 19228 }, { "epoch": 4.375199089874858, "grad_norm": 0.23436574352052178, "learning_rate": 4.7579566431702545e-08, "loss": 0.0014, "step": 19229 }, { "epoch": 4.375426621160409, "grad_norm": 0.02758231854944097, "learning_rate": 4.754537758488255e-08, "loss": 0.0001, "step": 19230 }, { "epoch": 4.375654152445962, "grad_norm": 0.12419161945380333, "learning_rate": 4.7511200540177394e-08, "loss": 0.0006, "step": 19231 }, { "epoch": 4.375881683731513, "grad_norm": 0.015010776137284507, "learning_rate": 4.747703529828576e-08, "loss": 0.0, "step": 19232 }, { "epoch": 4.376109215017065, "grad_norm": 0.028483531730715685, "learning_rate": 4.74428818599057e-08, "loss": 0.0001, "step": 19233 }, { "epoch": 4.376336746302616, "grad_norm": 0.26405334218342613, "learning_rate": 4.7408740225735466e-08, "loss": 0.0013, "step": 19234 }, { "epoch": 4.376564277588169, "grad_norm": 0.43999721873663283, "learning_rate": 4.737461039647284e-08, "loss": 0.0016, "step": 19235 }, { "epoch": 4.37679180887372, "grad_norm": 0.19803596477178934, "learning_rate": 4.734049237281518e-08, "loss": 0.0008, "step": 19236 }, { "epoch": 4.377019340159272, "grad_norm": 0.23399540321578777, "learning_rate": 4.730638615546006e-08, "loss": 0.0014, "step": 19237 }, { "epoch": 4.377246871444823, "grad_norm": 0.05051005451250576, "learning_rate": 4.727229174510441e-08, "loss": 0.0003, "step": 19238 }, { "epoch": 4.377474402730376, "grad_norm": 0.12355781859935588, "learning_rate": 4.7238209142444895e-08, "loss": 0.0003, "step": 19239 }, { "epoch": 4.377701934015927, "grad_norm": 0.4159623189459971, "learning_rate": 4.720413834817833e-08, "loss": 0.0016, "step": 19240 }, { "epoch": 4.377929465301479, "grad_norm": 0.8336810083233578, "learning_rate": 4.717007936300096e-08, "loss": 0.0027, "step": 19241 }, { "epoch": 4.37815699658703, "grad_norm": 0.6391848000870588, "learning_rate": 4.713603218760909e-08, "loss": 0.0036, "step": 19242 }, { "epoch": 4.378384527872583, "grad_norm": 0.24743966308006352, "learning_rate": 4.7101996822698226e-08, "loss": 0.0013, "step": 19243 }, { "epoch": 4.378612059158134, "grad_norm": 0.318809480988677, "learning_rate": 4.7067973268964324e-08, "loss": 0.0023, "step": 19244 }, { "epoch": 4.378839590443686, "grad_norm": 0.04561204263982593, "learning_rate": 4.70339615271026e-08, "loss": 0.0002, "step": 19245 }, { "epoch": 4.379067121729237, "grad_norm": 0.1286110394309535, "learning_rate": 4.699996159780812e-08, "loss": 0.0011, "step": 19246 }, { "epoch": 4.37929465301479, "grad_norm": 0.7829438430935828, "learning_rate": 4.696597348177588e-08, "loss": 0.0013, "step": 19247 }, { "epoch": 4.379522184300341, "grad_norm": 0.4669303383284775, "learning_rate": 4.693199717970047e-08, "loss": 0.0034, "step": 19248 }, { "epoch": 4.379749715585893, "grad_norm": 0.034984963549680156, "learning_rate": 4.689803269227654e-08, "loss": 0.0001, "step": 19249 }, { "epoch": 4.379977246871444, "grad_norm": 0.1213604305229348, "learning_rate": 4.6864080020197905e-08, "loss": 0.0001, "step": 19250 }, { "epoch": 4.380204778156997, "grad_norm": 0.2918890892366157, "learning_rate": 4.683013916415881e-08, "loss": 0.0024, "step": 19251 }, { "epoch": 4.380432309442549, "grad_norm": 0.5509644031989729, "learning_rate": 4.679621012485279e-08, "loss": 0.0062, "step": 19252 }, { "epoch": 4.3806598407281, "grad_norm": 1.1930591480491506, "learning_rate": 4.6762292902973186e-08, "loss": 0.0107, "step": 19253 }, { "epoch": 4.380887372013651, "grad_norm": 0.17609095363035027, "learning_rate": 4.672838749921332e-08, "loss": 0.0007, "step": 19254 }, { "epoch": 4.381114903299204, "grad_norm": 0.11894694069179076, "learning_rate": 4.66944939142662e-08, "loss": 0.0008, "step": 19255 }, { "epoch": 4.381342434584756, "grad_norm": 0.20317786705829075, "learning_rate": 4.666061214882459e-08, "loss": 0.0012, "step": 19256 }, { "epoch": 4.381569965870307, "grad_norm": 0.2058229425174154, "learning_rate": 4.662674220358085e-08, "loss": 0.0007, "step": 19257 }, { "epoch": 4.381797497155859, "grad_norm": 0.11830394502470472, "learning_rate": 4.6592884079227215e-08, "loss": 0.0002, "step": 19258 }, { "epoch": 4.382025028441411, "grad_norm": 0.05665610540803552, "learning_rate": 4.6559037776455835e-08, "loss": 0.0001, "step": 19259 }, { "epoch": 4.382252559726963, "grad_norm": 0.14765146303796442, "learning_rate": 4.6525203295958166e-08, "loss": 0.0008, "step": 19260 }, { "epoch": 4.382480091012514, "grad_norm": 0.21741142176169698, "learning_rate": 4.649138063842602e-08, "loss": 0.001, "step": 19261 }, { "epoch": 4.382707622298066, "grad_norm": 0.4276666796252504, "learning_rate": 4.6457569804550516e-08, "loss": 0.0051, "step": 19262 }, { "epoch": 4.382935153583618, "grad_norm": 0.03165476925131165, "learning_rate": 4.6423770795022833e-08, "loss": 0.0001, "step": 19263 }, { "epoch": 4.38316268486917, "grad_norm": 0.7392403587762189, "learning_rate": 4.6389983610533664e-08, "loss": 0.0024, "step": 19264 }, { "epoch": 4.383390216154721, "grad_norm": 0.4506072732585306, "learning_rate": 4.635620825177344e-08, "loss": 0.0038, "step": 19265 }, { "epoch": 4.383617747440273, "grad_norm": 0.08928017822918759, "learning_rate": 4.63224447194327e-08, "loss": 0.0003, "step": 19266 }, { "epoch": 4.383845278725825, "grad_norm": 0.04521826143382406, "learning_rate": 4.628869301420133e-08, "loss": 0.0002, "step": 19267 }, { "epoch": 4.384072810011377, "grad_norm": 0.2930508922187907, "learning_rate": 4.625495313676919e-08, "loss": 0.001, "step": 19268 }, { "epoch": 4.384300341296928, "grad_norm": 0.2787096552771157, "learning_rate": 4.622122508782585e-08, "loss": 0.0017, "step": 19269 }, { "epoch": 4.38452787258248, "grad_norm": 0.5239544443930436, "learning_rate": 4.618750886806085e-08, "loss": 0.0009, "step": 19270 }, { "epoch": 4.384755403868032, "grad_norm": 0.18792566362360244, "learning_rate": 4.615380447816308e-08, "loss": 0.0007, "step": 19271 }, { "epoch": 4.384982935153584, "grad_norm": 1.6580794990472196, "learning_rate": 4.612011191882135e-08, "loss": 0.004, "step": 19272 }, { "epoch": 4.385210466439135, "grad_norm": 0.5822942583828284, "learning_rate": 4.6086431190724425e-08, "loss": 0.0015, "step": 19273 }, { "epoch": 4.385437997724687, "grad_norm": 0.4198198919106611, "learning_rate": 4.605276229456057e-08, "loss": 0.0024, "step": 19274 }, { "epoch": 4.385665529010239, "grad_norm": 0.8744088047317001, "learning_rate": 4.601910523101792e-08, "loss": 0.0041, "step": 19275 }, { "epoch": 4.385893060295791, "grad_norm": 0.17009584521866997, "learning_rate": 4.598546000078454e-08, "loss": 0.0003, "step": 19276 }, { "epoch": 4.386120591581342, "grad_norm": 0.1446890142066853, "learning_rate": 4.595182660454785e-08, "loss": 0.0003, "step": 19277 }, { "epoch": 4.386348122866894, "grad_norm": 1.1793560729335468, "learning_rate": 4.5918205042995436e-08, "loss": 0.0041, "step": 19278 }, { "epoch": 4.386575654152446, "grad_norm": 0.11229709236791516, "learning_rate": 4.588459531681425e-08, "loss": 0.0004, "step": 19279 }, { "epoch": 4.386803185437998, "grad_norm": 0.035603568368971876, "learning_rate": 4.585099742669151e-08, "loss": 0.0002, "step": 19280 }, { "epoch": 4.387030716723549, "grad_norm": 0.3173722354333918, "learning_rate": 4.5817411373313414e-08, "loss": 0.0064, "step": 19281 }, { "epoch": 4.387258248009101, "grad_norm": 0.40279351863023355, "learning_rate": 4.578383715736698e-08, "loss": 0.0011, "step": 19282 }, { "epoch": 4.387485779294653, "grad_norm": 0.6803393034395465, "learning_rate": 4.575027477953811e-08, "loss": 0.0015, "step": 19283 }, { "epoch": 4.387713310580205, "grad_norm": 0.08486303501275738, "learning_rate": 4.571672424051265e-08, "loss": 0.0003, "step": 19284 }, { "epoch": 4.387940841865756, "grad_norm": 0.5066387920281586, "learning_rate": 4.5683185540976586e-08, "loss": 0.003, "step": 19285 }, { "epoch": 4.388168373151308, "grad_norm": 0.7274840033357864, "learning_rate": 4.564965868161513e-08, "loss": 0.0048, "step": 19286 }, { "epoch": 4.38839590443686, "grad_norm": 0.08577210963815497, "learning_rate": 4.561614366311362e-08, "loss": 0.0003, "step": 19287 }, { "epoch": 4.388623435722412, "grad_norm": 0.13552904813589756, "learning_rate": 4.558264048615702e-08, "loss": 0.0006, "step": 19288 }, { "epoch": 4.388850967007963, "grad_norm": 0.45897200769783847, "learning_rate": 4.554914915143017e-08, "loss": 0.0024, "step": 19289 }, { "epoch": 4.389078498293515, "grad_norm": 0.10179047595866934, "learning_rate": 4.5515669659617556e-08, "loss": 0.0004, "step": 19290 }, { "epoch": 4.389306029579068, "grad_norm": 1.2650564148744052, "learning_rate": 4.5482202011403255e-08, "loss": 0.0294, "step": 19291 }, { "epoch": 4.389533560864619, "grad_norm": 0.3972369611671396, "learning_rate": 4.5448746207471526e-08, "loss": 0.0058, "step": 19292 }, { "epoch": 4.38976109215017, "grad_norm": 0.7575495133696906, "learning_rate": 4.541530224850592e-08, "loss": 0.0063, "step": 19293 }, { "epoch": 4.389988623435722, "grad_norm": 0.3981407552741829, "learning_rate": 4.5381870135190046e-08, "loss": 0.0009, "step": 19294 }, { "epoch": 4.390216154721275, "grad_norm": 0.2453899493794672, "learning_rate": 4.5348449868207326e-08, "loss": 0.0016, "step": 19295 }, { "epoch": 4.390443686006826, "grad_norm": 0.1390566651445293, "learning_rate": 4.5315041448240686e-08, "loss": 0.0008, "step": 19296 }, { "epoch": 4.390671217292378, "grad_norm": 0.42406548943502737, "learning_rate": 4.528164487597297e-08, "loss": 0.0024, "step": 19297 }, { "epoch": 4.390898748577929, "grad_norm": 0.38679539049113804, "learning_rate": 4.524826015208664e-08, "loss": 0.0015, "step": 19298 }, { "epoch": 4.391126279863482, "grad_norm": 0.3340480724292729, "learning_rate": 4.521488727726425e-08, "loss": 0.001, "step": 19299 }, { "epoch": 4.391353811149033, "grad_norm": 0.05777626499021938, "learning_rate": 4.518152625218764e-08, "loss": 0.0002, "step": 19300 }, { "epoch": 4.391581342434585, "grad_norm": 0.6320635603922619, "learning_rate": 4.514817707753867e-08, "loss": 0.0019, "step": 19301 }, { "epoch": 4.391808873720136, "grad_norm": 0.10247576414168595, "learning_rate": 4.511483975399918e-08, "loss": 0.0002, "step": 19302 }, { "epoch": 4.392036405005689, "grad_norm": 0.1124199825584409, "learning_rate": 4.5081514282250274e-08, "loss": 0.0005, "step": 19303 }, { "epoch": 4.39226393629124, "grad_norm": 1.6091654844311263, "learning_rate": 4.5048200662973216e-08, "loss": 0.007, "step": 19304 }, { "epoch": 4.392491467576792, "grad_norm": 0.09517178128869115, "learning_rate": 4.501489889684865e-08, "loss": 0.0003, "step": 19305 }, { "epoch": 4.392718998862343, "grad_norm": 0.0469155272620543, "learning_rate": 4.4981608984557554e-08, "loss": 0.0001, "step": 19306 }, { "epoch": 4.392946530147896, "grad_norm": 1.7022417624789576, "learning_rate": 4.494833092677994e-08, "loss": 0.0099, "step": 19307 }, { "epoch": 4.393174061433447, "grad_norm": 0.45714456254023106, "learning_rate": 4.491506472419617e-08, "loss": 0.002, "step": 19308 }, { "epoch": 4.393401592718999, "grad_norm": 0.2270277655773233, "learning_rate": 4.488181037748618e-08, "loss": 0.0008, "step": 19309 }, { "epoch": 4.39362912400455, "grad_norm": 0.10851014432919343, "learning_rate": 4.48485678873295e-08, "loss": 0.0004, "step": 19310 }, { "epoch": 4.393856655290103, "grad_norm": 0.8536013036673484, "learning_rate": 4.481533725440565e-08, "loss": 0.0062, "step": 19311 }, { "epoch": 4.394084186575654, "grad_norm": 0.4080909401767996, "learning_rate": 4.478211847939361e-08, "loss": 0.0037, "step": 19312 }, { "epoch": 4.394311717861206, "grad_norm": 0.4077624538197392, "learning_rate": 4.474891156297262e-08, "loss": 0.0036, "step": 19313 }, { "epoch": 4.394539249146757, "grad_norm": 0.09463301114879591, "learning_rate": 4.471571650582103e-08, "loss": 0.0004, "step": 19314 }, { "epoch": 4.39476678043231, "grad_norm": 0.27121698711240194, "learning_rate": 4.468253330861754e-08, "loss": 0.0019, "step": 19315 }, { "epoch": 4.394994311717861, "grad_norm": 0.057215633957730994, "learning_rate": 4.464936197204034e-08, "loss": 0.0002, "step": 19316 }, { "epoch": 4.395221843003413, "grad_norm": 0.3091658523155091, "learning_rate": 4.461620249676717e-08, "loss": 0.0022, "step": 19317 }, { "epoch": 4.395449374288964, "grad_norm": 0.0911124378654012, "learning_rate": 4.458305488347602e-08, "loss": 0.0003, "step": 19318 }, { "epoch": 4.395676905574517, "grad_norm": 0.5446860241522788, "learning_rate": 4.454991913284419e-08, "loss": 0.003, "step": 19319 }, { "epoch": 4.395904436860068, "grad_norm": 0.24439093606979512, "learning_rate": 4.4516795245549e-08, "loss": 0.0008, "step": 19320 }, { "epoch": 4.39613196814562, "grad_norm": 0.02119046858728296, "learning_rate": 4.4483683222267326e-08, "loss": 0.0, "step": 19321 }, { "epoch": 4.396359499431171, "grad_norm": 0.17607025596114234, "learning_rate": 4.445058306367607e-08, "loss": 0.0003, "step": 19322 }, { "epoch": 4.396587030716724, "grad_norm": 0.10665158609922679, "learning_rate": 4.441749477045169e-08, "loss": 0.0005, "step": 19323 }, { "epoch": 4.396814562002275, "grad_norm": 0.4857345133758943, "learning_rate": 4.4384418343270395e-08, "loss": 0.0034, "step": 19324 }, { "epoch": 4.397042093287827, "grad_norm": 0.40125639300411203, "learning_rate": 4.43513537828083e-08, "loss": 0.0023, "step": 19325 }, { "epoch": 4.397269624573378, "grad_norm": 0.3841415480505611, "learning_rate": 4.431830108974112e-08, "loss": 0.0039, "step": 19326 }, { "epoch": 4.397497155858931, "grad_norm": 0.43367114619706004, "learning_rate": 4.428526026474429e-08, "loss": 0.0043, "step": 19327 }, { "epoch": 4.397724687144482, "grad_norm": 0.1770124865350497, "learning_rate": 4.425223130849324e-08, "loss": 0.0003, "step": 19328 }, { "epoch": 4.397952218430034, "grad_norm": 0.47883844263255626, "learning_rate": 4.421921422166298e-08, "loss": 0.0018, "step": 19329 }, { "epoch": 4.398179749715586, "grad_norm": 0.05505866536371523, "learning_rate": 4.4186209004928454e-08, "loss": 0.0002, "step": 19330 }, { "epoch": 4.398407281001138, "grad_norm": 0.593294607215404, "learning_rate": 4.4153215658963996e-08, "loss": 0.0042, "step": 19331 }, { "epoch": 4.398634812286689, "grad_norm": 0.8301300493572408, "learning_rate": 4.4120234184444127e-08, "loss": 0.0087, "step": 19332 }, { "epoch": 4.398862343572241, "grad_norm": 0.6130530466900636, "learning_rate": 4.408726458204282e-08, "loss": 0.0031, "step": 19333 }, { "epoch": 4.399089874857793, "grad_norm": 0.29396096830151364, "learning_rate": 4.405430685243385e-08, "loss": 0.0027, "step": 19334 }, { "epoch": 4.399317406143345, "grad_norm": 0.3071503586913125, "learning_rate": 4.402136099629091e-08, "loss": 0.0009, "step": 19335 }, { "epoch": 4.399544937428897, "grad_norm": 0.17071179610393986, "learning_rate": 4.398842701428735e-08, "loss": 0.0005, "step": 19336 }, { "epoch": 4.399772468714448, "grad_norm": 0.1292634159336672, "learning_rate": 4.395550490709638e-08, "loss": 0.0006, "step": 19337 }, { "epoch": 4.4, "grad_norm": 0.26393424762847634, "learning_rate": 4.3922594675390586e-08, "loss": 0.0009, "step": 19338 }, { "epoch": 4.400227531285552, "grad_norm": 0.5903546106239163, "learning_rate": 4.388969631984291e-08, "loss": 0.0014, "step": 19339 }, { "epoch": 4.400455062571104, "grad_norm": 0.1493353564585139, "learning_rate": 4.385680984112558e-08, "loss": 0.0005, "step": 19340 }, { "epoch": 4.400682593856655, "grad_norm": 0.11245342835623465, "learning_rate": 4.3823935239910646e-08, "loss": 0.0003, "step": 19341 }, { "epoch": 4.400910125142207, "grad_norm": 0.17270238271371097, "learning_rate": 4.379107251687012e-08, "loss": 0.0005, "step": 19342 }, { "epoch": 4.401137656427759, "grad_norm": 0.17896285692043795, "learning_rate": 4.375822167267563e-08, "loss": 0.0008, "step": 19343 }, { "epoch": 4.401365187713311, "grad_norm": 0.03209390870986379, "learning_rate": 4.3725382707998715e-08, "loss": 0.0001, "step": 19344 }, { "epoch": 4.401592718998862, "grad_norm": 0.36306247240249756, "learning_rate": 4.369255562351037e-08, "loss": 0.0013, "step": 19345 }, { "epoch": 4.401820250284414, "grad_norm": 0.09660262076218233, "learning_rate": 4.365974041988151e-08, "loss": 0.0003, "step": 19346 }, { "epoch": 4.402047781569966, "grad_norm": 0.6108955287483265, "learning_rate": 4.362693709778286e-08, "loss": 0.0053, "step": 19347 }, { "epoch": 4.402275312855518, "grad_norm": 0.04886693513190867, "learning_rate": 4.3594145657884915e-08, "loss": 0.0002, "step": 19348 }, { "epoch": 4.402502844141069, "grad_norm": 0.2906875569390977, "learning_rate": 4.3561366100857913e-08, "loss": 0.0005, "step": 19349 }, { "epoch": 4.402730375426621, "grad_norm": 0.20927450805830594, "learning_rate": 4.352859842737159e-08, "loss": 0.0005, "step": 19350 }, { "epoch": 4.402957906712173, "grad_norm": 0.2695154857808765, "learning_rate": 4.349584263809596e-08, "loss": 0.0023, "step": 19351 }, { "epoch": 4.403185437997725, "grad_norm": 0.048100874550654434, "learning_rate": 4.346309873370036e-08, "loss": 0.0001, "step": 19352 }, { "epoch": 4.403412969283276, "grad_norm": 0.12248495985057856, "learning_rate": 4.343036671485383e-08, "loss": 0.0004, "step": 19353 }, { "epoch": 4.403640500568828, "grad_norm": 0.2869193077691184, "learning_rate": 4.339764658222549e-08, "loss": 0.0016, "step": 19354 }, { "epoch": 4.40386803185438, "grad_norm": 0.5926733649288867, "learning_rate": 4.336493833648418e-08, "loss": 0.0024, "step": 19355 }, { "epoch": 4.404095563139932, "grad_norm": 0.12659420275219685, "learning_rate": 4.3332241978298326e-08, "loss": 0.0004, "step": 19356 }, { "epoch": 4.404323094425483, "grad_norm": 0.09125701501488194, "learning_rate": 4.329955750833614e-08, "loss": 0.0003, "step": 19357 }, { "epoch": 4.404550625711035, "grad_norm": 0.05557767695472571, "learning_rate": 4.3266884927265704e-08, "loss": 0.0002, "step": 19358 }, { "epoch": 4.404778156996587, "grad_norm": 0.29108914499576394, "learning_rate": 4.323422423575481e-08, "loss": 0.0009, "step": 19359 }, { "epoch": 4.405005688282139, "grad_norm": 0.07944647052834933, "learning_rate": 4.320157543447077e-08, "loss": 0.0002, "step": 19360 }, { "epoch": 4.40523321956769, "grad_norm": 0.3424813142950092, "learning_rate": 4.316893852408105e-08, "loss": 0.0022, "step": 19361 }, { "epoch": 4.405460750853242, "grad_norm": 0.6343809362684589, "learning_rate": 4.313631350525267e-08, "loss": 0.0046, "step": 19362 }, { "epoch": 4.405688282138794, "grad_norm": 0.036313569373887335, "learning_rate": 4.310370037865247e-08, "loss": 0.0001, "step": 19363 }, { "epoch": 4.405915813424346, "grad_norm": 0.3541959206784199, "learning_rate": 4.307109914494692e-08, "loss": 0.0034, "step": 19364 }, { "epoch": 4.406143344709897, "grad_norm": 0.21158512352369996, "learning_rate": 4.30385098048023e-08, "loss": 0.0003, "step": 19365 }, { "epoch": 4.406370875995449, "grad_norm": 0.18683812318914486, "learning_rate": 4.300593235888481e-08, "loss": 0.0006, "step": 19366 }, { "epoch": 4.406598407281001, "grad_norm": 0.33154133618997783, "learning_rate": 4.29733668078601e-08, "loss": 0.0011, "step": 19367 }, { "epoch": 4.406825938566553, "grad_norm": 0.8881292711445397, "learning_rate": 4.294081315239382e-08, "loss": 0.0024, "step": 19368 }, { "epoch": 4.407053469852105, "grad_norm": 0.13830561763049024, "learning_rate": 4.2908271393151335e-08, "loss": 0.0009, "step": 19369 }, { "epoch": 4.407281001137656, "grad_norm": 0.01764227279536407, "learning_rate": 4.2875741530797875e-08, "loss": 0.0001, "step": 19370 }, { "epoch": 4.407508532423208, "grad_norm": 0.4166721643050337, "learning_rate": 4.284322356599806e-08, "loss": 0.0016, "step": 19371 }, { "epoch": 4.40773606370876, "grad_norm": 0.39219610994519155, "learning_rate": 4.281071749941655e-08, "loss": 0.0044, "step": 19372 }, { "epoch": 4.407963594994312, "grad_norm": 0.08180840734714875, "learning_rate": 4.2778223331717825e-08, "loss": 0.0002, "step": 19373 }, { "epoch": 4.408191126279863, "grad_norm": 0.13042373765273904, "learning_rate": 4.274574106356587e-08, "loss": 0.0008, "step": 19374 }, { "epoch": 4.408418657565416, "grad_norm": 0.5409523286528194, "learning_rate": 4.271327069562452e-08, "loss": 0.0038, "step": 19375 }, { "epoch": 4.408646188850967, "grad_norm": 0.6132382214212135, "learning_rate": 4.2680812228557616e-08, "loss": 0.0025, "step": 19376 }, { "epoch": 4.408873720136519, "grad_norm": 0.34425284158301706, "learning_rate": 4.264836566302846e-08, "loss": 0.001, "step": 19377 }, { "epoch": 4.40910125142207, "grad_norm": 0.17495600666081865, "learning_rate": 4.2615930999700267e-08, "loss": 0.0006, "step": 19378 }, { "epoch": 4.409328782707623, "grad_norm": 0.027620049343851413, "learning_rate": 4.2583508239235696e-08, "loss": 0.0001, "step": 19379 }, { "epoch": 4.409556313993174, "grad_norm": 1.4716582409401342, "learning_rate": 4.2551097382297696e-08, "loss": 0.0155, "step": 19380 }, { "epoch": 4.409783845278726, "grad_norm": 0.3780304490026535, "learning_rate": 4.2518698429548435e-08, "loss": 0.0011, "step": 19381 }, { "epoch": 4.410011376564277, "grad_norm": 0.1737161237922597, "learning_rate": 4.248631138165024e-08, "loss": 0.0006, "step": 19382 }, { "epoch": 4.41023890784983, "grad_norm": 0.22670800762773802, "learning_rate": 4.245393623926508e-08, "loss": 0.0004, "step": 19383 }, { "epoch": 4.410466439135381, "grad_norm": 0.3914444773409929, "learning_rate": 4.242157300305451e-08, "loss": 0.0023, "step": 19384 }, { "epoch": 4.410693970420933, "grad_norm": 0.3163992306943037, "learning_rate": 4.238922167368015e-08, "loss": 0.002, "step": 19385 }, { "epoch": 4.4109215017064844, "grad_norm": 0.4020533422295023, "learning_rate": 4.2356882251803e-08, "loss": 0.0041, "step": 19386 }, { "epoch": 4.411149032992037, "grad_norm": 0.1478269752159144, "learning_rate": 4.2324554738084195e-08, "loss": 0.0006, "step": 19387 }, { "epoch": 4.411376564277588, "grad_norm": 0.14251999437516438, "learning_rate": 4.229223913318426e-08, "loss": 0.0007, "step": 19388 }, { "epoch": 4.41160409556314, "grad_norm": 0.30235886503079795, "learning_rate": 4.2259935437763824e-08, "loss": 0.0012, "step": 19389 }, { "epoch": 4.4118316268486915, "grad_norm": 0.11233034406129179, "learning_rate": 4.222764365248315e-08, "loss": 0.0004, "step": 19390 }, { "epoch": 4.412059158134244, "grad_norm": 0.2910042479517773, "learning_rate": 4.219536377800204e-08, "loss": 0.0038, "step": 19391 }, { "epoch": 4.412286689419795, "grad_norm": 0.27160272603116126, "learning_rate": 4.216309581498046e-08, "loss": 0.0019, "step": 19392 }, { "epoch": 4.412514220705347, "grad_norm": 0.37257077841654374, "learning_rate": 4.213083976407767e-08, "loss": 0.0022, "step": 19393 }, { "epoch": 4.4127417519908985, "grad_norm": 0.5123605779744466, "learning_rate": 4.2098595625953145e-08, "loss": 0.0027, "step": 19394 }, { "epoch": 4.412969283276451, "grad_norm": 0.5565791487543268, "learning_rate": 4.206636340126566e-08, "loss": 0.0036, "step": 19395 }, { "epoch": 4.413196814562002, "grad_norm": 1.2925732211509129, "learning_rate": 4.2034143090674136e-08, "loss": 0.0083, "step": 19396 }, { "epoch": 4.413424345847554, "grad_norm": 0.15503434883169964, "learning_rate": 4.200193469483714e-08, "loss": 0.0004, "step": 19397 }, { "epoch": 4.4136518771331055, "grad_norm": 0.1081676915895199, "learning_rate": 4.196973821441283e-08, "loss": 0.0004, "step": 19398 }, { "epoch": 4.413879408418658, "grad_norm": 0.34720925535192665, "learning_rate": 4.193755365005943e-08, "loss": 0.0011, "step": 19399 }, { "epoch": 4.414106939704209, "grad_norm": 0.03201618644347574, "learning_rate": 4.190538100243446e-08, "loss": 0.0001, "step": 19400 }, { "epoch": 4.414334470989761, "grad_norm": 0.4097054958020988, "learning_rate": 4.187322027219574e-08, "loss": 0.0024, "step": 19401 }, { "epoch": 4.4145620022753125, "grad_norm": 0.7372610530854525, "learning_rate": 4.184107146000031e-08, "loss": 0.0029, "step": 19402 }, { "epoch": 4.414789533560865, "grad_norm": 0.48438635813441794, "learning_rate": 4.180893456650542e-08, "loss": 0.0016, "step": 19403 }, { "epoch": 4.415017064846416, "grad_norm": 0.21808905718541954, "learning_rate": 4.1776809592367905e-08, "loss": 0.0009, "step": 19404 }, { "epoch": 4.415244596131968, "grad_norm": 0.7925602963861056, "learning_rate": 4.1744696538244185e-08, "loss": 0.0065, "step": 19405 }, { "epoch": 4.4154721274175195, "grad_norm": 21.57711204153217, "learning_rate": 4.171259540479082e-08, "loss": 0.0622, "step": 19406 }, { "epoch": 4.415699658703072, "grad_norm": 0.2564397984832841, "learning_rate": 4.1680506192663675e-08, "loss": 0.0009, "step": 19407 }, { "epoch": 4.415927189988624, "grad_norm": 0.5266974602489123, "learning_rate": 4.164842890251861e-08, "loss": 0.0041, "step": 19408 }, { "epoch": 4.416154721274175, "grad_norm": 0.5376588488988294, "learning_rate": 4.161636353501149e-08, "loss": 0.0033, "step": 19409 }, { "epoch": 4.4163822525597265, "grad_norm": 0.4479971291718544, "learning_rate": 4.158431009079734e-08, "loss": 0.0012, "step": 19410 }, { "epoch": 4.416609783845279, "grad_norm": 1.1424428233545842, "learning_rate": 4.155226857053149e-08, "loss": 0.0143, "step": 19411 }, { "epoch": 4.416837315130831, "grad_norm": 0.23026097859593272, "learning_rate": 4.1520238974868675e-08, "loss": 0.0011, "step": 19412 }, { "epoch": 4.417064846416382, "grad_norm": 0.409234608800112, "learning_rate": 4.148822130446366e-08, "loss": 0.0019, "step": 19413 }, { "epoch": 4.417292377701934, "grad_norm": 0.26138430002915514, "learning_rate": 4.145621555997063e-08, "loss": 0.002, "step": 19414 }, { "epoch": 4.417519908987486, "grad_norm": 0.06170148148751485, "learning_rate": 4.142422174204387e-08, "loss": 0.0002, "step": 19415 }, { "epoch": 4.417747440273038, "grad_norm": 0.7417561480466653, "learning_rate": 4.139223985133736e-08, "loss": 0.0021, "step": 19416 }, { "epoch": 4.417974971558589, "grad_norm": 0.3277716563768136, "learning_rate": 4.1360269888504545e-08, "loss": 0.0026, "step": 19417 }, { "epoch": 4.418202502844141, "grad_norm": 0.044913297933203436, "learning_rate": 4.1328311854198986e-08, "loss": 0.0002, "step": 19418 }, { "epoch": 4.418430034129693, "grad_norm": 0.2087446331166921, "learning_rate": 4.129636574907371e-08, "loss": 0.0003, "step": 19419 }, { "epoch": 4.418657565415245, "grad_norm": 0.26988085848345106, "learning_rate": 4.126443157378188e-08, "loss": 0.0015, "step": 19420 }, { "epoch": 4.418885096700796, "grad_norm": 0.36549795690181786, "learning_rate": 4.1232509328975806e-08, "loss": 0.0026, "step": 19421 }, { "epoch": 4.419112627986348, "grad_norm": 0.565085421236804, "learning_rate": 4.120059901530818e-08, "loss": 0.0026, "step": 19422 }, { "epoch": 4.4193401592719, "grad_norm": 0.275780860064789, "learning_rate": 4.116870063343124e-08, "loss": 0.0017, "step": 19423 }, { "epoch": 4.419567690557452, "grad_norm": 0.25208208307934565, "learning_rate": 4.1136814183996705e-08, "loss": 0.001, "step": 19424 }, { "epoch": 4.419795221843003, "grad_norm": 0.5367163429572148, "learning_rate": 4.110493966765647e-08, "loss": 0.0041, "step": 19425 }, { "epoch": 4.420022753128555, "grad_norm": 0.4865246730757509, "learning_rate": 4.107307708506182e-08, "loss": 0.0033, "step": 19426 }, { "epoch": 4.420250284414107, "grad_norm": 0.020814081786225198, "learning_rate": 4.104122643686419e-08, "loss": 0.0001, "step": 19427 }, { "epoch": 4.420477815699659, "grad_norm": 0.2601399768878721, "learning_rate": 4.1009387723714295e-08, "loss": 0.0012, "step": 19428 }, { "epoch": 4.42070534698521, "grad_norm": 0.18501529679360654, "learning_rate": 4.097756094626301e-08, "loss": 0.0011, "step": 19429 }, { "epoch": 4.420932878270762, "grad_norm": 0.12827497739375646, "learning_rate": 4.094574610516086e-08, "loss": 0.0005, "step": 19430 }, { "epoch": 4.421160409556314, "grad_norm": 0.33316471069362635, "learning_rate": 4.0913943201057944e-08, "loss": 0.0018, "step": 19431 }, { "epoch": 4.421387940841866, "grad_norm": 0.27823411361477546, "learning_rate": 4.088215223460444e-08, "loss": 0.0017, "step": 19432 }, { "epoch": 4.421615472127417, "grad_norm": 0.4464345819527863, "learning_rate": 4.085037320644997e-08, "loss": 0.0045, "step": 19433 }, { "epoch": 4.421843003412969, "grad_norm": 0.11932451212751452, "learning_rate": 4.0818606117243935e-08, "loss": 0.0007, "step": 19434 }, { "epoch": 4.422070534698521, "grad_norm": 0.13073196803964054, "learning_rate": 4.078685096763568e-08, "loss": 0.0004, "step": 19435 }, { "epoch": 4.422298065984073, "grad_norm": 0.5323356150186552, "learning_rate": 4.075510775827428e-08, "loss": 0.0014, "step": 19436 }, { "epoch": 4.422525597269624, "grad_norm": 0.24480743617030198, "learning_rate": 4.072337648980865e-08, "loss": 0.0004, "step": 19437 }, { "epoch": 4.422753128555176, "grad_norm": 0.6339900577296653, "learning_rate": 4.069165716288695e-08, "loss": 0.002, "step": 19438 }, { "epoch": 4.422980659840728, "grad_norm": 0.3203948582779969, "learning_rate": 4.0659949778157835e-08, "loss": 0.0012, "step": 19439 }, { "epoch": 4.42320819112628, "grad_norm": 0.40098476335911704, "learning_rate": 4.062825433626919e-08, "loss": 0.0024, "step": 19440 }, { "epoch": 4.423435722411831, "grad_norm": 0.20885063719547528, "learning_rate": 4.059657083786868e-08, "loss": 0.0007, "step": 19441 }, { "epoch": 4.4236632536973834, "grad_norm": 0.7493343295629099, "learning_rate": 4.0564899283603926e-08, "loss": 0.0049, "step": 19442 }, { "epoch": 4.423890784982935, "grad_norm": 0.4401520590993684, "learning_rate": 4.0533239674122384e-08, "loss": 0.0041, "step": 19443 }, { "epoch": 4.424118316268487, "grad_norm": 0.29106385382530203, "learning_rate": 4.0501592010071046e-08, "loss": 0.001, "step": 19444 }, { "epoch": 4.424345847554038, "grad_norm": 0.11982526923804064, "learning_rate": 4.0469956292096616e-08, "loss": 0.0003, "step": 19445 }, { "epoch": 4.4245733788395905, "grad_norm": 0.14301538128865512, "learning_rate": 4.0438332520845934e-08, "loss": 0.0007, "step": 19446 }, { "epoch": 4.424800910125143, "grad_norm": 0.20931234489731732, "learning_rate": 4.040672069696508e-08, "loss": 0.0012, "step": 19447 }, { "epoch": 4.425028441410694, "grad_norm": 0.6904504943424091, "learning_rate": 4.0375120821100135e-08, "loss": 0.0042, "step": 19448 }, { "epoch": 4.425255972696245, "grad_norm": 0.2515102852189536, "learning_rate": 4.034353289389705e-08, "loss": 0.0011, "step": 19449 }, { "epoch": 4.4254835039817975, "grad_norm": 0.0575418329687554, "learning_rate": 4.03119569160014e-08, "loss": 0.0002, "step": 19450 }, { "epoch": 4.42571103526735, "grad_norm": 0.1899400649332391, "learning_rate": 4.028039288805866e-08, "loss": 0.0012, "step": 19451 }, { "epoch": 4.425938566552901, "grad_norm": 0.6073908990931196, "learning_rate": 4.024884081071378e-08, "loss": 0.0033, "step": 19452 }, { "epoch": 4.426166097838453, "grad_norm": 0.08519743997806442, "learning_rate": 4.021730068461162e-08, "loss": 0.0003, "step": 19453 }, { "epoch": 4.4263936291240045, "grad_norm": 0.08695795359961282, "learning_rate": 4.018577251039699e-08, "loss": 0.0002, "step": 19454 }, { "epoch": 4.426621160409557, "grad_norm": 2.420715518110611, "learning_rate": 4.015425628871396e-08, "loss": 0.0296, "step": 19455 }, { "epoch": 4.426848691695108, "grad_norm": 0.09840505218605233, "learning_rate": 4.012275202020688e-08, "loss": 0.0003, "step": 19456 }, { "epoch": 4.42707622298066, "grad_norm": 0.8511745679533024, "learning_rate": 4.009125970551954e-08, "loss": 0.0078, "step": 19457 }, { "epoch": 4.4273037542662115, "grad_norm": 0.12023969077469061, "learning_rate": 4.00597793452958e-08, "loss": 0.0002, "step": 19458 }, { "epoch": 4.427531285551764, "grad_norm": 0.6079586215081358, "learning_rate": 4.0028310940178896e-08, "loss": 0.0026, "step": 19459 }, { "epoch": 4.427758816837315, "grad_norm": 0.4530900681112371, "learning_rate": 3.999685449081185e-08, "loss": 0.0024, "step": 19460 }, { "epoch": 4.427986348122867, "grad_norm": 0.278184467743196, "learning_rate": 3.9965409997837835e-08, "loss": 0.001, "step": 19461 }, { "epoch": 4.4282138794084185, "grad_norm": 0.2214006762331104, "learning_rate": 3.993397746189932e-08, "loss": 0.0007, "step": 19462 }, { "epoch": 4.428441410693971, "grad_norm": 0.22064138592610075, "learning_rate": 3.990255688363878e-08, "loss": 0.0008, "step": 19463 }, { "epoch": 4.428668941979522, "grad_norm": 0.05018006234567504, "learning_rate": 3.987114826369848e-08, "loss": 0.0001, "step": 19464 }, { "epoch": 4.428896473265074, "grad_norm": 0.1830994090132181, "learning_rate": 3.983975160272033e-08, "loss": 0.0007, "step": 19465 }, { "epoch": 4.4291240045506255, "grad_norm": 0.43733877945905175, "learning_rate": 3.980836690134604e-08, "loss": 0.0031, "step": 19466 }, { "epoch": 4.429351535836178, "grad_norm": 0.7088955599585448, "learning_rate": 3.977699416021684e-08, "loss": 0.0033, "step": 19467 }, { "epoch": 4.429579067121729, "grad_norm": 0.24252090888009745, "learning_rate": 3.9745633379974225e-08, "loss": 0.0016, "step": 19468 }, { "epoch": 4.429806598407281, "grad_norm": 0.3272367988402886, "learning_rate": 3.9714284561258866e-08, "loss": 0.0018, "step": 19469 }, { "epoch": 4.4300341296928325, "grad_norm": 0.027337898099112815, "learning_rate": 3.9682947704711777e-08, "loss": 0.0001, "step": 19470 }, { "epoch": 4.430261660978385, "grad_norm": 0.29654051603350073, "learning_rate": 3.9651622810973274e-08, "loss": 0.0016, "step": 19471 }, { "epoch": 4.430489192263936, "grad_norm": 0.38751666168605764, "learning_rate": 3.962030988068355e-08, "loss": 0.0026, "step": 19472 }, { "epoch": 4.430716723549488, "grad_norm": 0.09721829206823443, "learning_rate": 3.958900891448264e-08, "loss": 0.0002, "step": 19473 }, { "epoch": 4.4309442548350395, "grad_norm": 0.12177766177980652, "learning_rate": 3.955771991301018e-08, "loss": 0.0003, "step": 19474 }, { "epoch": 4.431171786120592, "grad_norm": 0.19457054783726171, "learning_rate": 3.952644287690578e-08, "loss": 0.0008, "step": 19475 }, { "epoch": 4.431399317406143, "grad_norm": 0.6360631828905495, "learning_rate": 3.94951778068086e-08, "loss": 0.0048, "step": 19476 }, { "epoch": 4.431626848691695, "grad_norm": 0.6811045716164607, "learning_rate": 3.9463924703357774e-08, "loss": 0.0029, "step": 19477 }, { "epoch": 4.4318543799772465, "grad_norm": 0.12961883175604338, "learning_rate": 3.943268356719203e-08, "loss": 0.0003, "step": 19478 }, { "epoch": 4.432081911262799, "grad_norm": 0.21767181277520176, "learning_rate": 3.940145439894967e-08, "loss": 0.0014, "step": 19479 }, { "epoch": 4.43230944254835, "grad_norm": 0.35645236660030816, "learning_rate": 3.937023719926922e-08, "loss": 0.0021, "step": 19480 }, { "epoch": 4.432536973833902, "grad_norm": 0.3061563488307636, "learning_rate": 3.933903196878849e-08, "loss": 0.0005, "step": 19481 }, { "epoch": 4.4327645051194535, "grad_norm": 0.14121685485372693, "learning_rate": 3.9307838708145324e-08, "loss": 0.0002, "step": 19482 }, { "epoch": 4.432992036405006, "grad_norm": 0.26601692528068244, "learning_rate": 3.9276657417977316e-08, "loss": 0.0011, "step": 19483 }, { "epoch": 4.433219567690557, "grad_norm": 0.030941116437065184, "learning_rate": 3.924548809892188e-08, "loss": 0.0001, "step": 19484 }, { "epoch": 4.433447098976109, "grad_norm": 0.406508823885247, "learning_rate": 3.921433075161587e-08, "loss": 0.0014, "step": 19485 }, { "epoch": 4.433674630261661, "grad_norm": 0.30547772576606613, "learning_rate": 3.9183185376696e-08, "loss": 0.0011, "step": 19486 }, { "epoch": 4.433902161547213, "grad_norm": 0.45646903098901587, "learning_rate": 3.915205197479906e-08, "loss": 0.0031, "step": 19487 }, { "epoch": 4.434129692832764, "grad_norm": 0.49981147546757887, "learning_rate": 3.9120930546561185e-08, "loss": 0.0017, "step": 19488 }, { "epoch": 4.434357224118316, "grad_norm": 1.1518051087459455, "learning_rate": 3.908982109261848e-08, "loss": 0.0015, "step": 19489 }, { "epoch": 4.434584755403868, "grad_norm": 0.1948061192761138, "learning_rate": 3.905872361360683e-08, "loss": 0.0009, "step": 19490 }, { "epoch": 4.43481228668942, "grad_norm": 0.2806379520646796, "learning_rate": 3.9027638110161744e-08, "loss": 0.0026, "step": 19491 }, { "epoch": 4.435039817974972, "grad_norm": 0.755648030131221, "learning_rate": 3.8996564582918646e-08, "loss": 0.0048, "step": 19492 }, { "epoch": 4.435267349260523, "grad_norm": 0.05455644572377462, "learning_rate": 3.8965503032512496e-08, "loss": 0.0002, "step": 19493 }, { "epoch": 4.435494880546075, "grad_norm": 0.17131095018840772, "learning_rate": 3.8934453459578205e-08, "loss": 0.0005, "step": 19494 }, { "epoch": 4.435722411831627, "grad_norm": 0.2951039991915845, "learning_rate": 3.890341586475034e-08, "loss": 0.0022, "step": 19495 }, { "epoch": 4.435949943117179, "grad_norm": 0.2801084303438512, "learning_rate": 3.8872390248663255e-08, "loss": 0.0009, "step": 19496 }, { "epoch": 4.43617747440273, "grad_norm": 0.9754308498878171, "learning_rate": 3.884137661195116e-08, "loss": 0.0036, "step": 19497 }, { "epoch": 4.436405005688282, "grad_norm": 0.5852748938949897, "learning_rate": 3.8810374955247714e-08, "loss": 0.0055, "step": 19498 }, { "epoch": 4.436632536973834, "grad_norm": 0.12964974558349526, "learning_rate": 3.877938527918672e-08, "loss": 0.0004, "step": 19499 }, { "epoch": 4.436860068259386, "grad_norm": 0.21871108433290193, "learning_rate": 3.874840758440142e-08, "loss": 0.0011, "step": 19500 }, { "epoch": 4.437087599544937, "grad_norm": 0.22810791183153528, "learning_rate": 3.8717441871525123e-08, "loss": 0.0006, "step": 19501 }, { "epoch": 4.4373151308304895, "grad_norm": 0.13039990883118663, "learning_rate": 3.8686488141190456e-08, "loss": 0.0005, "step": 19502 }, { "epoch": 4.437542662116041, "grad_norm": 0.06402678768827233, "learning_rate": 3.865554639403017e-08, "loss": 0.0002, "step": 19503 }, { "epoch": 4.437770193401593, "grad_norm": 0.5397172109588844, "learning_rate": 3.862461663067682e-08, "loss": 0.0027, "step": 19504 }, { "epoch": 4.437997724687144, "grad_norm": 0.024982660478206523, "learning_rate": 3.859369885176233e-08, "loss": 0.0001, "step": 19505 }, { "epoch": 4.4382252559726965, "grad_norm": 0.14179661380710618, "learning_rate": 3.8562793057918686e-08, "loss": 0.0006, "step": 19506 }, { "epoch": 4.438452787258248, "grad_norm": 0.1738786142604957, "learning_rate": 3.8531899249777544e-08, "loss": 0.0006, "step": 19507 }, { "epoch": 4.4386803185438, "grad_norm": 0.09579787244266254, "learning_rate": 3.850101742797034e-08, "loss": 0.0004, "step": 19508 }, { "epoch": 4.438907849829351, "grad_norm": 1.5888373458684637, "learning_rate": 3.847014759312817e-08, "loss": 0.0044, "step": 19509 }, { "epoch": 4.4391353811149035, "grad_norm": 0.15915358766623192, "learning_rate": 3.843928974588199e-08, "loss": 0.0004, "step": 19510 }, { "epoch": 4.439362912400455, "grad_norm": 0.15686060551325087, "learning_rate": 3.84084438868626e-08, "loss": 0.0005, "step": 19511 }, { "epoch": 4.439590443686007, "grad_norm": 0.2081556496071916, "learning_rate": 3.8377610016700204e-08, "loss": 0.0005, "step": 19512 }, { "epoch": 4.439817974971558, "grad_norm": 0.026732697319694358, "learning_rate": 3.8346788136025204e-08, "loss": 0.0001, "step": 19513 }, { "epoch": 4.4400455062571105, "grad_norm": 0.4206999933974508, "learning_rate": 3.831597824546736e-08, "loss": 0.0039, "step": 19514 }, { "epoch": 4.440273037542662, "grad_norm": 0.29167261079400997, "learning_rate": 3.828518034565659e-08, "loss": 0.0005, "step": 19515 }, { "epoch": 4.440500568828214, "grad_norm": 0.729654879483519, "learning_rate": 3.825439443722205e-08, "loss": 0.0024, "step": 19516 }, { "epoch": 4.440728100113765, "grad_norm": 0.10409888157361516, "learning_rate": 3.822362052079316e-08, "loss": 0.0004, "step": 19517 }, { "epoch": 4.4409556313993175, "grad_norm": 0.11121728547907601, "learning_rate": 3.819285859699894e-08, "loss": 0.0004, "step": 19518 }, { "epoch": 4.441183162684869, "grad_norm": 0.36659096632208577, "learning_rate": 3.81621086664679e-08, "loss": 0.0018, "step": 19519 }, { "epoch": 4.441410693970421, "grad_norm": 0.22120875749498128, "learning_rate": 3.813137072982871e-08, "loss": 0.0006, "step": 19520 }, { "epoch": 4.441638225255972, "grad_norm": 0.4380275034055589, "learning_rate": 3.810064478770942e-08, "loss": 0.0008, "step": 19521 }, { "epoch": 4.4418657565415245, "grad_norm": 0.495078359550999, "learning_rate": 3.806993084073819e-08, "loss": 0.0018, "step": 19522 }, { "epoch": 4.442093287827076, "grad_norm": 0.14941489548231038, "learning_rate": 3.803922888954252e-08, "loss": 0.0001, "step": 19523 }, { "epoch": 4.442320819112628, "grad_norm": 0.6519642841038126, "learning_rate": 3.8008538934750096e-08, "loss": 0.0033, "step": 19524 }, { "epoch": 4.44254835039818, "grad_norm": 0.1790219720520199, "learning_rate": 3.797786097698819e-08, "loss": 0.0005, "step": 19525 }, { "epoch": 4.4427758816837315, "grad_norm": 0.22281582690019705, "learning_rate": 3.79471950168836e-08, "loss": 0.0004, "step": 19526 }, { "epoch": 4.443003412969283, "grad_norm": 0.36087842012593596, "learning_rate": 3.7916541055063386e-08, "loss": 0.0025, "step": 19527 }, { "epoch": 4.443230944254835, "grad_norm": 0.24402233403935825, "learning_rate": 3.788589909215379e-08, "loss": 0.0024, "step": 19528 }, { "epoch": 4.443458475540387, "grad_norm": 0.08462364960562485, "learning_rate": 3.785526912878104e-08, "loss": 0.0003, "step": 19529 }, { "epoch": 4.4436860068259385, "grad_norm": 0.09211507862776083, "learning_rate": 3.782465116557145e-08, "loss": 0.0005, "step": 19530 }, { "epoch": 4.443913538111491, "grad_norm": 0.1310367937772175, "learning_rate": 3.779404520315048e-08, "loss": 0.0002, "step": 19531 }, { "epoch": 4.444141069397042, "grad_norm": 0.3420013021559435, "learning_rate": 3.776345124214396e-08, "loss": 0.0027, "step": 19532 }, { "epoch": 4.444368600682594, "grad_norm": 0.5425212678501442, "learning_rate": 3.7732869283176944e-08, "loss": 0.0045, "step": 19533 }, { "epoch": 4.4445961319681455, "grad_norm": 1.5469952319027829, "learning_rate": 3.770229932687462e-08, "loss": 0.0068, "step": 19534 }, { "epoch": 4.444823663253698, "grad_norm": 0.2221550013311543, "learning_rate": 3.767174137386164e-08, "loss": 0.001, "step": 19535 }, { "epoch": 4.445051194539249, "grad_norm": 0.2627162995358036, "learning_rate": 3.7641195424762617e-08, "loss": 0.001, "step": 19536 }, { "epoch": 4.445278725824801, "grad_norm": 0.5581817399589999, "learning_rate": 3.761066148020194e-08, "loss": 0.0069, "step": 19537 }, { "epoch": 4.4455062571103525, "grad_norm": 0.3364455054857443, "learning_rate": 3.7580139540803476e-08, "loss": 0.002, "step": 19538 }, { "epoch": 4.445733788395905, "grad_norm": 0.5218274486999561, "learning_rate": 3.75496296071913e-08, "loss": 0.0032, "step": 19539 }, { "epoch": 4.445961319681456, "grad_norm": 0.4748321348188102, "learning_rate": 3.751913167998882e-08, "loss": 0.0027, "step": 19540 }, { "epoch": 4.446188850967008, "grad_norm": 0.10262608408995622, "learning_rate": 3.748864575981927e-08, "loss": 0.0003, "step": 19541 }, { "epoch": 4.4464163822525595, "grad_norm": 0.0827126568614664, "learning_rate": 3.7458171847305767e-08, "loss": 0.0004, "step": 19542 }, { "epoch": 4.446643913538112, "grad_norm": 0.880887060032437, "learning_rate": 3.7427709943071296e-08, "loss": 0.003, "step": 19543 }, { "epoch": 4.446871444823663, "grad_norm": 0.24856488399824567, "learning_rate": 3.7397260047738404e-08, "loss": 0.0014, "step": 19544 }, { "epoch": 4.447098976109215, "grad_norm": 1.4380679406134114, "learning_rate": 3.736682216192923e-08, "loss": 0.0062, "step": 19545 }, { "epoch": 4.4473265073947665, "grad_norm": 0.7435912294512708, "learning_rate": 3.733639628626613e-08, "loss": 0.0011, "step": 19546 }, { "epoch": 4.447554038680319, "grad_norm": 0.41932421913617823, "learning_rate": 3.730598242137083e-08, "loss": 0.0041, "step": 19547 }, { "epoch": 4.44778156996587, "grad_norm": 0.25694976465874386, "learning_rate": 3.727558056786491e-08, "loss": 0.0016, "step": 19548 }, { "epoch": 4.448009101251422, "grad_norm": 0.9717437210597056, "learning_rate": 3.7245190726369684e-08, "loss": 0.0032, "step": 19549 }, { "epoch": 4.4482366325369735, "grad_norm": 0.2361588636260971, "learning_rate": 3.721481289750639e-08, "loss": 0.0008, "step": 19550 }, { "epoch": 4.448464163822526, "grad_norm": 0.3895928573440999, "learning_rate": 3.718444708189585e-08, "loss": 0.0019, "step": 19551 }, { "epoch": 4.448691695108077, "grad_norm": 0.014724571753808842, "learning_rate": 3.715409328015868e-08, "loss": 0.0, "step": 19552 }, { "epoch": 4.448919226393629, "grad_norm": 1.0836175998994635, "learning_rate": 3.712375149291528e-08, "loss": 0.0078, "step": 19553 }, { "epoch": 4.4491467576791806, "grad_norm": 0.7341595058399168, "learning_rate": 3.709342172078578e-08, "loss": 0.004, "step": 19554 }, { "epoch": 4.449374288964733, "grad_norm": 0.25443268974798006, "learning_rate": 3.706310396438997e-08, "loss": 0.0005, "step": 19555 }, { "epoch": 4.449601820250284, "grad_norm": 0.26590465619599835, "learning_rate": 3.703279822434756e-08, "loss": 0.0008, "step": 19556 }, { "epoch": 4.449829351535836, "grad_norm": 1.6003507396558974, "learning_rate": 3.7002504501277914e-08, "loss": 0.0027, "step": 19557 }, { "epoch": 4.450056882821388, "grad_norm": 0.2197680572388068, "learning_rate": 3.697222279580033e-08, "loss": 0.001, "step": 19558 }, { "epoch": 4.45028441410694, "grad_norm": 0.048205280965699794, "learning_rate": 3.6941953108533543e-08, "loss": 0.0001, "step": 19559 }, { "epoch": 4.450511945392491, "grad_norm": 0.6134226420106846, "learning_rate": 3.6911695440096236e-08, "loss": 0.0037, "step": 19560 }, { "epoch": 4.450739476678043, "grad_norm": 0.4591752048471223, "learning_rate": 3.688144979110686e-08, "loss": 0.0047, "step": 19561 }, { "epoch": 4.450967007963595, "grad_norm": 0.24282586048734237, "learning_rate": 3.685121616218347e-08, "loss": 0.0009, "step": 19562 }, { "epoch": 4.451194539249147, "grad_norm": 0.1052856504508873, "learning_rate": 3.682099455394411e-08, "loss": 0.0004, "step": 19563 }, { "epoch": 4.451422070534699, "grad_norm": 0.538269418956862, "learning_rate": 3.6790784967006415e-08, "loss": 0.0011, "step": 19564 }, { "epoch": 4.45164960182025, "grad_norm": 0.2091374678518691, "learning_rate": 3.6760587401987945e-08, "loss": 0.0016, "step": 19565 }, { "epoch": 4.451877133105802, "grad_norm": 1.2268839997658572, "learning_rate": 3.673040185950577e-08, "loss": 0.0064, "step": 19566 }, { "epoch": 4.452104664391354, "grad_norm": 0.5171874339638864, "learning_rate": 3.6700228340176694e-08, "loss": 0.002, "step": 19567 }, { "epoch": 4.452332195676906, "grad_norm": 0.1870143338869749, "learning_rate": 3.667006684461759e-08, "loss": 0.0008, "step": 19568 }, { "epoch": 4.452559726962457, "grad_norm": 0.32943612510594367, "learning_rate": 3.6639917373444755e-08, "loss": 0.0046, "step": 19569 }, { "epoch": 4.4527872582480095, "grad_norm": 0.44411884751373404, "learning_rate": 3.6609779927274516e-08, "loss": 0.002, "step": 19570 }, { "epoch": 4.453014789533561, "grad_norm": 0.20985846886950768, "learning_rate": 3.6579654506722766e-08, "loss": 0.0008, "step": 19571 }, { "epoch": 4.453242320819113, "grad_norm": 0.04571708702002036, "learning_rate": 3.654954111240533e-08, "loss": 0.0002, "step": 19572 }, { "epoch": 4.453469852104664, "grad_norm": 0.044440406734356036, "learning_rate": 3.651943974493761e-08, "loss": 0.0002, "step": 19573 }, { "epoch": 4.4536973833902165, "grad_norm": 0.3397292216407664, "learning_rate": 3.648935040493469e-08, "loss": 0.002, "step": 19574 }, { "epoch": 4.453924914675768, "grad_norm": 3.397485017465776, "learning_rate": 3.645927309301168e-08, "loss": 0.0114, "step": 19575 }, { "epoch": 4.45415244596132, "grad_norm": 0.17646903161102157, "learning_rate": 3.642920780978325e-08, "loss": 0.0005, "step": 19576 }, { "epoch": 4.454379977246871, "grad_norm": 0.1522529833750333, "learning_rate": 3.639915455586382e-08, "loss": 0.0003, "step": 19577 }, { "epoch": 4.4546075085324235, "grad_norm": 0.2527745473260677, "learning_rate": 3.636911333186784e-08, "loss": 0.001, "step": 19578 }, { "epoch": 4.454835039817975, "grad_norm": 0.1618741745092934, "learning_rate": 3.633908413840905e-08, "loss": 0.0007, "step": 19579 }, { "epoch": 4.455062571103527, "grad_norm": 0.05913454377959451, "learning_rate": 3.63090669761014e-08, "loss": 0.0002, "step": 19580 }, { "epoch": 4.455290102389078, "grad_norm": 0.622227316154267, "learning_rate": 3.627906184555822e-08, "loss": 0.0041, "step": 19581 }, { "epoch": 4.4555176336746305, "grad_norm": 0.5616066697779366, "learning_rate": 3.624906874739285e-08, "loss": 0.0029, "step": 19582 }, { "epoch": 4.455745164960182, "grad_norm": 0.04817516236645051, "learning_rate": 3.6219087682218196e-08, "loss": 0.0001, "step": 19583 }, { "epoch": 4.455972696245734, "grad_norm": 0.11026969697551753, "learning_rate": 3.61891186506471e-08, "loss": 0.0003, "step": 19584 }, { "epoch": 4.456200227531285, "grad_norm": 0.5346562917076662, "learning_rate": 3.6159161653292195e-08, "loss": 0.0013, "step": 19585 }, { "epoch": 4.4564277588168375, "grad_norm": 0.10512332612674136, "learning_rate": 3.6129216690765495e-08, "loss": 0.0004, "step": 19586 }, { "epoch": 4.456655290102389, "grad_norm": 0.11529143793688643, "learning_rate": 3.609928376367921e-08, "loss": 0.0005, "step": 19587 }, { "epoch": 4.456882821387941, "grad_norm": 0.6226463065382013, "learning_rate": 3.6069362872644954e-08, "loss": 0.0105, "step": 19588 }, { "epoch": 4.457110352673492, "grad_norm": 0.572170519945423, "learning_rate": 3.603945401827444e-08, "loss": 0.0004, "step": 19589 }, { "epoch": 4.4573378839590445, "grad_norm": 0.0888267415129441, "learning_rate": 3.600955720117871e-08, "loss": 0.0004, "step": 19590 }, { "epoch": 4.457565415244596, "grad_norm": 0.7529521033850439, "learning_rate": 3.597967242196909e-08, "loss": 0.0047, "step": 19591 }, { "epoch": 4.457792946530148, "grad_norm": 0.4227831343772402, "learning_rate": 3.59497996812562e-08, "loss": 0.0024, "step": 19592 }, { "epoch": 4.458020477815699, "grad_norm": 0.03558518415178315, "learning_rate": 3.5919938979650586e-08, "loss": 0.0001, "step": 19593 }, { "epoch": 4.4582480091012515, "grad_norm": 0.4670471546849867, "learning_rate": 3.58900903177626e-08, "loss": 0.0016, "step": 19594 }, { "epoch": 4.458475540386803, "grad_norm": 0.04800372909130123, "learning_rate": 3.586025369620223e-08, "loss": 0.0002, "step": 19595 }, { "epoch": 4.458703071672355, "grad_norm": 0.1063024092374518, "learning_rate": 3.5830429115579285e-08, "loss": 0.0003, "step": 19596 }, { "epoch": 4.458930602957906, "grad_norm": 0.8086073557070791, "learning_rate": 3.580061657650347e-08, "loss": 0.0023, "step": 19597 }, { "epoch": 4.4591581342434585, "grad_norm": 0.05169035229254106, "learning_rate": 3.577081607958381e-08, "loss": 0.0001, "step": 19598 }, { "epoch": 4.45938566552901, "grad_norm": 0.5268953477216722, "learning_rate": 3.574102762542976e-08, "loss": 0.003, "step": 19599 }, { "epoch": 4.459613196814562, "grad_norm": 0.12900146086244582, "learning_rate": 3.571125121464979e-08, "loss": 0.0005, "step": 19600 }, { "epoch": 4.459840728100113, "grad_norm": 0.41207754325972895, "learning_rate": 3.5681486847852633e-08, "loss": 0.0017, "step": 19601 }, { "epoch": 4.4600682593856655, "grad_norm": 0.11949991070630361, "learning_rate": 3.565173452564657e-08, "loss": 0.0007, "step": 19602 }, { "epoch": 4.460295790671218, "grad_norm": 0.4412027945784249, "learning_rate": 3.562199424863973e-08, "loss": 0.0016, "step": 19603 }, { "epoch": 4.460523321956769, "grad_norm": 0.5132318039486544, "learning_rate": 3.559226601744001e-08, "loss": 0.0043, "step": 19604 }, { "epoch": 4.460750853242321, "grad_norm": 0.6754978303059443, "learning_rate": 3.556254983265485e-08, "loss": 0.0023, "step": 19605 }, { "epoch": 4.4609783845278725, "grad_norm": 0.28169581370318963, "learning_rate": 3.553284569489168e-08, "loss": 0.0018, "step": 19606 }, { "epoch": 4.461205915813425, "grad_norm": 0.17297558086966086, "learning_rate": 3.550315360475759e-08, "loss": 0.0003, "step": 19607 }, { "epoch": 4.461433447098976, "grad_norm": 0.20958619784551347, "learning_rate": 3.5473473562859446e-08, "loss": 0.0005, "step": 19608 }, { "epoch": 4.461660978384528, "grad_norm": 0.4197236057829696, "learning_rate": 3.5443805569803785e-08, "loss": 0.0016, "step": 19609 }, { "epoch": 4.4618885096700796, "grad_norm": 0.13604911006301493, "learning_rate": 3.541414962619706e-08, "loss": 0.0003, "step": 19610 }, { "epoch": 4.462116040955632, "grad_norm": 0.07569929201399032, "learning_rate": 3.5384505732645317e-08, "loss": 0.0002, "step": 19611 }, { "epoch": 4.462343572241183, "grad_norm": 0.17081428337214333, "learning_rate": 3.535487388975446e-08, "loss": 0.0006, "step": 19612 }, { "epoch": 4.462571103526735, "grad_norm": 2.7562156531507553, "learning_rate": 3.532525409813012e-08, "loss": 0.0196, "step": 19613 }, { "epoch": 4.462798634812287, "grad_norm": 0.11467444066294713, "learning_rate": 3.5295646358377635e-08, "loss": 0.0004, "step": 19614 }, { "epoch": 4.463026166097839, "grad_norm": 0.2508452188751679, "learning_rate": 3.5266050671102155e-08, "loss": 0.0008, "step": 19615 }, { "epoch": 4.46325369738339, "grad_norm": 0.13762368194095184, "learning_rate": 3.5236467036908473e-08, "loss": 0.0007, "step": 19616 }, { "epoch": 4.463481228668942, "grad_norm": 0.14334518656143755, "learning_rate": 3.520689545640138e-08, "loss": 0.0005, "step": 19617 }, { "epoch": 4.463708759954494, "grad_norm": 0.07274766537709197, "learning_rate": 3.517733593018519e-08, "loss": 0.0002, "step": 19618 }, { "epoch": 4.463936291240046, "grad_norm": 0.16665569642125913, "learning_rate": 3.514778845886399e-08, "loss": 0.0007, "step": 19619 }, { "epoch": 4.464163822525597, "grad_norm": 0.33358685837414037, "learning_rate": 3.511825304304182e-08, "loss": 0.0008, "step": 19620 }, { "epoch": 4.464391353811149, "grad_norm": 0.05099765786975437, "learning_rate": 3.5088729683322165e-08, "loss": 0.0002, "step": 19621 }, { "epoch": 4.464618885096701, "grad_norm": 0.5332358913945765, "learning_rate": 3.5059218380308555e-08, "loss": 0.0027, "step": 19622 }, { "epoch": 4.464846416382253, "grad_norm": 0.42159057367197167, "learning_rate": 3.5029719134604054e-08, "loss": 0.0022, "step": 19623 }, { "epoch": 4.465073947667804, "grad_norm": 0.03419908835491342, "learning_rate": 3.500023194681158e-08, "loss": 0.0001, "step": 19624 }, { "epoch": 4.465301478953356, "grad_norm": 0.334893009111999, "learning_rate": 3.4970756817533924e-08, "loss": 0.0011, "step": 19625 }, { "epoch": 4.465529010238908, "grad_norm": 0.038129070380743035, "learning_rate": 3.4941293747373306e-08, "loss": 0.0001, "step": 19626 }, { "epoch": 4.46575654152446, "grad_norm": 0.09614318343304794, "learning_rate": 3.4911842736932155e-08, "loss": 0.0003, "step": 19627 }, { "epoch": 4.465984072810011, "grad_norm": 0.44453557553505085, "learning_rate": 3.488240378681208e-08, "loss": 0.0018, "step": 19628 }, { "epoch": 4.466211604095563, "grad_norm": 0.8954205360770485, "learning_rate": 3.4852976897615035e-08, "loss": 0.0085, "step": 19629 }, { "epoch": 4.466439135381115, "grad_norm": 0.29991704794695934, "learning_rate": 3.4823562069942264e-08, "loss": 0.0017, "step": 19630 }, { "epoch": 4.466666666666667, "grad_norm": 0.08818878093665473, "learning_rate": 3.479415930439503e-08, "loss": 0.0004, "step": 19631 }, { "epoch": 4.466894197952218, "grad_norm": 0.19084807464299489, "learning_rate": 3.476476860157438e-08, "loss": 0.0012, "step": 19632 }, { "epoch": 4.46712172923777, "grad_norm": 0.23411308266416908, "learning_rate": 3.47353899620808e-08, "loss": 0.001, "step": 19633 }, { "epoch": 4.467349260523322, "grad_norm": 0.826642182380504, "learning_rate": 3.4706023386514856e-08, "loss": 0.0031, "step": 19634 }, { "epoch": 4.467576791808874, "grad_norm": 0.23647409384317755, "learning_rate": 3.467666887547676e-08, "loss": 0.0016, "step": 19635 }, { "epoch": 4.467804323094425, "grad_norm": 0.18178691748786543, "learning_rate": 3.464732642956638e-08, "loss": 0.0005, "step": 19636 }, { "epoch": 4.468031854379977, "grad_norm": 0.3657287950890348, "learning_rate": 3.4617996049383375e-08, "loss": 0.0016, "step": 19637 }, { "epoch": 4.468259385665529, "grad_norm": 0.08219182486134055, "learning_rate": 3.458867773552733e-08, "loss": 0.0004, "step": 19638 }, { "epoch": 4.468486916951081, "grad_norm": 0.38535219790007935, "learning_rate": 3.4559371488597576e-08, "loss": 0.0021, "step": 19639 }, { "epoch": 4.468714448236632, "grad_norm": 0.46830671447179956, "learning_rate": 3.4530077309192774e-08, "loss": 0.0036, "step": 19640 }, { "epoch": 4.468941979522184, "grad_norm": 0.22589141941822238, "learning_rate": 3.450079519791191e-08, "loss": 0.0008, "step": 19641 }, { "epoch": 4.4691695108077365, "grad_norm": 0.2675401463246548, "learning_rate": 3.447152515535339e-08, "loss": 0.0017, "step": 19642 }, { "epoch": 4.469397042093288, "grad_norm": 1.4839054676566792, "learning_rate": 3.44422671821152e-08, "loss": 0.0083, "step": 19643 }, { "epoch": 4.46962457337884, "grad_norm": 0.27962450259557686, "learning_rate": 3.4413021278795616e-08, "loss": 0.0014, "step": 19644 }, { "epoch": 4.469852104664391, "grad_norm": 2.5246651194842618, "learning_rate": 3.4383787445992215e-08, "loss": 0.0112, "step": 19645 }, { "epoch": 4.4700796359499435, "grad_norm": 0.22703670211238175, "learning_rate": 3.435456568430265e-08, "loss": 0.0009, "step": 19646 }, { "epoch": 4.470307167235495, "grad_norm": 0.7088399772833927, "learning_rate": 3.4325355994324005e-08, "loss": 0.0077, "step": 19647 }, { "epoch": 4.470534698521047, "grad_norm": 0.04100780829495019, "learning_rate": 3.429615837665323e-08, "loss": 0.0001, "step": 19648 }, { "epoch": 4.470762229806598, "grad_norm": 0.49922281025177456, "learning_rate": 3.426697283188729e-08, "loss": 0.0017, "step": 19649 }, { "epoch": 4.4709897610921505, "grad_norm": 0.4215976789050676, "learning_rate": 3.4237799360622296e-08, "loss": 0.0021, "step": 19650 }, { "epoch": 4.471217292377702, "grad_norm": 0.6666518456016223, "learning_rate": 3.4208637963455e-08, "loss": 0.0036, "step": 19651 }, { "epoch": 4.471444823663254, "grad_norm": 0.6644423989236874, "learning_rate": 3.417948864098103e-08, "loss": 0.0026, "step": 19652 }, { "epoch": 4.471672354948805, "grad_norm": 0.1762240762462266, "learning_rate": 3.415035139379644e-08, "loss": 0.0006, "step": 19653 }, { "epoch": 4.4718998862343575, "grad_norm": 0.6316067525741959, "learning_rate": 3.4121226222496514e-08, "loss": 0.0042, "step": 19654 }, { "epoch": 4.472127417519909, "grad_norm": 0.743208375052731, "learning_rate": 3.409211312767648e-08, "loss": 0.0044, "step": 19655 }, { "epoch": 4.472354948805461, "grad_norm": 0.14684064071360645, "learning_rate": 3.406301210993147e-08, "loss": 0.0009, "step": 19656 }, { "epoch": 4.472582480091012, "grad_norm": 0.6675620775046379, "learning_rate": 3.4033923169856236e-08, "loss": 0.0044, "step": 19657 }, { "epoch": 4.4728100113765645, "grad_norm": 0.7720869192088833, "learning_rate": 3.400484630804543e-08, "loss": 0.0032, "step": 19658 }, { "epoch": 4.473037542662116, "grad_norm": 0.2460084507457841, "learning_rate": 3.3975781525093094e-08, "loss": 0.0011, "step": 19659 }, { "epoch": 4.473265073947668, "grad_norm": 0.15719250839015025, "learning_rate": 3.394672882159347e-08, "loss": 0.0003, "step": 19660 }, { "epoch": 4.473492605233219, "grad_norm": 0.18818969430098745, "learning_rate": 3.3917688198140256e-08, "loss": 0.001, "step": 19661 }, { "epoch": 4.4737201365187715, "grad_norm": 0.10996091830540876, "learning_rate": 3.3888659655326866e-08, "loss": 0.0005, "step": 19662 }, { "epoch": 4.473947667804323, "grad_norm": 0.41214946084484183, "learning_rate": 3.385964319374671e-08, "loss": 0.0037, "step": 19663 }, { "epoch": 4.474175199089875, "grad_norm": 0.2597720524592664, "learning_rate": 3.3830638813992856e-08, "loss": 0.0023, "step": 19664 }, { "epoch": 4.474402730375426, "grad_norm": 0.0683906624285157, "learning_rate": 3.380164651665817e-08, "loss": 0.0003, "step": 19665 }, { "epoch": 4.4746302616609785, "grad_norm": 0.1992628487979273, "learning_rate": 3.377266630233502e-08, "loss": 0.0006, "step": 19666 }, { "epoch": 4.47485779294653, "grad_norm": 0.31770855000065884, "learning_rate": 3.374369817161577e-08, "loss": 0.0023, "step": 19667 }, { "epoch": 4.475085324232082, "grad_norm": 0.09135304865638565, "learning_rate": 3.371474212509253e-08, "loss": 0.0003, "step": 19668 }, { "epoch": 4.475312855517633, "grad_norm": 0.1516505240175211, "learning_rate": 3.3685798163357044e-08, "loss": 0.0001, "step": 19669 }, { "epoch": 4.4755403868031856, "grad_norm": 0.03681979044138763, "learning_rate": 3.365686628700085e-08, "loss": 0.0001, "step": 19670 }, { "epoch": 4.475767918088737, "grad_norm": 0.19013668571963696, "learning_rate": 3.3627946496615355e-08, "loss": 0.0004, "step": 19671 }, { "epoch": 4.475995449374289, "grad_norm": 0.8370153288711949, "learning_rate": 3.359903879279168e-08, "loss": 0.002, "step": 19672 }, { "epoch": 4.47622298065984, "grad_norm": 0.8470805847632227, "learning_rate": 3.3570143176120535e-08, "loss": 0.0068, "step": 19673 }, { "epoch": 4.476450511945393, "grad_norm": 0.7724685335185736, "learning_rate": 3.354125964719242e-08, "loss": 0.0063, "step": 19674 }, { "epoch": 4.476678043230944, "grad_norm": 0.5360939387141954, "learning_rate": 3.351238820659783e-08, "loss": 0.0041, "step": 19675 }, { "epoch": 4.476905574516496, "grad_norm": 0.38926259689345843, "learning_rate": 3.3483528854926645e-08, "loss": 0.0023, "step": 19676 }, { "epoch": 4.477133105802047, "grad_norm": 0.5510590838207485, "learning_rate": 3.345468159276888e-08, "loss": 0.0068, "step": 19677 }, { "epoch": 4.4773606370876, "grad_norm": 0.04152828822254775, "learning_rate": 3.342584642071406e-08, "loss": 0.0001, "step": 19678 }, { "epoch": 4.477588168373151, "grad_norm": 0.10346003754582424, "learning_rate": 3.3397023339351576e-08, "loss": 0.0004, "step": 19679 }, { "epoch": 4.477815699658703, "grad_norm": 0.912809564028315, "learning_rate": 3.336821234927047e-08, "loss": 0.002, "step": 19680 }, { "epoch": 4.478043230944255, "grad_norm": 0.874938821431051, "learning_rate": 3.333941345105951e-08, "loss": 0.0043, "step": 19681 }, { "epoch": 4.478270762229807, "grad_norm": 0.4258762335581461, "learning_rate": 3.331062664530746e-08, "loss": 0.0038, "step": 19682 }, { "epoch": 4.478498293515359, "grad_norm": 1.121289356607527, "learning_rate": 3.3281851932602454e-08, "loss": 0.0094, "step": 19683 }, { "epoch": 4.47872582480091, "grad_norm": 0.2672553115123004, "learning_rate": 3.325308931353272e-08, "loss": 0.0012, "step": 19684 }, { "epoch": 4.478953356086462, "grad_norm": 0.3936912857887229, "learning_rate": 3.3224338788686236e-08, "loss": 0.0011, "step": 19685 }, { "epoch": 4.479180887372014, "grad_norm": 0.5983067897760772, "learning_rate": 3.3195600358650326e-08, "loss": 0.0036, "step": 19686 }, { "epoch": 4.479408418657566, "grad_norm": 0.6823783426249197, "learning_rate": 3.316687402401264e-08, "loss": 0.0016, "step": 19687 }, { "epoch": 4.479635949943117, "grad_norm": 0.1729349849864378, "learning_rate": 3.313815978536007e-08, "loss": 0.0006, "step": 19688 }, { "epoch": 4.479863481228669, "grad_norm": 0.6070628700312184, "learning_rate": 3.310945764327965e-08, "loss": 0.0008, "step": 19689 }, { "epoch": 4.480091012514221, "grad_norm": 0.18544096071712066, "learning_rate": 3.308076759835778e-08, "loss": 0.0003, "step": 19690 }, { "epoch": 4.480318543799773, "grad_norm": 0.46301466664255975, "learning_rate": 3.305208965118108e-08, "loss": 0.0016, "step": 19691 }, { "epoch": 4.480546075085324, "grad_norm": 0.11274021166734438, "learning_rate": 3.30234238023356e-08, "loss": 0.0005, "step": 19692 }, { "epoch": 4.480773606370876, "grad_norm": 0.2554417970524231, "learning_rate": 3.299477005240706e-08, "loss": 0.0008, "step": 19693 }, { "epoch": 4.481001137656428, "grad_norm": 0.1104833182173422, "learning_rate": 3.296612840198131e-08, "loss": 0.0005, "step": 19694 }, { "epoch": 4.48122866894198, "grad_norm": 0.33234469290735913, "learning_rate": 3.293749885164357e-08, "loss": 0.0017, "step": 19695 }, { "epoch": 4.481456200227531, "grad_norm": 0.40120771426944213, "learning_rate": 3.290888140197915e-08, "loss": 0.0017, "step": 19696 }, { "epoch": 4.481683731513083, "grad_norm": 0.29222542400529256, "learning_rate": 3.2880276053572706e-08, "loss": 0.0029, "step": 19697 }, { "epoch": 4.481911262798635, "grad_norm": 0.6230505762701918, "learning_rate": 3.285168280700905e-08, "loss": 0.0016, "step": 19698 }, { "epoch": 4.482138794084187, "grad_norm": 0.06812333725645525, "learning_rate": 3.2823101662872584e-08, "loss": 0.0002, "step": 19699 }, { "epoch": 4.482366325369738, "grad_norm": 0.43040672347202086, "learning_rate": 3.279453262174728e-08, "loss": 0.0014, "step": 19700 }, { "epoch": 4.48259385665529, "grad_norm": 0.03907675236693335, "learning_rate": 3.2765975684217325e-08, "loss": 0.0001, "step": 19701 }, { "epoch": 4.482821387940842, "grad_norm": 0.04560885800936414, "learning_rate": 3.273743085086607e-08, "loss": 0.0002, "step": 19702 }, { "epoch": 4.483048919226394, "grad_norm": 0.06738713016610555, "learning_rate": 3.270889812227715e-08, "loss": 0.0002, "step": 19703 }, { "epoch": 4.483276450511945, "grad_norm": 0.4989443800554277, "learning_rate": 3.2680377499033494e-08, "loss": 0.0014, "step": 19704 }, { "epoch": 4.483503981797497, "grad_norm": 0.3445519002904101, "learning_rate": 3.2651868981718185e-08, "loss": 0.0026, "step": 19705 }, { "epoch": 4.483731513083049, "grad_norm": 0.355010529615326, "learning_rate": 3.262337257091387e-08, "loss": 0.0018, "step": 19706 }, { "epoch": 4.483959044368601, "grad_norm": 1.002898247053264, "learning_rate": 3.259488826720295e-08, "loss": 0.0046, "step": 19707 }, { "epoch": 4.484186575654152, "grad_norm": 0.45860785857452246, "learning_rate": 3.256641607116758e-08, "loss": 0.0037, "step": 19708 }, { "epoch": 4.484414106939704, "grad_norm": 0.2983469511409749, "learning_rate": 3.253795598338959e-08, "loss": 0.002, "step": 19709 }, { "epoch": 4.484641638225256, "grad_norm": 0.20380402619119106, "learning_rate": 3.250950800445081e-08, "loss": 0.0004, "step": 19710 }, { "epoch": 4.484869169510808, "grad_norm": 0.5277554788470297, "learning_rate": 3.248107213493258e-08, "loss": 0.0032, "step": 19711 }, { "epoch": 4.485096700796359, "grad_norm": 0.0661220245090879, "learning_rate": 3.245264837541603e-08, "loss": 0.0002, "step": 19712 }, { "epoch": 4.485324232081911, "grad_norm": 0.22963505696765085, "learning_rate": 3.2424236726482234e-08, "loss": 0.0009, "step": 19713 }, { "epoch": 4.485551763367463, "grad_norm": 0.098624857982904, "learning_rate": 3.2395837188711687e-08, "loss": 0.0003, "step": 19714 }, { "epoch": 4.485779294653015, "grad_norm": 0.4461528055609552, "learning_rate": 3.236744976268504e-08, "loss": 0.0049, "step": 19715 }, { "epoch": 4.486006825938566, "grad_norm": 0.33959280822338594, "learning_rate": 3.233907444898231e-08, "loss": 0.0018, "step": 19716 }, { "epoch": 4.486234357224118, "grad_norm": 0.43797481906123903, "learning_rate": 3.2310711248183454e-08, "loss": 0.0022, "step": 19717 }, { "epoch": 4.48646188850967, "grad_norm": 0.10299248612737827, "learning_rate": 3.228236016086829e-08, "loss": 0.0005, "step": 19718 }, { "epoch": 4.486689419795222, "grad_norm": 0.11172783080961564, "learning_rate": 3.225402118761607e-08, "loss": 0.0003, "step": 19719 }, { "epoch": 4.486916951080774, "grad_norm": 0.6566120934901298, "learning_rate": 3.22256943290062e-08, "loss": 0.0026, "step": 19720 }, { "epoch": 4.487144482366325, "grad_norm": 0.735889073709075, "learning_rate": 3.219737958561744e-08, "loss": 0.0067, "step": 19721 }, { "epoch": 4.4873720136518775, "grad_norm": 0.5413674292539674, "learning_rate": 3.216907695802865e-08, "loss": 0.0027, "step": 19722 }, { "epoch": 4.487599544937429, "grad_norm": 0.47588585582176457, "learning_rate": 3.214078644681817e-08, "loss": 0.0037, "step": 19723 }, { "epoch": 4.487827076222981, "grad_norm": 1.297078519519403, "learning_rate": 3.2112508052564165e-08, "loss": 0.0094, "step": 19724 }, { "epoch": 4.488054607508532, "grad_norm": 0.09556692148144656, "learning_rate": 3.2084241775844836e-08, "loss": 0.0004, "step": 19725 }, { "epoch": 4.4882821387940846, "grad_norm": 1.1454257848500828, "learning_rate": 3.205598761723758e-08, "loss": 0.0048, "step": 19726 }, { "epoch": 4.488509670079636, "grad_norm": 0.06679667697305273, "learning_rate": 3.202774557732012e-08, "loss": 0.0002, "step": 19727 }, { "epoch": 4.488737201365188, "grad_norm": 0.788465832364573, "learning_rate": 3.199951565666951e-08, "loss": 0.0031, "step": 19728 }, { "epoch": 4.488964732650739, "grad_norm": 0.0367152630071016, "learning_rate": 3.197129785586277e-08, "loss": 0.0001, "step": 19729 }, { "epoch": 4.489192263936292, "grad_norm": 0.3031629254882445, "learning_rate": 3.19430921754766e-08, "loss": 0.0019, "step": 19730 }, { "epoch": 4.489419795221843, "grad_norm": 0.16577007702984742, "learning_rate": 3.191489861608749e-08, "loss": 0.0006, "step": 19731 }, { "epoch": 4.489647326507395, "grad_norm": 0.6915615920043604, "learning_rate": 3.188671717827177e-08, "loss": 0.0036, "step": 19732 }, { "epoch": 4.489874857792946, "grad_norm": 0.09449507485459577, "learning_rate": 3.185854786260517e-08, "loss": 0.0002, "step": 19733 }, { "epoch": 4.490102389078499, "grad_norm": 0.18697189233430814, "learning_rate": 3.1830390669663685e-08, "loss": 0.0002, "step": 19734 }, { "epoch": 4.49032992036405, "grad_norm": 0.4758059935611895, "learning_rate": 3.1802245600022626e-08, "loss": 0.005, "step": 19735 }, { "epoch": 4.490557451649602, "grad_norm": 0.735427230427315, "learning_rate": 3.1774112654257354e-08, "loss": 0.0036, "step": 19736 }, { "epoch": 4.490784982935153, "grad_norm": 0.2951135823937033, "learning_rate": 3.174599183294262e-08, "loss": 0.0014, "step": 19737 }, { "epoch": 4.491012514220706, "grad_norm": 1.0596405146792145, "learning_rate": 3.1717883136653386e-08, "loss": 0.0073, "step": 19738 }, { "epoch": 4.491240045506257, "grad_norm": 0.31780337018466176, "learning_rate": 3.168978656596412e-08, "loss": 0.0007, "step": 19739 }, { "epoch": 4.491467576791809, "grad_norm": 0.8544993905336894, "learning_rate": 3.166170212144902e-08, "loss": 0.0021, "step": 19740 }, { "epoch": 4.49169510807736, "grad_norm": 3.290312215229139, "learning_rate": 3.163362980368207e-08, "loss": 0.0033, "step": 19741 }, { "epoch": 4.491922639362913, "grad_norm": 0.15069453003892466, "learning_rate": 3.16055696132371e-08, "loss": 0.0005, "step": 19742 }, { "epoch": 4.492150170648464, "grad_norm": 0.19921490146508894, "learning_rate": 3.1577521550687356e-08, "loss": 0.0006, "step": 19743 }, { "epoch": 4.492377701934016, "grad_norm": 0.20896555387045787, "learning_rate": 3.154948561660633e-08, "loss": 0.0012, "step": 19744 }, { "epoch": 4.492605233219567, "grad_norm": 0.05747904403543183, "learning_rate": 3.1521461811566956e-08, "loss": 0.0003, "step": 19745 }, { "epoch": 4.49283276450512, "grad_norm": 0.4723419156186595, "learning_rate": 3.149345013614212e-08, "loss": 0.0025, "step": 19746 }, { "epoch": 4.493060295790671, "grad_norm": 0.1009007746721421, "learning_rate": 3.146545059090407e-08, "loss": 0.0002, "step": 19747 }, { "epoch": 4.493287827076223, "grad_norm": 0.41268071052754357, "learning_rate": 3.1437463176425334e-08, "loss": 0.0008, "step": 19748 }, { "epoch": 4.493515358361774, "grad_norm": 0.47834639955897845, "learning_rate": 3.140948789327774e-08, "loss": 0.0007, "step": 19749 }, { "epoch": 4.493742889647327, "grad_norm": 1.0388203610799522, "learning_rate": 3.138152474203307e-08, "loss": 0.004, "step": 19750 }, { "epoch": 4.493970420932878, "grad_norm": 0.7339221320927389, "learning_rate": 3.135357372326286e-08, "loss": 0.0076, "step": 19751 }, { "epoch": 4.49419795221843, "grad_norm": 0.04573645399609814, "learning_rate": 3.132563483753834e-08, "loss": 0.0002, "step": 19752 }, { "epoch": 4.494425483503981, "grad_norm": 0.7730519648631446, "learning_rate": 3.12977080854307e-08, "loss": 0.0088, "step": 19753 }, { "epoch": 4.494653014789534, "grad_norm": 0.12846345675335544, "learning_rate": 3.126979346751061e-08, "loss": 0.0004, "step": 19754 }, { "epoch": 4.494880546075085, "grad_norm": 0.42835195622957084, "learning_rate": 3.12418909843485e-08, "loss": 0.0029, "step": 19755 }, { "epoch": 4.495108077360637, "grad_norm": 0.34177167384911533, "learning_rate": 3.1214000636514764e-08, "loss": 0.0016, "step": 19756 }, { "epoch": 4.495335608646188, "grad_norm": 0.6728381338471353, "learning_rate": 3.118612242457936e-08, "loss": 0.0025, "step": 19757 }, { "epoch": 4.495563139931741, "grad_norm": 0.398504776078637, "learning_rate": 3.115825634911203e-08, "loss": 0.0012, "step": 19758 }, { "epoch": 4.495790671217293, "grad_norm": 0.45303817447316475, "learning_rate": 3.113040241068239e-08, "loss": 0.0034, "step": 19759 }, { "epoch": 4.496018202502844, "grad_norm": 0.1887473902094913, "learning_rate": 3.1102560609859794e-08, "loss": 0.0008, "step": 19760 }, { "epoch": 4.496245733788396, "grad_norm": 0.07668203713589677, "learning_rate": 3.107473094721321e-08, "loss": 0.0002, "step": 19761 }, { "epoch": 4.496473265073948, "grad_norm": 0.26123083263330527, "learning_rate": 3.104691342331129e-08, "loss": 0.0015, "step": 19762 }, { "epoch": 4.4967007963595, "grad_norm": 0.6460658835699257, "learning_rate": 3.101910803872281e-08, "loss": 0.0031, "step": 19763 }, { "epoch": 4.496928327645051, "grad_norm": 0.031533974452706406, "learning_rate": 3.0991314794015795e-08, "loss": 0.0001, "step": 19764 }, { "epoch": 4.497155858930603, "grad_norm": 0.1112518964736659, "learning_rate": 3.096353368975846e-08, "loss": 0.0006, "step": 19765 }, { "epoch": 4.497383390216155, "grad_norm": 0.26753139658346287, "learning_rate": 3.093576472651856e-08, "loss": 0.002, "step": 19766 }, { "epoch": 4.497610921501707, "grad_norm": 1.2662822395640116, "learning_rate": 3.090800790486376e-08, "loss": 0.0062, "step": 19767 }, { "epoch": 4.497838452787258, "grad_norm": 0.42500982117873776, "learning_rate": 3.088026322536124e-08, "loss": 0.0013, "step": 19768 }, { "epoch": 4.49806598407281, "grad_norm": 0.09336346037190055, "learning_rate": 3.085253068857798e-08, "loss": 0.0004, "step": 19769 }, { "epoch": 4.498293515358362, "grad_norm": 0.17606736358305217, "learning_rate": 3.082481029508096e-08, "loss": 0.0007, "step": 19770 }, { "epoch": 4.498521046643914, "grad_norm": 0.1622837307547081, "learning_rate": 3.079710204543638e-08, "loss": 0.0004, "step": 19771 }, { "epoch": 4.498748577929465, "grad_norm": 0.5610304867489284, "learning_rate": 3.076940594021111e-08, "loss": 0.0022, "step": 19772 }, { "epoch": 4.498976109215017, "grad_norm": 0.11484566627631866, "learning_rate": 3.074172197997077e-08, "loss": 0.0005, "step": 19773 }, { "epoch": 4.499203640500569, "grad_norm": 0.09367813309571456, "learning_rate": 3.071405016528132e-08, "loss": 0.0004, "step": 19774 }, { "epoch": 4.499431171786121, "grad_norm": 0.2539685402385842, "learning_rate": 3.068639049670835e-08, "loss": 0.0011, "step": 19775 }, { "epoch": 4.499658703071672, "grad_norm": 0.12076258450514343, "learning_rate": 3.0658742974817026e-08, "loss": 0.0003, "step": 19776 }, { "epoch": 4.499886234357224, "grad_norm": 0.2549760582245949, "learning_rate": 3.0631107600172516e-08, "loss": 0.0008, "step": 19777 }, { "epoch": 4.500113765642776, "grad_norm": 0.050745704193349485, "learning_rate": 3.060348437333959e-08, "loss": 0.0001, "step": 19778 }, { "epoch": 4.500341296928328, "grad_norm": 0.18333285675028715, "learning_rate": 3.0575873294882984e-08, "loss": 0.0006, "step": 19779 }, { "epoch": 4.500568828213879, "grad_norm": 0.7605173252052985, "learning_rate": 3.054827436536692e-08, "loss": 0.0083, "step": 19780 }, { "epoch": 4.500796359499431, "grad_norm": 0.043342129661302436, "learning_rate": 3.0520687585355315e-08, "loss": 0.0002, "step": 19781 }, { "epoch": 4.501023890784983, "grad_norm": 0.5165863562864077, "learning_rate": 3.0493112955412225e-08, "loss": 0.0019, "step": 19782 }, { "epoch": 4.501251422070535, "grad_norm": 0.4309846955655129, "learning_rate": 3.046555047610103e-08, "loss": 0.0024, "step": 19783 }, { "epoch": 4.501478953356086, "grad_norm": 0.22627792206678388, "learning_rate": 3.043800014798509e-08, "loss": 0.0009, "step": 19784 }, { "epoch": 4.501706484641638, "grad_norm": 5.464657573142055, "learning_rate": 3.041046197162764e-08, "loss": 0.0043, "step": 19785 }, { "epoch": 4.50193401592719, "grad_norm": 0.041350223786758976, "learning_rate": 3.0382935947591426e-08, "loss": 0.0001, "step": 19786 }, { "epoch": 4.502161547212742, "grad_norm": 0.45232033374570485, "learning_rate": 3.035542207643898e-08, "loss": 0.0016, "step": 19787 }, { "epoch": 4.502389078498293, "grad_norm": 0.44255570010365913, "learning_rate": 3.032792035873262e-08, "loss": 0.0018, "step": 19788 }, { "epoch": 4.502616609783845, "grad_norm": 0.09127535883590707, "learning_rate": 3.0300430795034625e-08, "loss": 0.0004, "step": 19789 }, { "epoch": 4.502844141069397, "grad_norm": 0.5384365902382026, "learning_rate": 3.0272953385906554e-08, "loss": 0.0078, "step": 19790 }, { "epoch": 4.503071672354949, "grad_norm": 0.4916522364317422, "learning_rate": 3.0245488131910096e-08, "loss": 0.002, "step": 19791 }, { "epoch": 4.5032992036405, "grad_norm": 0.1330527522851877, "learning_rate": 3.0218035033606695e-08, "loss": 0.0004, "step": 19792 }, { "epoch": 4.503526734926052, "grad_norm": 0.8010372865673111, "learning_rate": 3.019059409155735e-08, "loss": 0.0031, "step": 19793 }, { "epoch": 4.503754266211605, "grad_norm": 0.2870847588447608, "learning_rate": 3.0163165306322934e-08, "loss": 0.0025, "step": 19794 }, { "epoch": 4.503981797497156, "grad_norm": 0.5011387563127612, "learning_rate": 3.013574867846397e-08, "loss": 0.0046, "step": 19795 }, { "epoch": 4.504209328782707, "grad_norm": 0.08976252841145078, "learning_rate": 3.010834420854092e-08, "loss": 0.0004, "step": 19796 }, { "epoch": 4.504436860068259, "grad_norm": 0.1514938013341812, "learning_rate": 3.0080951897113814e-08, "loss": 0.001, "step": 19797 }, { "epoch": 4.504664391353812, "grad_norm": 0.21087022986014312, "learning_rate": 3.005357174474241e-08, "loss": 0.0011, "step": 19798 }, { "epoch": 4.504891922639363, "grad_norm": 0.17662164594516078, "learning_rate": 3.002620375198655e-08, "loss": 0.0006, "step": 19799 }, { "epoch": 4.505119453924914, "grad_norm": 1.068089193559954, "learning_rate": 2.999884791940535e-08, "loss": 0.0076, "step": 19800 }, { "epoch": 4.505346985210466, "grad_norm": 0.7013837811922997, "learning_rate": 2.997150424755811e-08, "loss": 0.0014, "step": 19801 }, { "epoch": 4.505574516496019, "grad_norm": 0.17356132820255524, "learning_rate": 2.994417273700347e-08, "loss": 0.0009, "step": 19802 }, { "epoch": 4.50580204778157, "grad_norm": 0.6468147893013465, "learning_rate": 2.9916853388300204e-08, "loss": 0.0021, "step": 19803 }, { "epoch": 4.506029579067122, "grad_norm": 0.057560183688066625, "learning_rate": 2.988954620200657e-08, "loss": 0.0001, "step": 19804 }, { "epoch": 4.506257110352673, "grad_norm": 0.12594857005344862, "learning_rate": 2.986225117868072e-08, "loss": 0.0005, "step": 19805 }, { "epoch": 4.506484641638226, "grad_norm": 0.644939442831978, "learning_rate": 2.983496831888054e-08, "loss": 0.0033, "step": 19806 }, { "epoch": 4.506712172923777, "grad_norm": 0.22403486314295665, "learning_rate": 2.980769762316357e-08, "loss": 0.0011, "step": 19807 }, { "epoch": 4.506939704209329, "grad_norm": 0.6585324639565111, "learning_rate": 2.978043909208729e-08, "loss": 0.0044, "step": 19808 }, { "epoch": 4.50716723549488, "grad_norm": 0.28632540980918686, "learning_rate": 2.9753192726208677e-08, "loss": 0.0007, "step": 19809 }, { "epoch": 4.507394766780433, "grad_norm": 0.5413169210012974, "learning_rate": 2.9725958526084786e-08, "loss": 0.0035, "step": 19810 }, { "epoch": 4.507622298065984, "grad_norm": 0.3499088749840024, "learning_rate": 2.969873649227198e-08, "loss": 0.002, "step": 19811 }, { "epoch": 4.507849829351536, "grad_norm": 0.2539165470547404, "learning_rate": 2.9671526625326756e-08, "loss": 0.0011, "step": 19812 }, { "epoch": 4.508077360637087, "grad_norm": 0.4574847592566261, "learning_rate": 2.9644328925805267e-08, "loss": 0.0047, "step": 19813 }, { "epoch": 4.50830489192264, "grad_norm": 0.057327781127854624, "learning_rate": 2.9617143394263316e-08, "loss": 0.0002, "step": 19814 }, { "epoch": 4.508532423208191, "grad_norm": 0.1443739592251808, "learning_rate": 2.9589970031256647e-08, "loss": 0.0002, "step": 19815 }, { "epoch": 4.508759954493743, "grad_norm": 0.26954695092412184, "learning_rate": 2.95628088373405e-08, "loss": 0.0008, "step": 19816 }, { "epoch": 4.508987485779294, "grad_norm": 0.11097887633888556, "learning_rate": 2.9535659813070064e-08, "loss": 0.0002, "step": 19817 }, { "epoch": 4.509215017064847, "grad_norm": 0.29381737672785, "learning_rate": 2.9508522959000167e-08, "loss": 0.0022, "step": 19818 }, { "epoch": 4.509442548350398, "grad_norm": 0.29683046635033217, "learning_rate": 2.948139827568544e-08, "loss": 0.001, "step": 19819 }, { "epoch": 4.50967007963595, "grad_norm": 0.6765625236807766, "learning_rate": 2.9454285763680437e-08, "loss": 0.002, "step": 19820 }, { "epoch": 4.509897610921501, "grad_norm": 0.689285099567375, "learning_rate": 2.942718542353902e-08, "loss": 0.0037, "step": 19821 }, { "epoch": 4.510125142207054, "grad_norm": 0.11063713741532104, "learning_rate": 2.940009725581526e-08, "loss": 0.0002, "step": 19822 }, { "epoch": 4.510352673492605, "grad_norm": 0.7860411998834453, "learning_rate": 2.9373021261062603e-08, "loss": 0.0038, "step": 19823 }, { "epoch": 4.510580204778157, "grad_norm": 0.4781150710883901, "learning_rate": 2.9345957439834705e-08, "loss": 0.0018, "step": 19824 }, { "epoch": 4.510807736063708, "grad_norm": 0.3668913787585502, "learning_rate": 2.9318905792684453e-08, "loss": 0.001, "step": 19825 }, { "epoch": 4.511035267349261, "grad_norm": 1.4266642666422786, "learning_rate": 2.9291866320164814e-08, "loss": 0.005, "step": 19826 }, { "epoch": 4.511262798634812, "grad_norm": 0.9246502392713292, "learning_rate": 2.9264839022828533e-08, "loss": 0.0045, "step": 19827 }, { "epoch": 4.511490329920364, "grad_norm": 0.171229363300339, "learning_rate": 2.9237823901227813e-08, "loss": 0.0004, "step": 19828 }, { "epoch": 4.5117178612059154, "grad_norm": 0.21993284029476443, "learning_rate": 2.9210820955914918e-08, "loss": 0.0006, "step": 19829 }, { "epoch": 4.511945392491468, "grad_norm": 0.6653018383785184, "learning_rate": 2.9183830187441768e-08, "loss": 0.0046, "step": 19830 }, { "epoch": 4.512172923777019, "grad_norm": 0.49007968119259676, "learning_rate": 2.9156851596359863e-08, "loss": 0.0012, "step": 19831 }, { "epoch": 4.512400455062571, "grad_norm": 0.5866780951183063, "learning_rate": 2.9129885183220645e-08, "loss": 0.0031, "step": 19832 }, { "epoch": 4.512627986348123, "grad_norm": 0.3474633447882451, "learning_rate": 2.910293094857533e-08, "loss": 0.0013, "step": 19833 }, { "epoch": 4.512855517633675, "grad_norm": 0.08075805630812663, "learning_rate": 2.9075988892974805e-08, "loss": 0.0002, "step": 19834 }, { "epoch": 4.513083048919226, "grad_norm": 0.048902185762789825, "learning_rate": 2.9049059016969666e-08, "loss": 0.0001, "step": 19835 }, { "epoch": 4.513310580204778, "grad_norm": 0.5117689806740622, "learning_rate": 2.9022141321110378e-08, "loss": 0.0009, "step": 19836 }, { "epoch": 4.51353811149033, "grad_norm": 0.1197969464796892, "learning_rate": 2.8995235805946987e-08, "loss": 0.0007, "step": 19837 }, { "epoch": 4.513765642775882, "grad_norm": 0.15548279540514512, "learning_rate": 2.8968342472029328e-08, "loss": 0.0005, "step": 19838 }, { "epoch": 4.513993174061433, "grad_norm": 0.23791978976209985, "learning_rate": 2.8941461319907313e-08, "loss": 0.0017, "step": 19839 }, { "epoch": 4.514220705346985, "grad_norm": 0.9492536197022463, "learning_rate": 2.891459235013015e-08, "loss": 0.0063, "step": 19840 }, { "epoch": 4.514448236632537, "grad_norm": 0.03138917377088246, "learning_rate": 2.888773556324713e-08, "loss": 0.0001, "step": 19841 }, { "epoch": 4.514675767918089, "grad_norm": 0.14911494716147194, "learning_rate": 2.886089095980697e-08, "loss": 0.0003, "step": 19842 }, { "epoch": 4.514903299203641, "grad_norm": 0.36543007716335035, "learning_rate": 2.883405854035848e-08, "loss": 0.0016, "step": 19843 }, { "epoch": 4.515130830489192, "grad_norm": 0.17888603230497604, "learning_rate": 2.8807238305449958e-08, "loss": 0.0006, "step": 19844 }, { "epoch": 4.515358361774744, "grad_norm": 1.1195133796067918, "learning_rate": 2.8780430255629585e-08, "loss": 0.0093, "step": 19845 }, { "epoch": 4.515585893060296, "grad_norm": 0.5283602772433282, "learning_rate": 2.8753634391445394e-08, "loss": 0.0031, "step": 19846 }, { "epoch": 4.515813424345848, "grad_norm": 0.38969660535907574, "learning_rate": 2.8726850713444858e-08, "loss": 0.0018, "step": 19847 }, { "epoch": 4.516040955631399, "grad_norm": 0.12987971986393027, "learning_rate": 2.8700079222175532e-08, "loss": 0.0005, "step": 19848 }, { "epoch": 4.516268486916951, "grad_norm": 0.15579068892227366, "learning_rate": 2.8673319918184546e-08, "loss": 0.0004, "step": 19849 }, { "epoch": 4.516496018202503, "grad_norm": 0.43538965343310426, "learning_rate": 2.8646572802018616e-08, "loss": 0.0047, "step": 19850 }, { "epoch": 4.516723549488055, "grad_norm": 0.1722213239760953, "learning_rate": 2.8619837874224662e-08, "loss": 0.0007, "step": 19851 }, { "epoch": 4.516951080773606, "grad_norm": 0.3829719936217328, "learning_rate": 2.8593115135348986e-08, "loss": 0.002, "step": 19852 }, { "epoch": 4.517178612059158, "grad_norm": 0.46675154073092917, "learning_rate": 2.856640458593782e-08, "loss": 0.0018, "step": 19853 }, { "epoch": 4.51740614334471, "grad_norm": 0.1388512162940158, "learning_rate": 2.853970622653697e-08, "loss": 0.001, "step": 19854 }, { "epoch": 4.517633674630262, "grad_norm": 0.5944162645016746, "learning_rate": 2.8513020057692256e-08, "loss": 0.0021, "step": 19855 }, { "epoch": 4.517861205915813, "grad_norm": 0.7126760130100391, "learning_rate": 2.8486346079948934e-08, "loss": 0.0062, "step": 19856 }, { "epoch": 4.518088737201365, "grad_norm": 0.41152152196499153, "learning_rate": 2.8459684293852194e-08, "loss": 0.0016, "step": 19857 }, { "epoch": 4.518316268486917, "grad_norm": 0.05192030042514333, "learning_rate": 2.8433034699947014e-08, "loss": 0.0002, "step": 19858 }, { "epoch": 4.518543799772469, "grad_norm": 0.23040951863279024, "learning_rate": 2.840639729877803e-08, "loss": 0.0008, "step": 19859 }, { "epoch": 4.51877133105802, "grad_norm": 0.16678887076047252, "learning_rate": 2.837977209088974e-08, "loss": 0.0004, "step": 19860 }, { "epoch": 4.518998862343572, "grad_norm": 0.4886424103887423, "learning_rate": 2.8353159076826286e-08, "loss": 0.0028, "step": 19861 }, { "epoch": 4.519226393629124, "grad_norm": 0.09212511115356486, "learning_rate": 2.8326558257131473e-08, "loss": 0.0003, "step": 19862 }, { "epoch": 4.519453924914676, "grad_norm": 0.3146916906796192, "learning_rate": 2.82999696323491e-08, "loss": 0.0012, "step": 19863 }, { "epoch": 4.519681456200227, "grad_norm": 0.0582042603349988, "learning_rate": 2.8273393203022482e-08, "loss": 0.0001, "step": 19864 }, { "epoch": 4.519908987485779, "grad_norm": 0.405593341679237, "learning_rate": 2.8246828969694802e-08, "loss": 0.0016, "step": 19865 }, { "epoch": 4.520136518771331, "grad_norm": 0.5045284889426257, "learning_rate": 2.8220276932909087e-08, "loss": 0.0028, "step": 19866 }, { "epoch": 4.520364050056883, "grad_norm": 0.18287855108794995, "learning_rate": 2.8193737093208038e-08, "loss": 0.0006, "step": 19867 }, { "epoch": 4.520591581342434, "grad_norm": 0.09958886285201315, "learning_rate": 2.8167209451133993e-08, "loss": 0.0001, "step": 19868 }, { "epoch": 4.520819112627986, "grad_norm": 0.3583199961781679, "learning_rate": 2.8140694007229087e-08, "loss": 0.0017, "step": 19869 }, { "epoch": 4.521046643913538, "grad_norm": 0.4190994557267919, "learning_rate": 2.8114190762035322e-08, "loss": 0.0021, "step": 19870 }, { "epoch": 4.52127417519909, "grad_norm": 0.6649009269455609, "learning_rate": 2.8087699716094346e-08, "loss": 0.0023, "step": 19871 }, { "epoch": 4.521501706484642, "grad_norm": 0.32909629356956355, "learning_rate": 2.806122086994753e-08, "loss": 0.0012, "step": 19872 }, { "epoch": 4.521729237770193, "grad_norm": 0.2007798455994848, "learning_rate": 2.8034754224136178e-08, "loss": 0.0003, "step": 19873 }, { "epoch": 4.521956769055745, "grad_norm": 1.139331785795484, "learning_rate": 2.800829977920118e-08, "loss": 0.0056, "step": 19874 }, { "epoch": 4.522184300341297, "grad_norm": 0.23357809011206687, "learning_rate": 2.7981857535683206e-08, "loss": 0.0014, "step": 19875 }, { "epoch": 4.522411831626849, "grad_norm": 0.19951877180171004, "learning_rate": 2.7955427494122667e-08, "loss": 0.0009, "step": 19876 }, { "epoch": 4.5226393629124, "grad_norm": 0.43432619505551284, "learning_rate": 2.792900965505975e-08, "loss": 0.0007, "step": 19877 }, { "epoch": 4.522866894197952, "grad_norm": 0.3753358461949492, "learning_rate": 2.790260401903437e-08, "loss": 0.007, "step": 19878 }, { "epoch": 4.523094425483504, "grad_norm": 0.2972072242310814, "learning_rate": 2.787621058658617e-08, "loss": 0.0021, "step": 19879 }, { "epoch": 4.523321956769056, "grad_norm": 0.45058881423105573, "learning_rate": 2.7849829358254782e-08, "loss": 0.003, "step": 19880 }, { "epoch": 4.523549488054607, "grad_norm": 1.1764326682221466, "learning_rate": 2.7823460334579154e-08, "loss": 0.005, "step": 19881 }, { "epoch": 4.52377701934016, "grad_norm": 0.05101085095411466, "learning_rate": 2.7797103516098363e-08, "loss": 0.0002, "step": 19882 }, { "epoch": 4.524004550625711, "grad_norm": 0.37037435996753426, "learning_rate": 2.7770758903351013e-08, "loss": 0.0063, "step": 19883 }, { "epoch": 4.524232081911263, "grad_norm": 0.3371568290466121, "learning_rate": 2.774442649687563e-08, "loss": 0.0009, "step": 19884 }, { "epoch": 4.5244596131968144, "grad_norm": 0.21413576039655033, "learning_rate": 2.771810629721032e-08, "loss": 0.0014, "step": 19885 }, { "epoch": 4.524687144482367, "grad_norm": 0.9027481730087037, "learning_rate": 2.7691798304892993e-08, "loss": 0.0033, "step": 19886 }, { "epoch": 4.524914675767918, "grad_norm": 0.09068435865778292, "learning_rate": 2.7665502520461477e-08, "loss": 0.0002, "step": 19887 }, { "epoch": 4.52514220705347, "grad_norm": 0.4828411106922105, "learning_rate": 2.7639218944453052e-08, "loss": 0.0032, "step": 19888 }, { "epoch": 4.5253697383390215, "grad_norm": 0.28931147236801913, "learning_rate": 2.761294757740507e-08, "loss": 0.0014, "step": 19889 }, { "epoch": 4.525597269624574, "grad_norm": 0.5317765509827187, "learning_rate": 2.7586688419854317e-08, "loss": 0.0057, "step": 19890 }, { "epoch": 4.525824800910125, "grad_norm": 0.09525032198911683, "learning_rate": 2.756044147233759e-08, "loss": 0.0002, "step": 19891 }, { "epoch": 4.526052332195677, "grad_norm": 0.11241621165576278, "learning_rate": 2.753420673539113e-08, "loss": 0.0005, "step": 19892 }, { "epoch": 4.5262798634812285, "grad_norm": 0.06988437817171908, "learning_rate": 2.7507984209551447e-08, "loss": 0.0003, "step": 19893 }, { "epoch": 4.526507394766781, "grad_norm": 0.34570557126007534, "learning_rate": 2.7481773895354296e-08, "loss": 0.0014, "step": 19894 }, { "epoch": 4.526734926052332, "grad_norm": 0.24784680755371571, "learning_rate": 2.7455575793335285e-08, "loss": 0.0008, "step": 19895 }, { "epoch": 4.526962457337884, "grad_norm": 0.48488156271085664, "learning_rate": 2.742938990403003e-08, "loss": 0.0061, "step": 19896 }, { "epoch": 4.5271899886234355, "grad_norm": 0.3722733265305504, "learning_rate": 2.7403216227973594e-08, "loss": 0.002, "step": 19897 }, { "epoch": 4.527417519908988, "grad_norm": 0.3786774522290843, "learning_rate": 2.737705476570096e-08, "loss": 0.002, "step": 19898 }, { "epoch": 4.527645051194539, "grad_norm": 0.5664394374897213, "learning_rate": 2.735090551774691e-08, "loss": 0.0017, "step": 19899 }, { "epoch": 4.527872582480091, "grad_norm": 0.09944676585079536, "learning_rate": 2.7324768484645743e-08, "loss": 0.0006, "step": 19900 }, { "epoch": 4.5281001137656425, "grad_norm": 0.45670274257162763, "learning_rate": 2.7298643666931816e-08, "loss": 0.0018, "step": 19901 }, { "epoch": 4.528327645051195, "grad_norm": 0.06618880132452053, "learning_rate": 2.727253106513887e-08, "loss": 0.0003, "step": 19902 }, { "epoch": 4.528555176336746, "grad_norm": 0.9725854697503177, "learning_rate": 2.724643067980079e-08, "loss": 0.0039, "step": 19903 }, { "epoch": 4.528782707622298, "grad_norm": 0.1904360408170877, "learning_rate": 2.7220342511450823e-08, "loss": 0.0007, "step": 19904 }, { "epoch": 4.5290102389078495, "grad_norm": 0.28364113554000087, "learning_rate": 2.7194266560622296e-08, "loss": 0.0004, "step": 19905 }, { "epoch": 4.529237770193402, "grad_norm": 0.05604865641616285, "learning_rate": 2.7168202827848254e-08, "loss": 0.0002, "step": 19906 }, { "epoch": 4.529465301478953, "grad_norm": 0.23615669211279802, "learning_rate": 2.7142151313661115e-08, "loss": 0.0014, "step": 19907 }, { "epoch": 4.529692832764505, "grad_norm": 0.285356395866372, "learning_rate": 2.7116112018593586e-08, "loss": 0.0014, "step": 19908 }, { "epoch": 4.5299203640500565, "grad_norm": 0.1974705774042731, "learning_rate": 2.7090084943177662e-08, "loss": 0.0008, "step": 19909 }, { "epoch": 4.530147895335609, "grad_norm": 1.275738734030865, "learning_rate": 2.7064070087945494e-08, "loss": 0.0067, "step": 19910 }, { "epoch": 4.530375426621161, "grad_norm": 1.1903274034155846, "learning_rate": 2.703806745342853e-08, "loss": 0.0053, "step": 19911 }, { "epoch": 4.530602957906712, "grad_norm": 0.29864338882008507, "learning_rate": 2.701207704015843e-08, "loss": 0.0013, "step": 19912 }, { "epoch": 4.5308304891922635, "grad_norm": 0.5345246226911434, "learning_rate": 2.698609884866636e-08, "loss": 0.0024, "step": 19913 }, { "epoch": 4.531058020477816, "grad_norm": 0.16006134536970013, "learning_rate": 2.6960132879483154e-08, "loss": 0.0012, "step": 19914 }, { "epoch": 4.531285551763368, "grad_norm": 0.10441267206638245, "learning_rate": 2.693417913313963e-08, "loss": 0.0002, "step": 19915 }, { "epoch": 4.531513083048919, "grad_norm": 0.01957276065464247, "learning_rate": 2.690823761016606e-08, "loss": 0.0001, "step": 19916 }, { "epoch": 4.5317406143344705, "grad_norm": 0.2547068763296539, "learning_rate": 2.6882308311092855e-08, "loss": 0.0008, "step": 19917 }, { "epoch": 4.531968145620023, "grad_norm": 1.0460453763111632, "learning_rate": 2.685639123644987e-08, "loss": 0.0069, "step": 19918 }, { "epoch": 4.532195676905575, "grad_norm": 0.028611162081874142, "learning_rate": 2.6830486386766747e-08, "loss": 0.0001, "step": 19919 }, { "epoch": 4.532423208191126, "grad_norm": 0.1522837802187625, "learning_rate": 2.6804593762572996e-08, "loss": 0.0005, "step": 19920 }, { "epoch": 4.532650739476678, "grad_norm": 0.5086292866360212, "learning_rate": 2.6778713364397844e-08, "loss": 0.0043, "step": 19921 }, { "epoch": 4.53287827076223, "grad_norm": 0.15688579300464356, "learning_rate": 2.6752845192770175e-08, "loss": 0.0005, "step": 19922 }, { "epoch": 4.533105802047782, "grad_norm": 0.09689201848851804, "learning_rate": 2.6726989248218732e-08, "loss": 0.0002, "step": 19923 }, { "epoch": 4.533333333333333, "grad_norm": 0.13667367570225772, "learning_rate": 2.6701145531271984e-08, "loss": 0.0005, "step": 19924 }, { "epoch": 4.533560864618885, "grad_norm": 0.311316288107842, "learning_rate": 2.6675314042457976e-08, "loss": 0.0029, "step": 19925 }, { "epoch": 4.533788395904437, "grad_norm": 0.041189437950787694, "learning_rate": 2.6649494782304762e-08, "loss": 0.0001, "step": 19926 }, { "epoch": 4.534015927189989, "grad_norm": 0.09463865038669127, "learning_rate": 2.662368775134018e-08, "loss": 0.0005, "step": 19927 }, { "epoch": 4.53424345847554, "grad_norm": 0.4167120946607618, "learning_rate": 2.659789295009145e-08, "loss": 0.0034, "step": 19928 }, { "epoch": 4.534470989761092, "grad_norm": 0.29798404498443376, "learning_rate": 2.6572110379085926e-08, "loss": 0.0011, "step": 19929 }, { "epoch": 4.534698521046644, "grad_norm": 0.5759168416940964, "learning_rate": 2.6546340038850484e-08, "loss": 0.0042, "step": 19930 }, { "epoch": 4.534926052332196, "grad_norm": 0.15629202211515789, "learning_rate": 2.6520581929911844e-08, "loss": 0.0003, "step": 19931 }, { "epoch": 4.535153583617747, "grad_norm": 0.5528661179801434, "learning_rate": 2.64948360527964e-08, "loss": 0.0033, "step": 19932 }, { "epoch": 4.535381114903299, "grad_norm": 0.0289016025298705, "learning_rate": 2.646910240803033e-08, "loss": 0.0001, "step": 19933 }, { "epoch": 4.535608646188851, "grad_norm": 0.23156524658913072, "learning_rate": 2.64433809961398e-08, "loss": 0.0019, "step": 19934 }, { "epoch": 4.535836177474403, "grad_norm": 1.1828484554258145, "learning_rate": 2.64176718176503e-08, "loss": 0.0073, "step": 19935 }, { "epoch": 4.536063708759954, "grad_norm": 0.1560354746411206, "learning_rate": 2.639197487308731e-08, "loss": 0.0005, "step": 19936 }, { "epoch": 4.536291240045506, "grad_norm": 0.5981582270983824, "learning_rate": 2.6366290162976176e-08, "loss": 0.0016, "step": 19937 }, { "epoch": 4.536518771331058, "grad_norm": 0.7746657721036627, "learning_rate": 2.6340617687841546e-08, "loss": 0.0066, "step": 19938 }, { "epoch": 4.53674630261661, "grad_norm": 0.4981558152820581, "learning_rate": 2.6314957448208348e-08, "loss": 0.0028, "step": 19939 }, { "epoch": 4.536973833902161, "grad_norm": 0.5367386344071212, "learning_rate": 2.628930944460102e-08, "loss": 0.0011, "step": 19940 }, { "epoch": 4.537201365187713, "grad_norm": 0.5028848584170766, "learning_rate": 2.6263673677543733e-08, "loss": 0.003, "step": 19941 }, { "epoch": 4.537428896473265, "grad_norm": 0.5961706743190794, "learning_rate": 2.6238050147560368e-08, "loss": 0.0025, "step": 19942 }, { "epoch": 4.537656427758817, "grad_norm": 0.29707417337887143, "learning_rate": 2.6212438855174744e-08, "loss": 0.0012, "step": 19943 }, { "epoch": 4.537883959044368, "grad_norm": 0.7836855514778195, "learning_rate": 2.618683980091026e-08, "loss": 0.0048, "step": 19944 }, { "epoch": 4.5381114903299204, "grad_norm": 0.6123920966959716, "learning_rate": 2.6161252985289972e-08, "loss": 0.0025, "step": 19945 }, { "epoch": 4.538339021615472, "grad_norm": 0.24724404242269651, "learning_rate": 2.6135678408837002e-08, "loss": 0.0015, "step": 19946 }, { "epoch": 4.538566552901024, "grad_norm": 0.3495048542256481, "learning_rate": 2.611011607207399e-08, "loss": 0.0008, "step": 19947 }, { "epoch": 4.538794084186575, "grad_norm": 1.0908153810303938, "learning_rate": 2.6084565975523433e-08, "loss": 0.0086, "step": 19948 }, { "epoch": 4.5390216154721275, "grad_norm": 0.09428550432824316, "learning_rate": 2.605902811970748e-08, "loss": 0.0002, "step": 19949 }, { "epoch": 4.53924914675768, "grad_norm": 0.02484972897078891, "learning_rate": 2.603350250514808e-08, "loss": 0.0001, "step": 19950 }, { "epoch": 4.539476678043231, "grad_norm": 0.07121010694644496, "learning_rate": 2.600798913236696e-08, "loss": 0.0002, "step": 19951 }, { "epoch": 4.539704209328782, "grad_norm": 0.1347572444238406, "learning_rate": 2.598248800188552e-08, "loss": 0.0006, "step": 19952 }, { "epoch": 4.5399317406143345, "grad_norm": 0.18816761614436428, "learning_rate": 2.5956999114224932e-08, "loss": 0.0009, "step": 19953 }, { "epoch": 4.540159271899887, "grad_norm": 0.15236155464125398, "learning_rate": 2.593152246990617e-08, "loss": 0.0002, "step": 19954 }, { "epoch": 4.540386803185438, "grad_norm": 0.3851552709186297, "learning_rate": 2.590605806945e-08, "loss": 0.0013, "step": 19955 }, { "epoch": 4.540614334470989, "grad_norm": 0.09378273888173647, "learning_rate": 2.5880605913376904e-08, "loss": 0.0003, "step": 19956 }, { "epoch": 4.5408418657565415, "grad_norm": 0.12219105489583672, "learning_rate": 2.5855166002206818e-08, "loss": 0.0007, "step": 19957 }, { "epoch": 4.541069397042094, "grad_norm": 0.45080189505103124, "learning_rate": 2.582973833646002e-08, "loss": 0.0015, "step": 19958 }, { "epoch": 4.541296928327645, "grad_norm": 0.2681701103597812, "learning_rate": 2.5804322916655814e-08, "loss": 0.0004, "step": 19959 }, { "epoch": 4.541524459613197, "grad_norm": 1.1168049644412439, "learning_rate": 2.5778919743314063e-08, "loss": 0.0015, "step": 19960 }, { "epoch": 4.5417519908987485, "grad_norm": 0.2055630999310336, "learning_rate": 2.5753528816953658e-08, "loss": 0.0007, "step": 19961 }, { "epoch": 4.541979522184301, "grad_norm": 0.547664464809546, "learning_rate": 2.5728150138093765e-08, "loss": 0.0019, "step": 19962 }, { "epoch": 4.542207053469852, "grad_norm": 0.5126810623544722, "learning_rate": 2.5702783707253e-08, "loss": 0.0019, "step": 19963 }, { "epoch": 4.542434584755404, "grad_norm": 0.14457170465489871, "learning_rate": 2.567742952494963e-08, "loss": 0.0004, "step": 19964 }, { "epoch": 4.5426621160409555, "grad_norm": 0.4155844786017055, "learning_rate": 2.5652087591702053e-08, "loss": 0.0016, "step": 19965 }, { "epoch": 4.542889647326508, "grad_norm": 0.6197204275668082, "learning_rate": 2.5626757908028128e-08, "loss": 0.0032, "step": 19966 }, { "epoch": 4.543117178612059, "grad_norm": 0.15182165316957277, "learning_rate": 2.5601440474445627e-08, "loss": 0.0004, "step": 19967 }, { "epoch": 4.543344709897611, "grad_norm": 0.2895177944435518, "learning_rate": 2.557613529147192e-08, "loss": 0.0018, "step": 19968 }, { "epoch": 4.5435722411831625, "grad_norm": 0.293465325432388, "learning_rate": 2.5550842359624227e-08, "loss": 0.0012, "step": 19969 }, { "epoch": 4.543799772468715, "grad_norm": 0.1249914147465563, "learning_rate": 2.5525561679419496e-08, "loss": 0.0002, "step": 19970 }, { "epoch": 4.544027303754266, "grad_norm": 0.5555962246357861, "learning_rate": 2.5500293251374263e-08, "loss": 0.0019, "step": 19971 }, { "epoch": 4.544254835039818, "grad_norm": 0.2674644374927478, "learning_rate": 2.5475037076005196e-08, "loss": 0.0007, "step": 19972 }, { "epoch": 4.5444823663253695, "grad_norm": 0.07464558100282448, "learning_rate": 2.544979315382834e-08, "loss": 0.0005, "step": 19973 }, { "epoch": 4.544709897610922, "grad_norm": 0.358382488935112, "learning_rate": 2.5424561485359808e-08, "loss": 0.0022, "step": 19974 }, { "epoch": 4.544937428896473, "grad_norm": 0.5940701167022912, "learning_rate": 2.5399342071115164e-08, "loss": 0.0044, "step": 19975 }, { "epoch": 4.545164960182025, "grad_norm": 0.13459174353036935, "learning_rate": 2.5374134911609756e-08, "loss": 0.0004, "step": 19976 }, { "epoch": 4.5453924914675765, "grad_norm": 0.342028006482037, "learning_rate": 2.5348940007358935e-08, "loss": 0.0023, "step": 19977 }, { "epoch": 4.545620022753129, "grad_norm": 0.7470348400005598, "learning_rate": 2.53237573588775e-08, "loss": 0.0044, "step": 19978 }, { "epoch": 4.54584755403868, "grad_norm": 0.37956595271699645, "learning_rate": 2.529858696668018e-08, "loss": 0.0021, "step": 19979 }, { "epoch": 4.546075085324232, "grad_norm": 0.006847297457721901, "learning_rate": 2.527342883128149e-08, "loss": 0.0, "step": 19980 }, { "epoch": 4.5463026166097835, "grad_norm": 0.2999951263343252, "learning_rate": 2.5248282953195676e-08, "loss": 0.0007, "step": 19981 }, { "epoch": 4.546530147895336, "grad_norm": 0.32699052004599477, "learning_rate": 2.5223149332936486e-08, "loss": 0.0012, "step": 19982 }, { "epoch": 4.546757679180887, "grad_norm": 0.12994986414760087, "learning_rate": 2.5198027971017614e-08, "loss": 0.0008, "step": 19983 }, { "epoch": 4.546985210466439, "grad_norm": 0.05024558865030469, "learning_rate": 2.5172918867952673e-08, "loss": 0.0001, "step": 19984 }, { "epoch": 4.5472127417519905, "grad_norm": 0.27484776653569554, "learning_rate": 2.5147822024254657e-08, "loss": 0.0014, "step": 19985 }, { "epoch": 4.547440273037543, "grad_norm": 0.01771072259179436, "learning_rate": 2.5122737440436627e-08, "loss": 0.0, "step": 19986 }, { "epoch": 4.547667804323094, "grad_norm": 0.07788409447126747, "learning_rate": 2.509766511701123e-08, "loss": 0.0002, "step": 19987 }, { "epoch": 4.547895335608646, "grad_norm": 0.3160007097016756, "learning_rate": 2.507260505449083e-08, "loss": 0.0009, "step": 19988 }, { "epoch": 4.548122866894198, "grad_norm": 0.34488231525567953, "learning_rate": 2.5047557253387727e-08, "loss": 0.0019, "step": 19989 }, { "epoch": 4.54835039817975, "grad_norm": 0.26018728230313615, "learning_rate": 2.502252171421374e-08, "loss": 0.0011, "step": 19990 }, { "epoch": 4.548577929465301, "grad_norm": 0.13717006637008203, "learning_rate": 2.499749843748067e-08, "loss": 0.0003, "step": 19991 }, { "epoch": 4.548805460750853, "grad_norm": 0.6485082258499404, "learning_rate": 2.4972487423699856e-08, "loss": 0.0026, "step": 19992 }, { "epoch": 4.549032992036405, "grad_norm": 0.1467019018614543, "learning_rate": 2.494748867338241e-08, "loss": 0.0004, "step": 19993 }, { "epoch": 4.549260523321957, "grad_norm": 0.23872319290737723, "learning_rate": 2.4922502187039523e-08, "loss": 0.0004, "step": 19994 }, { "epoch": 4.549488054607508, "grad_norm": 0.9091178525310938, "learning_rate": 2.489752796518155e-08, "loss": 0.0012, "step": 19995 }, { "epoch": 4.54971558589306, "grad_norm": 0.05945395268511198, "learning_rate": 2.4872566008319195e-08, "loss": 0.0002, "step": 19996 }, { "epoch": 4.549943117178612, "grad_norm": 0.2242703145551216, "learning_rate": 2.484761631696239e-08, "loss": 0.0012, "step": 19997 }, { "epoch": 4.550170648464164, "grad_norm": 0.8602740963747072, "learning_rate": 2.4822678891621294e-08, "loss": 0.0118, "step": 19998 }, { "epoch": 4.550398179749716, "grad_norm": 0.11609826914001094, "learning_rate": 2.4797753732805352e-08, "loss": 0.0005, "step": 19999 }, { "epoch": 4.550625711035267, "grad_norm": 0.0808766225409653, "learning_rate": 2.477284084102409e-08, "loss": 0.0001, "step": 20000 }, { "epoch": 4.5508532423208194, "grad_norm": 0.4371755159804087, "learning_rate": 2.4747940216786822e-08, "loss": 0.0022, "step": 20001 }, { "epoch": 4.551080773606371, "grad_norm": 0.40837687979553466, "learning_rate": 2.472305186060224e-08, "loss": 0.003, "step": 20002 }, { "epoch": 4.551308304891923, "grad_norm": 0.6272527311328081, "learning_rate": 2.4698175772979242e-08, "loss": 0.0014, "step": 20003 }, { "epoch": 4.551535836177474, "grad_norm": 0.27075348083764905, "learning_rate": 2.467331195442603e-08, "loss": 0.0019, "step": 20004 }, { "epoch": 4.5517633674630265, "grad_norm": 0.44484916913821104, "learning_rate": 2.464846040545095e-08, "loss": 0.0034, "step": 20005 }, { "epoch": 4.551990898748578, "grad_norm": 0.47230542463830244, "learning_rate": 2.4623621126561788e-08, "loss": 0.0048, "step": 20006 }, { "epoch": 4.55221843003413, "grad_norm": 0.09444042671582163, "learning_rate": 2.45987941182662e-08, "loss": 0.0003, "step": 20007 }, { "epoch": 4.552445961319681, "grad_norm": 0.16554956489182918, "learning_rate": 2.4573979381071824e-08, "loss": 0.0004, "step": 20008 }, { "epoch": 4.5526734926052335, "grad_norm": 1.3857156879348411, "learning_rate": 2.4549176915485627e-08, "loss": 0.0022, "step": 20009 }, { "epoch": 4.552901023890785, "grad_norm": 0.11181980459481224, "learning_rate": 2.4524386722014625e-08, "loss": 0.0002, "step": 20010 }, { "epoch": 4.553128555176337, "grad_norm": 0.15659235993191326, "learning_rate": 2.4499608801165294e-08, "loss": 0.0002, "step": 20011 }, { "epoch": 4.553356086461888, "grad_norm": 0.2013411000565247, "learning_rate": 2.447484315344438e-08, "loss": 0.0009, "step": 20012 }, { "epoch": 4.5535836177474405, "grad_norm": 0.13444864732843353, "learning_rate": 2.4450089779357727e-08, "loss": 0.0003, "step": 20013 }, { "epoch": 4.553811149032992, "grad_norm": 0.8264858445693216, "learning_rate": 2.442534867941139e-08, "loss": 0.0048, "step": 20014 }, { "epoch": 4.554038680318544, "grad_norm": 1.1696836479390944, "learning_rate": 2.4400619854111144e-08, "loss": 0.005, "step": 20015 }, { "epoch": 4.554266211604095, "grad_norm": 0.3602362707048241, "learning_rate": 2.437590330396214e-08, "loss": 0.0019, "step": 20016 }, { "epoch": 4.5544937428896475, "grad_norm": 0.2650610111231153, "learning_rate": 2.4351199029469806e-08, "loss": 0.0007, "step": 20017 }, { "epoch": 4.554721274175199, "grad_norm": 0.24656745885175474, "learning_rate": 2.4326507031138812e-08, "loss": 0.0012, "step": 20018 }, { "epoch": 4.554948805460751, "grad_norm": 0.6692568008235142, "learning_rate": 2.4301827309473962e-08, "loss": 0.0104, "step": 20019 }, { "epoch": 4.555176336746302, "grad_norm": 1.0114232445588944, "learning_rate": 2.4277159864979707e-08, "loss": 0.0053, "step": 20020 }, { "epoch": 4.5554038680318545, "grad_norm": 0.14131885734769267, "learning_rate": 2.4252504698160028e-08, "loss": 0.0004, "step": 20021 }, { "epoch": 4.555631399317406, "grad_norm": 0.25749357671320056, "learning_rate": 2.4227861809519025e-08, "loss": 0.0011, "step": 20022 }, { "epoch": 4.555858930602958, "grad_norm": 0.1853454774622643, "learning_rate": 2.4203231199560194e-08, "loss": 0.0006, "step": 20023 }, { "epoch": 4.556086461888509, "grad_norm": 0.7316487161644817, "learning_rate": 2.417861286878709e-08, "loss": 0.001, "step": 20024 }, { "epoch": 4.5563139931740615, "grad_norm": 0.5540345404261053, "learning_rate": 2.4154006817702775e-08, "loss": 0.0047, "step": 20025 }, { "epoch": 4.556541524459613, "grad_norm": 0.180066404633734, "learning_rate": 2.4129413046810045e-08, "loss": 0.0005, "step": 20026 }, { "epoch": 4.556769055745165, "grad_norm": 0.056227083433785435, "learning_rate": 2.4104831556611835e-08, "loss": 0.0002, "step": 20027 }, { "epoch": 4.556996587030717, "grad_norm": 0.7615889388142724, "learning_rate": 2.408026234761024e-08, "loss": 0.0049, "step": 20028 }, { "epoch": 4.5572241183162685, "grad_norm": 0.3861773851223757, "learning_rate": 2.4055705420307703e-08, "loss": 0.0028, "step": 20029 }, { "epoch": 4.55745164960182, "grad_norm": 0.0976123475379024, "learning_rate": 2.403116077520584e-08, "loss": 0.0005, "step": 20030 }, { "epoch": 4.557679180887372, "grad_norm": 0.10414211683974262, "learning_rate": 2.4006628412806475e-08, "loss": 0.0003, "step": 20031 }, { "epoch": 4.557906712172924, "grad_norm": 0.1526612321170799, "learning_rate": 2.3982108333610935e-08, "loss": 0.0006, "step": 20032 }, { "epoch": 4.5581342434584755, "grad_norm": 0.3384088729569732, "learning_rate": 2.3957600538120352e-08, "loss": 0.0011, "step": 20033 }, { "epoch": 4.558361774744027, "grad_norm": 0.2871280726557149, "learning_rate": 2.393310502683578e-08, "loss": 0.0019, "step": 20034 }, { "epoch": 4.558589306029579, "grad_norm": 0.029405524673737173, "learning_rate": 2.3908621800257658e-08, "loss": 0.0001, "step": 20035 }, { "epoch": 4.558816837315131, "grad_norm": 0.34721355151836847, "learning_rate": 2.3884150858886484e-08, "loss": 0.0017, "step": 20036 }, { "epoch": 4.5590443686006825, "grad_norm": 0.20438127830324096, "learning_rate": 2.3859692203222346e-08, "loss": 0.001, "step": 20037 }, { "epoch": 4.559271899886235, "grad_norm": 0.47495247704424265, "learning_rate": 2.383524583376519e-08, "loss": 0.0052, "step": 20038 }, { "epoch": 4.559499431171786, "grad_norm": 1.0052007965346044, "learning_rate": 2.3810811751014616e-08, "loss": 0.0012, "step": 20039 }, { "epoch": 4.559726962457338, "grad_norm": 0.11380899097320521, "learning_rate": 2.3786389955469947e-08, "loss": 0.0004, "step": 20040 }, { "epoch": 4.5599544937428895, "grad_norm": 0.31549573102924167, "learning_rate": 2.3761980447630507e-08, "loss": 0.0011, "step": 20041 }, { "epoch": 4.560182025028442, "grad_norm": 0.44351899369564046, "learning_rate": 2.373758322799506e-08, "loss": 0.0013, "step": 20042 }, { "epoch": 4.560409556313993, "grad_norm": 0.4287950141823027, "learning_rate": 2.3713198297062242e-08, "loss": 0.004, "step": 20043 }, { "epoch": 4.560637087599545, "grad_norm": 0.03479418591472257, "learning_rate": 2.368882565533047e-08, "loss": 0.0001, "step": 20044 }, { "epoch": 4.5608646188850965, "grad_norm": 0.5543315299529434, "learning_rate": 2.3664465303297814e-08, "loss": 0.0055, "step": 20045 }, { "epoch": 4.561092150170649, "grad_norm": 0.7827519878169544, "learning_rate": 2.3640117241462147e-08, "loss": 0.0045, "step": 20046 }, { "epoch": 4.5613196814562, "grad_norm": 0.003031336999839716, "learning_rate": 2.361578147032119e-08, "loss": 0.0, "step": 20047 }, { "epoch": 4.561547212741752, "grad_norm": 0.2648216246425992, "learning_rate": 2.3591457990372256e-08, "loss": 0.001, "step": 20048 }, { "epoch": 4.5617747440273035, "grad_norm": 0.6435263500169938, "learning_rate": 2.3567146802112516e-08, "loss": 0.0041, "step": 20049 }, { "epoch": 4.562002275312856, "grad_norm": 0.15582491070591442, "learning_rate": 2.3542847906038862e-08, "loss": 0.0003, "step": 20050 }, { "epoch": 4.562229806598407, "grad_norm": 0.14525531150051632, "learning_rate": 2.3518561302647915e-08, "loss": 0.0006, "step": 20051 }, { "epoch": 4.562457337883959, "grad_norm": 0.5531677442139212, "learning_rate": 2.3494286992435874e-08, "loss": 0.0021, "step": 20052 }, { "epoch": 4.5626848691695105, "grad_norm": 0.10135087234770251, "learning_rate": 2.347002497589901e-08, "loss": 0.0003, "step": 20053 }, { "epoch": 4.562912400455063, "grad_norm": 0.28489718359400307, "learning_rate": 2.344577525353324e-08, "loss": 0.0015, "step": 20054 }, { "epoch": 4.563139931740614, "grad_norm": 0.07513983384428424, "learning_rate": 2.3421537825834145e-08, "loss": 0.0002, "step": 20055 }, { "epoch": 4.563367463026166, "grad_norm": 0.13836448578419558, "learning_rate": 2.339731269329709e-08, "loss": 0.0005, "step": 20056 }, { "epoch": 4.563594994311718, "grad_norm": 0.04076331214048131, "learning_rate": 2.337309985641717e-08, "loss": 0.0002, "step": 20057 }, { "epoch": 4.56382252559727, "grad_norm": 0.17259426060158437, "learning_rate": 2.334889931568926e-08, "loss": 0.0006, "step": 20058 }, { "epoch": 4.564050056882821, "grad_norm": 0.1816269131221202, "learning_rate": 2.332471107160797e-08, "loss": 0.0009, "step": 20059 }, { "epoch": 4.564277588168373, "grad_norm": 0.7236341161566794, "learning_rate": 2.330053512466762e-08, "loss": 0.0037, "step": 20060 }, { "epoch": 4.564505119453925, "grad_norm": 0.13387547123688115, "learning_rate": 2.327637147536241e-08, "loss": 0.0003, "step": 20061 }, { "epoch": 4.564732650739477, "grad_norm": 0.453085456053698, "learning_rate": 2.3252220124186167e-08, "loss": 0.0015, "step": 20062 }, { "epoch": 4.564960182025028, "grad_norm": 0.13889367238854725, "learning_rate": 2.322808107163253e-08, "loss": 0.0009, "step": 20063 }, { "epoch": 4.56518771331058, "grad_norm": 0.5194902382723452, "learning_rate": 2.320395431819479e-08, "loss": 0.0053, "step": 20064 }, { "epoch": 4.565415244596132, "grad_norm": 0.14013821681738384, "learning_rate": 2.3179839864366088e-08, "loss": 0.0003, "step": 20065 }, { "epoch": 4.565642775881684, "grad_norm": 0.13916883241390113, "learning_rate": 2.3155737710639296e-08, "loss": 0.0004, "step": 20066 }, { "epoch": 4.565870307167236, "grad_norm": 0.38779844539187724, "learning_rate": 2.3131647857506935e-08, "loss": 0.0039, "step": 20067 }, { "epoch": 4.566097838452787, "grad_norm": 0.2145566772185385, "learning_rate": 2.310757030546139e-08, "loss": 0.0006, "step": 20068 }, { "epoch": 4.566325369738339, "grad_norm": 1.2236652707839903, "learning_rate": 2.3083505054994902e-08, "loss": 0.0096, "step": 20069 }, { "epoch": 4.566552901023891, "grad_norm": 0.3010701325162349, "learning_rate": 2.305945210659924e-08, "loss": 0.0025, "step": 20070 }, { "epoch": 4.566780432309443, "grad_norm": 0.39368077174940674, "learning_rate": 2.303541146076588e-08, "loss": 0.0037, "step": 20071 }, { "epoch": 4.567007963594994, "grad_norm": 0.2461042891098077, "learning_rate": 2.301138311798637e-08, "loss": 0.0013, "step": 20072 }, { "epoch": 4.567235494880546, "grad_norm": 0.15207406043010488, "learning_rate": 2.2987367078751572e-08, "loss": 0.0006, "step": 20073 }, { "epoch": 4.567463026166098, "grad_norm": 0.16053171126022414, "learning_rate": 2.296336334355248e-08, "loss": 0.0005, "step": 20074 }, { "epoch": 4.56769055745165, "grad_norm": 0.5260293052537088, "learning_rate": 2.2939371912879747e-08, "loss": 0.0029, "step": 20075 }, { "epoch": 4.567918088737201, "grad_norm": 0.26376861391749756, "learning_rate": 2.291539278722353e-08, "loss": 0.0006, "step": 20076 }, { "epoch": 4.5681456200227535, "grad_norm": 0.421985710727618, "learning_rate": 2.289142596707414e-08, "loss": 0.0029, "step": 20077 }, { "epoch": 4.568373151308305, "grad_norm": 0.3691460932559337, "learning_rate": 2.2867471452921176e-08, "loss": 0.0011, "step": 20078 }, { "epoch": 4.568600682593857, "grad_norm": 0.20080958607705182, "learning_rate": 2.284352924525439e-08, "loss": 0.0006, "step": 20079 }, { "epoch": 4.568828213879408, "grad_norm": 0.10635772250219266, "learning_rate": 2.2819599344562974e-08, "loss": 0.0001, "step": 20080 }, { "epoch": 4.5690557451649605, "grad_norm": 0.4180336738973749, "learning_rate": 2.279568175133626e-08, "loss": 0.001, "step": 20081 }, { "epoch": 4.569283276450512, "grad_norm": 0.6919899853547243, "learning_rate": 2.2771776466062877e-08, "loss": 0.0072, "step": 20082 }, { "epoch": 4.569510807736064, "grad_norm": 0.16383784186942627, "learning_rate": 2.2747883489231403e-08, "loss": 0.0008, "step": 20083 }, { "epoch": 4.569738339021615, "grad_norm": 0.48764361000106804, "learning_rate": 2.272400282133033e-08, "loss": 0.0024, "step": 20084 }, { "epoch": 4.5699658703071675, "grad_norm": 0.11917299693649756, "learning_rate": 2.2700134462847467e-08, "loss": 0.0003, "step": 20085 }, { "epoch": 4.570193401592719, "grad_norm": 0.34301304208225736, "learning_rate": 2.267627841427089e-08, "loss": 0.0017, "step": 20086 }, { "epoch": 4.570420932878271, "grad_norm": 0.18371745018061617, "learning_rate": 2.2652434676087993e-08, "loss": 0.0011, "step": 20087 }, { "epoch": 4.570648464163822, "grad_norm": 0.05384031816883389, "learning_rate": 2.2628603248786295e-08, "loss": 0.0002, "step": 20088 }, { "epoch": 4.5708759954493745, "grad_norm": 0.4697662468648551, "learning_rate": 2.2604784132852774e-08, "loss": 0.0028, "step": 20089 }, { "epoch": 4.571103526734926, "grad_norm": 0.12661732765668088, "learning_rate": 2.2580977328774123e-08, "loss": 0.0006, "step": 20090 }, { "epoch": 4.571331058020478, "grad_norm": 0.10740051664346702, "learning_rate": 2.2557182837037105e-08, "loss": 0.0003, "step": 20091 }, { "epoch": 4.571558589306029, "grad_norm": 0.689351485404701, "learning_rate": 2.2533400658127923e-08, "loss": 0.0026, "step": 20092 }, { "epoch": 4.5717861205915815, "grad_norm": 0.2474974784275877, "learning_rate": 2.2509630792532586e-08, "loss": 0.0003, "step": 20093 }, { "epoch": 4.572013651877133, "grad_norm": 0.16057194053010398, "learning_rate": 2.248587324073709e-08, "loss": 0.0006, "step": 20094 }, { "epoch": 4.572241183162685, "grad_norm": 0.3579170121543524, "learning_rate": 2.246212800322681e-08, "loss": 0.002, "step": 20095 }, { "epoch": 4.572468714448236, "grad_norm": 0.45010950761465945, "learning_rate": 2.2438395080487257e-08, "loss": 0.006, "step": 20096 }, { "epoch": 4.5726962457337885, "grad_norm": 0.23415467917797259, "learning_rate": 2.241467447300326e-08, "loss": 0.0013, "step": 20097 }, { "epoch": 4.57292377701934, "grad_norm": 0.31613934495971696, "learning_rate": 2.239096618125977e-08, "loss": 0.001, "step": 20098 }, { "epoch": 4.573151308304892, "grad_norm": 0.12486083328290619, "learning_rate": 2.2367270205741267e-08, "loss": 0.0006, "step": 20099 }, { "epoch": 4.573378839590443, "grad_norm": 0.3105739352564605, "learning_rate": 2.2343586546932154e-08, "loss": 0.0008, "step": 20100 }, { "epoch": 4.5736063708759955, "grad_norm": 0.668278473277801, "learning_rate": 2.2319915205316416e-08, "loss": 0.0099, "step": 20101 }, { "epoch": 4.573833902161547, "grad_norm": 0.5204965885304633, "learning_rate": 2.229625618137776e-08, "loss": 0.0041, "step": 20102 }, { "epoch": 4.574061433447099, "grad_norm": 0.8447478951470463, "learning_rate": 2.2272609475599975e-08, "loss": 0.0063, "step": 20103 }, { "epoch": 4.57428896473265, "grad_norm": 0.19200352653508193, "learning_rate": 2.2248975088466075e-08, "loss": 0.0015, "step": 20104 }, { "epoch": 4.5745164960182025, "grad_norm": 0.36139600450011566, "learning_rate": 2.222535302045935e-08, "loss": 0.0015, "step": 20105 }, { "epoch": 4.574744027303755, "grad_norm": 0.2563243976843117, "learning_rate": 2.2201743272062397e-08, "loss": 0.0009, "step": 20106 }, { "epoch": 4.574971558589306, "grad_norm": 0.3301376915423777, "learning_rate": 2.2178145843757827e-08, "loss": 0.0025, "step": 20107 }, { "epoch": 4.575199089874857, "grad_norm": 0.06432884572160069, "learning_rate": 2.2154560736028023e-08, "loss": 0.0002, "step": 20108 }, { "epoch": 4.5754266211604095, "grad_norm": 0.8081817329786254, "learning_rate": 2.2130987949354892e-08, "loss": 0.0035, "step": 20109 }, { "epoch": 4.575654152445962, "grad_norm": 0.04621465482250069, "learning_rate": 2.2107427484220275e-08, "loss": 0.0002, "step": 20110 }, { "epoch": 4.575881683731513, "grad_norm": 0.23663354695176067, "learning_rate": 2.208387934110566e-08, "loss": 0.0007, "step": 20111 }, { "epoch": 4.576109215017064, "grad_norm": 0.49844799380210064, "learning_rate": 2.2060343520492466e-08, "loss": 0.0025, "step": 20112 }, { "epoch": 4.5763367463026166, "grad_norm": 0.1726113905414273, "learning_rate": 2.203682002286149e-08, "loss": 0.0004, "step": 20113 }, { "epoch": 4.576564277588169, "grad_norm": 0.1547652011677314, "learning_rate": 2.2013308848693667e-08, "loss": 0.0005, "step": 20114 }, { "epoch": 4.57679180887372, "grad_norm": 0.5454749852963283, "learning_rate": 2.1989809998469586e-08, "loss": 0.0017, "step": 20115 }, { "epoch": 4.577019340159272, "grad_norm": 0.5355756245083627, "learning_rate": 2.1966323472669276e-08, "loss": 0.0015, "step": 20116 }, { "epoch": 4.577246871444824, "grad_norm": 0.1436448850035471, "learning_rate": 2.194284927177305e-08, "loss": 0.0006, "step": 20117 }, { "epoch": 4.577474402730376, "grad_norm": 0.23843285130160177, "learning_rate": 2.1919387396260456e-08, "loss": 0.0009, "step": 20118 }, { "epoch": 4.577701934015927, "grad_norm": 0.01060427685089847, "learning_rate": 2.1895937846611177e-08, "loss": 0.0, "step": 20119 }, { "epoch": 4.577929465301479, "grad_norm": 0.30598565568435926, "learning_rate": 2.1872500623304275e-08, "loss": 0.0013, "step": 20120 }, { "epoch": 4.578156996587031, "grad_norm": 0.06899210505039123, "learning_rate": 2.184907572681895e-08, "loss": 0.0002, "step": 20121 }, { "epoch": 4.578384527872583, "grad_norm": 0.025522090616296712, "learning_rate": 2.1825663157633918e-08, "loss": 0.0, "step": 20122 }, { "epoch": 4.578612059158134, "grad_norm": 0.09268229375281811, "learning_rate": 2.180226291622761e-08, "loss": 0.0003, "step": 20123 }, { "epoch": 4.578839590443686, "grad_norm": 0.37469032561333626, "learning_rate": 2.1778875003078405e-08, "loss": 0.0023, "step": 20124 }, { "epoch": 4.579067121729238, "grad_norm": 0.3371632826392107, "learning_rate": 2.175549941866417e-08, "loss": 0.0009, "step": 20125 }, { "epoch": 4.57929465301479, "grad_norm": 0.9170150972827877, "learning_rate": 2.173213616346287e-08, "loss": 0.0055, "step": 20126 }, { "epoch": 4.579522184300341, "grad_norm": 0.3380452479344499, "learning_rate": 2.170878523795175e-08, "loss": 0.0008, "step": 20127 }, { "epoch": 4.579749715585893, "grad_norm": 0.056630568985762554, "learning_rate": 2.1685446642608214e-08, "loss": 0.0002, "step": 20128 }, { "epoch": 4.579977246871445, "grad_norm": 0.18316023475497936, "learning_rate": 2.166212037790924e-08, "loss": 0.0008, "step": 20129 }, { "epoch": 4.580204778156997, "grad_norm": 0.28184529037955314, "learning_rate": 2.1638806444331597e-08, "loss": 0.0012, "step": 20130 }, { "epoch": 4.580432309442548, "grad_norm": 0.1879089733352442, "learning_rate": 2.161550484235171e-08, "loss": 0.0009, "step": 20131 }, { "epoch": 4.5806598407281, "grad_norm": 0.17286477879755238, "learning_rate": 2.1592215572445937e-08, "loss": 0.001, "step": 20132 }, { "epoch": 4.580887372013652, "grad_norm": 0.017205324362013653, "learning_rate": 2.1568938635090077e-08, "loss": 0.0001, "step": 20133 }, { "epoch": 4.581114903299204, "grad_norm": 0.2838810769359092, "learning_rate": 2.1545674030759993e-08, "loss": 0.0018, "step": 20134 }, { "epoch": 4.581342434584755, "grad_norm": 0.04710417983801237, "learning_rate": 2.1522421759931147e-08, "loss": 0.0001, "step": 20135 }, { "epoch": 4.581569965870307, "grad_norm": 0.38126003627474125, "learning_rate": 2.149918182307885e-08, "loss": 0.0029, "step": 20136 }, { "epoch": 4.581797497155859, "grad_norm": 0.10753009564767929, "learning_rate": 2.147595422067793e-08, "loss": 0.0002, "step": 20137 }, { "epoch": 4.582025028441411, "grad_norm": 0.2629519293388211, "learning_rate": 2.1452738953203352e-08, "loss": 0.0016, "step": 20138 }, { "epoch": 4.582252559726962, "grad_norm": 0.032840180270189846, "learning_rate": 2.142953602112939e-08, "loss": 0.0001, "step": 20139 }, { "epoch": 4.582480091012514, "grad_norm": 0.8041237127404026, "learning_rate": 2.1406345424930182e-08, "loss": 0.004, "step": 20140 }, { "epoch": 4.582707622298066, "grad_norm": 0.5544190878669278, "learning_rate": 2.1383167165080002e-08, "loss": 0.003, "step": 20141 }, { "epoch": 4.582935153583618, "grad_norm": 0.5462890754793792, "learning_rate": 2.1360001242052285e-08, "loss": 0.0026, "step": 20142 }, { "epoch": 4.583162684869169, "grad_norm": 0.7071340136082813, "learning_rate": 2.1336847656320755e-08, "loss": 0.0031, "step": 20143 }, { "epoch": 4.583390216154721, "grad_norm": 0.07057178967570138, "learning_rate": 2.1313706408358432e-08, "loss": 0.0002, "step": 20144 }, { "epoch": 4.5836177474402735, "grad_norm": 0.538761340211312, "learning_rate": 2.1290577498638416e-08, "loss": 0.0035, "step": 20145 }, { "epoch": 4.583845278725825, "grad_norm": 0.15164027932396748, "learning_rate": 2.1267460927633236e-08, "loss": 0.0009, "step": 20146 }, { "epoch": 4.584072810011376, "grad_norm": 0.1675439412576427, "learning_rate": 2.124435669581551e-08, "loss": 0.0007, "step": 20147 }, { "epoch": 4.584300341296928, "grad_norm": 0.35865778887124294, "learning_rate": 2.1221264803657492e-08, "loss": 0.0021, "step": 20148 }, { "epoch": 4.5845278725824805, "grad_norm": 0.37857605650326764, "learning_rate": 2.119818525163096e-08, "loss": 0.0018, "step": 20149 }, { "epoch": 4.584755403868032, "grad_norm": 0.3922137450814263, "learning_rate": 2.117511804020783e-08, "loss": 0.0012, "step": 20150 }, { "epoch": 4.584982935153583, "grad_norm": 0.1802908957020407, "learning_rate": 2.115206316985939e-08, "loss": 0.0005, "step": 20151 }, { "epoch": 4.585210466439135, "grad_norm": 0.6628496825767447, "learning_rate": 2.1129020641056792e-08, "loss": 0.0027, "step": 20152 }, { "epoch": 4.5854379977246875, "grad_norm": 0.020823510750388644, "learning_rate": 2.1105990454271112e-08, "loss": 0.0001, "step": 20153 }, { "epoch": 4.585665529010239, "grad_norm": 0.39610748567467013, "learning_rate": 2.1082972609973024e-08, "loss": 0.0089, "step": 20154 }, { "epoch": 4.585893060295791, "grad_norm": 0.06133815904874504, "learning_rate": 2.105996710863298e-08, "loss": 0.0001, "step": 20155 }, { "epoch": 4.586120591581342, "grad_norm": 0.20470435013231136, "learning_rate": 2.103697395072109e-08, "loss": 0.0011, "step": 20156 }, { "epoch": 4.5863481228668945, "grad_norm": 0.3134243293335314, "learning_rate": 2.1013993136707392e-08, "loss": 0.0024, "step": 20157 }, { "epoch": 4.586575654152446, "grad_norm": 0.27786105577436104, "learning_rate": 2.0991024667061584e-08, "loss": 0.001, "step": 20158 }, { "epoch": 4.586803185437998, "grad_norm": 0.5049932922851285, "learning_rate": 2.0968068542252945e-08, "loss": 0.005, "step": 20159 }, { "epoch": 4.587030716723549, "grad_norm": 0.2858886511195159, "learning_rate": 2.094512476275075e-08, "loss": 0.0043, "step": 20160 }, { "epoch": 4.5872582480091015, "grad_norm": 0.4018597907698234, "learning_rate": 2.0922193329023925e-08, "loss": 0.0008, "step": 20161 }, { "epoch": 4.587485779294653, "grad_norm": 0.08281712495374617, "learning_rate": 2.08992742415412e-08, "loss": 0.0003, "step": 20162 }, { "epoch": 4.587713310580205, "grad_norm": 0.4262993044670787, "learning_rate": 2.0876367500770946e-08, "loss": 0.0014, "step": 20163 }, { "epoch": 4.587940841865756, "grad_norm": 0.10209861100046949, "learning_rate": 2.0853473107181332e-08, "loss": 0.0002, "step": 20164 }, { "epoch": 4.5881683731513085, "grad_norm": 0.4657511004707397, "learning_rate": 2.0830591061240315e-08, "loss": 0.0031, "step": 20165 }, { "epoch": 4.58839590443686, "grad_norm": 2.488000998374545, "learning_rate": 2.080772136341551e-08, "loss": 0.0127, "step": 20166 }, { "epoch": 4.588623435722412, "grad_norm": 1.2041860833235525, "learning_rate": 2.0784864014174316e-08, "loss": 0.0094, "step": 20167 }, { "epoch": 4.588850967007963, "grad_norm": 0.29229838665605545, "learning_rate": 2.0762019013983932e-08, "loss": 0.0012, "step": 20168 }, { "epoch": 4.5890784982935156, "grad_norm": 0.4584849159655329, "learning_rate": 2.0739186363311348e-08, "loss": 0.0068, "step": 20169 }, { "epoch": 4.589306029579067, "grad_norm": 0.023974025090761165, "learning_rate": 2.071636606262313e-08, "loss": 0.0001, "step": 20170 }, { "epoch": 4.589533560864619, "grad_norm": 1.138017045323941, "learning_rate": 2.0693558112385646e-08, "loss": 0.0027, "step": 20171 }, { "epoch": 4.58976109215017, "grad_norm": 0.05590252135129492, "learning_rate": 2.0670762513065115e-08, "loss": 0.0001, "step": 20172 }, { "epoch": 4.589988623435723, "grad_norm": 0.13580600318020591, "learning_rate": 2.0647979265127418e-08, "loss": 0.0003, "step": 20173 }, { "epoch": 4.590216154721274, "grad_norm": 0.06066182814464144, "learning_rate": 2.0625208369038155e-08, "loss": 0.0003, "step": 20174 }, { "epoch": 4.590443686006826, "grad_norm": 0.41073047016954206, "learning_rate": 2.0602449825262856e-08, "loss": 0.0009, "step": 20175 }, { "epoch": 4.590671217292377, "grad_norm": 0.09761526509898852, "learning_rate": 2.0579703634266563e-08, "loss": 0.0003, "step": 20176 }, { "epoch": 4.59089874857793, "grad_norm": 0.1872896169286031, "learning_rate": 2.0556969796514185e-08, "loss": 0.0002, "step": 20177 }, { "epoch": 4.591126279863481, "grad_norm": 0.2211297783276508, "learning_rate": 2.053424831247035e-08, "loss": 0.0009, "step": 20178 }, { "epoch": 4.591353811149033, "grad_norm": 0.06797176609992404, "learning_rate": 2.0511539182599476e-08, "loss": 0.0002, "step": 20179 }, { "epoch": 4.591581342434584, "grad_norm": 0.6026375274781789, "learning_rate": 2.0488842407365635e-08, "loss": 0.0047, "step": 20180 }, { "epoch": 4.591808873720137, "grad_norm": 0.3616118739197657, "learning_rate": 2.04661579872327e-08, "loss": 0.0018, "step": 20181 }, { "epoch": 4.592036405005688, "grad_norm": 0.03513121366779115, "learning_rate": 2.044348592266446e-08, "loss": 0.0001, "step": 20182 }, { "epoch": 4.59226393629124, "grad_norm": 0.39729352032377974, "learning_rate": 2.0420826214124085e-08, "loss": 0.0021, "step": 20183 }, { "epoch": 4.592491467576792, "grad_norm": 0.1185132132681757, "learning_rate": 2.0398178862074888e-08, "loss": 0.0004, "step": 20184 }, { "epoch": 4.592718998862344, "grad_norm": 0.4638686545111523, "learning_rate": 2.0375543866979554e-08, "loss": 0.0051, "step": 20185 }, { "epoch": 4.592946530147895, "grad_norm": 0.19519502547563547, "learning_rate": 2.035292122930084e-08, "loss": 0.0006, "step": 20186 }, { "epoch": 4.593174061433447, "grad_norm": 0.11410334924346005, "learning_rate": 2.0330310949500942e-08, "loss": 0.0005, "step": 20187 }, { "epoch": 4.593401592718999, "grad_norm": 0.7782145480852203, "learning_rate": 2.03077130280422e-08, "loss": 0.0031, "step": 20188 }, { "epoch": 4.593629124004551, "grad_norm": 0.7799150850712792, "learning_rate": 2.0285127465386328e-08, "loss": 0.0093, "step": 20189 }, { "epoch": 4.593856655290102, "grad_norm": 0.05235336329315015, "learning_rate": 2.026255426199497e-08, "loss": 0.0002, "step": 20190 }, { "epoch": 4.594084186575654, "grad_norm": 0.8105764257551322, "learning_rate": 2.0239993418329494e-08, "loss": 0.0058, "step": 20191 }, { "epoch": 4.594311717861206, "grad_norm": 0.25244949669877687, "learning_rate": 2.0217444934850922e-08, "loss": 0.0016, "step": 20192 }, { "epoch": 4.594539249146758, "grad_norm": 0.2549882077881721, "learning_rate": 2.0194908812020268e-08, "loss": 0.0018, "step": 20193 }, { "epoch": 4.59476678043231, "grad_norm": 0.9058669976635724, "learning_rate": 2.0172385050297935e-08, "loss": 0.0053, "step": 20194 }, { "epoch": 4.594994311717861, "grad_norm": 0.6431579096232973, "learning_rate": 2.0149873650144385e-08, "loss": 0.0045, "step": 20195 }, { "epoch": 4.595221843003413, "grad_norm": 0.6665319086214482, "learning_rate": 2.01273746120198e-08, "loss": 0.0041, "step": 20196 }, { "epoch": 4.595449374288965, "grad_norm": 0.23492855750452712, "learning_rate": 2.010488793638375e-08, "loss": 0.001, "step": 20197 }, { "epoch": 4.595676905574517, "grad_norm": 0.30884089691117583, "learning_rate": 2.0082413623696075e-08, "loss": 0.0021, "step": 20198 }, { "epoch": 4.595904436860068, "grad_norm": 0.44249416528857183, "learning_rate": 2.005995167441599e-08, "loss": 0.0037, "step": 20199 }, { "epoch": 4.59613196814562, "grad_norm": 0.3661749567988224, "learning_rate": 2.003750208900257e-08, "loss": 0.0017, "step": 20200 }, { "epoch": 4.596359499431172, "grad_norm": 0.4644194303119179, "learning_rate": 2.0015064867914686e-08, "loss": 0.0024, "step": 20201 }, { "epoch": 4.596587030716724, "grad_norm": 0.1502156176919139, "learning_rate": 1.999264001161086e-08, "loss": 0.0004, "step": 20202 }, { "epoch": 4.596814562002275, "grad_norm": 0.05773558042899384, "learning_rate": 1.9970227520549543e-08, "loss": 0.0002, "step": 20203 }, { "epoch": 4.597042093287827, "grad_norm": 1.3274873801992462, "learning_rate": 1.9947827395188633e-08, "loss": 0.0115, "step": 20204 }, { "epoch": 4.597269624573379, "grad_norm": 0.5963020627010794, "learning_rate": 1.99254396359861e-08, "loss": 0.0094, "step": 20205 }, { "epoch": 4.597497155858931, "grad_norm": 0.5734090690126981, "learning_rate": 1.9903064243399355e-08, "loss": 0.0058, "step": 20206 }, { "epoch": 4.597724687144482, "grad_norm": 0.5075813385559981, "learning_rate": 1.9880701217885807e-08, "loss": 0.0064, "step": 20207 }, { "epoch": 4.597952218430034, "grad_norm": 0.10146483105180323, "learning_rate": 1.9858350559902596e-08, "loss": 0.0004, "step": 20208 }, { "epoch": 4.598179749715586, "grad_norm": 0.3901856339499816, "learning_rate": 1.9836012269906364e-08, "loss": 0.0009, "step": 20209 }, { "epoch": 4.598407281001138, "grad_norm": 0.015996285056180705, "learning_rate": 1.981368634835376e-08, "loss": 0.0, "step": 20210 }, { "epoch": 4.598634812286689, "grad_norm": 0.13445359934040568, "learning_rate": 1.979137279570102e-08, "loss": 0.0003, "step": 20211 }, { "epoch": 4.598862343572241, "grad_norm": 0.23777565086951302, "learning_rate": 1.9769071612404307e-08, "loss": 0.0009, "step": 20212 }, { "epoch": 4.599089874857793, "grad_norm": 0.030386885308905466, "learning_rate": 1.9746782798919227e-08, "loss": 0.0001, "step": 20213 }, { "epoch": 4.599317406143345, "grad_norm": 0.04811010208665552, "learning_rate": 1.9724506355701528e-08, "loss": 0.0002, "step": 20214 }, { "epoch": 4.599544937428896, "grad_norm": 0.23550978854001317, "learning_rate": 1.97022422832064e-08, "loss": 0.001, "step": 20215 }, { "epoch": 4.599772468714448, "grad_norm": 0.1565045851007897, "learning_rate": 1.9679990581888826e-08, "loss": 0.0006, "step": 20216 }, { "epoch": 4.6, "grad_norm": 0.30308432642725863, "learning_rate": 1.965775125220379e-08, "loss": 0.0005, "step": 20217 }, { "epoch": 4.600227531285552, "grad_norm": 0.574616465436433, "learning_rate": 1.9635524294605578e-08, "loss": 0.0011, "step": 20218 }, { "epoch": 4.600455062571103, "grad_norm": 0.9660091165830709, "learning_rate": 1.9613309709548625e-08, "loss": 0.0053, "step": 20219 }, { "epoch": 4.600682593856655, "grad_norm": 0.16085710790014995, "learning_rate": 1.959110749748687e-08, "loss": 0.0003, "step": 20220 }, { "epoch": 4.600910125142207, "grad_norm": 0.1906523818326168, "learning_rate": 1.9568917658874115e-08, "loss": 0.0008, "step": 20221 }, { "epoch": 4.601137656427759, "grad_norm": 0.33173078742507445, "learning_rate": 1.9546740194163886e-08, "loss": 0.0017, "step": 20222 }, { "epoch": 4.601365187713311, "grad_norm": 0.0973793786627744, "learning_rate": 1.9524575103809507e-08, "loss": 0.0003, "step": 20223 }, { "epoch": 4.601592718998862, "grad_norm": 0.3553064201449493, "learning_rate": 1.9502422388263942e-08, "loss": 0.0016, "step": 20224 }, { "epoch": 4.601820250284414, "grad_norm": 0.23978339939545398, "learning_rate": 1.9480282047979818e-08, "loss": 0.001, "step": 20225 }, { "epoch": 4.602047781569966, "grad_norm": 0.2728451716279607, "learning_rate": 1.9458154083409965e-08, "loss": 0.0011, "step": 20226 }, { "epoch": 4.602275312855518, "grad_norm": 0.9278376759290804, "learning_rate": 1.9436038495006317e-08, "loss": 0.001, "step": 20227 }, { "epoch": 4.602502844141069, "grad_norm": 0.43853297778101696, "learning_rate": 1.941393528322094e-08, "loss": 0.005, "step": 20228 }, { "epoch": 4.602730375426621, "grad_norm": 0.3185139205729471, "learning_rate": 1.9391844448505837e-08, "loss": 0.0006, "step": 20229 }, { "epoch": 4.602957906712173, "grad_norm": 0.14880978565896558, "learning_rate": 1.936976599131217e-08, "loss": 0.0007, "step": 20230 }, { "epoch": 4.603185437997725, "grad_norm": 0.17214740561181607, "learning_rate": 1.9347699912091386e-08, "loss": 0.0005, "step": 20231 }, { "epoch": 4.603412969283276, "grad_norm": 0.10794890922310657, "learning_rate": 1.9325646211294374e-08, "loss": 0.0005, "step": 20232 }, { "epoch": 4.603640500568829, "grad_norm": 0.5138328288097022, "learning_rate": 1.9303604889372023e-08, "loss": 0.0025, "step": 20233 }, { "epoch": 4.60386803185438, "grad_norm": 0.49026418933739113, "learning_rate": 1.92815759467746e-08, "loss": 0.0027, "step": 20234 }, { "epoch": 4.604095563139932, "grad_norm": 0.01940502808490364, "learning_rate": 1.9259559383952435e-08, "loss": 0.0001, "step": 20235 }, { "epoch": 4.604323094425483, "grad_norm": 0.9159922737150714, "learning_rate": 1.923755520135559e-08, "loss": 0.0028, "step": 20236 }, { "epoch": 4.604550625711036, "grad_norm": 0.7563090770006556, "learning_rate": 1.921556339943363e-08, "loss": 0.0013, "step": 20237 }, { "epoch": 4.604778156996587, "grad_norm": 0.14044102532868988, "learning_rate": 1.91935839786362e-08, "loss": 0.0004, "step": 20238 }, { "epoch": 4.605005688282139, "grad_norm": 0.06723979501126007, "learning_rate": 1.9171616939412456e-08, "loss": 0.0002, "step": 20239 }, { "epoch": 4.60523321956769, "grad_norm": 0.6459280674010961, "learning_rate": 1.91496622822112e-08, "loss": 0.0031, "step": 20240 }, { "epoch": 4.605460750853243, "grad_norm": 0.040928032862111434, "learning_rate": 1.912772000748131e-08, "loss": 0.0001, "step": 20241 }, { "epoch": 4.605688282138794, "grad_norm": 1.2297824513258606, "learning_rate": 1.9105790115671248e-08, "loss": 0.0108, "step": 20242 }, { "epoch": 4.605915813424346, "grad_norm": 0.3946230075992199, "learning_rate": 1.908387260722927e-08, "loss": 0.0016, "step": 20243 }, { "epoch": 4.606143344709897, "grad_norm": 0.04023446497234926, "learning_rate": 1.9061967482603138e-08, "loss": 0.0001, "step": 20244 }, { "epoch": 4.60637087599545, "grad_norm": 0.15144882818026828, "learning_rate": 1.904007474224083e-08, "loss": 0.0004, "step": 20245 }, { "epoch": 4.606598407281001, "grad_norm": 0.1477808497945856, "learning_rate": 1.9018194386589557e-08, "loss": 0.0006, "step": 20246 }, { "epoch": 4.606825938566553, "grad_norm": 0.33055032162572695, "learning_rate": 1.8996326416096526e-08, "loss": 0.0016, "step": 20247 }, { "epoch": 4.607053469852104, "grad_norm": 0.04679410055163726, "learning_rate": 1.897447083120875e-08, "loss": 0.0001, "step": 20248 }, { "epoch": 4.607281001137657, "grad_norm": 0.19485015823094012, "learning_rate": 1.8952627632372876e-08, "loss": 0.0007, "step": 20249 }, { "epoch": 4.607508532423208, "grad_norm": 0.12472381383773828, "learning_rate": 1.893079682003543e-08, "loss": 0.0007, "step": 20250 }, { "epoch": 4.60773606370876, "grad_norm": 0.17263139186635193, "learning_rate": 1.8908978394642507e-08, "loss": 0.0009, "step": 20251 }, { "epoch": 4.607963594994311, "grad_norm": 0.2489058057900545, "learning_rate": 1.8887172356640143e-08, "loss": 0.001, "step": 20252 }, { "epoch": 4.608191126279864, "grad_norm": 0.35746659166501205, "learning_rate": 1.8865378706473883e-08, "loss": 0.0017, "step": 20253 }, { "epoch": 4.608418657565415, "grad_norm": 0.5558064284597667, "learning_rate": 1.8843597444589205e-08, "loss": 0.0025, "step": 20254 }, { "epoch": 4.608646188850967, "grad_norm": 0.2985778245077683, "learning_rate": 1.8821828571431167e-08, "loss": 0.0004, "step": 20255 }, { "epoch": 4.608873720136518, "grad_norm": 0.09519482634862457, "learning_rate": 1.88000720874449e-08, "loss": 0.0004, "step": 20256 }, { "epoch": 4.609101251422071, "grad_norm": 0.5441545587967803, "learning_rate": 1.8778327993074983e-08, "loss": 0.0054, "step": 20257 }, { "epoch": 4.609328782707622, "grad_norm": 1.8977201698301245, "learning_rate": 1.8756596288765778e-08, "loss": 0.0207, "step": 20258 }, { "epoch": 4.609556313993174, "grad_norm": 0.26075990627392015, "learning_rate": 1.8734876974961374e-08, "loss": 0.0017, "step": 20259 }, { "epoch": 4.609783845278725, "grad_norm": 0.18923380302166506, "learning_rate": 1.8713170052105865e-08, "loss": 0.0005, "step": 20260 }, { "epoch": 4.610011376564278, "grad_norm": 0.2629643359133237, "learning_rate": 1.869147552064264e-08, "loss": 0.002, "step": 20261 }, { "epoch": 4.61023890784983, "grad_norm": 0.16723831515525736, "learning_rate": 1.8669793381015445e-08, "loss": 0.0006, "step": 20262 }, { "epoch": 4.610466439135381, "grad_norm": 0.2706068093653165, "learning_rate": 1.8648123633667123e-08, "loss": 0.001, "step": 20263 }, { "epoch": 4.610693970420932, "grad_norm": 0.06654195641469102, "learning_rate": 1.8626466279040718e-08, "loss": 0.0002, "step": 20264 }, { "epoch": 4.610921501706485, "grad_norm": 0.3232583483991191, "learning_rate": 1.860482131757886e-08, "loss": 0.0015, "step": 20265 }, { "epoch": 4.611149032992037, "grad_norm": 1.1967366118400045, "learning_rate": 1.858318874972384e-08, "loss": 0.0018, "step": 20266 }, { "epoch": 4.611376564277588, "grad_norm": 0.4408721251524964, "learning_rate": 1.85615685759178e-08, "loss": 0.0029, "step": 20267 }, { "epoch": 4.611604095563139, "grad_norm": 0.182839392550764, "learning_rate": 1.8539960796602683e-08, "loss": 0.0011, "step": 20268 }, { "epoch": 4.611831626848692, "grad_norm": 0.45735616360006226, "learning_rate": 1.8518365412220142e-08, "loss": 0.0012, "step": 20269 }, { "epoch": 4.612059158134244, "grad_norm": 0.04886274601138363, "learning_rate": 1.849678242321143e-08, "loss": 0.0001, "step": 20270 }, { "epoch": 4.612286689419795, "grad_norm": 0.17903208937164572, "learning_rate": 1.8475211830017782e-08, "loss": 0.0006, "step": 20271 }, { "epoch": 4.612514220705347, "grad_norm": 0.33530664136833893, "learning_rate": 1.8453653633080034e-08, "loss": 0.0024, "step": 20272 }, { "epoch": 4.612741751990899, "grad_norm": 0.34502499082786314, "learning_rate": 1.8432107832838733e-08, "loss": 0.0004, "step": 20273 }, { "epoch": 4.612969283276451, "grad_norm": 0.3920168324819652, "learning_rate": 1.841057442973422e-08, "loss": 0.0016, "step": 20274 }, { "epoch": 4.613196814562002, "grad_norm": 0.7256295942170699, "learning_rate": 1.838905342420663e-08, "loss": 0.0066, "step": 20275 }, { "epoch": 4.613424345847554, "grad_norm": 0.024502178088909827, "learning_rate": 1.8367544816695962e-08, "loss": 0.0001, "step": 20276 }, { "epoch": 4.613651877133106, "grad_norm": 0.6201999322819773, "learning_rate": 1.834604860764165e-08, "loss": 0.0027, "step": 20277 }, { "epoch": 4.613879408418658, "grad_norm": 0.30124763590704273, "learning_rate": 1.8324564797483e-08, "loss": 0.002, "step": 20278 }, { "epoch": 4.614106939704209, "grad_norm": 0.14485150718051906, "learning_rate": 1.830309338665924e-08, "loss": 0.0005, "step": 20279 }, { "epoch": 4.614334470989761, "grad_norm": 0.4975745527302196, "learning_rate": 1.828163437560905e-08, "loss": 0.0013, "step": 20280 }, { "epoch": 4.614562002275313, "grad_norm": 0.08881949458064661, "learning_rate": 1.8260187764771105e-08, "loss": 0.0003, "step": 20281 }, { "epoch": 4.614789533560865, "grad_norm": 0.2786518694553217, "learning_rate": 1.8238753554583737e-08, "loss": 0.0003, "step": 20282 }, { "epoch": 4.615017064846416, "grad_norm": 0.08141662643949278, "learning_rate": 1.8217331745485138e-08, "loss": 0.0003, "step": 20283 }, { "epoch": 4.615244596131968, "grad_norm": 0.4875356653209458, "learning_rate": 1.819592233791294e-08, "loss": 0.0036, "step": 20284 }, { "epoch": 4.61547212741752, "grad_norm": 0.01838575416518233, "learning_rate": 1.817452533230471e-08, "loss": 0.0, "step": 20285 }, { "epoch": 4.615699658703072, "grad_norm": 1.1309884098429996, "learning_rate": 1.8153140729097948e-08, "loss": 0.0065, "step": 20286 }, { "epoch": 4.615927189988623, "grad_norm": 0.48992741421846403, "learning_rate": 1.8131768528729525e-08, "loss": 0.0025, "step": 20287 }, { "epoch": 4.616154721274175, "grad_norm": 0.4896893381278748, "learning_rate": 1.8110408731636318e-08, "loss": 0.0031, "step": 20288 }, { "epoch": 4.616382252559727, "grad_norm": 0.10013743683124764, "learning_rate": 1.8089061338254915e-08, "loss": 0.0003, "step": 20289 }, { "epoch": 4.616609783845279, "grad_norm": 0.7979553363344127, "learning_rate": 1.806772634902164e-08, "loss": 0.0047, "step": 20290 }, { "epoch": 4.61683731513083, "grad_norm": 0.27947732306570305, "learning_rate": 1.804640376437246e-08, "loss": 0.0005, "step": 20291 }, { "epoch": 4.617064846416382, "grad_norm": 0.6491452629793899, "learning_rate": 1.8025093584743138e-08, "loss": 0.0035, "step": 20292 }, { "epoch": 4.617292377701934, "grad_norm": 0.1667447699183522, "learning_rate": 1.8003795810569434e-08, "loss": 0.0005, "step": 20293 }, { "epoch": 4.617519908987486, "grad_norm": 0.13725066197762084, "learning_rate": 1.7982510442286355e-08, "loss": 0.0007, "step": 20294 }, { "epoch": 4.617747440273037, "grad_norm": 0.32103196004378526, "learning_rate": 1.7961237480329032e-08, "loss": 0.0012, "step": 20295 }, { "epoch": 4.617974971558589, "grad_norm": 0.3629522250258979, "learning_rate": 1.79399769251324e-08, "loss": 0.0013, "step": 20296 }, { "epoch": 4.618202502844141, "grad_norm": 0.26531532873582403, "learning_rate": 1.791872877713076e-08, "loss": 0.0015, "step": 20297 }, { "epoch": 4.618430034129693, "grad_norm": 0.1991353993045997, "learning_rate": 1.7897493036758556e-08, "loss": 0.0005, "step": 20298 }, { "epoch": 4.618657565415244, "grad_norm": 0.5278012759708162, "learning_rate": 1.787626970444968e-08, "loss": 0.0024, "step": 20299 }, { "epoch": 4.618885096700796, "grad_norm": 0.28816614333448687, "learning_rate": 1.785505878063802e-08, "loss": 0.002, "step": 20300 }, { "epoch": 4.619112627986349, "grad_norm": 0.22261897684375706, "learning_rate": 1.783386026575698e-08, "loss": 0.0004, "step": 20301 }, { "epoch": 4.6193401592719, "grad_norm": 0.1896721826217488, "learning_rate": 1.7812674160239824e-08, "loss": 0.0008, "step": 20302 }, { "epoch": 4.619567690557451, "grad_norm": 0.024390107401370938, "learning_rate": 1.7791500464519675e-08, "loss": 0.0001, "step": 20303 }, { "epoch": 4.619795221843003, "grad_norm": 0.6926479895188471, "learning_rate": 1.7770339179029177e-08, "loss": 0.0035, "step": 20304 }, { "epoch": 4.620022753128556, "grad_norm": 0.3561324592245305, "learning_rate": 1.77491903042009e-08, "loss": 0.002, "step": 20305 }, { "epoch": 4.620250284414107, "grad_norm": 0.11510892562605582, "learning_rate": 1.7728053840466998e-08, "loss": 0.0009, "step": 20306 }, { "epoch": 4.620477815699658, "grad_norm": 0.22234886799205786, "learning_rate": 1.7706929788259553e-08, "loss": 0.0008, "step": 20307 }, { "epoch": 4.62070534698521, "grad_norm": 0.3311138120995077, "learning_rate": 1.768581814801017e-08, "loss": 0.0011, "step": 20308 }, { "epoch": 4.620932878270763, "grad_norm": 0.3849612007009982, "learning_rate": 1.766471892015044e-08, "loss": 0.0018, "step": 20309 }, { "epoch": 4.621160409556314, "grad_norm": 0.060342914329537325, "learning_rate": 1.764363210511169e-08, "loss": 0.0002, "step": 20310 }, { "epoch": 4.621387940841866, "grad_norm": 0.7386306095036933, "learning_rate": 1.762255770332462e-08, "loss": 0.0029, "step": 20311 }, { "epoch": 4.621615472127417, "grad_norm": 0.2947874345710951, "learning_rate": 1.760149571522027e-08, "loss": 0.0014, "step": 20312 }, { "epoch": 4.62184300341297, "grad_norm": 0.2932430571217068, "learning_rate": 1.758044614122878e-08, "loss": 0.0028, "step": 20313 }, { "epoch": 4.622070534698521, "grad_norm": 0.267732607082769, "learning_rate": 1.7559408981780712e-08, "loss": 0.0021, "step": 20314 }, { "epoch": 4.622298065984073, "grad_norm": 0.47590198579624415, "learning_rate": 1.753838423730572e-08, "loss": 0.0021, "step": 20315 }, { "epoch": 4.622525597269624, "grad_norm": 0.34301184811720364, "learning_rate": 1.7517371908233675e-08, "loss": 0.0028, "step": 20316 }, { "epoch": 4.622753128555177, "grad_norm": 0.29845784232667355, "learning_rate": 1.7496371994994015e-08, "loss": 0.0015, "step": 20317 }, { "epoch": 4.622980659840728, "grad_norm": 0.6757422815673266, "learning_rate": 1.747538449801585e-08, "loss": 0.0051, "step": 20318 }, { "epoch": 4.62320819112628, "grad_norm": 0.36945067570010404, "learning_rate": 1.7454409417728343e-08, "loss": 0.001, "step": 20319 }, { "epoch": 4.623435722411831, "grad_norm": 0.08530438971083497, "learning_rate": 1.7433446754559907e-08, "loss": 0.0002, "step": 20320 }, { "epoch": 4.623663253697384, "grad_norm": 0.3110562452566633, "learning_rate": 1.7412496508939154e-08, "loss": 0.0025, "step": 20321 }, { "epoch": 4.623890784982935, "grad_norm": 0.37614369204286535, "learning_rate": 1.7391558681294218e-08, "loss": 0.0012, "step": 20322 }, { "epoch": 4.624118316268487, "grad_norm": 0.13343397659386327, "learning_rate": 1.7370633272053016e-08, "loss": 0.0004, "step": 20323 }, { "epoch": 4.624345847554038, "grad_norm": 0.27629719999958247, "learning_rate": 1.7349720281643335e-08, "loss": 0.0008, "step": 20324 }, { "epoch": 4.624573378839591, "grad_norm": 0.8427125711442445, "learning_rate": 1.7328819710492468e-08, "loss": 0.0009, "step": 20325 }, { "epoch": 4.624800910125142, "grad_norm": 1.1027254892724538, "learning_rate": 1.7307931559027643e-08, "loss": 0.0118, "step": 20326 }, { "epoch": 4.625028441410694, "grad_norm": 0.4343271988024275, "learning_rate": 1.7287055827675815e-08, "loss": 0.0036, "step": 20327 }, { "epoch": 4.625255972696245, "grad_norm": 0.24421404997515586, "learning_rate": 1.726619251686337e-08, "loss": 0.0011, "step": 20328 }, { "epoch": 4.625483503981798, "grad_norm": 0.24871137611533894, "learning_rate": 1.724534162701713e-08, "loss": 0.0008, "step": 20329 }, { "epoch": 4.625711035267349, "grad_norm": 0.7436663111113421, "learning_rate": 1.7224503158563e-08, "loss": 0.008, "step": 20330 }, { "epoch": 4.625938566552901, "grad_norm": 0.14665478577458788, "learning_rate": 1.7203677111927026e-08, "loss": 0.0006, "step": 20331 }, { "epoch": 4.6261660978384525, "grad_norm": 0.1126658355739534, "learning_rate": 1.7182863487534633e-08, "loss": 0.0006, "step": 20332 }, { "epoch": 4.626393629124005, "grad_norm": 0.7653564526516836, "learning_rate": 1.716206228581145e-08, "loss": 0.0047, "step": 20333 }, { "epoch": 4.626621160409556, "grad_norm": 0.04129888573178435, "learning_rate": 1.714127350718249e-08, "loss": 0.0001, "step": 20334 }, { "epoch": 4.626848691695108, "grad_norm": 0.7214281208808552, "learning_rate": 1.712049715207255e-08, "loss": 0.0026, "step": 20335 }, { "epoch": 4.6270762229806595, "grad_norm": 0.17129979839992535, "learning_rate": 1.7099733220906496e-08, "loss": 0.0011, "step": 20336 }, { "epoch": 4.627303754266212, "grad_norm": 0.06970216417529643, "learning_rate": 1.7078981714108574e-08, "loss": 0.0003, "step": 20337 }, { "epoch": 4.627531285551763, "grad_norm": 0.5265811367918624, "learning_rate": 1.7058242632102887e-08, "loss": 0.0032, "step": 20338 }, { "epoch": 4.627758816837315, "grad_norm": 0.02697783540460268, "learning_rate": 1.7037515975313266e-08, "loss": 0.0001, "step": 20339 }, { "epoch": 4.627986348122867, "grad_norm": 0.2788883797095397, "learning_rate": 1.7016801744163465e-08, "loss": 0.0012, "step": 20340 }, { "epoch": 4.628213879408419, "grad_norm": 0.08469459821566672, "learning_rate": 1.6996099939076762e-08, "loss": 0.0004, "step": 20341 }, { "epoch": 4.62844141069397, "grad_norm": 0.0738530422514608, "learning_rate": 1.697541056047622e-08, "loss": 0.0003, "step": 20342 }, { "epoch": 4.628668941979522, "grad_norm": 0.5040557310153301, "learning_rate": 1.695473360878476e-08, "loss": 0.0046, "step": 20343 }, { "epoch": 4.628896473265074, "grad_norm": 0.339949294546646, "learning_rate": 1.6934069084424965e-08, "loss": 0.0017, "step": 20344 }, { "epoch": 4.629124004550626, "grad_norm": 0.28612024101191597, "learning_rate": 1.6913416987819204e-08, "loss": 0.0015, "step": 20345 }, { "epoch": 4.629351535836177, "grad_norm": 0.7501963547646613, "learning_rate": 1.6892777319389568e-08, "loss": 0.0086, "step": 20346 }, { "epoch": 4.629579067121729, "grad_norm": 0.5944889834534649, "learning_rate": 1.687215007955781e-08, "loss": 0.0067, "step": 20347 }, { "epoch": 4.629806598407281, "grad_norm": 0.7579150432787531, "learning_rate": 1.6851535268745596e-08, "loss": 0.0075, "step": 20348 }, { "epoch": 4.630034129692833, "grad_norm": 0.33690914994007864, "learning_rate": 1.6830932887374127e-08, "loss": 0.0024, "step": 20349 }, { "epoch": 4.630261660978385, "grad_norm": 1.3219172052287604, "learning_rate": 1.6810342935864726e-08, "loss": 0.0105, "step": 20350 }, { "epoch": 4.630489192263936, "grad_norm": 0.6121523939295683, "learning_rate": 1.678976541463803e-08, "loss": 0.0012, "step": 20351 }, { "epoch": 4.630716723549488, "grad_norm": 0.2879878671199614, "learning_rate": 1.676920032411461e-08, "loss": 0.0014, "step": 20352 }, { "epoch": 4.63094425483504, "grad_norm": 0.28194755840061464, "learning_rate": 1.6748647664714886e-08, "loss": 0.0006, "step": 20353 }, { "epoch": 4.631171786120592, "grad_norm": 0.03580122277266583, "learning_rate": 1.6728107436858808e-08, "loss": 0.0001, "step": 20354 }, { "epoch": 4.631399317406143, "grad_norm": 0.0629535997678236, "learning_rate": 1.670757964096617e-08, "loss": 0.0003, "step": 20355 }, { "epoch": 4.631626848691695, "grad_norm": 0.09134442681541749, "learning_rate": 1.6687064277456574e-08, "loss": 0.0001, "step": 20356 }, { "epoch": 4.631854379977247, "grad_norm": 0.3511682216386791, "learning_rate": 1.6666561346749337e-08, "loss": 0.0041, "step": 20357 }, { "epoch": 4.632081911262799, "grad_norm": 0.2705822550416859, "learning_rate": 1.6646070849263425e-08, "loss": 0.0006, "step": 20358 }, { "epoch": 4.63230944254835, "grad_norm": 0.03426455971769238, "learning_rate": 1.662559278541781e-08, "loss": 0.0001, "step": 20359 }, { "epoch": 4.632536973833902, "grad_norm": 0.4122156696682815, "learning_rate": 1.660512715563084e-08, "loss": 0.0049, "step": 20360 }, { "epoch": 4.632764505119454, "grad_norm": 0.151574579882696, "learning_rate": 1.658467396032079e-08, "loss": 0.0003, "step": 20361 }, { "epoch": 4.632992036405006, "grad_norm": 0.22418876035294488, "learning_rate": 1.656423319990573e-08, "loss": 0.0012, "step": 20362 }, { "epoch": 4.633219567690557, "grad_norm": 0.14584996573484443, "learning_rate": 1.6543804874803448e-08, "loss": 0.0004, "step": 20363 }, { "epoch": 4.633447098976109, "grad_norm": 0.35979027539112296, "learning_rate": 1.652338898543153e-08, "loss": 0.0039, "step": 20364 }, { "epoch": 4.633674630261661, "grad_norm": 0.7577111872005747, "learning_rate": 1.650298553220721e-08, "loss": 0.0034, "step": 20365 }, { "epoch": 4.633902161547213, "grad_norm": 0.47499870995287113, "learning_rate": 1.6482594515547373e-08, "loss": 0.0024, "step": 20366 }, { "epoch": 4.634129692832764, "grad_norm": 0.19565007721991398, "learning_rate": 1.6462215935868914e-08, "loss": 0.0005, "step": 20367 }, { "epoch": 4.634357224118316, "grad_norm": 0.2625156031892522, "learning_rate": 1.6441849793588163e-08, "loss": 0.0008, "step": 20368 }, { "epoch": 4.634584755403868, "grad_norm": 0.06178918780326043, "learning_rate": 1.6421496089121526e-08, "loss": 0.0001, "step": 20369 }, { "epoch": 4.63481228668942, "grad_norm": 0.31946279483794754, "learning_rate": 1.6401154822884986e-08, "loss": 0.0005, "step": 20370 }, { "epoch": 4.635039817974971, "grad_norm": 0.4924861536417435, "learning_rate": 1.6380825995294253e-08, "loss": 0.0023, "step": 20371 }, { "epoch": 4.635267349260523, "grad_norm": 1.28950005539066, "learning_rate": 1.6360509606764828e-08, "loss": 0.01, "step": 20372 }, { "epoch": 4.635494880546075, "grad_norm": 0.3019507485152119, "learning_rate": 1.63402056577118e-08, "loss": 0.0013, "step": 20373 }, { "epoch": 4.635722411831627, "grad_norm": 0.25218862535222475, "learning_rate": 1.6319914148550387e-08, "loss": 0.0011, "step": 20374 }, { "epoch": 4.635949943117178, "grad_norm": 0.40771919907523585, "learning_rate": 1.629963507969512e-08, "loss": 0.0023, "step": 20375 }, { "epoch": 4.63617747440273, "grad_norm": 0.07804416065273447, "learning_rate": 1.6279368451560528e-08, "loss": 0.0001, "step": 20376 }, { "epoch": 4.636405005688282, "grad_norm": 0.2442180061351114, "learning_rate": 1.6259114264560795e-08, "loss": 0.0009, "step": 20377 }, { "epoch": 4.636632536973834, "grad_norm": 0.14751744885985332, "learning_rate": 1.6238872519109965e-08, "loss": 0.0001, "step": 20378 }, { "epoch": 4.636860068259386, "grad_norm": 0.2508707460273159, "learning_rate": 1.6218643215621736e-08, "loss": 0.0012, "step": 20379 }, { "epoch": 4.637087599544937, "grad_norm": 0.6946414099739429, "learning_rate": 1.6198426354509385e-08, "loss": 0.0016, "step": 20380 }, { "epoch": 4.637315130830489, "grad_norm": 0.3237494902066939, "learning_rate": 1.617822193618633e-08, "loss": 0.0014, "step": 20381 }, { "epoch": 4.637542662116041, "grad_norm": 0.2115137128776552, "learning_rate": 1.61580299610653e-08, "loss": 0.0005, "step": 20382 }, { "epoch": 4.637770193401593, "grad_norm": 0.7129077325810884, "learning_rate": 1.6137850429559234e-08, "loss": 0.0066, "step": 20383 }, { "epoch": 4.637997724687144, "grad_norm": 0.6981825100869029, "learning_rate": 1.6117683342080427e-08, "loss": 0.0053, "step": 20384 }, { "epoch": 4.638225255972696, "grad_norm": 0.15353676329497212, "learning_rate": 1.6097528699040924e-08, "loss": 0.0012, "step": 20385 }, { "epoch": 4.638452787258248, "grad_norm": 0.07065235430537636, "learning_rate": 1.6077386500852887e-08, "loss": 0.0004, "step": 20386 }, { "epoch": 4.6386803185438, "grad_norm": 0.6364803389599236, "learning_rate": 1.6057256747927867e-08, "loss": 0.0022, "step": 20387 }, { "epoch": 4.6389078498293514, "grad_norm": 0.5658496574748237, "learning_rate": 1.6037139440677197e-08, "loss": 0.0054, "step": 20388 }, { "epoch": 4.639135381114904, "grad_norm": 0.15808749360808821, "learning_rate": 1.6017034579512217e-08, "loss": 0.0006, "step": 20389 }, { "epoch": 4.639362912400455, "grad_norm": 0.40721855437252974, "learning_rate": 1.5996942164843777e-08, "loss": 0.0024, "step": 20390 }, { "epoch": 4.639590443686007, "grad_norm": 0.5473068402683311, "learning_rate": 1.5976862197082528e-08, "loss": 0.006, "step": 20391 }, { "epoch": 4.6398179749715585, "grad_norm": 0.13011790113519953, "learning_rate": 1.5956794676638756e-08, "loss": 0.0009, "step": 20392 }, { "epoch": 4.640045506257111, "grad_norm": 0.06081781862216256, "learning_rate": 1.5936739603922767e-08, "loss": 0.0001, "step": 20393 }, { "epoch": 4.640273037542662, "grad_norm": 0.6800355467450967, "learning_rate": 1.5916696979344296e-08, "loss": 0.0084, "step": 20394 }, { "epoch": 4.640500568828214, "grad_norm": 0.4171291625939157, "learning_rate": 1.5896666803313086e-08, "loss": 0.001, "step": 20395 }, { "epoch": 4.6407281001137655, "grad_norm": 1.0605576174500777, "learning_rate": 1.58766490762384e-08, "loss": 0.0007, "step": 20396 }, { "epoch": 4.640955631399318, "grad_norm": 0.19157797886586778, "learning_rate": 1.5856643798529624e-08, "loss": 0.0011, "step": 20397 }, { "epoch": 4.641183162684869, "grad_norm": 0.2772328068510498, "learning_rate": 1.583665097059539e-08, "loss": 0.0013, "step": 20398 }, { "epoch": 4.641410693970421, "grad_norm": 0.30141650843361745, "learning_rate": 1.5816670592844335e-08, "loss": 0.0021, "step": 20399 }, { "epoch": 4.6416382252559725, "grad_norm": 2.037369257525199, "learning_rate": 1.579670266568495e-08, "loss": 0.0461, "step": 20400 }, { "epoch": 4.641865756541525, "grad_norm": 0.052564532651865135, "learning_rate": 1.577674718952517e-08, "loss": 0.0002, "step": 20401 }, { "epoch": 4.642093287827076, "grad_norm": 0.17167339999350176, "learning_rate": 1.5756804164772933e-08, "loss": 0.0006, "step": 20402 }, { "epoch": 4.642320819112628, "grad_norm": 0.11817574353119623, "learning_rate": 1.57368735918359e-08, "loss": 0.0004, "step": 20403 }, { "epoch": 4.6425483503981795, "grad_norm": 0.22385037028749677, "learning_rate": 1.5716955471121313e-08, "loss": 0.0009, "step": 20404 }, { "epoch": 4.642775881683732, "grad_norm": 0.4001639851247802, "learning_rate": 1.5697049803036353e-08, "loss": 0.0028, "step": 20405 }, { "epoch": 4.643003412969283, "grad_norm": 0.14049612614681098, "learning_rate": 1.5677156587987768e-08, "loss": 0.0005, "step": 20406 }, { "epoch": 4.643230944254835, "grad_norm": 0.2367485385271757, "learning_rate": 1.5657275826382255e-08, "loss": 0.0051, "step": 20407 }, { "epoch": 4.6434584755403865, "grad_norm": 0.137863559117044, "learning_rate": 1.5637407518625942e-08, "loss": 0.0002, "step": 20408 }, { "epoch": 4.643686006825939, "grad_norm": 0.039336507634608886, "learning_rate": 1.56175516651251e-08, "loss": 0.0001, "step": 20409 }, { "epoch": 4.64391353811149, "grad_norm": 1.0256664703758724, "learning_rate": 1.5597708266285524e-08, "loss": 0.0162, "step": 20410 }, { "epoch": 4.644141069397042, "grad_norm": 0.020748951765641673, "learning_rate": 1.557787732251258e-08, "loss": 0.0, "step": 20411 }, { "epoch": 4.6443686006825935, "grad_norm": 0.23475937457983267, "learning_rate": 1.555805883421184e-08, "loss": 0.001, "step": 20412 }, { "epoch": 4.644596131968146, "grad_norm": 0.6059386304526092, "learning_rate": 1.5538252801788194e-08, "loss": 0.0072, "step": 20413 }, { "epoch": 4.644823663253697, "grad_norm": 0.20492731241194426, "learning_rate": 1.5518459225646528e-08, "loss": 0.0011, "step": 20414 }, { "epoch": 4.645051194539249, "grad_norm": 0.4154531769713809, "learning_rate": 1.5498678106191308e-08, "loss": 0.0024, "step": 20415 }, { "epoch": 4.6452787258248005, "grad_norm": 0.44363953822169777, "learning_rate": 1.5478909443826795e-08, "loss": 0.0024, "step": 20416 }, { "epoch": 4.645506257110353, "grad_norm": 0.2570677780754066, "learning_rate": 1.5459153238957178e-08, "loss": 0.0013, "step": 20417 }, { "epoch": 4.645733788395905, "grad_norm": 1.030540908123511, "learning_rate": 1.5439409491986092e-08, "loss": 0.0059, "step": 20418 }, { "epoch": 4.645961319681456, "grad_norm": 0.03469034002289326, "learning_rate": 1.5419678203317175e-08, "loss": 0.0001, "step": 20419 }, { "epoch": 4.6461888509670075, "grad_norm": 0.0456841428805224, "learning_rate": 1.5399959373353577e-08, "loss": 0.0002, "step": 20420 }, { "epoch": 4.64641638225256, "grad_norm": 0.5034823208674719, "learning_rate": 1.5380253002498443e-08, "loss": 0.0011, "step": 20421 }, { "epoch": 4.646643913538112, "grad_norm": 0.14714192125954823, "learning_rate": 1.5360559091154443e-08, "loss": 0.0006, "step": 20422 }, { "epoch": 4.646871444823663, "grad_norm": 0.9717169397125354, "learning_rate": 1.5340877639724097e-08, "loss": 0.0024, "step": 20423 }, { "epoch": 4.6470989761092145, "grad_norm": 0.48701985939292725, "learning_rate": 1.5321208648609724e-08, "loss": 0.0022, "step": 20424 }, { "epoch": 4.647326507394767, "grad_norm": 0.477421951392982, "learning_rate": 1.5301552118213228e-08, "loss": 0.0034, "step": 20425 }, { "epoch": 4.647554038680319, "grad_norm": 0.40432611458936746, "learning_rate": 1.5281908048936436e-08, "loss": 0.0037, "step": 20426 }, { "epoch": 4.64778156996587, "grad_norm": 0.13569947249371905, "learning_rate": 1.5262276441180697e-08, "loss": 0.0002, "step": 20427 }, { "epoch": 4.648009101251422, "grad_norm": 0.1483893163446517, "learning_rate": 1.524265729534749e-08, "loss": 0.0004, "step": 20428 }, { "epoch": 4.648236632536974, "grad_norm": 0.2587540249871645, "learning_rate": 1.5223050611837543e-08, "loss": 0.0011, "step": 20429 }, { "epoch": 4.648464163822526, "grad_norm": 0.7043621377692035, "learning_rate": 1.520345639105171e-08, "loss": 0.0042, "step": 20430 }, { "epoch": 4.648691695108077, "grad_norm": 0.5175890136196533, "learning_rate": 1.5183874633390438e-08, "loss": 0.0021, "step": 20431 }, { "epoch": 4.648919226393629, "grad_norm": 1.802333717569383, "learning_rate": 1.516430533925396e-08, "loss": 0.0116, "step": 20432 }, { "epoch": 4.649146757679181, "grad_norm": 0.32976586241116146, "learning_rate": 1.514474850904224e-08, "loss": 0.0026, "step": 20433 }, { "epoch": 4.649374288964733, "grad_norm": 0.6193721258459896, "learning_rate": 1.512520414315495e-08, "loss": 0.0031, "step": 20434 }, { "epoch": 4.649601820250284, "grad_norm": 0.10673201640671698, "learning_rate": 1.5105672241991567e-08, "loss": 0.0005, "step": 20435 }, { "epoch": 4.649829351535836, "grad_norm": 0.4933650492938631, "learning_rate": 1.5086152805951216e-08, "loss": 0.0005, "step": 20436 }, { "epoch": 4.650056882821388, "grad_norm": 0.14873993873338415, "learning_rate": 1.5066645835432882e-08, "loss": 0.0005, "step": 20437 }, { "epoch": 4.65028441410694, "grad_norm": 0.5405862291689415, "learning_rate": 1.504715133083534e-08, "loss": 0.0052, "step": 20438 }, { "epoch": 4.650511945392491, "grad_norm": 0.22517833683786825, "learning_rate": 1.502766929255696e-08, "loss": 0.001, "step": 20439 }, { "epoch": 4.650739476678043, "grad_norm": 0.024083288078546913, "learning_rate": 1.500819972099589e-08, "loss": 0.0001, "step": 20440 }, { "epoch": 4.650967007963595, "grad_norm": 0.34255869444781883, "learning_rate": 1.4988742616550074e-08, "loss": 0.0015, "step": 20441 }, { "epoch": 4.651194539249147, "grad_norm": 0.35098964400812416, "learning_rate": 1.4969297979617117e-08, "loss": 0.002, "step": 20442 }, { "epoch": 4.651422070534698, "grad_norm": 0.45484055843266835, "learning_rate": 1.4949865810594544e-08, "loss": 0.0034, "step": 20443 }, { "epoch": 4.6516496018202504, "grad_norm": 0.07050036901886139, "learning_rate": 1.49304461098794e-08, "loss": 0.0003, "step": 20444 }, { "epoch": 4.651877133105802, "grad_norm": 0.3576096257556389, "learning_rate": 1.4911038877868724e-08, "loss": 0.0031, "step": 20445 }, { "epoch": 4.652104664391354, "grad_norm": 0.12574603698797066, "learning_rate": 1.4891644114959011e-08, "loss": 0.0003, "step": 20446 }, { "epoch": 4.652332195676905, "grad_norm": 0.04811246368095709, "learning_rate": 1.4872261821546816e-08, "loss": 0.0002, "step": 20447 }, { "epoch": 4.6525597269624575, "grad_norm": 1.0312839338469302, "learning_rate": 1.485289199802821e-08, "loss": 0.0026, "step": 20448 }, { "epoch": 4.652787258248009, "grad_norm": 0.38217871950991555, "learning_rate": 1.4833534644798919e-08, "loss": 0.0028, "step": 20449 }, { "epoch": 4.653014789533561, "grad_norm": 0.6132239343621448, "learning_rate": 1.4814189762254808e-08, "loss": 0.0032, "step": 20450 }, { "epoch": 4.653242320819112, "grad_norm": 0.2724436511087701, "learning_rate": 1.4794857350791113e-08, "loss": 0.0009, "step": 20451 }, { "epoch": 4.6534698521046645, "grad_norm": 0.10585410978815174, "learning_rate": 1.4775537410803079e-08, "loss": 0.0003, "step": 20452 }, { "epoch": 4.653697383390216, "grad_norm": 0.1905419862828898, "learning_rate": 1.4756229942685524e-08, "loss": 0.0005, "step": 20453 }, { "epoch": 4.653924914675768, "grad_norm": 0.21549930291798303, "learning_rate": 1.4736934946832856e-08, "loss": 0.0007, "step": 20454 }, { "epoch": 4.654152445961319, "grad_norm": 0.2081899439222663, "learning_rate": 1.4717652423639692e-08, "loss": 0.001, "step": 20455 }, { "epoch": 4.6543799772468715, "grad_norm": 0.11068659770115542, "learning_rate": 1.469838237349995e-08, "loss": 0.0003, "step": 20456 }, { "epoch": 4.654607508532424, "grad_norm": 0.5062228720730075, "learning_rate": 1.4679124796807692e-08, "loss": 0.0009, "step": 20457 }, { "epoch": 4.654835039817975, "grad_norm": 0.2536350037837862, "learning_rate": 1.4659879693956283e-08, "loss": 0.0012, "step": 20458 }, { "epoch": 4.655062571103526, "grad_norm": 0.1397851518913742, "learning_rate": 1.4640647065339159e-08, "loss": 0.0004, "step": 20459 }, { "epoch": 4.6552901023890785, "grad_norm": 0.7787030637537353, "learning_rate": 1.4621426911349407e-08, "loss": 0.0019, "step": 20460 }, { "epoch": 4.655517633674631, "grad_norm": 0.04182859616725477, "learning_rate": 1.4602219232379839e-08, "loss": 0.0001, "step": 20461 }, { "epoch": 4.655745164960182, "grad_norm": 0.5280193872461979, "learning_rate": 1.4583024028822988e-08, "loss": 0.007, "step": 20462 }, { "epoch": 4.655972696245733, "grad_norm": 0.5691706709142974, "learning_rate": 1.4563841301071177e-08, "loss": 0.0031, "step": 20463 }, { "epoch": 4.6562002275312855, "grad_norm": 0.22340602953061284, "learning_rate": 1.4544671049516527e-08, "loss": 0.0011, "step": 20464 }, { "epoch": 4.656427758816838, "grad_norm": 0.06263053960112426, "learning_rate": 1.4525513274550803e-08, "loss": 0.0001, "step": 20465 }, { "epoch": 4.656655290102389, "grad_norm": 0.2766680012362003, "learning_rate": 1.4506367976565572e-08, "loss": 0.0017, "step": 20466 }, { "epoch": 4.656882821387941, "grad_norm": 0.03716789467963273, "learning_rate": 1.4487235155952184e-08, "loss": 0.0001, "step": 20467 }, { "epoch": 4.6571103526734925, "grad_norm": 0.045868033614830744, "learning_rate": 1.4468114813101439e-08, "loss": 0.0001, "step": 20468 }, { "epoch": 4.657337883959045, "grad_norm": 0.5918238899648376, "learning_rate": 1.4449006948404412e-08, "loss": 0.0033, "step": 20469 }, { "epoch": 4.657565415244596, "grad_norm": 0.1393781834316949, "learning_rate": 1.4429911562251417e-08, "loss": 0.0007, "step": 20470 }, { "epoch": 4.657792946530148, "grad_norm": 0.3623201118616007, "learning_rate": 1.4410828655032904e-08, "loss": 0.0014, "step": 20471 }, { "epoch": 4.6580204778156995, "grad_norm": 0.19778552530072804, "learning_rate": 1.4391758227138841e-08, "loss": 0.0013, "step": 20472 }, { "epoch": 4.658248009101252, "grad_norm": 0.8339125915452981, "learning_rate": 1.4372700278958915e-08, "loss": 0.0036, "step": 20473 }, { "epoch": 4.658475540386803, "grad_norm": 0.7427675690986483, "learning_rate": 1.4353654810882676e-08, "loss": 0.0051, "step": 20474 }, { "epoch": 4.658703071672355, "grad_norm": 0.4072299783380654, "learning_rate": 1.4334621823299394e-08, "loss": 0.0014, "step": 20475 }, { "epoch": 4.6589306029579065, "grad_norm": 0.38511676320744986, "learning_rate": 1.4315601316597997e-08, "loss": 0.0019, "step": 20476 }, { "epoch": 4.659158134243459, "grad_norm": 0.06802764124648948, "learning_rate": 1.4296593291167337e-08, "loss": 0.0001, "step": 20477 }, { "epoch": 4.65938566552901, "grad_norm": 0.6897178339712599, "learning_rate": 1.4277597747395927e-08, "loss": 0.0049, "step": 20478 }, { "epoch": 4.659613196814562, "grad_norm": 0.07638054382989538, "learning_rate": 1.4258614685671927e-08, "loss": 0.0001, "step": 20479 }, { "epoch": 4.6598407281001135, "grad_norm": 0.3191451311937652, "learning_rate": 1.4239644106383218e-08, "loss": 0.0018, "step": 20480 }, { "epoch": 4.660068259385666, "grad_norm": 1.0474162596735286, "learning_rate": 1.4220686009917758e-08, "loss": 0.0113, "step": 20481 }, { "epoch": 4.660295790671217, "grad_norm": 0.5091681918972695, "learning_rate": 1.4201740396662805e-08, "loss": 0.0017, "step": 20482 }, { "epoch": 4.660523321956769, "grad_norm": 0.2365539635665122, "learning_rate": 1.4182807267005618e-08, "loss": 0.0013, "step": 20483 }, { "epoch": 4.6607508532423205, "grad_norm": 0.12669486271951608, "learning_rate": 1.416388662133325e-08, "loss": 0.0004, "step": 20484 }, { "epoch": 4.660978384527873, "grad_norm": 0.2986986871030851, "learning_rate": 1.4144978460032335e-08, "loss": 0.0021, "step": 20485 }, { "epoch": 4.661205915813424, "grad_norm": 0.06422236772488789, "learning_rate": 1.412608278348937e-08, "loss": 0.0002, "step": 20486 }, { "epoch": 4.661433447098976, "grad_norm": 0.5035459712401471, "learning_rate": 1.4107199592090434e-08, "loss": 0.0035, "step": 20487 }, { "epoch": 4.6616609783845275, "grad_norm": 0.5381025559598492, "learning_rate": 1.4088328886221678e-08, "loss": 0.0049, "step": 20488 }, { "epoch": 4.66188850967008, "grad_norm": 0.23764189921561826, "learning_rate": 1.4069470666268486e-08, "loss": 0.0012, "step": 20489 }, { "epoch": 4.662116040955631, "grad_norm": 0.058012509529166735, "learning_rate": 1.4050624932616525e-08, "loss": 0.0001, "step": 20490 }, { "epoch": 4.662343572241183, "grad_norm": 0.32875815387634993, "learning_rate": 1.40317916856509e-08, "loss": 0.0022, "step": 20491 }, { "epoch": 4.6625711035267345, "grad_norm": 0.03774105642887296, "learning_rate": 1.4012970925756444e-08, "loss": 0.0001, "step": 20492 }, { "epoch": 4.662798634812287, "grad_norm": 0.030211615696434622, "learning_rate": 1.399416265331799e-08, "loss": 0.0001, "step": 20493 }, { "epoch": 4.663026166097838, "grad_norm": 0.28095970035810863, "learning_rate": 1.3975366868719808e-08, "loss": 0.0017, "step": 20494 }, { "epoch": 4.66325369738339, "grad_norm": 0.18085003367720964, "learning_rate": 1.3956583572346111e-08, "loss": 0.0005, "step": 20495 }, { "epoch": 4.663481228668942, "grad_norm": 0.17034753071678857, "learning_rate": 1.3937812764580686e-08, "loss": 0.0005, "step": 20496 }, { "epoch": 4.663708759954494, "grad_norm": 0.055553761106506436, "learning_rate": 1.3919054445807323e-08, "loss": 0.0001, "step": 20497 }, { "epoch": 4.663936291240045, "grad_norm": 0.2526118599591887, "learning_rate": 1.3900308616409327e-08, "loss": 0.0022, "step": 20498 }, { "epoch": 4.664163822525597, "grad_norm": 0.8042950614985493, "learning_rate": 1.3881575276769865e-08, "loss": 0.0014, "step": 20499 }, { "epoch": 4.664391353811149, "grad_norm": 0.03376582953819655, "learning_rate": 1.3862854427271824e-08, "loss": 0.0001, "step": 20500 }, { "epoch": 4.664618885096701, "grad_norm": 0.6693814884913094, "learning_rate": 1.3844146068297746e-08, "loss": 0.0035, "step": 20501 }, { "epoch": 4.664846416382253, "grad_norm": 0.07063813034833936, "learning_rate": 1.3825450200230032e-08, "loss": 0.0002, "step": 20502 }, { "epoch": 4.665073947667804, "grad_norm": 0.17804898480823067, "learning_rate": 1.3806766823450806e-08, "loss": 0.0005, "step": 20503 }, { "epoch": 4.6653014789533565, "grad_norm": 0.061948013211081225, "learning_rate": 1.3788095938341919e-08, "loss": 0.0002, "step": 20504 }, { "epoch": 4.665529010238908, "grad_norm": 0.5686676896181516, "learning_rate": 1.3769437545285078e-08, "loss": 0.0027, "step": 20505 }, { "epoch": 4.66575654152446, "grad_norm": 0.19534218631289677, "learning_rate": 1.3750791644661366e-08, "loss": 0.0004, "step": 20506 }, { "epoch": 4.665984072810011, "grad_norm": 0.26335123741213146, "learning_rate": 1.3732158236852145e-08, "loss": 0.001, "step": 20507 }, { "epoch": 4.6662116040955635, "grad_norm": 0.09667666657125296, "learning_rate": 1.3713537322238012e-08, "loss": 0.0002, "step": 20508 }, { "epoch": 4.666439135381115, "grad_norm": 0.7224885507664425, "learning_rate": 1.3694928901199708e-08, "loss": 0.0065, "step": 20509 }, { "epoch": 4.666666666666667, "grad_norm": 0.15694577145044245, "learning_rate": 1.3676332974117552e-08, "loss": 0.0005, "step": 20510 }, { "epoch": 4.666894197952218, "grad_norm": 0.4009973842571462, "learning_rate": 1.3657749541371445e-08, "loss": 0.0008, "step": 20511 }, { "epoch": 4.6671217292377705, "grad_norm": 0.04154670269949268, "learning_rate": 1.3639178603341435e-08, "loss": 0.0001, "step": 20512 }, { "epoch": 4.667349260523322, "grad_norm": 0.5389463077735329, "learning_rate": 1.3620620160406938e-08, "loss": 0.0075, "step": 20513 }, { "epoch": 4.667576791808874, "grad_norm": 0.20532258057630512, "learning_rate": 1.3602074212947237e-08, "loss": 0.0008, "step": 20514 }, { "epoch": 4.667804323094425, "grad_norm": 0.4018848400965431, "learning_rate": 1.3583540761341468e-08, "loss": 0.0017, "step": 20515 }, { "epoch": 4.6680318543799775, "grad_norm": 2.9148221408983073, "learning_rate": 1.356501980596829e-08, "loss": 0.0102, "step": 20516 }, { "epoch": 4.668259385665529, "grad_norm": 0.02252748284645352, "learning_rate": 1.3546511347206359e-08, "loss": 0.0001, "step": 20517 }, { "epoch": 4.668486916951081, "grad_norm": 0.1128829705936453, "learning_rate": 1.3528015385433912e-08, "loss": 0.0004, "step": 20518 }, { "epoch": 4.668714448236632, "grad_norm": 0.16916352895405534, "learning_rate": 1.3509531921029052e-08, "loss": 0.0009, "step": 20519 }, { "epoch": 4.6689419795221845, "grad_norm": 0.28630600112800914, "learning_rate": 1.349106095436932e-08, "loss": 0.0011, "step": 20520 }, { "epoch": 4.669169510807736, "grad_norm": 0.2218395669031437, "learning_rate": 1.3472602485832472e-08, "loss": 0.0011, "step": 20521 }, { "epoch": 4.669397042093288, "grad_norm": 0.23855935604485867, "learning_rate": 1.3454156515795639e-08, "loss": 0.0009, "step": 20522 }, { "epoch": 4.669624573378839, "grad_norm": 0.5282911783085822, "learning_rate": 1.3435723044635877e-08, "loss": 0.0014, "step": 20523 }, { "epoch": 4.6698521046643915, "grad_norm": 0.46466858860288773, "learning_rate": 1.3417302072729899e-08, "loss": 0.0046, "step": 20524 }, { "epoch": 4.670079635949943, "grad_norm": 0.056554940567939915, "learning_rate": 1.3398893600454141e-08, "loss": 0.0002, "step": 20525 }, { "epoch": 4.670307167235495, "grad_norm": 0.24336398007123275, "learning_rate": 1.3380497628185037e-08, "loss": 0.0012, "step": 20526 }, { "epoch": 4.670534698521046, "grad_norm": 0.5013713025850147, "learning_rate": 1.336211415629833e-08, "loss": 0.0033, "step": 20527 }, { "epoch": 4.6707622298065985, "grad_norm": 0.06638014085975821, "learning_rate": 1.33437431851699e-08, "loss": 0.0002, "step": 20528 }, { "epoch": 4.67098976109215, "grad_norm": 0.40730754492737453, "learning_rate": 1.3325384715175138e-08, "loss": 0.0017, "step": 20529 }, { "epoch": 4.671217292377702, "grad_norm": 0.22536399756945, "learning_rate": 1.3307038746689232e-08, "loss": 0.0005, "step": 20530 }, { "epoch": 4.671444823663253, "grad_norm": 0.2605505446218591, "learning_rate": 1.3288705280087299e-08, "loss": 0.0013, "step": 20531 }, { "epoch": 4.6716723549488055, "grad_norm": 0.06251676719450802, "learning_rate": 1.327038431574383e-08, "loss": 0.0002, "step": 20532 }, { "epoch": 4.671899886234357, "grad_norm": 0.5952412284778363, "learning_rate": 1.3252075854033459e-08, "loss": 0.002, "step": 20533 }, { "epoch": 4.672127417519909, "grad_norm": 0.7527922473884603, "learning_rate": 1.3233779895330257e-08, "loss": 0.0034, "step": 20534 }, { "epoch": 4.672354948805461, "grad_norm": 0.290635307376329, "learning_rate": 1.3215496440008232e-08, "loss": 0.001, "step": 20535 }, { "epoch": 4.6725824800910125, "grad_norm": 0.44524714837563756, "learning_rate": 1.3197225488440976e-08, "loss": 0.0024, "step": 20536 }, { "epoch": 4.672810011376564, "grad_norm": 0.2266394510407441, "learning_rate": 1.317896704100194e-08, "loss": 0.0009, "step": 20537 }, { "epoch": 4.673037542662116, "grad_norm": 0.36432353269696294, "learning_rate": 1.3160721098064432e-08, "loss": 0.0023, "step": 20538 }, { "epoch": 4.673265073947668, "grad_norm": 0.16907286675970004, "learning_rate": 1.3142487660001147e-08, "loss": 0.0005, "step": 20539 }, { "epoch": 4.6734926052332195, "grad_norm": 0.0859322148581991, "learning_rate": 1.3124266727184907e-08, "loss": 0.0002, "step": 20540 }, { "epoch": 4.673720136518772, "grad_norm": 0.022414811941389653, "learning_rate": 1.3106058299988122e-08, "loss": 0.0001, "step": 20541 }, { "epoch": 4.673947667804323, "grad_norm": 0.695909183569883, "learning_rate": 1.308786237878272e-08, "loss": 0.0062, "step": 20542 }, { "epoch": 4.674175199089875, "grad_norm": 0.04828152525931738, "learning_rate": 1.3069678963940832e-08, "loss": 0.0002, "step": 20543 }, { "epoch": 4.6744027303754265, "grad_norm": 0.29892657436384557, "learning_rate": 1.3051508055833967e-08, "loss": 0.0009, "step": 20544 }, { "epoch": 4.674630261660979, "grad_norm": 0.19962143953646944, "learning_rate": 1.3033349654833633e-08, "loss": 0.001, "step": 20545 }, { "epoch": 4.67485779294653, "grad_norm": 0.3096186521752422, "learning_rate": 1.3015203761310782e-08, "loss": 0.0028, "step": 20546 }, { "epoch": 4.675085324232082, "grad_norm": 0.39860710016899015, "learning_rate": 1.2997070375636439e-08, "loss": 0.0037, "step": 20547 }, { "epoch": 4.6753128555176335, "grad_norm": 0.0528610902761315, "learning_rate": 1.297894949818114e-08, "loss": 0.0001, "step": 20548 }, { "epoch": 4.675540386803186, "grad_norm": 0.3864942543927512, "learning_rate": 1.2960841129315213e-08, "loss": 0.0023, "step": 20549 }, { "epoch": 4.675767918088737, "grad_norm": 0.12138654772650147, "learning_rate": 1.294274526940878e-08, "loss": 0.0009, "step": 20550 }, { "epoch": 4.675995449374289, "grad_norm": 0.14430583443194853, "learning_rate": 1.2924661918831683e-08, "loss": 0.0008, "step": 20551 }, { "epoch": 4.6762229806598405, "grad_norm": 0.4585356128784613, "learning_rate": 1.2906591077953626e-08, "loss": 0.0034, "step": 20552 }, { "epoch": 4.676450511945393, "grad_norm": 0.36078392495044453, "learning_rate": 1.2888532747143758e-08, "loss": 0.0037, "step": 20553 }, { "epoch": 4.676678043230944, "grad_norm": 0.017424154099945872, "learning_rate": 1.28704869267713e-08, "loss": 0.0, "step": 20554 }, { "epoch": 4.676905574516496, "grad_norm": 0.1110804921482203, "learning_rate": 1.2852453617205051e-08, "loss": 0.0003, "step": 20555 }, { "epoch": 4.6771331058020476, "grad_norm": 0.3833671365204608, "learning_rate": 1.2834432818813538e-08, "loss": 0.0031, "step": 20556 }, { "epoch": 4.6773606370876, "grad_norm": 0.7860053815632544, "learning_rate": 1.2816424531965076e-08, "loss": 0.006, "step": 20557 }, { "epoch": 4.677588168373151, "grad_norm": 0.43457190409704854, "learning_rate": 1.2798428757027707e-08, "loss": 0.0025, "step": 20558 }, { "epoch": 4.677815699658703, "grad_norm": 0.08201174417882837, "learning_rate": 1.2780445494369326e-08, "loss": 0.0003, "step": 20559 }, { "epoch": 4.678043230944255, "grad_norm": 0.28710202101904436, "learning_rate": 1.276247474435742e-08, "loss": 0.0026, "step": 20560 }, { "epoch": 4.678270762229807, "grad_norm": 1.0724536525002895, "learning_rate": 1.2744516507359195e-08, "loss": 0.003, "step": 20561 }, { "epoch": 4.678498293515358, "grad_norm": 0.8184929607663229, "learning_rate": 1.2726570783741787e-08, "loss": 0.0028, "step": 20562 }, { "epoch": 4.67872582480091, "grad_norm": 0.19345531823006024, "learning_rate": 1.2708637573871919e-08, "loss": 0.0005, "step": 20563 }, { "epoch": 4.678953356086462, "grad_norm": 0.17428049600264575, "learning_rate": 1.269071687811617e-08, "loss": 0.0007, "step": 20564 }, { "epoch": 4.679180887372014, "grad_norm": 0.450935856619226, "learning_rate": 1.2672808696840775e-08, "loss": 0.0043, "step": 20565 }, { "epoch": 4.679408418657565, "grad_norm": 0.5579962187396018, "learning_rate": 1.2654913030411762e-08, "loss": 0.0011, "step": 20566 }, { "epoch": 4.679635949943117, "grad_norm": 0.5729307715039722, "learning_rate": 1.263702987919488e-08, "loss": 0.0017, "step": 20567 }, { "epoch": 4.679863481228669, "grad_norm": 0.23659128131003113, "learning_rate": 1.2619159243555599e-08, "loss": 0.0011, "step": 20568 }, { "epoch": 4.680091012514221, "grad_norm": 1.023442707492374, "learning_rate": 1.2601301123859183e-08, "loss": 0.0029, "step": 20569 }, { "epoch": 4.680318543799773, "grad_norm": 0.35893258083320256, "learning_rate": 1.258345552047055e-08, "loss": 0.003, "step": 20570 }, { "epoch": 4.680546075085324, "grad_norm": 0.8207185580935423, "learning_rate": 1.2565622433754616e-08, "loss": 0.0075, "step": 20571 }, { "epoch": 4.680773606370876, "grad_norm": 0.35281858833189134, "learning_rate": 1.2547801864075601e-08, "loss": 0.002, "step": 20572 }, { "epoch": 4.681001137656428, "grad_norm": 0.23666509926813584, "learning_rate": 1.2529993811798008e-08, "loss": 0.0006, "step": 20573 }, { "epoch": 4.68122866894198, "grad_norm": 0.36108726446842776, "learning_rate": 1.2512198277285642e-08, "loss": 0.0039, "step": 20574 }, { "epoch": 4.681456200227531, "grad_norm": 0.14931899602822338, "learning_rate": 1.2494415260902102e-08, "loss": 0.0006, "step": 20575 }, { "epoch": 4.681683731513083, "grad_norm": 0.052149774607600556, "learning_rate": 1.2476644763011053e-08, "loss": 0.0002, "step": 20576 }, { "epoch": 4.681911262798635, "grad_norm": 0.4868144209280675, "learning_rate": 1.245888678397554e-08, "loss": 0.0022, "step": 20577 }, { "epoch": 4.682138794084187, "grad_norm": 0.6415029971049485, "learning_rate": 1.2441141324158676e-08, "loss": 0.0061, "step": 20578 }, { "epoch": 4.682366325369738, "grad_norm": 0.3717300643490141, "learning_rate": 1.2423408383922947e-08, "loss": 0.0024, "step": 20579 }, { "epoch": 4.6825938566552905, "grad_norm": 0.10809940604668655, "learning_rate": 1.240568796363091e-08, "loss": 0.0003, "step": 20580 }, { "epoch": 4.682821387940842, "grad_norm": 0.013755614535299492, "learning_rate": 1.2387980063644709e-08, "loss": 0.0001, "step": 20581 }, { "epoch": 4.683048919226394, "grad_norm": 0.14511342266682412, "learning_rate": 1.2370284684326204e-08, "loss": 0.0008, "step": 20582 }, { "epoch": 4.683276450511945, "grad_norm": 0.1682092027366273, "learning_rate": 1.235260182603705e-08, "loss": 0.0005, "step": 20583 }, { "epoch": 4.6835039817974975, "grad_norm": 0.15275524703027374, "learning_rate": 1.2334931489138765e-08, "loss": 0.0006, "step": 20584 }, { "epoch": 4.683731513083049, "grad_norm": 0.033370435229524306, "learning_rate": 1.231727367399245e-08, "loss": 0.0001, "step": 20585 }, { "epoch": 4.683959044368601, "grad_norm": 0.2895479539535502, "learning_rate": 1.2299628380958994e-08, "loss": 0.0006, "step": 20586 }, { "epoch": 4.684186575654152, "grad_norm": 0.27390522124519595, "learning_rate": 1.2281995610399014e-08, "loss": 0.001, "step": 20587 }, { "epoch": 4.6844141069397045, "grad_norm": 0.17488813973776576, "learning_rate": 1.2264375362672914e-08, "loss": 0.0004, "step": 20588 }, { "epoch": 4.684641638225256, "grad_norm": 0.33711006198467525, "learning_rate": 1.2246767638140755e-08, "loss": 0.0034, "step": 20589 }, { "epoch": 4.684869169510808, "grad_norm": 0.314648612885255, "learning_rate": 1.2229172437162525e-08, "loss": 0.0013, "step": 20590 }, { "epoch": 4.685096700796359, "grad_norm": 0.045769894804796554, "learning_rate": 1.221158976009773e-08, "loss": 0.0001, "step": 20591 }, { "epoch": 4.6853242320819115, "grad_norm": 0.52640728298522, "learning_rate": 1.2194019607305804e-08, "loss": 0.003, "step": 20592 }, { "epoch": 4.685551763367463, "grad_norm": 0.6079271491564753, "learning_rate": 1.2176461979145835e-08, "loss": 0.0034, "step": 20593 }, { "epoch": 4.685779294653015, "grad_norm": 0.3651203644826718, "learning_rate": 1.2158916875976562e-08, "loss": 0.0032, "step": 20594 }, { "epoch": 4.686006825938566, "grad_norm": 0.1639191138599426, "learning_rate": 1.2141384298156796e-08, "loss": 0.0006, "step": 20595 }, { "epoch": 4.6862343572241185, "grad_norm": 0.5264637906308548, "learning_rate": 1.2123864246044656e-08, "loss": 0.0044, "step": 20596 }, { "epoch": 4.68646188850967, "grad_norm": 0.09581747505717274, "learning_rate": 1.2106356719998255e-08, "loss": 0.0005, "step": 20597 }, { "epoch": 4.686689419795222, "grad_norm": 0.2172798083288085, "learning_rate": 1.2088861720375502e-08, "loss": 0.0006, "step": 20598 }, { "epoch": 4.686916951080773, "grad_norm": 0.368093713595616, "learning_rate": 1.207137924753396e-08, "loss": 0.0024, "step": 20599 }, { "epoch": 4.6871444823663255, "grad_norm": 0.5727739144712346, "learning_rate": 1.205390930183091e-08, "loss": 0.0055, "step": 20600 }, { "epoch": 4.687372013651877, "grad_norm": 0.30594993074413873, "learning_rate": 1.203645188362329e-08, "loss": 0.0008, "step": 20601 }, { "epoch": 4.687599544937429, "grad_norm": 0.15650053591431504, "learning_rate": 1.2019006993268107e-08, "loss": 0.0005, "step": 20602 }, { "epoch": 4.68782707622298, "grad_norm": 0.9822479608198804, "learning_rate": 1.200157463112174e-08, "loss": 0.0034, "step": 20603 }, { "epoch": 4.6880546075085325, "grad_norm": 0.1288774990096922, "learning_rate": 1.1984154797540573e-08, "loss": 0.0005, "step": 20604 }, { "epoch": 4.688282138794084, "grad_norm": 0.8998442702565834, "learning_rate": 1.196674749288057e-08, "loss": 0.0051, "step": 20605 }, { "epoch": 4.688509670079636, "grad_norm": 0.30519867064969175, "learning_rate": 1.1949352717497558e-08, "loss": 0.0019, "step": 20606 }, { "epoch": 4.688737201365187, "grad_norm": 41.44328604051166, "learning_rate": 1.1931970471747017e-08, "loss": 0.1179, "step": 20607 }, { "epoch": 4.6889647326507395, "grad_norm": 0.5063778236167858, "learning_rate": 1.1914600755984219e-08, "loss": 0.0054, "step": 20608 }, { "epoch": 4.689192263936292, "grad_norm": 0.3807896963662319, "learning_rate": 1.1897243570564155e-08, "loss": 0.0022, "step": 20609 }, { "epoch": 4.689419795221843, "grad_norm": 0.09520241861752388, "learning_rate": 1.1879898915841611e-08, "loss": 0.0002, "step": 20610 }, { "epoch": 4.689647326507394, "grad_norm": 0.8271319756321345, "learning_rate": 1.186256679217096e-08, "loss": 0.0034, "step": 20611 }, { "epoch": 4.6898748577929465, "grad_norm": 0.03205382287256661, "learning_rate": 1.1845247199906706e-08, "loss": 0.0001, "step": 20612 }, { "epoch": 4.690102389078499, "grad_norm": 0.319014299588191, "learning_rate": 1.1827940139402528e-08, "loss": 0.0011, "step": 20613 }, { "epoch": 4.69032992036405, "grad_norm": 0.05101669439896346, "learning_rate": 1.1810645611012375e-08, "loss": 0.0001, "step": 20614 }, { "epoch": 4.690557451649601, "grad_norm": 0.3203793516764119, "learning_rate": 1.179336361508951e-08, "loss": 0.0017, "step": 20615 }, { "epoch": 4.690784982935154, "grad_norm": 1.2847652291973983, "learning_rate": 1.1776094151987328e-08, "loss": 0.0059, "step": 20616 }, { "epoch": 4.691012514220706, "grad_norm": 0.5559205370972959, "learning_rate": 1.1758837222058742e-08, "loss": 0.003, "step": 20617 }, { "epoch": 4.691240045506257, "grad_norm": 0.12563423987919933, "learning_rate": 1.1741592825656316e-08, "loss": 0.001, "step": 20618 }, { "epoch": 4.691467576791809, "grad_norm": 0.15858276556568351, "learning_rate": 1.1724360963132758e-08, "loss": 0.0007, "step": 20619 }, { "epoch": 4.691695108077361, "grad_norm": 0.6568616273747725, "learning_rate": 1.1707141634839937e-08, "loss": 0.0055, "step": 20620 }, { "epoch": 4.691922639362913, "grad_norm": 0.6450190122478658, "learning_rate": 1.1689934841130069e-08, "loss": 0.0024, "step": 20621 }, { "epoch": 4.692150170648464, "grad_norm": 0.263549989511167, "learning_rate": 1.167274058235468e-08, "loss": 0.001, "step": 20622 }, { "epoch": 4.692377701934016, "grad_norm": 0.11954171505562829, "learning_rate": 1.1655558858865227e-08, "loss": 0.0002, "step": 20623 }, { "epoch": 4.692605233219568, "grad_norm": 0.39268470803651606, "learning_rate": 1.1638389671012815e-08, "loss": 0.004, "step": 20624 }, { "epoch": 4.69283276450512, "grad_norm": 0.4773333249558129, "learning_rate": 1.1621233019148414e-08, "loss": 0.0029, "step": 20625 }, { "epoch": 4.693060295790671, "grad_norm": 0.6508136171995498, "learning_rate": 1.1604088903622718e-08, "loss": 0.0096, "step": 20626 }, { "epoch": 4.693287827076223, "grad_norm": 0.44720749740325644, "learning_rate": 1.1586957324786e-08, "loss": 0.0007, "step": 20627 }, { "epoch": 4.693515358361775, "grad_norm": 0.08562687864566818, "learning_rate": 1.1569838282988467e-08, "loss": 0.0001, "step": 20628 }, { "epoch": 4.693742889647327, "grad_norm": 0.6581632548766347, "learning_rate": 1.1552731778580045e-08, "loss": 0.0062, "step": 20629 }, { "epoch": 4.693970420932878, "grad_norm": 0.06130830537911831, "learning_rate": 1.153563781191018e-08, "loss": 0.0002, "step": 20630 }, { "epoch": 4.69419795221843, "grad_norm": 0.08997605345322153, "learning_rate": 1.1518556383328522e-08, "loss": 0.0003, "step": 20631 }, { "epoch": 4.694425483503982, "grad_norm": 0.2722482546199925, "learning_rate": 1.1501487493183957e-08, "loss": 0.001, "step": 20632 }, { "epoch": 4.694653014789534, "grad_norm": 0.35743754602576927, "learning_rate": 1.1484431141825445e-08, "loss": 0.0037, "step": 20633 }, { "epoch": 4.694880546075085, "grad_norm": 0.20504695030552658, "learning_rate": 1.1467387329601524e-08, "loss": 0.0015, "step": 20634 }, { "epoch": 4.695108077360637, "grad_norm": 0.3143391714637972, "learning_rate": 1.1450356056860664e-08, "loss": 0.0012, "step": 20635 }, { "epoch": 4.695335608646189, "grad_norm": 0.47218087352503557, "learning_rate": 1.1433337323950785e-08, "loss": 0.0023, "step": 20636 }, { "epoch": 4.695563139931741, "grad_norm": 1.2826605062125795, "learning_rate": 1.1416331131219802e-08, "loss": 0.0057, "step": 20637 }, { "epoch": 4.695790671217292, "grad_norm": 0.48872864371620195, "learning_rate": 1.1399337479015282e-08, "loss": 0.0026, "step": 20638 }, { "epoch": 4.696018202502844, "grad_norm": 0.137358000373225, "learning_rate": 1.1382356367684588e-08, "loss": 0.0003, "step": 20639 }, { "epoch": 4.696245733788396, "grad_norm": 0.04033123983822095, "learning_rate": 1.1365387797574734e-08, "loss": 0.0001, "step": 20640 }, { "epoch": 4.696473265073948, "grad_norm": 0.7584587156568288, "learning_rate": 1.1348431769032456e-08, "loss": 0.0034, "step": 20641 }, { "epoch": 4.696700796359499, "grad_norm": 0.7839585008583343, "learning_rate": 1.133148828240449e-08, "loss": 0.0058, "step": 20642 }, { "epoch": 4.696928327645051, "grad_norm": 0.3742898770845685, "learning_rate": 1.131455733803695e-08, "loss": 0.0017, "step": 20643 }, { "epoch": 4.697155858930603, "grad_norm": 1.082854008619993, "learning_rate": 1.1297638936275945e-08, "loss": 0.0017, "step": 20644 }, { "epoch": 4.697383390216155, "grad_norm": 0.3050499048835016, "learning_rate": 1.1280733077467312e-08, "loss": 0.0007, "step": 20645 }, { "epoch": 4.697610921501706, "grad_norm": 0.07697998083696897, "learning_rate": 1.1263839761956469e-08, "loss": 0.0001, "step": 20646 }, { "epoch": 4.697838452787258, "grad_norm": 0.12392595013770673, "learning_rate": 1.1246958990088833e-08, "loss": 0.0004, "step": 20647 }, { "epoch": 4.6980659840728105, "grad_norm": 1.1970390735036607, "learning_rate": 1.1230090762209267e-08, "loss": 0.0104, "step": 20648 }, { "epoch": 4.698293515358362, "grad_norm": 1.2841594662343623, "learning_rate": 1.1213235078662495e-08, "loss": 0.007, "step": 20649 }, { "epoch": 4.698521046643913, "grad_norm": 0.33233892994533787, "learning_rate": 1.1196391939793175e-08, "loss": 0.0021, "step": 20650 }, { "epoch": 4.698748577929465, "grad_norm": 0.16603585683518146, "learning_rate": 1.1179561345945471e-08, "loss": 0.0008, "step": 20651 }, { "epoch": 4.6989761092150175, "grad_norm": 0.38891934083262003, "learning_rate": 1.116274329746335e-08, "loss": 0.0032, "step": 20652 }, { "epoch": 4.699203640500569, "grad_norm": 0.40661151569278664, "learning_rate": 1.1145937794690559e-08, "loss": 0.0029, "step": 20653 }, { "epoch": 4.69943117178612, "grad_norm": 0.1476372000357519, "learning_rate": 1.1129144837970645e-08, "loss": 0.0004, "step": 20654 }, { "epoch": 4.699658703071672, "grad_norm": 0.45675577320642696, "learning_rate": 1.1112364427646738e-08, "loss": 0.0012, "step": 20655 }, { "epoch": 4.6998862343572245, "grad_norm": 0.38690227341958366, "learning_rate": 1.1095596564061826e-08, "loss": 0.0024, "step": 20656 }, { "epoch": 4.700113765642776, "grad_norm": 0.7883604817446679, "learning_rate": 1.107884124755855e-08, "loss": 0.0053, "step": 20657 }, { "epoch": 4.700341296928328, "grad_norm": 0.4797562702034537, "learning_rate": 1.1062098478479416e-08, "loss": 0.003, "step": 20658 }, { "epoch": 4.700568828213879, "grad_norm": 0.22850591195616032, "learning_rate": 1.104536825716665e-08, "loss": 0.0021, "step": 20659 }, { "epoch": 4.7007963594994315, "grad_norm": 0.5406218738912627, "learning_rate": 1.1028650583962131e-08, "loss": 0.001, "step": 20660 }, { "epoch": 4.701023890784983, "grad_norm": 0.07523289120589562, "learning_rate": 1.1011945459207598e-08, "loss": 0.0001, "step": 20661 }, { "epoch": 4.701251422070535, "grad_norm": 0.22103750512583598, "learning_rate": 1.0995252883244447e-08, "loss": 0.0008, "step": 20662 }, { "epoch": 4.701478953356086, "grad_norm": 0.14121535608939076, "learning_rate": 1.0978572856413794e-08, "loss": 0.0007, "step": 20663 }, { "epoch": 4.7017064846416385, "grad_norm": 1.064508723003361, "learning_rate": 1.0961905379056545e-08, "loss": 0.0102, "step": 20664 }, { "epoch": 4.70193401592719, "grad_norm": 0.33033323250754437, "learning_rate": 1.094525045151347e-08, "loss": 0.0019, "step": 20665 }, { "epoch": 4.702161547212742, "grad_norm": 0.35981936823070865, "learning_rate": 1.0928608074124853e-08, "loss": 0.0032, "step": 20666 }, { "epoch": 4.702389078498293, "grad_norm": 0.01917581977109395, "learning_rate": 1.0911978247230906e-08, "loss": 0.0001, "step": 20667 }, { "epoch": 4.7026166097838455, "grad_norm": 0.06693140615213149, "learning_rate": 1.0895360971171429e-08, "loss": 0.0003, "step": 20668 }, { "epoch": 4.702844141069397, "grad_norm": 0.21254701908278975, "learning_rate": 1.0878756246286078e-08, "loss": 0.0004, "step": 20669 }, { "epoch": 4.703071672354949, "grad_norm": 0.10562395675459575, "learning_rate": 1.0862164072914238e-08, "loss": 0.0004, "step": 20670 }, { "epoch": 4.7032992036405, "grad_norm": 0.04414231782035408, "learning_rate": 1.084558445139508e-08, "loss": 0.0001, "step": 20671 }, { "epoch": 4.7035267349260526, "grad_norm": 0.39114895250593645, "learning_rate": 1.082901738206736e-08, "loss": 0.0026, "step": 20672 }, { "epoch": 4.703754266211604, "grad_norm": 0.2051919239911792, "learning_rate": 1.0812462865269768e-08, "loss": 0.0006, "step": 20673 }, { "epoch": 4.703981797497156, "grad_norm": 0.24778216407162035, "learning_rate": 1.0795920901340573e-08, "loss": 0.0013, "step": 20674 }, { "epoch": 4.704209328782707, "grad_norm": 0.4364462669875013, "learning_rate": 1.0779391490617839e-08, "loss": 0.0015, "step": 20675 }, { "epoch": 4.70443686006826, "grad_norm": 0.3302074426647711, "learning_rate": 1.0762874633439557e-08, "loss": 0.0018, "step": 20676 }, { "epoch": 4.704664391353811, "grad_norm": 0.19649137472429878, "learning_rate": 1.07463703301431e-08, "loss": 0.0011, "step": 20677 }, { "epoch": 4.704891922639363, "grad_norm": 0.2502266338365557, "learning_rate": 1.0729878581065902e-08, "loss": 0.001, "step": 20678 }, { "epoch": 4.705119453924914, "grad_norm": 0.2997469657825439, "learning_rate": 1.071339938654506e-08, "loss": 0.0012, "step": 20679 }, { "epoch": 4.705346985210467, "grad_norm": 0.5859801375759544, "learning_rate": 1.0696932746917314e-08, "loss": 0.0007, "step": 20680 }, { "epoch": 4.705574516496018, "grad_norm": 0.2982838290486502, "learning_rate": 1.06804786625192e-08, "loss": 0.0014, "step": 20681 }, { "epoch": 4.70580204778157, "grad_norm": 0.425956369025294, "learning_rate": 1.066403713368698e-08, "loss": 0.0022, "step": 20682 }, { "epoch": 4.706029579067121, "grad_norm": 0.5109950099549658, "learning_rate": 1.0647608160756842e-08, "loss": 0.0029, "step": 20683 }, { "epoch": 4.706257110352674, "grad_norm": 0.09452230637011044, "learning_rate": 1.063119174406435e-08, "loss": 0.0002, "step": 20684 }, { "epoch": 4.706484641638225, "grad_norm": 0.8153864022317389, "learning_rate": 1.061478788394521e-08, "loss": 0.0069, "step": 20685 }, { "epoch": 4.706712172923777, "grad_norm": 0.3492609286012888, "learning_rate": 1.0598396580734569e-08, "loss": 0.0027, "step": 20686 }, { "epoch": 4.706939704209329, "grad_norm": 0.24339353970329916, "learning_rate": 1.0582017834767505e-08, "loss": 0.0018, "step": 20687 }, { "epoch": 4.707167235494881, "grad_norm": 0.07725188778656653, "learning_rate": 1.0565651646378753e-08, "loss": 0.0003, "step": 20688 }, { "epoch": 4.707394766780432, "grad_norm": 0.9072908898510555, "learning_rate": 1.0549298015902765e-08, "loss": 0.0026, "step": 20689 }, { "epoch": 4.707622298065984, "grad_norm": 0.2247820915602537, "learning_rate": 1.0532956943673858e-08, "loss": 0.0005, "step": 20690 }, { "epoch": 4.707849829351536, "grad_norm": 0.07817304038114245, "learning_rate": 1.0516628430025864e-08, "loss": 0.0002, "step": 20691 }, { "epoch": 4.708077360637088, "grad_norm": 0.08974730646394023, "learning_rate": 1.050031247529268e-08, "loss": 0.0004, "step": 20692 }, { "epoch": 4.708304891922639, "grad_norm": 0.18944115448370535, "learning_rate": 1.048400907980772e-08, "loss": 0.0007, "step": 20693 }, { "epoch": 4.708532423208191, "grad_norm": 0.11833830148949188, "learning_rate": 1.046771824390419e-08, "loss": 0.0005, "step": 20694 }, { "epoch": 4.708759954493743, "grad_norm": 0.09101612404483957, "learning_rate": 1.0451439967915019e-08, "loss": 0.0003, "step": 20695 }, { "epoch": 4.708987485779295, "grad_norm": 0.22675698000776517, "learning_rate": 1.0435174252172928e-08, "loss": 0.0008, "step": 20696 }, { "epoch": 4.709215017064847, "grad_norm": 0.04147407424557685, "learning_rate": 1.0418921097010287e-08, "loss": 0.0001, "step": 20697 }, { "epoch": 4.709442548350398, "grad_norm": 0.463277511425028, "learning_rate": 1.0402680502759401e-08, "loss": 0.0016, "step": 20698 }, { "epoch": 4.70967007963595, "grad_norm": 0.5949982311186113, "learning_rate": 1.0386452469752159e-08, "loss": 0.0058, "step": 20699 }, { "epoch": 4.709897610921502, "grad_norm": 0.7132777381175418, "learning_rate": 1.0370236998320238e-08, "loss": 0.0068, "step": 20700 }, { "epoch": 4.710125142207054, "grad_norm": 0.11046510030777695, "learning_rate": 1.035403408879497e-08, "loss": 0.0004, "step": 20701 }, { "epoch": 4.710352673492605, "grad_norm": 0.4670535961545176, "learning_rate": 1.033784374150769e-08, "loss": 0.0036, "step": 20702 }, { "epoch": 4.710580204778157, "grad_norm": 0.9853296412265695, "learning_rate": 1.0321665956789032e-08, "loss": 0.0075, "step": 20703 }, { "epoch": 4.710807736063709, "grad_norm": 0.3242990576573104, "learning_rate": 1.0305500734969846e-08, "loss": 0.0008, "step": 20704 }, { "epoch": 4.711035267349261, "grad_norm": 0.3909067271632503, "learning_rate": 1.028934807638049e-08, "loss": 0.0024, "step": 20705 }, { "epoch": 4.711262798634812, "grad_norm": 0.5278127458313318, "learning_rate": 1.027320798135105e-08, "loss": 0.0076, "step": 20706 }, { "epoch": 4.711490329920364, "grad_norm": 0.2259491614194741, "learning_rate": 1.0257080450211468e-08, "loss": 0.0013, "step": 20707 }, { "epoch": 4.711717861205916, "grad_norm": 0.2808716780612509, "learning_rate": 1.0240965483291274e-08, "loss": 0.0016, "step": 20708 }, { "epoch": 4.711945392491468, "grad_norm": 0.0452929117074373, "learning_rate": 1.0224863080919855e-08, "loss": 0.0001, "step": 20709 }, { "epoch": 4.712172923777019, "grad_norm": 0.038020599220247205, "learning_rate": 1.0208773243426326e-08, "loss": 0.0001, "step": 20710 }, { "epoch": 4.712400455062571, "grad_norm": 0.13267804664006178, "learning_rate": 1.019269597113952e-08, "loss": 0.0008, "step": 20711 }, { "epoch": 4.712627986348123, "grad_norm": 0.5682961737865041, "learning_rate": 1.0176631264388128e-08, "loss": 0.011, "step": 20712 }, { "epoch": 4.712855517633675, "grad_norm": 0.03737427743897267, "learning_rate": 1.0160579123500298e-08, "loss": 0.0, "step": 20713 }, { "epoch": 4.713083048919226, "grad_norm": 0.19409323837858378, "learning_rate": 1.0144539548804233e-08, "loss": 0.0011, "step": 20714 }, { "epoch": 4.713310580204778, "grad_norm": 0.06944973327925817, "learning_rate": 1.012851254062773e-08, "loss": 0.0002, "step": 20715 }, { "epoch": 4.71353811149033, "grad_norm": 0.21237452140707197, "learning_rate": 1.0112498099298374e-08, "loss": 0.0002, "step": 20716 }, { "epoch": 4.713765642775882, "grad_norm": 0.248719751699955, "learning_rate": 1.0096496225143401e-08, "loss": 0.0013, "step": 20717 }, { "epoch": 4.713993174061433, "grad_norm": 0.6669041184048099, "learning_rate": 1.0080506918489913e-08, "loss": 0.0027, "step": 20718 }, { "epoch": 4.714220705346985, "grad_norm": 0.44369526595181547, "learning_rate": 1.0064530179664731e-08, "loss": 0.0014, "step": 20719 }, { "epoch": 4.714448236632537, "grad_norm": 0.0991782142638519, "learning_rate": 1.0048566008994329e-08, "loss": 0.0004, "step": 20720 }, { "epoch": 4.714675767918089, "grad_norm": 0.2924764636278976, "learning_rate": 1.0032614406805044e-08, "loss": 0.0022, "step": 20721 }, { "epoch": 4.71490329920364, "grad_norm": 0.376940393457991, "learning_rate": 1.0016675373422796e-08, "loss": 0.0011, "step": 20722 }, { "epoch": 4.715130830489192, "grad_norm": 0.5919987315304284, "learning_rate": 1.0000748909173435e-08, "loss": 0.0021, "step": 20723 }, { "epoch": 4.715358361774744, "grad_norm": 0.2832430684551865, "learning_rate": 9.984835014382466e-09, "loss": 0.0007, "step": 20724 }, { "epoch": 4.715585893060296, "grad_norm": 0.6022374464429985, "learning_rate": 9.968933689375044e-09, "loss": 0.0037, "step": 20725 }, { "epoch": 4.715813424345848, "grad_norm": 0.12252560993103866, "learning_rate": 9.953044934476325e-09, "loss": 0.0005, "step": 20726 }, { "epoch": 4.716040955631399, "grad_norm": 0.42010555199333155, "learning_rate": 9.937168750010912e-09, "loss": 0.0017, "step": 20727 }, { "epoch": 4.716268486916951, "grad_norm": 0.6662645929953557, "learning_rate": 9.921305136303405e-09, "loss": 0.0031, "step": 20728 }, { "epoch": 4.716496018202503, "grad_norm": 0.4208742712901909, "learning_rate": 9.90545409367792e-09, "loss": 0.0052, "step": 20729 }, { "epoch": 4.716723549488055, "grad_norm": 0.5447255002497753, "learning_rate": 9.889615622458507e-09, "loss": 0.0051, "step": 20730 }, { "epoch": 4.716951080773606, "grad_norm": 0.2524291838933747, "learning_rate": 9.873789722968722e-09, "loss": 0.0007, "step": 20731 }, { "epoch": 4.717178612059158, "grad_norm": 0.6236314047446339, "learning_rate": 9.857976395532196e-09, "loss": 0.0087, "step": 20732 }, { "epoch": 4.71740614334471, "grad_norm": 0.36086479731530663, "learning_rate": 9.842175640472074e-09, "loss": 0.0032, "step": 20733 }, { "epoch": 4.717633674630262, "grad_norm": 0.2525496554364515, "learning_rate": 9.826387458111153e-09, "loss": 0.0011, "step": 20734 }, { "epoch": 4.717861205915813, "grad_norm": 0.07381858273415323, "learning_rate": 9.81061184877237e-09, "loss": 0.0002, "step": 20735 }, { "epoch": 4.718088737201366, "grad_norm": 0.3241879886448631, "learning_rate": 9.794848812778035e-09, "loss": 0.0008, "step": 20736 }, { "epoch": 4.718316268486917, "grad_norm": 0.04289587091561783, "learning_rate": 9.779098350450183e-09, "loss": 0.0001, "step": 20737 }, { "epoch": 4.718543799772469, "grad_norm": 2.0877786575751287, "learning_rate": 9.763360462110848e-09, "loss": 0.044, "step": 20738 }, { "epoch": 4.71877133105802, "grad_norm": 0.3015209439406196, "learning_rate": 9.74763514808172e-09, "loss": 0.001, "step": 20739 }, { "epoch": 4.718998862343573, "grad_norm": 0.3582665699084012, "learning_rate": 9.731922408684133e-09, "loss": 0.0024, "step": 20740 }, { "epoch": 4.719226393629124, "grad_norm": 0.2172239339137989, "learning_rate": 9.716222244239223e-09, "loss": 0.0009, "step": 20741 }, { "epoch": 4.719453924914676, "grad_norm": 0.18586062666352907, "learning_rate": 9.700534655067914e-09, "loss": 0.0004, "step": 20742 }, { "epoch": 4.719681456200227, "grad_norm": 0.7211594989886224, "learning_rate": 9.684859641490852e-09, "loss": 0.0049, "step": 20743 }, { "epoch": 4.71990898748578, "grad_norm": 0.25469269908331016, "learning_rate": 9.669197203828265e-09, "loss": 0.0024, "step": 20744 }, { "epoch": 4.720136518771331, "grad_norm": 0.13369379472808715, "learning_rate": 9.653547342400454e-09, "loss": 0.0005, "step": 20745 }, { "epoch": 4.720364050056883, "grad_norm": 1.2184066041246036, "learning_rate": 9.637910057527094e-09, "loss": 0.0085, "step": 20746 }, { "epoch": 4.720591581342434, "grad_norm": 0.324721819219466, "learning_rate": 9.622285349527929e-09, "loss": 0.0011, "step": 20747 }, { "epoch": 4.720819112627987, "grad_norm": 0.024278697042429787, "learning_rate": 9.606673218722217e-09, "loss": 0.0001, "step": 20748 }, { "epoch": 4.721046643913538, "grad_norm": 0.30297571307585713, "learning_rate": 9.59107366542908e-09, "loss": 0.0052, "step": 20749 }, { "epoch": 4.72127417519909, "grad_norm": 0.4021906448505913, "learning_rate": 9.575486689967356e-09, "loss": 0.002, "step": 20750 }, { "epoch": 4.721501706484641, "grad_norm": 0.33110643218476693, "learning_rate": 9.559912292655474e-09, "loss": 0.0007, "step": 20751 }, { "epoch": 4.721729237770194, "grad_norm": 0.07871871614748972, "learning_rate": 9.544350473811998e-09, "loss": 0.0003, "step": 20752 }, { "epoch": 4.721956769055745, "grad_norm": 0.14503329844300905, "learning_rate": 9.528801233754797e-09, "loss": 0.0004, "step": 20753 }, { "epoch": 4.722184300341297, "grad_norm": 0.24964738976833922, "learning_rate": 9.513264572801675e-09, "loss": 0.0019, "step": 20754 }, { "epoch": 4.722411831626848, "grad_norm": 1.0649984867682882, "learning_rate": 9.497740491270293e-09, "loss": 0.0088, "step": 20755 }, { "epoch": 4.722639362912401, "grad_norm": 0.44911823715314053, "learning_rate": 9.48222898947783e-09, "loss": 0.0024, "step": 20756 }, { "epoch": 4.722866894197952, "grad_norm": 0.21238975227637885, "learning_rate": 9.466730067741251e-09, "loss": 0.0009, "step": 20757 }, { "epoch": 4.723094425483504, "grad_norm": 0.11519176046462165, "learning_rate": 9.451243726377458e-09, "loss": 0.0005, "step": 20758 }, { "epoch": 4.723321956769055, "grad_norm": 0.060997533217770655, "learning_rate": 9.435769965703001e-09, "loss": 0.0002, "step": 20759 }, { "epoch": 4.723549488054608, "grad_norm": 0.7516926040935684, "learning_rate": 9.420308786033949e-09, "loss": 0.0049, "step": 20760 }, { "epoch": 4.723777019340159, "grad_norm": 0.13848124508678333, "learning_rate": 9.404860187686507e-09, "loss": 0.0006, "step": 20761 }, { "epoch": 4.724004550625711, "grad_norm": 0.22608069596016242, "learning_rate": 9.389424170976256e-09, "loss": 0.0008, "step": 20762 }, { "epoch": 4.724232081911262, "grad_norm": 0.33920165544948205, "learning_rate": 9.374000736218706e-09, "loss": 0.0013, "step": 20763 }, { "epoch": 4.724459613196815, "grad_norm": 0.38104226571500327, "learning_rate": 9.35858988372916e-09, "loss": 0.0022, "step": 20764 }, { "epoch": 4.724687144482367, "grad_norm": 0.39948593798864274, "learning_rate": 9.343191613822509e-09, "loss": 0.0018, "step": 20765 }, { "epoch": 4.724914675767918, "grad_norm": 0.1384411023666138, "learning_rate": 9.327805926813566e-09, "loss": 0.0005, "step": 20766 }, { "epoch": 4.725142207053469, "grad_norm": 0.10130096204966679, "learning_rate": 9.312432823016595e-09, "loss": 0.0004, "step": 20767 }, { "epoch": 4.725369738339022, "grad_norm": 0.1397321557252125, "learning_rate": 9.297072302746068e-09, "loss": 0.0005, "step": 20768 }, { "epoch": 4.725597269624574, "grad_norm": 0.1298798245539465, "learning_rate": 9.281724366315692e-09, "loss": 0.0003, "step": 20769 }, { "epoch": 4.725824800910125, "grad_norm": 1.005652648712713, "learning_rate": 9.266389014039242e-09, "loss": 0.0062, "step": 20770 }, { "epoch": 4.726052332195676, "grad_norm": 0.31761870139849013, "learning_rate": 9.251066246230151e-09, "loss": 0.0014, "step": 20771 }, { "epoch": 4.726279863481229, "grad_norm": 0.054909731179589645, "learning_rate": 9.235756063201498e-09, "loss": 0.0001, "step": 20772 }, { "epoch": 4.726507394766781, "grad_norm": 0.488403979316915, "learning_rate": 9.220458465266368e-09, "loss": 0.005, "step": 20773 }, { "epoch": 4.726734926052332, "grad_norm": 0.4905046275672793, "learning_rate": 9.20517345273736e-09, "loss": 0.0012, "step": 20774 }, { "epoch": 4.726962457337884, "grad_norm": 0.026035413538866632, "learning_rate": 9.18990102592672e-09, "loss": 0.0001, "step": 20775 }, { "epoch": 4.727189988623436, "grad_norm": 0.06533164370901294, "learning_rate": 9.17464118514677e-09, "loss": 0.0001, "step": 20776 }, { "epoch": 4.727417519908988, "grad_norm": 0.8400489713870187, "learning_rate": 9.159393930709276e-09, "loss": 0.0027, "step": 20777 }, { "epoch": 4.727645051194539, "grad_norm": 1.5288888869289712, "learning_rate": 9.14415926292593e-09, "loss": 0.0011, "step": 20778 }, { "epoch": 4.727872582480091, "grad_norm": 0.48855018862749916, "learning_rate": 9.128937182108083e-09, "loss": 0.0022, "step": 20779 }, { "epoch": 4.728100113765643, "grad_norm": 0.36142735477578125, "learning_rate": 9.113727688566872e-09, "loss": 0.0021, "step": 20780 }, { "epoch": 4.728327645051195, "grad_norm": 0.3640460754981053, "learning_rate": 9.098530782613094e-09, "loss": 0.0009, "step": 20781 }, { "epoch": 4.728555176336746, "grad_norm": 0.030453133233608137, "learning_rate": 9.083346464557399e-09, "loss": 0.0001, "step": 20782 }, { "epoch": 4.728782707622298, "grad_norm": 1.2244509848582974, "learning_rate": 9.068174734710097e-09, "loss": 0.0079, "step": 20783 }, { "epoch": 4.72901023890785, "grad_norm": 0.24601759172908003, "learning_rate": 9.053015593381286e-09, "loss": 0.0007, "step": 20784 }, { "epoch": 4.729237770193402, "grad_norm": 0.36622751206995924, "learning_rate": 9.037869040880721e-09, "loss": 0.0027, "step": 20785 }, { "epoch": 4.729465301478953, "grad_norm": 0.3897263602047981, "learning_rate": 9.02273507751808e-09, "loss": 0.0025, "step": 20786 }, { "epoch": 4.729692832764505, "grad_norm": 0.0790475763252533, "learning_rate": 9.007613703602633e-09, "loss": 0.0002, "step": 20787 }, { "epoch": 4.729920364050057, "grad_norm": 0.059703450482043986, "learning_rate": 8.992504919443437e-09, "loss": 0.0002, "step": 20788 }, { "epoch": 4.730147895335609, "grad_norm": 0.5117927711302025, "learning_rate": 8.977408725349204e-09, "loss": 0.0021, "step": 20789 }, { "epoch": 4.73037542662116, "grad_norm": 0.4726235703670547, "learning_rate": 8.962325121628646e-09, "loss": 0.0024, "step": 20790 }, { "epoch": 4.730602957906712, "grad_norm": 0.47727786924235804, "learning_rate": 8.947254108589848e-09, "loss": 0.0008, "step": 20791 }, { "epoch": 4.730830489192264, "grad_norm": 0.3687070840832346, "learning_rate": 8.932195686540967e-09, "loss": 0.0018, "step": 20792 }, { "epoch": 4.731058020477816, "grad_norm": 0.1380893279634358, "learning_rate": 8.917149855789745e-09, "loss": 0.0005, "step": 20793 }, { "epoch": 4.731285551763367, "grad_norm": 0.3038609414760209, "learning_rate": 8.902116616643711e-09, "loss": 0.0022, "step": 20794 }, { "epoch": 4.731513083048919, "grad_norm": 0.4769674660466118, "learning_rate": 8.88709596941005e-09, "loss": 0.0034, "step": 20795 }, { "epoch": 4.731740614334471, "grad_norm": 0.4742954586466615, "learning_rate": 8.872087914395742e-09, "loss": 0.0031, "step": 20796 }, { "epoch": 4.731968145620023, "grad_norm": 0.21411845981686076, "learning_rate": 8.85709245190762e-09, "loss": 0.0006, "step": 20797 }, { "epoch": 4.732195676905574, "grad_norm": 0.8290743530154907, "learning_rate": 8.84210958225211e-09, "loss": 0.0035, "step": 20798 }, { "epoch": 4.732423208191126, "grad_norm": 0.16305928853620832, "learning_rate": 8.827139305735422e-09, "loss": 0.0007, "step": 20799 }, { "epoch": 4.732650739476678, "grad_norm": 0.06276862842970607, "learning_rate": 8.812181622663562e-09, "loss": 0.0001, "step": 20800 }, { "epoch": 4.73287827076223, "grad_norm": 0.062086779479286376, "learning_rate": 8.79723653334219e-09, "loss": 0.0001, "step": 20801 }, { "epoch": 4.733105802047781, "grad_norm": 0.13073402821758287, "learning_rate": 8.782304038076824e-09, "loss": 0.0007, "step": 20802 }, { "epoch": 4.733333333333333, "grad_norm": 1.3714935107970823, "learning_rate": 8.767384137172569e-09, "loss": 0.0059, "step": 20803 }, { "epoch": 4.733560864618886, "grad_norm": 0.25061040135374996, "learning_rate": 8.752476830934457e-09, "loss": 0.0011, "step": 20804 }, { "epoch": 4.733788395904437, "grad_norm": 0.06808253735105005, "learning_rate": 8.737582119667034e-09, "loss": 0.0003, "step": 20805 }, { "epoch": 4.734015927189988, "grad_norm": 0.08495959446128694, "learning_rate": 8.722700003674853e-09, "loss": 0.0003, "step": 20806 }, { "epoch": 4.73424345847554, "grad_norm": 0.16033978903928392, "learning_rate": 8.707830483262042e-09, "loss": 0.0006, "step": 20807 }, { "epoch": 4.734470989761093, "grad_norm": 0.2864204782675777, "learning_rate": 8.692973558732456e-09, "loss": 0.0011, "step": 20808 }, { "epoch": 4.734698521046644, "grad_norm": 0.21625781331581473, "learning_rate": 8.678129230389812e-09, "loss": 0.0008, "step": 20809 }, { "epoch": 4.734926052332195, "grad_norm": 0.5028735428625126, "learning_rate": 8.663297498537407e-09, "loss": 0.0024, "step": 20810 }, { "epoch": 4.735153583617747, "grad_norm": 0.05106026887643513, "learning_rate": 8.648478363478541e-09, "loss": 0.0001, "step": 20811 }, { "epoch": 4.7353811149033, "grad_norm": 0.20367648696032956, "learning_rate": 8.633671825515888e-09, "loss": 0.0011, "step": 20812 }, { "epoch": 4.735608646188851, "grad_norm": 0.18678776229536773, "learning_rate": 8.618877884952192e-09, "loss": 0.0006, "step": 20813 }, { "epoch": 4.735836177474403, "grad_norm": 0.20658926386052703, "learning_rate": 8.604096542089782e-09, "loss": 0.0005, "step": 20814 }, { "epoch": 4.736063708759954, "grad_norm": 0.3997403395590268, "learning_rate": 8.589327797230707e-09, "loss": 0.0021, "step": 20815 }, { "epoch": 4.736291240045507, "grad_norm": 0.1696145956727619, "learning_rate": 8.574571650676947e-09, "loss": 0.0009, "step": 20816 }, { "epoch": 4.736518771331058, "grad_norm": 0.2985748656617097, "learning_rate": 8.559828102729997e-09, "loss": 0.002, "step": 20817 }, { "epoch": 4.73674630261661, "grad_norm": 0.027386963974234377, "learning_rate": 8.545097153691145e-09, "loss": 0.0001, "step": 20818 }, { "epoch": 4.736973833902161, "grad_norm": 0.8064165712434415, "learning_rate": 8.530378803861608e-09, "loss": 0.0033, "step": 20819 }, { "epoch": 4.737201365187714, "grad_norm": 0.06653060711366575, "learning_rate": 8.515673053542048e-09, "loss": 0.0002, "step": 20820 }, { "epoch": 4.737428896473265, "grad_norm": 0.37137147626398287, "learning_rate": 8.500979903033197e-09, "loss": 0.0025, "step": 20821 }, { "epoch": 4.737656427758817, "grad_norm": 1.2553080474119003, "learning_rate": 8.48629935263516e-09, "loss": 0.0101, "step": 20822 }, { "epoch": 4.737883959044368, "grad_norm": 0.10533886126632284, "learning_rate": 8.471631402648117e-09, "loss": 0.0003, "step": 20823 }, { "epoch": 4.738111490329921, "grad_norm": 0.5790391661001423, "learning_rate": 8.456976053371758e-09, "loss": 0.0069, "step": 20824 }, { "epoch": 4.738339021615472, "grad_norm": 0.04072219949811572, "learning_rate": 8.442333305105702e-09, "loss": 0.0001, "step": 20825 }, { "epoch": 4.738566552901024, "grad_norm": 0.4812111119554687, "learning_rate": 8.427703158149155e-09, "loss": 0.0049, "step": 20826 }, { "epoch": 4.738794084186575, "grad_norm": 0.3342286502685867, "learning_rate": 8.413085612801186e-09, "loss": 0.001, "step": 20827 }, { "epoch": 4.739021615472128, "grad_norm": 0.2545589545661375, "learning_rate": 8.398480669360512e-09, "loss": 0.0006, "step": 20828 }, { "epoch": 4.739249146757679, "grad_norm": 0.03913502546808314, "learning_rate": 8.383888328125645e-09, "loss": 0.0001, "step": 20829 }, { "epoch": 4.739476678043231, "grad_norm": 0.5322565203378635, "learning_rate": 8.369308589394818e-09, "loss": 0.0018, "step": 20830 }, { "epoch": 4.7397042093287824, "grad_norm": 0.1255291731433865, "learning_rate": 8.354741453465987e-09, "loss": 0.0003, "step": 20831 }, { "epoch": 4.739931740614335, "grad_norm": 0.7137530589675201, "learning_rate": 8.3401869206369e-09, "loss": 0.0032, "step": 20832 }, { "epoch": 4.740159271899886, "grad_norm": 0.37479615504911384, "learning_rate": 8.325644991205099e-09, "loss": 0.0015, "step": 20833 }, { "epoch": 4.740386803185438, "grad_norm": 0.047255670490603335, "learning_rate": 8.311115665467704e-09, "loss": 0.0002, "step": 20834 }, { "epoch": 4.7406143344709895, "grad_norm": 0.2852875128598326, "learning_rate": 8.296598943721701e-09, "loss": 0.0007, "step": 20835 }, { "epoch": 4.740841865756542, "grad_norm": 1.5492499958195864, "learning_rate": 8.282094826263729e-09, "loss": 0.0089, "step": 20836 }, { "epoch": 4.741069397042093, "grad_norm": 0.07070731111796356, "learning_rate": 8.267603313390354e-09, "loss": 0.0002, "step": 20837 }, { "epoch": 4.741296928327645, "grad_norm": 0.1829733511141776, "learning_rate": 8.253124405397591e-09, "loss": 0.001, "step": 20838 }, { "epoch": 4.7415244596131965, "grad_norm": 0.06257475723625548, "learning_rate": 8.238658102581454e-09, "loss": 0.0002, "step": 20839 }, { "epoch": 4.741751990898749, "grad_norm": 0.06771030817473202, "learning_rate": 8.224204405237677e-09, "loss": 0.0002, "step": 20840 }, { "epoch": 4.7419795221843, "grad_norm": 0.3995187339373487, "learning_rate": 8.20976331366151e-09, "loss": 0.0037, "step": 20841 }, { "epoch": 4.742207053469852, "grad_norm": 0.3598101467663611, "learning_rate": 8.195334828148272e-09, "loss": 0.0018, "step": 20842 }, { "epoch": 4.742434584755404, "grad_norm": 0.11012146006542868, "learning_rate": 8.180918948992728e-09, "loss": 0.0004, "step": 20843 }, { "epoch": 4.742662116040956, "grad_norm": 0.6453079972365221, "learning_rate": 8.166515676489503e-09, "loss": 0.0031, "step": 20844 }, { "epoch": 4.742889647326507, "grad_norm": 0.01117864736065881, "learning_rate": 8.152125010933016e-09, "loss": 0.0001, "step": 20845 }, { "epoch": 4.743117178612059, "grad_norm": 0.3333437482387164, "learning_rate": 8.137746952617404e-09, "loss": 0.0003, "step": 20846 }, { "epoch": 4.743344709897611, "grad_norm": 0.21067568236268638, "learning_rate": 8.1233815018366e-09, "loss": 0.0006, "step": 20847 }, { "epoch": 4.743572241183163, "grad_norm": 0.39411305339832364, "learning_rate": 8.109028658884049e-09, "loss": 0.0025, "step": 20848 }, { "epoch": 4.743799772468714, "grad_norm": 1.2424071807148944, "learning_rate": 8.094688424053199e-09, "loss": 0.006, "step": 20849 }, { "epoch": 4.744027303754266, "grad_norm": 0.3880040072838964, "learning_rate": 8.080360797637077e-09, "loss": 0.0023, "step": 20850 }, { "epoch": 4.744254835039818, "grad_norm": 0.2970944639787894, "learning_rate": 8.066045779928574e-09, "loss": 0.0005, "step": 20851 }, { "epoch": 4.74448236632537, "grad_norm": 0.11362502563858934, "learning_rate": 8.051743371220167e-09, "loss": 0.0004, "step": 20852 }, { "epoch": 4.744709897610922, "grad_norm": 0.16045589544181849, "learning_rate": 8.037453571804257e-09, "loss": 0.0006, "step": 20853 }, { "epoch": 4.744937428896473, "grad_norm": 0.09399911413091983, "learning_rate": 8.023176381972975e-09, "loss": 0.0004, "step": 20854 }, { "epoch": 4.745164960182025, "grad_norm": 0.5331482792568395, "learning_rate": 8.008911802017891e-09, "loss": 0.003, "step": 20855 }, { "epoch": 4.745392491467577, "grad_norm": 0.3007143213772626, "learning_rate": 7.994659832230785e-09, "loss": 0.0021, "step": 20856 }, { "epoch": 4.745620022753129, "grad_norm": 0.2650031023911042, "learning_rate": 7.980420472902886e-09, "loss": 0.0009, "step": 20857 }, { "epoch": 4.74584755403868, "grad_norm": 0.08257805725608999, "learning_rate": 7.96619372432507e-09, "loss": 0.0004, "step": 20858 }, { "epoch": 4.746075085324232, "grad_norm": 0.11821353315056078, "learning_rate": 7.951979586788214e-09, "loss": 0.0002, "step": 20859 }, { "epoch": 4.746302616609784, "grad_norm": 0.32416131543040055, "learning_rate": 7.937778060582852e-09, "loss": 0.0013, "step": 20860 }, { "epoch": 4.746530147895336, "grad_norm": 0.06561530026784142, "learning_rate": 7.923589145999236e-09, "loss": 0.0002, "step": 20861 }, { "epoch": 4.746757679180887, "grad_norm": 0.3944774508964515, "learning_rate": 7.909412843327344e-09, "loss": 0.0015, "step": 20862 }, { "epoch": 4.746985210466439, "grad_norm": 0.03766442598642422, "learning_rate": 7.895249152856804e-09, "loss": 0.0002, "step": 20863 }, { "epoch": 4.747212741751991, "grad_norm": 0.3725203310188034, "learning_rate": 7.881098074877317e-09, "loss": 0.0008, "step": 20864 }, { "epoch": 4.747440273037543, "grad_norm": 0.19860114982579655, "learning_rate": 7.866959609677885e-09, "loss": 0.0012, "step": 20865 }, { "epoch": 4.747667804323094, "grad_norm": 0.26587353029511296, "learning_rate": 7.852833757547654e-09, "loss": 0.0011, "step": 20866 }, { "epoch": 4.747895335608646, "grad_norm": 0.2419375735420785, "learning_rate": 7.838720518775142e-09, "loss": 0.0005, "step": 20867 }, { "epoch": 4.748122866894198, "grad_norm": 0.089817829226783, "learning_rate": 7.824619893649008e-09, "loss": 0.0006, "step": 20868 }, { "epoch": 4.74835039817975, "grad_norm": 0.48905316026521345, "learning_rate": 7.810531882457353e-09, "loss": 0.0018, "step": 20869 }, { "epoch": 4.748577929465301, "grad_norm": 0.3374003569632744, "learning_rate": 7.796456485488075e-09, "loss": 0.0021, "step": 20870 }, { "epoch": 4.748805460750853, "grad_norm": 0.10755121082918428, "learning_rate": 7.782393703028857e-09, "loss": 0.0002, "step": 20871 }, { "epoch": 4.749032992036405, "grad_norm": 0.1830674166095397, "learning_rate": 7.76834353536704e-09, "loss": 0.0009, "step": 20872 }, { "epoch": 4.749260523321957, "grad_norm": 0.06060946915664978, "learning_rate": 7.754305982790034e-09, "loss": 0.0002, "step": 20873 }, { "epoch": 4.749488054607508, "grad_norm": 0.008396511230255272, "learning_rate": 7.740281045584483e-09, "loss": 0.0, "step": 20874 }, { "epoch": 4.74971558589306, "grad_norm": 1.0851561030684547, "learning_rate": 7.726268724037173e-09, "loss": 0.0032, "step": 20875 }, { "epoch": 4.749943117178612, "grad_norm": 0.4656519039518487, "learning_rate": 7.71226901843447e-09, "loss": 0.0023, "step": 20876 }, { "epoch": 4.750170648464164, "grad_norm": 0.2519449849098866, "learning_rate": 7.698281929062398e-09, "loss": 0.001, "step": 20877 }, { "epoch": 4.750398179749715, "grad_norm": 0.29879242944507733, "learning_rate": 7.684307456206908e-09, "loss": 0.0016, "step": 20878 }, { "epoch": 4.750625711035267, "grad_norm": 0.39656251615095417, "learning_rate": 7.670345600153673e-09, "loss": 0.0009, "step": 20879 }, { "epoch": 4.750853242320819, "grad_norm": 0.09361831765293475, "learning_rate": 7.656396361187951e-09, "loss": 0.0003, "step": 20880 }, { "epoch": 4.751080773606371, "grad_norm": 0.23272958543004818, "learning_rate": 7.642459739594931e-09, "loss": 0.0007, "step": 20881 }, { "epoch": 4.751308304891923, "grad_norm": 0.24018138353541554, "learning_rate": 7.628535735659387e-09, "loss": 0.0012, "step": 20882 }, { "epoch": 4.751535836177474, "grad_norm": 0.8410377069590887, "learning_rate": 7.614624349665881e-09, "loss": 0.004, "step": 20883 }, { "epoch": 4.751763367463026, "grad_norm": 0.9977917174083155, "learning_rate": 7.600725581898769e-09, "loss": 0.0036, "step": 20884 }, { "epoch": 4.751990898748578, "grad_norm": 0.40289248535703576, "learning_rate": 7.58683943264206e-09, "loss": 0.0029, "step": 20885 }, { "epoch": 4.75221843003413, "grad_norm": 0.23413054822190704, "learning_rate": 7.572965902179694e-09, "loss": 0.0005, "step": 20886 }, { "epoch": 4.7524459613196814, "grad_norm": 0.4831204780978216, "learning_rate": 7.559104990795125e-09, "loss": 0.0041, "step": 20887 }, { "epoch": 4.752673492605233, "grad_norm": 0.9611650989920273, "learning_rate": 7.545256698771666e-09, "loss": 0.0042, "step": 20888 }, { "epoch": 4.752901023890785, "grad_norm": 0.06585773515364175, "learning_rate": 7.531421026392288e-09, "loss": 0.0002, "step": 20889 }, { "epoch": 4.753128555176337, "grad_norm": 0.2268500524680299, "learning_rate": 7.517597973939889e-09, "loss": 0.0014, "step": 20890 }, { "epoch": 4.7533560864618885, "grad_norm": 0.03817079850144627, "learning_rate": 7.50378754169688e-09, "loss": 0.0001, "step": 20891 }, { "epoch": 4.753583617747441, "grad_norm": 0.40276481347131743, "learning_rate": 7.489989729945538e-09, "loss": 0.0014, "step": 20892 }, { "epoch": 4.753811149032992, "grad_norm": 0.2689149770287437, "learning_rate": 7.476204538967927e-09, "loss": 0.001, "step": 20893 }, { "epoch": 4.754038680318544, "grad_norm": 1.3165721785593971, "learning_rate": 7.462431969045766e-09, "loss": 0.0058, "step": 20894 }, { "epoch": 4.7542662116040955, "grad_norm": 0.38408572049793877, "learning_rate": 7.448672020460568e-09, "loss": 0.0006, "step": 20895 }, { "epoch": 4.754493742889648, "grad_norm": 0.16621209818619398, "learning_rate": 7.434924693493495e-09, "loss": 0.0006, "step": 20896 }, { "epoch": 4.754721274175199, "grad_norm": 1.5202929110576313, "learning_rate": 7.421189988425503e-09, "loss": 0.0074, "step": 20897 }, { "epoch": 4.754948805460751, "grad_norm": 0.2302834622080307, "learning_rate": 7.4074679055373414e-09, "loss": 0.0007, "step": 20898 }, { "epoch": 4.7551763367463025, "grad_norm": 0.28518114173444875, "learning_rate": 7.393758445109478e-09, "loss": 0.0026, "step": 20899 }, { "epoch": 4.755403868031855, "grad_norm": 0.44643777275158586, "learning_rate": 7.380061607422176e-09, "loss": 0.0013, "step": 20900 }, { "epoch": 4.755631399317406, "grad_norm": 0.29836616206030364, "learning_rate": 7.366377392755211e-09, "loss": 0.0015, "step": 20901 }, { "epoch": 4.755858930602958, "grad_norm": 0.37026765212817514, "learning_rate": 7.35270580138836e-09, "loss": 0.0025, "step": 20902 }, { "epoch": 4.7560864618885095, "grad_norm": 0.6680168704016158, "learning_rate": 7.339046833601051e-09, "loss": 0.003, "step": 20903 }, { "epoch": 4.756313993174062, "grad_norm": 0.19443079676539407, "learning_rate": 7.325400489672438e-09, "loss": 0.0013, "step": 20904 }, { "epoch": 4.756541524459613, "grad_norm": 0.2574609823737156, "learning_rate": 7.311766769881392e-09, "loss": 0.0014, "step": 20905 }, { "epoch": 4.756769055745165, "grad_norm": 0.10763168311961312, "learning_rate": 7.298145674506651e-09, "loss": 0.0003, "step": 20906 }, { "epoch": 4.7569965870307165, "grad_norm": 1.1546886818625832, "learning_rate": 7.2845372038265335e-09, "loss": 0.0043, "step": 20907 }, { "epoch": 4.757224118316269, "grad_norm": 0.7029071749566809, "learning_rate": 7.270941358119079e-09, "loss": 0.0044, "step": 20908 }, { "epoch": 4.75745164960182, "grad_norm": 0.6842198537834296, "learning_rate": 7.257358137662401e-09, "loss": 0.0014, "step": 20909 }, { "epoch": 4.757679180887372, "grad_norm": 0.08110165599438313, "learning_rate": 7.243787542733915e-09, "loss": 0.0003, "step": 20910 }, { "epoch": 4.7579067121729235, "grad_norm": 0.026795110732719826, "learning_rate": 7.230229573611039e-09, "loss": 0.0001, "step": 20911 }, { "epoch": 4.758134243458476, "grad_norm": 0.1611609217694517, "learning_rate": 7.216684230570911e-09, "loss": 0.0007, "step": 20912 }, { "epoch": 4.758361774744027, "grad_norm": 0.08728548223302471, "learning_rate": 7.203151513890325e-09, "loss": 0.0003, "step": 20913 }, { "epoch": 4.758589306029579, "grad_norm": 0.1573095707356906, "learning_rate": 7.189631423845864e-09, "loss": 0.0005, "step": 20914 }, { "epoch": 4.7588168373151305, "grad_norm": 0.31605987312002215, "learning_rate": 7.1761239607139045e-09, "loss": 0.0019, "step": 20915 }, { "epoch": 4.759044368600683, "grad_norm": 0.4800858001961839, "learning_rate": 7.162629124770476e-09, "loss": 0.0033, "step": 20916 }, { "epoch": 4.759271899886234, "grad_norm": 0.09348973465901267, "learning_rate": 7.14914691629133e-09, "loss": 0.0002, "step": 20917 }, { "epoch": 4.759499431171786, "grad_norm": 0.11254346107217307, "learning_rate": 7.135677335552149e-09, "loss": 0.0006, "step": 20918 }, { "epoch": 4.7597269624573375, "grad_norm": 0.23280902792469707, "learning_rate": 7.122220382828129e-09, "loss": 0.0003, "step": 20919 }, { "epoch": 4.75995449374289, "grad_norm": 0.4319241710551947, "learning_rate": 7.108776058394329e-09, "loss": 0.0021, "step": 20920 }, { "epoch": 4.760182025028442, "grad_norm": 0.059478095140875924, "learning_rate": 7.095344362525528e-09, "loss": 0.0001, "step": 20921 }, { "epoch": 4.760409556313993, "grad_norm": 0.15948434302086278, "learning_rate": 7.081925295496297e-09, "loss": 0.0004, "step": 20922 }, { "epoch": 4.7606370875995445, "grad_norm": 0.7090918153147184, "learning_rate": 7.068518857580794e-09, "loss": 0.003, "step": 20923 }, { "epoch": 4.760864618885097, "grad_norm": 0.4886470145862555, "learning_rate": 7.055125049053105e-09, "loss": 0.002, "step": 20924 }, { "epoch": 4.761092150170649, "grad_norm": 0.1218501180797022, "learning_rate": 7.041743870186968e-09, "loss": 0.0005, "step": 20925 }, { "epoch": 4.7613196814562, "grad_norm": 0.8637874667022776, "learning_rate": 7.028375321255845e-09, "loss": 0.002, "step": 20926 }, { "epoch": 4.7615472127417515, "grad_norm": 0.32279314498066103, "learning_rate": 7.015019402532921e-09, "loss": 0.0013, "step": 20927 }, { "epoch": 4.761774744027304, "grad_norm": 0.21381894836593374, "learning_rate": 7.001676114291242e-09, "loss": 0.001, "step": 20928 }, { "epoch": 4.762002275312856, "grad_norm": 0.7790708339426434, "learning_rate": 6.9883454568035055e-09, "loss": 0.005, "step": 20929 }, { "epoch": 4.762229806598407, "grad_norm": 0.30434284307831133, "learning_rate": 6.975027430342132e-09, "loss": 0.0016, "step": 20930 }, { "epoch": 4.762457337883959, "grad_norm": 0.08578255316939114, "learning_rate": 6.961722035179336e-09, "loss": 0.0003, "step": 20931 }, { "epoch": 4.762684869169511, "grad_norm": 0.19613396320491674, "learning_rate": 6.94842927158712e-09, "loss": 0.0007, "step": 20932 }, { "epoch": 4.762912400455063, "grad_norm": 0.3851015625359936, "learning_rate": 6.935149139837005e-09, "loss": 0.0017, "step": 20933 }, { "epoch": 4.763139931740614, "grad_norm": 0.2754406801711009, "learning_rate": 6.921881640200509e-09, "loss": 0.002, "step": 20934 }, { "epoch": 4.763367463026166, "grad_norm": 1.064699196036219, "learning_rate": 6.908626772948873e-09, "loss": 0.0034, "step": 20935 }, { "epoch": 4.763594994311718, "grad_norm": 0.05500363231963031, "learning_rate": 6.895384538352923e-09, "loss": 0.0002, "step": 20936 }, { "epoch": 4.76382252559727, "grad_norm": 0.07027476518773479, "learning_rate": 6.8821549366832745e-09, "loss": 0.0003, "step": 20937 }, { "epoch": 4.764050056882821, "grad_norm": 0.3320345049494743, "learning_rate": 6.868937968210407e-09, "loss": 0.0023, "step": 20938 }, { "epoch": 4.764277588168373, "grad_norm": 0.8625535585143517, "learning_rate": 6.855733633204242e-09, "loss": 0.0053, "step": 20939 }, { "epoch": 4.764505119453925, "grad_norm": 0.8439055550137289, "learning_rate": 6.842541931934912e-09, "loss": 0.0052, "step": 20940 }, { "epoch": 4.764732650739477, "grad_norm": 0.1302759083781576, "learning_rate": 6.8293628646719235e-09, "loss": 0.0004, "step": 20941 }, { "epoch": 4.764960182025028, "grad_norm": 0.1935634140033281, "learning_rate": 6.816196431684643e-09, "loss": 0.0005, "step": 20942 }, { "epoch": 4.76518771331058, "grad_norm": 0.2552024134097519, "learning_rate": 6.8030426332420935e-09, "loss": 0.002, "step": 20943 }, { "epoch": 4.765415244596132, "grad_norm": 0.24880443382676398, "learning_rate": 6.789901469613294e-09, "loss": 0.0011, "step": 20944 }, { "epoch": 4.765642775881684, "grad_norm": 0.11650488700778612, "learning_rate": 6.776772941066573e-09, "loss": 0.0002, "step": 20945 }, { "epoch": 4.765870307167235, "grad_norm": 0.41460791345893805, "learning_rate": 6.763657047870464e-09, "loss": 0.0026, "step": 20946 }, { "epoch": 4.7660978384527874, "grad_norm": 0.15136480763574772, "learning_rate": 6.750553790292949e-09, "loss": 0.0006, "step": 20947 }, { "epoch": 4.766325369738339, "grad_norm": 0.21760060213823582, "learning_rate": 6.737463168601868e-09, "loss": 0.001, "step": 20948 }, { "epoch": 4.766552901023891, "grad_norm": 0.4177096938654218, "learning_rate": 6.724385183064716e-09, "loss": 0.0024, "step": 20949 }, { "epoch": 4.766780432309442, "grad_norm": 0.9182785084659726, "learning_rate": 6.711319833948848e-09, "loss": 0.0048, "step": 20950 }, { "epoch": 4.7670079635949945, "grad_norm": 0.23073289708831504, "learning_rate": 6.6982671215212044e-09, "loss": 0.001, "step": 20951 }, { "epoch": 4.767235494880546, "grad_norm": 0.338906484441219, "learning_rate": 6.685227046048654e-09, "loss": 0.002, "step": 20952 }, { "epoch": 4.767463026166098, "grad_norm": 0.07984616229474678, "learning_rate": 6.67219960779765e-09, "loss": 0.0003, "step": 20953 }, { "epoch": 4.767690557451649, "grad_norm": 0.4748217211822993, "learning_rate": 6.659184807034508e-09, "loss": 0.0027, "step": 20954 }, { "epoch": 4.7679180887372015, "grad_norm": 0.4315386495826598, "learning_rate": 6.646182644025126e-09, "loss": 0.0008, "step": 20955 }, { "epoch": 4.768145620022753, "grad_norm": 0.1609377043981229, "learning_rate": 6.633193119035403e-09, "loss": 0.0005, "step": 20956 }, { "epoch": 4.768373151308305, "grad_norm": 0.8262100282364756, "learning_rate": 6.620216232330681e-09, "loss": 0.0036, "step": 20957 }, { "epoch": 4.768600682593856, "grad_norm": 0.8867823995739806, "learning_rate": 6.6072519841761665e-09, "loss": 0.0156, "step": 20958 }, { "epoch": 4.7688282138794085, "grad_norm": 0.2367572571873835, "learning_rate": 6.594300374836923e-09, "loss": 0.0008, "step": 20959 }, { "epoch": 4.769055745164961, "grad_norm": 0.17807676034485792, "learning_rate": 6.581361404577671e-09, "loss": 0.0007, "step": 20960 }, { "epoch": 4.769283276450512, "grad_norm": 0.2080257601517507, "learning_rate": 6.568435073662782e-09, "loss": 0.0005, "step": 20961 }, { "epoch": 4.769510807736063, "grad_norm": 0.023811570181297908, "learning_rate": 6.555521382356489e-09, "loss": 0.0, "step": 20962 }, { "epoch": 4.7697383390216155, "grad_norm": 0.48906890022569344, "learning_rate": 6.542620330922677e-09, "loss": 0.0021, "step": 20963 }, { "epoch": 4.769965870307168, "grad_norm": 0.24709683384128517, "learning_rate": 6.529731919625165e-09, "loss": 0.001, "step": 20964 }, { "epoch": 4.770193401592719, "grad_norm": 0.6281397457723042, "learning_rate": 6.516856148727144e-09, "loss": 0.0059, "step": 20965 }, { "epoch": 4.77042093287827, "grad_norm": 0.2010780156167244, "learning_rate": 6.503993018491875e-09, "loss": 0.0008, "step": 20966 }, { "epoch": 4.7706484641638225, "grad_norm": 0.34736149252483584, "learning_rate": 6.4911425291822735e-09, "loss": 0.0018, "step": 20967 }, { "epoch": 4.770875995449375, "grad_norm": 0.7847820142892468, "learning_rate": 6.478304681061045e-09, "loss": 0.0028, "step": 20968 }, { "epoch": 4.771103526734926, "grad_norm": 0.46464176811038005, "learning_rate": 6.465479474390482e-09, "loss": 0.0044, "step": 20969 }, { "epoch": 4.771331058020478, "grad_norm": 0.12000908286917074, "learning_rate": 6.4526669094326644e-09, "loss": 0.0005, "step": 20970 }, { "epoch": 4.7715585893060295, "grad_norm": 0.17409676613347655, "learning_rate": 6.439866986449605e-09, "loss": 0.0008, "step": 20971 }, { "epoch": 4.771786120591582, "grad_norm": 0.2548347507088588, "learning_rate": 6.427079705702763e-09, "loss": 0.0013, "step": 20972 }, { "epoch": 4.772013651877133, "grad_norm": 0.1989927406121376, "learning_rate": 6.414305067453524e-09, "loss": 0.0004, "step": 20973 }, { "epoch": 4.772241183162685, "grad_norm": 0.2135930509959862, "learning_rate": 6.4015430719630704e-09, "loss": 0.0007, "step": 20974 }, { "epoch": 4.7724687144482365, "grad_norm": 0.3167687137086354, "learning_rate": 6.388793719492164e-09, "loss": 0.0013, "step": 20975 }, { "epoch": 4.772696245733789, "grad_norm": 0.13222188474647853, "learning_rate": 6.376057010301362e-09, "loss": 0.0004, "step": 20976 }, { "epoch": 4.77292377701934, "grad_norm": 0.20015867024643974, "learning_rate": 6.3633329446509405e-09, "loss": 0.0011, "step": 20977 }, { "epoch": 4.773151308304892, "grad_norm": 0.38755782214048984, "learning_rate": 6.350621522801109e-09, "loss": 0.0021, "step": 20978 }, { "epoch": 4.7733788395904435, "grad_norm": 0.28194634511878675, "learning_rate": 6.337922745011521e-09, "loss": 0.0016, "step": 20979 }, { "epoch": 4.773606370875996, "grad_norm": 0.29890963412230553, "learning_rate": 6.32523661154176e-09, "loss": 0.0012, "step": 20980 }, { "epoch": 4.773833902161547, "grad_norm": 0.5117148731860683, "learning_rate": 6.312563122651133e-09, "loss": 0.0019, "step": 20981 }, { "epoch": 4.774061433447099, "grad_norm": 0.2872926601281823, "learning_rate": 6.299902278598599e-09, "loss": 0.0016, "step": 20982 }, { "epoch": 4.7742889647326505, "grad_norm": 0.15277781032147392, "learning_rate": 6.287254079643049e-09, "loss": 0.0005, "step": 20983 }, { "epoch": 4.774516496018203, "grad_norm": 0.30332231256172304, "learning_rate": 6.274618526042886e-09, "loss": 0.0005, "step": 20984 }, { "epoch": 4.774744027303754, "grad_norm": 0.10067957226284346, "learning_rate": 6.261995618056377e-09, "loss": 0.0003, "step": 20985 }, { "epoch": 4.774971558589306, "grad_norm": 0.28926169265599133, "learning_rate": 6.24938535594144e-09, "loss": 0.0017, "step": 20986 }, { "epoch": 4.7751990898748575, "grad_norm": 0.01766780526603568, "learning_rate": 6.236787739955924e-09, "loss": 0.0, "step": 20987 }, { "epoch": 4.77542662116041, "grad_norm": 1.056060224411013, "learning_rate": 6.2242027703572624e-09, "loss": 0.001, "step": 20988 }, { "epoch": 4.775654152445961, "grad_norm": 0.3707841235788522, "learning_rate": 6.21163044740268e-09, "loss": 0.0027, "step": 20989 }, { "epoch": 4.775881683731513, "grad_norm": 0.07890595185756714, "learning_rate": 6.199070771349055e-09, "loss": 0.0005, "step": 20990 }, { "epoch": 4.7761092150170645, "grad_norm": 0.33176092078333447, "learning_rate": 6.186523742453196e-09, "loss": 0.001, "step": 20991 }, { "epoch": 4.776336746302617, "grad_norm": 0.6199182871608194, "learning_rate": 6.173989360971494e-09, "loss": 0.0122, "step": 20992 }, { "epoch": 4.776564277588168, "grad_norm": 0.12395364801916607, "learning_rate": 6.161467627160064e-09, "loss": 0.0003, "step": 20993 }, { "epoch": 4.77679180887372, "grad_norm": 0.5023983035538028, "learning_rate": 6.148958541274952e-09, "loss": 0.0026, "step": 20994 }, { "epoch": 4.7770193401592715, "grad_norm": 0.505927229932517, "learning_rate": 6.136462103571717e-09, "loss": 0.0035, "step": 20995 }, { "epoch": 4.777246871444824, "grad_norm": 0.35685066008553196, "learning_rate": 6.123978314305778e-09, "loss": 0.0012, "step": 20996 }, { "epoch": 4.777474402730375, "grad_norm": 0.12974357775269119, "learning_rate": 6.11150717373242e-09, "loss": 0.0004, "step": 20997 }, { "epoch": 4.777701934015927, "grad_norm": 0.4963560223260798, "learning_rate": 6.099048682106299e-09, "loss": 0.0016, "step": 20998 }, { "epoch": 4.777929465301479, "grad_norm": 0.1684334328122935, "learning_rate": 6.0866028396821404e-09, "loss": 0.0005, "step": 20999 }, { "epoch": 4.778156996587031, "grad_norm": 0.32969770446552643, "learning_rate": 6.074169646714395e-09, "loss": 0.0061, "step": 21000 }, { "epoch": 4.778384527872582, "grad_norm": 0.27125413831701883, "learning_rate": 6.061749103457165e-09, "loss": 0.0011, "step": 21001 }, { "epoch": 4.778612059158134, "grad_norm": 0.35020333750146104, "learning_rate": 6.049341210164206e-09, "loss": 0.0016, "step": 21002 }, { "epoch": 4.7788395904436864, "grad_norm": 0.09559936332998101, "learning_rate": 6.036945967089134e-09, "loss": 0.0002, "step": 21003 }, { "epoch": 4.779067121729238, "grad_norm": 1.138811218570902, "learning_rate": 6.024563374485287e-09, "loss": 0.0069, "step": 21004 }, { "epoch": 4.779294653014789, "grad_norm": 0.13046209044480514, "learning_rate": 6.012193432605798e-09, "loss": 0.0003, "step": 21005 }, { "epoch": 4.779522184300341, "grad_norm": 1.1117004555834415, "learning_rate": 5.9998361417034494e-09, "loss": 0.0058, "step": 21006 }, { "epoch": 4.7797497155858935, "grad_norm": 0.293154930476873, "learning_rate": 5.987491502030817e-09, "loss": 0.0007, "step": 21007 }, { "epoch": 4.779977246871445, "grad_norm": 0.5814348920613375, "learning_rate": 5.975159513840131e-09, "loss": 0.0018, "step": 21008 }, { "epoch": 4.780204778156997, "grad_norm": 0.17558766700077075, "learning_rate": 5.9628401773836195e-09, "loss": 0.0012, "step": 21009 }, { "epoch": 4.780432309442548, "grad_norm": 0.6228964770297157, "learning_rate": 5.950533492912816e-09, "loss": 0.0019, "step": 21010 }, { "epoch": 4.7806598407281005, "grad_norm": 0.2855536320586434, "learning_rate": 5.938239460679465e-09, "loss": 0.0013, "step": 21011 }, { "epoch": 4.780887372013652, "grad_norm": 0.2904292509472201, "learning_rate": 5.925958080934685e-09, "loss": 0.0019, "step": 21012 }, { "epoch": 4.781114903299204, "grad_norm": 0.2573692480064223, "learning_rate": 5.913689353929525e-09, "loss": 0.0011, "step": 21013 }, { "epoch": 4.781342434584755, "grad_norm": 0.35214898490523855, "learning_rate": 5.901433279914826e-09, "loss": 0.0025, "step": 21014 }, { "epoch": 4.7815699658703075, "grad_norm": 0.5261004111090508, "learning_rate": 5.889189859140942e-09, "loss": 0.001, "step": 21015 }, { "epoch": 4.781797497155859, "grad_norm": 0.12017576241478346, "learning_rate": 5.876959091858162e-09, "loss": 0.0004, "step": 21016 }, { "epoch": 4.782025028441411, "grad_norm": 0.5058719903516358, "learning_rate": 5.864740978316491e-09, "loss": 0.012, "step": 21017 }, { "epoch": 4.782252559726962, "grad_norm": 0.049748698945690215, "learning_rate": 5.852535518765662e-09, "loss": 0.0001, "step": 21018 }, { "epoch": 4.7824800910125145, "grad_norm": 0.6945939210620415, "learning_rate": 5.840342713455058e-09, "loss": 0.0015, "step": 21019 }, { "epoch": 4.782707622298066, "grad_norm": 0.21893935013787252, "learning_rate": 5.8281625626338544e-09, "loss": 0.0016, "step": 21020 }, { "epoch": 4.782935153583618, "grad_norm": 0.3404964141013097, "learning_rate": 5.8159950665511585e-09, "loss": 0.0022, "step": 21021 }, { "epoch": 4.783162684869169, "grad_norm": 0.35695015825539544, "learning_rate": 5.803840225455451e-09, "loss": 0.0027, "step": 21022 }, { "epoch": 4.7833902161547215, "grad_norm": 1.0438478309738641, "learning_rate": 5.791698039595283e-09, "loss": 0.0124, "step": 21023 }, { "epoch": 4.783617747440273, "grad_norm": 0.8331557041569864, "learning_rate": 5.7795685092187915e-09, "loss": 0.0075, "step": 21024 }, { "epoch": 4.783845278725825, "grad_norm": 0.5650099881521035, "learning_rate": 5.7674516345738315e-09, "loss": 0.0061, "step": 21025 }, { "epoch": 4.784072810011376, "grad_norm": 0.26387200228888974, "learning_rate": 5.755347415908122e-09, "loss": 0.0017, "step": 21026 }, { "epoch": 4.7843003412969285, "grad_norm": 0.3887539897421927, "learning_rate": 5.743255853468965e-09, "loss": 0.0006, "step": 21027 }, { "epoch": 4.78452787258248, "grad_norm": 0.11483109312215822, "learning_rate": 5.7311769475036635e-09, "loss": 0.0004, "step": 21028 }, { "epoch": 4.784755403868032, "grad_norm": 0.5173532328855083, "learning_rate": 5.719110698258826e-09, "loss": 0.0024, "step": 21029 }, { "epoch": 4.784982935153583, "grad_norm": 0.08282912021516164, "learning_rate": 5.707057105981337e-09, "loss": 0.0002, "step": 21030 }, { "epoch": 4.7852104664391355, "grad_norm": 0.3649995058033555, "learning_rate": 5.69501617091732e-09, "loss": 0.0017, "step": 21031 }, { "epoch": 4.785437997724687, "grad_norm": 0.4999588854541789, "learning_rate": 5.682987893313035e-09, "loss": 0.0036, "step": 21032 }, { "epoch": 4.785665529010239, "grad_norm": 0.01896272147064039, "learning_rate": 5.670972273414191e-09, "loss": 0.0001, "step": 21033 }, { "epoch": 4.78589306029579, "grad_norm": 0.12113241790654988, "learning_rate": 5.658969311466422e-09, "loss": 0.0008, "step": 21034 }, { "epoch": 4.7861205915813425, "grad_norm": 0.06257391359558004, "learning_rate": 5.646979007715159e-09, "loss": 0.0002, "step": 21035 }, { "epoch": 4.786348122866894, "grad_norm": 0.3446280490692452, "learning_rate": 5.635001362405273e-09, "loss": 0.0006, "step": 21036 }, { "epoch": 4.786575654152446, "grad_norm": 0.6371930221053598, "learning_rate": 5.62303637578164e-09, "loss": 0.0027, "step": 21037 }, { "epoch": 4.786803185437998, "grad_norm": 0.03595898814006096, "learning_rate": 5.611084048088786e-09, "loss": 0.0001, "step": 21038 }, { "epoch": 4.7870307167235495, "grad_norm": 0.46381158676366846, "learning_rate": 5.599144379571097e-09, "loss": 0.0044, "step": 21039 }, { "epoch": 4.787258248009101, "grad_norm": 0.4149258882810027, "learning_rate": 5.587217370472478e-09, "loss": 0.0018, "step": 21040 }, { "epoch": 4.787485779294653, "grad_norm": 0.24449129434924957, "learning_rate": 5.57530302103669e-09, "loss": 0.0015, "step": 21041 }, { "epoch": 4.787713310580205, "grad_norm": 0.27251011254178586, "learning_rate": 5.56340133150736e-09, "loss": 0.0014, "step": 21042 }, { "epoch": 4.7879408418657565, "grad_norm": 0.09569913342867617, "learning_rate": 5.551512302127623e-09, "loss": 0.0002, "step": 21043 }, { "epoch": 4.788168373151308, "grad_norm": 0.4659804817627514, "learning_rate": 5.539635933140483e-09, "loss": 0.0008, "step": 21044 }, { "epoch": 4.78839590443686, "grad_norm": 0.5899014067796011, "learning_rate": 5.527772224788799e-09, "loss": 0.0104, "step": 21045 }, { "epoch": 4.788623435722412, "grad_norm": 0.3499755288290906, "learning_rate": 5.515921177314809e-09, "loss": 0.0018, "step": 21046 }, { "epoch": 4.7888509670079635, "grad_norm": 0.10904830264056023, "learning_rate": 5.504082790960888e-09, "loss": 0.0003, "step": 21047 }, { "epoch": 4.789078498293516, "grad_norm": 0.4662183937646046, "learning_rate": 5.492257065968995e-09, "loss": 0.0026, "step": 21048 }, { "epoch": 4.789306029579067, "grad_norm": 0.18939336071076127, "learning_rate": 5.480444002580812e-09, "loss": 0.0009, "step": 21049 }, { "epoch": 4.789533560864619, "grad_norm": 1.063730742371293, "learning_rate": 5.468643601037743e-09, "loss": 0.0038, "step": 21050 }, { "epoch": 4.7897610921501705, "grad_norm": 0.14081178593737068, "learning_rate": 5.456855861580984e-09, "loss": 0.0006, "step": 21051 }, { "epoch": 4.789988623435723, "grad_norm": 0.29498948447920553, "learning_rate": 5.445080784451454e-09, "loss": 0.0008, "step": 21052 }, { "epoch": 4.790216154721274, "grad_norm": 0.30732359490170225, "learning_rate": 5.433318369889792e-09, "loss": 0.0014, "step": 21053 }, { "epoch": 4.790443686006826, "grad_norm": 0.35392882227444317, "learning_rate": 5.421568618136364e-09, "loss": 0.0023, "step": 21054 }, { "epoch": 4.7906712172923775, "grad_norm": 0.22641216597565364, "learning_rate": 5.409831529431461e-09, "loss": 0.0004, "step": 21055 }, { "epoch": 4.79089874857793, "grad_norm": 0.9701548545118645, "learning_rate": 5.3981071040148245e-09, "loss": 0.0056, "step": 21056 }, { "epoch": 4.791126279863481, "grad_norm": 0.4174362920675647, "learning_rate": 5.386395342126191e-09, "loss": 0.002, "step": 21057 }, { "epoch": 4.791353811149033, "grad_norm": 0.7300504942743699, "learning_rate": 5.374696244004815e-09, "loss": 0.002, "step": 21058 }, { "epoch": 4.791581342434585, "grad_norm": 0.41075057280860783, "learning_rate": 5.363009809889879e-09, "loss": 0.0016, "step": 21059 }, { "epoch": 4.791808873720137, "grad_norm": 0.22174651894296002, "learning_rate": 5.351336040020153e-09, "loss": 0.0006, "step": 21060 }, { "epoch": 4.792036405005688, "grad_norm": 1.033004914706574, "learning_rate": 5.3396749346344004e-09, "loss": 0.0028, "step": 21061 }, { "epoch": 4.79226393629124, "grad_norm": 0.023632540521347892, "learning_rate": 5.328026493970698e-09, "loss": 0.0, "step": 21062 }, { "epoch": 4.792491467576792, "grad_norm": 0.2283675734319424, "learning_rate": 5.3163907182673945e-09, "loss": 0.0008, "step": 21063 }, { "epoch": 4.792718998862344, "grad_norm": 0.6916489774210605, "learning_rate": 5.3047676077621484e-09, "loss": 0.0024, "step": 21064 }, { "epoch": 4.792946530147895, "grad_norm": 1.3022909540037775, "learning_rate": 5.293157162692478e-09, "loss": 0.0081, "step": 21065 }, { "epoch": 4.793174061433447, "grad_norm": 0.13195380193192666, "learning_rate": 5.281559383295692e-09, "loss": 0.0004, "step": 21066 }, { "epoch": 4.793401592718999, "grad_norm": 0.16221776215276243, "learning_rate": 5.269974269808895e-09, "loss": 0.0007, "step": 21067 }, { "epoch": 4.793629124004551, "grad_norm": 0.1528888931554142, "learning_rate": 5.2584018224689085e-09, "loss": 0.0006, "step": 21068 }, { "epoch": 4.793856655290102, "grad_norm": 0.09349630346946436, "learning_rate": 5.246842041512143e-09, "loss": 0.0003, "step": 21069 }, { "epoch": 4.794084186575654, "grad_norm": 0.12119282847888867, "learning_rate": 5.235294927174936e-09, "loss": 0.0004, "step": 21070 }, { "epoch": 4.794311717861206, "grad_norm": 0.07733973569034602, "learning_rate": 5.223760479693279e-09, "loss": 0.0002, "step": 21071 }, { "epoch": 4.794539249146758, "grad_norm": 0.5178682664333556, "learning_rate": 5.212238699302818e-09, "loss": 0.0012, "step": 21072 }, { "epoch": 4.794766780432309, "grad_norm": 0.5538998246960667, "learning_rate": 5.200729586239128e-09, "loss": 0.001, "step": 21073 }, { "epoch": 4.794994311717861, "grad_norm": 0.16578598008058257, "learning_rate": 5.189233140737368e-09, "loss": 0.0007, "step": 21074 }, { "epoch": 4.795221843003413, "grad_norm": 0.5054943366944454, "learning_rate": 5.177749363032627e-09, "loss": 0.0029, "step": 21075 }, { "epoch": 4.795449374288965, "grad_norm": 0.0594668328558537, "learning_rate": 5.16627825335958e-09, "loss": 0.0001, "step": 21076 }, { "epoch": 4.795676905574517, "grad_norm": 0.21697023868417628, "learning_rate": 5.154819811952552e-09, "loss": 0.0003, "step": 21077 }, { "epoch": 4.795904436860068, "grad_norm": 0.16763639761732965, "learning_rate": 5.14337403904587e-09, "loss": 0.0006, "step": 21078 }, { "epoch": 4.79613196814562, "grad_norm": 0.21545403373931873, "learning_rate": 5.131940934873375e-09, "loss": 0.0009, "step": 21079 }, { "epoch": 4.796359499431172, "grad_norm": 0.029965573810190098, "learning_rate": 5.1205204996686994e-09, "loss": 0.0001, "step": 21080 }, { "epoch": 4.796587030716724, "grad_norm": 0.16779666664299067, "learning_rate": 5.1091127336654055e-09, "loss": 0.0006, "step": 21081 }, { "epoch": 4.796814562002275, "grad_norm": 0.5039782673278903, "learning_rate": 5.097717637096572e-09, "loss": 0.0021, "step": 21082 }, { "epoch": 4.797042093287827, "grad_norm": 0.3202552338245643, "learning_rate": 5.086335210195137e-09, "loss": 0.0009, "step": 21083 }, { "epoch": 4.797269624573379, "grad_norm": 0.075343318926855, "learning_rate": 5.0749654531936224e-09, "loss": 0.0002, "step": 21084 }, { "epoch": 4.797497155858931, "grad_norm": 0.10545375225931483, "learning_rate": 5.063608366324552e-09, "loss": 0.0004, "step": 21085 }, { "epoch": 4.797724687144482, "grad_norm": 1.6258570779858788, "learning_rate": 5.0522639498198915e-09, "loss": 0.0062, "step": 21086 }, { "epoch": 4.7979522184300345, "grad_norm": 0.06805110095083282, "learning_rate": 5.040932203911539e-09, "loss": 0.0001, "step": 21087 }, { "epoch": 4.798179749715586, "grad_norm": 0.197603945693206, "learning_rate": 5.029613128831184e-09, "loss": 0.0014, "step": 21088 }, { "epoch": 4.798407281001138, "grad_norm": 0.2263285367304076, "learning_rate": 5.018306724810171e-09, "loss": 0.0017, "step": 21089 }, { "epoch": 4.798634812286689, "grad_norm": 0.09666645839211362, "learning_rate": 5.007012992079494e-09, "loss": 0.0002, "step": 21090 }, { "epoch": 4.7988623435722415, "grad_norm": 0.3137486220829206, "learning_rate": 4.995731930869941e-09, "loss": 0.0019, "step": 21091 }, { "epoch": 4.799089874857793, "grad_norm": 0.14970988468335603, "learning_rate": 4.98446354141223e-09, "loss": 0.0008, "step": 21092 }, { "epoch": 4.799317406143345, "grad_norm": 0.07266990123131484, "learning_rate": 4.973207823936524e-09, "loss": 0.0003, "step": 21093 }, { "epoch": 4.799544937428896, "grad_norm": 0.25481076168944156, "learning_rate": 4.961964778672918e-09, "loss": 0.0022, "step": 21094 }, { "epoch": 4.7997724687144485, "grad_norm": 0.46129767624655543, "learning_rate": 4.950734405851226e-09, "loss": 0.0026, "step": 21095 }, { "epoch": 4.8, "grad_norm": 0.607389103088619, "learning_rate": 4.939516705700919e-09, "loss": 0.0061, "step": 21096 }, { "epoch": 4.800227531285552, "grad_norm": 0.09634845000728648, "learning_rate": 4.928311678451397e-09, "loss": 0.0003, "step": 21097 }, { "epoch": 4.800455062571103, "grad_norm": 0.07156483045084491, "learning_rate": 4.917119324331504e-09, "loss": 0.0003, "step": 21098 }, { "epoch": 4.8006825938566555, "grad_norm": 0.3934430038948414, "learning_rate": 4.905939643570084e-09, "loss": 0.0013, "step": 21099 }, { "epoch": 4.800910125142207, "grad_norm": 0.35748787129269666, "learning_rate": 4.894772636395567e-09, "loss": 0.0013, "step": 21100 }, { "epoch": 4.801137656427759, "grad_norm": 0.3471573997934073, "learning_rate": 4.883618303036242e-09, "loss": 0.001, "step": 21101 }, { "epoch": 4.80136518771331, "grad_norm": 0.46876906046219413, "learning_rate": 4.8724766437201195e-09, "loss": 0.0058, "step": 21102 }, { "epoch": 4.8015927189988625, "grad_norm": 0.11653642799265257, "learning_rate": 4.861347658674867e-09, "loss": 0.0004, "step": 21103 }, { "epoch": 4.801820250284414, "grad_norm": 0.2961881998916184, "learning_rate": 4.85023134812794e-09, "loss": 0.001, "step": 21104 }, { "epoch": 4.802047781569966, "grad_norm": 0.04114926908348866, "learning_rate": 4.839127712306449e-09, "loss": 0.0001, "step": 21105 }, { "epoch": 4.802275312855517, "grad_norm": 0.14306959918201145, "learning_rate": 4.828036751437504e-09, "loss": 0.0004, "step": 21106 }, { "epoch": 4.8025028441410695, "grad_norm": 0.1224709526630489, "learning_rate": 4.8169584657476596e-09, "loss": 0.0005, "step": 21107 }, { "epoch": 4.802730375426621, "grad_norm": 0.022162066467685232, "learning_rate": 4.805892855463332e-09, "loss": 0.0001, "step": 21108 }, { "epoch": 4.802957906712173, "grad_norm": 0.31998519996581143, "learning_rate": 4.794839920810798e-09, "loss": 0.0018, "step": 21109 }, { "epoch": 4.803185437997724, "grad_norm": 0.37975995938376966, "learning_rate": 4.7837996620158504e-09, "loss": 0.0027, "step": 21110 }, { "epoch": 4.8034129692832765, "grad_norm": 0.714053451517022, "learning_rate": 4.772772079304211e-09, "loss": 0.0044, "step": 21111 }, { "epoch": 4.803640500568828, "grad_norm": 0.5225221026953475, "learning_rate": 4.761757172901116e-09, "loss": 0.0011, "step": 21112 }, { "epoch": 4.80386803185438, "grad_norm": 0.44409445961924066, "learning_rate": 4.750754943031871e-09, "loss": 0.0023, "step": 21113 }, { "epoch": 4.804095563139931, "grad_norm": 0.30519028899959777, "learning_rate": 4.739765389921227e-09, "loss": 0.0071, "step": 21114 }, { "epoch": 4.8043230944254836, "grad_norm": 0.42947421315040796, "learning_rate": 4.728788513793866e-09, "loss": 0.0015, "step": 21115 }, { "epoch": 4.804550625711036, "grad_norm": 0.6721206473369337, "learning_rate": 4.717824314874053e-09, "loss": 0.0029, "step": 21116 }, { "epoch": 4.804778156996587, "grad_norm": 0.05453224017331992, "learning_rate": 4.706872793385914e-09, "loss": 0.0001, "step": 21117 }, { "epoch": 4.805005688282138, "grad_norm": 0.7031669230807158, "learning_rate": 4.695933949553297e-09, "loss": 0.0026, "step": 21118 }, { "epoch": 4.805233219567691, "grad_norm": 0.17080567879601896, "learning_rate": 4.685007783599774e-09, "loss": 0.0005, "step": 21119 }, { "epoch": 4.805460750853243, "grad_norm": 0.05450375980993997, "learning_rate": 4.674094295748638e-09, "loss": 0.0001, "step": 21120 }, { "epoch": 4.805688282138794, "grad_norm": 0.03851774265237298, "learning_rate": 4.6631934862229055e-09, "loss": 0.0001, "step": 21121 }, { "epoch": 4.805915813424345, "grad_norm": 0.21585930845515008, "learning_rate": 4.652305355245454e-09, "loss": 0.0015, "step": 21122 }, { "epoch": 4.806143344709898, "grad_norm": 0.05613694342641176, "learning_rate": 4.641429903038744e-09, "loss": 0.0002, "step": 21123 }, { "epoch": 4.80637087599545, "grad_norm": 0.0380527061968016, "learning_rate": 4.630567129825028e-09, "loss": 0.0002, "step": 21124 }, { "epoch": 4.806598407281001, "grad_norm": 0.1491968475308214, "learning_rate": 4.61971703582649e-09, "loss": 0.0005, "step": 21125 }, { "epoch": 4.806825938566553, "grad_norm": 0.2599925725147413, "learning_rate": 4.60887962126462e-09, "loss": 0.0014, "step": 21126 }, { "epoch": 4.807053469852105, "grad_norm": 0.31582988906262854, "learning_rate": 4.598054886361114e-09, "loss": 0.0016, "step": 21127 }, { "epoch": 4.807281001137657, "grad_norm": 0.598933002874671, "learning_rate": 4.587242831337185e-09, "loss": 0.0037, "step": 21128 }, { "epoch": 4.807508532423208, "grad_norm": 0.6238676599185561, "learning_rate": 4.576443456413768e-09, "loss": 0.0102, "step": 21129 }, { "epoch": 4.80773606370876, "grad_norm": 0.17431054533338025, "learning_rate": 4.565656761811587e-09, "loss": 0.0005, "step": 21130 }, { "epoch": 4.807963594994312, "grad_norm": 0.797051997958576, "learning_rate": 4.5548827477510935e-09, "loss": 0.0058, "step": 21131 }, { "epoch": 4.808191126279864, "grad_norm": 0.26441150496145643, "learning_rate": 4.544121414452527e-09, "loss": 0.0022, "step": 21132 }, { "epoch": 4.808418657565415, "grad_norm": 0.06944933546474849, "learning_rate": 4.533372762135779e-09, "loss": 0.0002, "step": 21133 }, { "epoch": 4.808646188850967, "grad_norm": 0.029692766036263942, "learning_rate": 4.522636791020607e-09, "loss": 0.0001, "step": 21134 }, { "epoch": 4.808873720136519, "grad_norm": 0.20922950624812342, "learning_rate": 4.511913501326348e-09, "loss": 0.0009, "step": 21135 }, { "epoch": 4.809101251422071, "grad_norm": 0.4101010098946777, "learning_rate": 4.501202893272202e-09, "loss": 0.0034, "step": 21136 }, { "epoch": 4.809328782707622, "grad_norm": 0.6998896564121836, "learning_rate": 4.490504967077091e-09, "loss": 0.0086, "step": 21137 }, { "epoch": 4.809556313993174, "grad_norm": 0.133614736501298, "learning_rate": 4.479819722959661e-09, "loss": 0.0003, "step": 21138 }, { "epoch": 4.809783845278726, "grad_norm": 0.2430296252583457, "learning_rate": 4.469147161138276e-09, "loss": 0.0011, "step": 21139 }, { "epoch": 4.810011376564278, "grad_norm": 0.0933997553525092, "learning_rate": 4.458487281831098e-09, "loss": 0.0002, "step": 21140 }, { "epoch": 4.810238907849829, "grad_norm": 0.691678145891936, "learning_rate": 4.447840085255936e-09, "loss": 0.0051, "step": 21141 }, { "epoch": 4.810466439135381, "grad_norm": 0.04652456672296527, "learning_rate": 4.437205571630465e-09, "loss": 0.0001, "step": 21142 }, { "epoch": 4.810693970420933, "grad_norm": 0.910700744190428, "learning_rate": 4.426583741172011e-09, "loss": 0.0037, "step": 21143 }, { "epoch": 4.810921501706485, "grad_norm": 0.19586450517294327, "learning_rate": 4.415974594097622e-09, "loss": 0.0017, "step": 21144 }, { "epoch": 4.811149032992036, "grad_norm": 0.1387548974245607, "learning_rate": 4.4053781306242075e-09, "loss": 0.0002, "step": 21145 }, { "epoch": 4.811376564277588, "grad_norm": 1.629397023391977, "learning_rate": 4.3947943509682625e-09, "loss": 0.0041, "step": 21146 }, { "epoch": 4.81160409556314, "grad_norm": 0.12044390251342314, "learning_rate": 4.38422325534614e-09, "loss": 0.0008, "step": 21147 }, { "epoch": 4.811831626848692, "grad_norm": 0.4097673835310387, "learning_rate": 4.373664843973918e-09, "loss": 0.0017, "step": 21148 }, { "epoch": 4.812059158134243, "grad_norm": 0.7801374751141602, "learning_rate": 4.3631191170673274e-09, "loss": 0.0022, "step": 21149 }, { "epoch": 4.812286689419795, "grad_norm": 0.18529513640510695, "learning_rate": 4.352586074841958e-09, "loss": 0.0005, "step": 21150 }, { "epoch": 4.812514220705347, "grad_norm": 0.4012154820893169, "learning_rate": 4.342065717513055e-09, "loss": 0.0021, "step": 21151 }, { "epoch": 4.812741751990899, "grad_norm": 0.10327443259204824, "learning_rate": 4.3315580452957255e-09, "loss": 0.0005, "step": 21152 }, { "epoch": 4.81296928327645, "grad_norm": 0.14183032000393203, "learning_rate": 4.321063058404518e-09, "loss": 0.0008, "step": 21153 }, { "epoch": 4.813196814562002, "grad_norm": 0.5284612674217058, "learning_rate": 4.310580757054123e-09, "loss": 0.0013, "step": 21154 }, { "epoch": 4.8134243458475545, "grad_norm": 0.35165867769241527, "learning_rate": 4.3001111414587434e-09, "loss": 0.0018, "step": 21155 }, { "epoch": 4.813651877133106, "grad_norm": 0.18552301517260553, "learning_rate": 4.289654211832306e-09, "loss": 0.001, "step": 21156 }, { "epoch": 4.813879408418657, "grad_norm": 0.5593220607798935, "learning_rate": 4.279209968388528e-09, "loss": 0.0033, "step": 21157 }, { "epoch": 4.814106939704209, "grad_norm": 0.2922045877340786, "learning_rate": 4.26877841134099e-09, "loss": 0.001, "step": 21158 }, { "epoch": 4.8143344709897615, "grad_norm": 0.1990281587793922, "learning_rate": 4.258359540902785e-09, "loss": 0.0013, "step": 21159 }, { "epoch": 4.814562002275313, "grad_norm": 0.05492385775101845, "learning_rate": 4.247953357286796e-09, "loss": 0.0001, "step": 21160 }, { "epoch": 4.814789533560864, "grad_norm": 0.39896416397372636, "learning_rate": 4.237559860705842e-09, "loss": 0.0013, "step": 21161 }, { "epoch": 4.815017064846416, "grad_norm": 0.015697017221326656, "learning_rate": 4.227179051372252e-09, "loss": 0.0, "step": 21162 }, { "epoch": 4.8152445961319685, "grad_norm": 0.9255377988670966, "learning_rate": 4.2168109294982854e-09, "loss": 0.008, "step": 21163 }, { "epoch": 4.81547212741752, "grad_norm": 0.5564044640600946, "learning_rate": 4.2064554952957885e-09, "loss": 0.0031, "step": 21164 }, { "epoch": 4.815699658703072, "grad_norm": 0.5498830688950561, "learning_rate": 4.196112748976397e-09, "loss": 0.0029, "step": 21165 }, { "epoch": 4.815927189988623, "grad_norm": 0.38405592275077466, "learning_rate": 4.185782690751539e-09, "loss": 0.0011, "step": 21166 }, { "epoch": 4.8161547212741755, "grad_norm": 0.1307798978063842, "learning_rate": 4.175465320832295e-09, "loss": 0.0004, "step": 21167 }, { "epoch": 4.816382252559727, "grad_norm": 0.5407202999457429, "learning_rate": 4.165160639429469e-09, "loss": 0.0037, "step": 21168 }, { "epoch": 4.816609783845279, "grad_norm": 0.20176975109543321, "learning_rate": 4.1548686467538655e-09, "loss": 0.001, "step": 21169 }, { "epoch": 4.81683731513083, "grad_norm": 0.08105055578535926, "learning_rate": 4.144589343015662e-09, "loss": 0.0002, "step": 21170 }, { "epoch": 4.8170648464163826, "grad_norm": 0.5558174188357216, "learning_rate": 4.134322728425108e-09, "loss": 0.0032, "step": 21171 }, { "epoch": 4.817292377701934, "grad_norm": 0.10564154583153083, "learning_rate": 4.124068803191828e-09, "loss": 0.0004, "step": 21172 }, { "epoch": 4.817519908987486, "grad_norm": 0.5434650679594291, "learning_rate": 4.113827567525514e-09, "loss": 0.0032, "step": 21173 }, { "epoch": 4.817747440273037, "grad_norm": 0.030523649918808213, "learning_rate": 4.103599021635512e-09, "loss": 0.0001, "step": 21174 }, { "epoch": 4.81797497155859, "grad_norm": 0.33642583803098364, "learning_rate": 4.093383165730755e-09, "loss": 0.0021, "step": 21175 }, { "epoch": 4.818202502844141, "grad_norm": 0.2769858532862082, "learning_rate": 4.08318000002017e-09, "loss": 0.0033, "step": 21176 }, { "epoch": 4.818430034129693, "grad_norm": 0.10710285889970825, "learning_rate": 4.072989524712204e-09, "loss": 0.0003, "step": 21177 }, { "epoch": 4.818657565415244, "grad_norm": 0.07186345211353538, "learning_rate": 4.0628117400152295e-09, "loss": 0.0002, "step": 21178 }, { "epoch": 4.818885096700797, "grad_norm": 0.4638404900883963, "learning_rate": 4.0526466461371375e-09, "loss": 0.0031, "step": 21179 }, { "epoch": 4.819112627986348, "grad_norm": 0.8595792001662982, "learning_rate": 4.042494243285746e-09, "loss": 0.0057, "step": 21180 }, { "epoch": 4.8193401592719, "grad_norm": 0.03542643194356572, "learning_rate": 4.0323545316684615e-09, "loss": 0.0001, "step": 21181 }, { "epoch": 4.819567690557451, "grad_norm": 0.48937814732718843, "learning_rate": 4.022227511492685e-09, "loss": 0.0032, "step": 21182 }, { "epoch": 4.819795221843004, "grad_norm": 0.5967963334849179, "learning_rate": 4.012113182965336e-09, "loss": 0.006, "step": 21183 }, { "epoch": 4.820022753128555, "grad_norm": 0.21147255476973992, "learning_rate": 4.0020115462930525e-09, "loss": 0.0008, "step": 21184 }, { "epoch": 4.820250284414107, "grad_norm": 0.06395471445046211, "learning_rate": 3.9919226016823385e-09, "loss": 0.0002, "step": 21185 }, { "epoch": 4.820477815699658, "grad_norm": 0.63807518099916, "learning_rate": 3.981846349339416e-09, "loss": 0.008, "step": 21186 }, { "epoch": 4.820705346985211, "grad_norm": 0.7516645085165391, "learning_rate": 3.9717827894702325e-09, "loss": 0.0041, "step": 21187 }, { "epoch": 4.820932878270762, "grad_norm": 0.07147558096526276, "learning_rate": 3.961731922280388e-09, "loss": 0.0003, "step": 21188 }, { "epoch": 4.821160409556314, "grad_norm": 0.14886385632075988, "learning_rate": 3.95169374797548e-09, "loss": 0.0004, "step": 21189 }, { "epoch": 4.821387940841865, "grad_norm": 0.15809602878071916, "learning_rate": 3.941668266760484e-09, "loss": 0.0011, "step": 21190 }, { "epoch": 4.821615472127418, "grad_norm": 0.7519488510050887, "learning_rate": 3.931655478840377e-09, "loss": 0.003, "step": 21191 }, { "epoch": 4.821843003412969, "grad_norm": 0.6915178994645718, "learning_rate": 3.9216553844198535e-09, "loss": 0.0037, "step": 21192 }, { "epoch": 4.822070534698521, "grad_norm": 0.046211487414587825, "learning_rate": 3.911667983703127e-09, "loss": 0.0001, "step": 21193 }, { "epoch": 4.822298065984073, "grad_norm": 0.8036694112607701, "learning_rate": 3.901693276894547e-09, "loss": 0.0087, "step": 21194 }, { "epoch": 4.822525597269625, "grad_norm": 0.38793300258234864, "learning_rate": 3.891731264197771e-09, "loss": 0.0033, "step": 21195 }, { "epoch": 4.822753128555176, "grad_norm": 0.1771297766857538, "learning_rate": 3.881781945816593e-09, "loss": 0.0007, "step": 21196 }, { "epoch": 4.822980659840728, "grad_norm": 0.061757873990122274, "learning_rate": 3.871845321954254e-09, "loss": 0.0002, "step": 21197 }, { "epoch": 4.82320819112628, "grad_norm": 0.04845119259907067, "learning_rate": 3.861921392813856e-09, "loss": 0.0002, "step": 21198 }, { "epoch": 4.823435722411832, "grad_norm": 0.09407502493133786, "learning_rate": 3.8520101585981515e-09, "loss": 0.0004, "step": 21199 }, { "epoch": 4.823663253697383, "grad_norm": 0.4954789770015994, "learning_rate": 3.842111619509828e-09, "loss": 0.0013, "step": 21200 }, { "epoch": 4.823890784982935, "grad_norm": 0.9665697488920716, "learning_rate": 3.832225775751152e-09, "loss": 0.0053, "step": 21201 }, { "epoch": 4.824118316268487, "grad_norm": 0.07859252848483671, "learning_rate": 3.822352627524187e-09, "loss": 0.0003, "step": 21202 }, { "epoch": 4.824345847554039, "grad_norm": 0.09079058057336212, "learning_rate": 3.8124921750306435e-09, "loss": 0.0003, "step": 21203 }, { "epoch": 4.824573378839591, "grad_norm": 0.11412552049750783, "learning_rate": 3.8026444184721675e-09, "loss": 0.0004, "step": 21204 }, { "epoch": 4.824800910125142, "grad_norm": 0.11343617572608375, "learning_rate": 3.7928093580499175e-09, "loss": 0.0004, "step": 21205 }, { "epoch": 4.825028441410694, "grad_norm": 0.8871674122256853, "learning_rate": 3.782986993965051e-09, "loss": 0.0066, "step": 21206 }, { "epoch": 4.825255972696246, "grad_norm": 0.4978790586987893, "learning_rate": 3.7731773264181035e-09, "loss": 0.0016, "step": 21207 }, { "epoch": 4.825483503981798, "grad_norm": 0.39971751934051564, "learning_rate": 3.763380355609747e-09, "loss": 0.0029, "step": 21208 }, { "epoch": 4.825711035267349, "grad_norm": 0.21929406082842207, "learning_rate": 3.753596081740169e-09, "loss": 0.0007, "step": 21209 }, { "epoch": 4.825938566552901, "grad_norm": 0.19990100741437056, "learning_rate": 3.743824505009277e-09, "loss": 0.0009, "step": 21210 }, { "epoch": 4.826166097838453, "grad_norm": 0.08015649546596851, "learning_rate": 3.734065625616914e-09, "loss": 0.0004, "step": 21211 }, { "epoch": 4.826393629124005, "grad_norm": 0.799466083889976, "learning_rate": 3.7243194437623642e-09, "loss": 0.0052, "step": 21212 }, { "epoch": 4.826621160409556, "grad_norm": 0.1328838004810146, "learning_rate": 3.7145859596449814e-09, "loss": 0.0005, "step": 21213 }, { "epoch": 4.826848691695108, "grad_norm": 0.4358598744938509, "learning_rate": 3.7048651734636344e-09, "loss": 0.0029, "step": 21214 }, { "epoch": 4.82707622298066, "grad_norm": 0.08420990998127158, "learning_rate": 3.6951570854169144e-09, "loss": 0.0003, "step": 21215 }, { "epoch": 4.827303754266212, "grad_norm": 0.1691063652366752, "learning_rate": 3.685461695703413e-09, "loss": 0.0004, "step": 21216 }, { "epoch": 4.827531285551763, "grad_norm": 0.0755559325236426, "learning_rate": 3.6757790045210962e-09, "loss": 0.0003, "step": 21217 }, { "epoch": 4.827758816837315, "grad_norm": 0.33146456028848853, "learning_rate": 3.6661090120680696e-09, "loss": 0.0016, "step": 21218 }, { "epoch": 4.827986348122867, "grad_norm": 0.36595494819599156, "learning_rate": 3.6564517185417454e-09, "loss": 0.0022, "step": 21219 }, { "epoch": 4.828213879408419, "grad_norm": 0.10913872832622219, "learning_rate": 3.646807124139673e-09, "loss": 0.0004, "step": 21220 }, { "epoch": 4.82844141069397, "grad_norm": 0.13203466578570136, "learning_rate": 3.6371752290589175e-09, "loss": 0.0005, "step": 21221 }, { "epoch": 4.828668941979522, "grad_norm": 0.26566186293607125, "learning_rate": 3.6275560334962655e-09, "loss": 0.001, "step": 21222 }, { "epoch": 4.828896473265074, "grad_norm": 0.07603961911630373, "learning_rate": 3.6179495376484355e-09, "loss": 0.0002, "step": 21223 }, { "epoch": 4.829124004550626, "grad_norm": 0.25042487551119297, "learning_rate": 3.6083557417117276e-09, "loss": 0.0007, "step": 21224 }, { "epoch": 4.829351535836177, "grad_norm": 0.5750179941245537, "learning_rate": 3.5987746458821665e-09, "loss": 0.0043, "step": 21225 }, { "epoch": 4.829579067121729, "grad_norm": 0.2584515405743419, "learning_rate": 3.5892062503555676e-09, "loss": 0.0005, "step": 21226 }, { "epoch": 4.829806598407281, "grad_norm": 0.4775221877889006, "learning_rate": 3.579650555327607e-09, "loss": 0.006, "step": 21227 }, { "epoch": 4.830034129692833, "grad_norm": 0.46344693631307005, "learning_rate": 3.570107560993477e-09, "loss": 0.0037, "step": 21228 }, { "epoch": 4.830261660978384, "grad_norm": 0.22013746247024712, "learning_rate": 3.5605772675482294e-09, "loss": 0.0007, "step": 21229 }, { "epoch": 4.830489192263936, "grad_norm": 0.23831830684750635, "learning_rate": 3.5510596751867077e-09, "loss": 0.0007, "step": 21230 }, { "epoch": 4.830716723549488, "grad_norm": 0.377186347545091, "learning_rate": 3.54155478410341e-09, "loss": 0.0016, "step": 21231 }, { "epoch": 4.83094425483504, "grad_norm": 0.08877487548514944, "learning_rate": 3.5320625944925553e-09, "loss": 0.0003, "step": 21232 }, { "epoch": 4.831171786120592, "grad_norm": 0.3468335069799568, "learning_rate": 3.522583106548155e-09, "loss": 0.0057, "step": 21233 }, { "epoch": 4.831399317406143, "grad_norm": 0.5120851732417768, "learning_rate": 3.5131163204640133e-09, "loss": 0.0033, "step": 21234 }, { "epoch": 4.831626848691695, "grad_norm": 0.4201806069467205, "learning_rate": 3.503662236433586e-09, "loss": 0.0026, "step": 21235 }, { "epoch": 4.831854379977247, "grad_norm": 0.037526903724761966, "learning_rate": 3.494220854650052e-09, "loss": 0.0001, "step": 21236 }, { "epoch": 4.832081911262799, "grad_norm": 0.168793484538134, "learning_rate": 3.4847921753063813e-09, "loss": 0.0005, "step": 21237 }, { "epoch": 4.83230944254835, "grad_norm": 0.01494696336343356, "learning_rate": 3.475376198595337e-09, "loss": 0.0001, "step": 21238 }, { "epoch": 4.832536973833902, "grad_norm": 0.3398655890435484, "learning_rate": 3.4659729247094043e-09, "loss": 0.0015, "step": 21239 }, { "epoch": 4.832764505119454, "grad_norm": 0.38902893855290605, "learning_rate": 3.4565823538406513e-09, "loss": 0.0043, "step": 21240 }, { "epoch": 4.832992036405006, "grad_norm": 0.47668003401953307, "learning_rate": 3.4472044861809383e-09, "loss": 0.0023, "step": 21241 }, { "epoch": 4.833219567690557, "grad_norm": 0.4050967437165874, "learning_rate": 3.4378393219221958e-09, "loss": 0.0021, "step": 21242 }, { "epoch": 4.83344709897611, "grad_norm": 0.028177233801697833, "learning_rate": 3.4284868612555895e-09, "loss": 0.0001, "step": 21243 }, { "epoch": 4.833674630261661, "grad_norm": 0.306121899220536, "learning_rate": 3.419147104372425e-09, "loss": 0.0033, "step": 21244 }, { "epoch": 4.833902161547213, "grad_norm": 0.5080878332466906, "learning_rate": 3.409820051463522e-09, "loss": 0.0029, "step": 21245 }, { "epoch": 4.834129692832764, "grad_norm": 0.20408502608795948, "learning_rate": 3.400505702719492e-09, "loss": 0.0009, "step": 21246 }, { "epoch": 4.834357224118317, "grad_norm": 0.07512520586751056, "learning_rate": 3.391204058330669e-09, "loss": 0.0003, "step": 21247 }, { "epoch": 4.834584755403868, "grad_norm": 0.07808012351806093, "learning_rate": 3.381915118487247e-09, "loss": 0.0003, "step": 21248 }, { "epoch": 4.83481228668942, "grad_norm": 0.01044473224162643, "learning_rate": 3.372638883379076e-09, "loss": 0.0, "step": 21249 }, { "epoch": 4.835039817974971, "grad_norm": 0.8053381197745897, "learning_rate": 3.3633753531956553e-09, "loss": 0.0024, "step": 21250 }, { "epoch": 4.835267349260524, "grad_norm": 0.12494292752801525, "learning_rate": 3.354124528126418e-09, "loss": 0.0005, "step": 21251 }, { "epoch": 4.835494880546075, "grad_norm": 0.44648315769725094, "learning_rate": 3.3448864083603096e-09, "loss": 0.0026, "step": 21252 }, { "epoch": 4.835722411831627, "grad_norm": 0.5084280286860152, "learning_rate": 3.335660994086276e-09, "loss": 0.0031, "step": 21253 }, { "epoch": 4.835949943117178, "grad_norm": 0.7921443936262716, "learning_rate": 3.3264482854927783e-09, "loss": 0.0048, "step": 21254 }, { "epoch": 4.836177474402731, "grad_norm": 1.1751870320077682, "learning_rate": 3.3172482827682067e-09, "loss": 0.0103, "step": 21255 }, { "epoch": 4.836405005688282, "grad_norm": 0.2348690773014524, "learning_rate": 3.308060986100467e-09, "loss": 0.0009, "step": 21256 }, { "epoch": 4.836632536973834, "grad_norm": 0.43751183547276157, "learning_rate": 3.298886395677395e-09, "loss": 0.0011, "step": 21257 }, { "epoch": 4.836860068259385, "grad_norm": 0.030236166265826835, "learning_rate": 3.289724511686479e-09, "loss": 0.0001, "step": 21258 }, { "epoch": 4.837087599544938, "grad_norm": 0.4197974223424417, "learning_rate": 3.2805753343150004e-09, "loss": 0.0026, "step": 21259 }, { "epoch": 4.837315130830489, "grad_norm": 0.09177840625312667, "learning_rate": 3.271438863749893e-09, "loss": 0.0003, "step": 21260 }, { "epoch": 4.837542662116041, "grad_norm": 0.35141048879131886, "learning_rate": 3.2623151001779514e-09, "loss": 0.0042, "step": 21261 }, { "epoch": 4.837770193401592, "grad_norm": 0.16464121036108653, "learning_rate": 3.253204043785693e-09, "loss": 0.0007, "step": 21262 }, { "epoch": 4.837997724687145, "grad_norm": 0.2857757333743138, "learning_rate": 3.24410569475922e-09, "loss": 0.0009, "step": 21263 }, { "epoch": 4.838225255972696, "grad_norm": 0.20584613241955488, "learning_rate": 3.2350200532844934e-09, "loss": 0.0019, "step": 21264 }, { "epoch": 4.838452787258248, "grad_norm": 0.2824072036333179, "learning_rate": 3.2259471195473374e-09, "loss": 0.0006, "step": 21265 }, { "epoch": 4.838680318543799, "grad_norm": 0.0910912662715444, "learning_rate": 3.21688689373309e-09, "loss": 0.0002, "step": 21266 }, { "epoch": 4.838907849829352, "grad_norm": 0.07212766808734566, "learning_rate": 3.2078393760268804e-09, "loss": 0.0003, "step": 21267 }, { "epoch": 4.839135381114903, "grad_norm": 0.051100614312972824, "learning_rate": 3.198804566613631e-09, "loss": 0.0001, "step": 21268 }, { "epoch": 4.839362912400455, "grad_norm": 0.24356874276662996, "learning_rate": 3.1897824656781233e-09, "loss": 0.0013, "step": 21269 }, { "epoch": 4.839590443686006, "grad_norm": 0.18156361323891723, "learning_rate": 3.1807730734046553e-09, "loss": 0.0004, "step": 21270 }, { "epoch": 4.839817974971559, "grad_norm": 0.2578840837535456, "learning_rate": 3.171776389977385e-09, "loss": 0.0016, "step": 21271 }, { "epoch": 4.840045506257111, "grad_norm": 0.4039415223686769, "learning_rate": 3.1627924155801238e-09, "loss": 0.0037, "step": 21272 }, { "epoch": 4.840273037542662, "grad_norm": 0.3609803505370701, "learning_rate": 3.1538211503966134e-09, "loss": 0.0015, "step": 21273 }, { "epoch": 4.8405005688282134, "grad_norm": 0.7128294489938208, "learning_rate": 3.1448625946101096e-09, "loss": 0.0099, "step": 21274 }, { "epoch": 4.840728100113766, "grad_norm": 0.2725884570570177, "learning_rate": 3.1359167484037307e-09, "loss": 0.0009, "step": 21275 }, { "epoch": 4.840955631399318, "grad_norm": 0.6874088441969075, "learning_rate": 3.126983611960316e-09, "loss": 0.0027, "step": 21276 }, { "epoch": 4.841183162684869, "grad_norm": 0.053961770956924345, "learning_rate": 3.1180631854624973e-09, "loss": 0.0001, "step": 21277 }, { "epoch": 4.8414106939704205, "grad_norm": 0.3651808141647588, "learning_rate": 3.1091554690925592e-09, "loss": 0.0021, "step": 21278 }, { "epoch": 4.841638225255973, "grad_norm": 0.4887381846769322, "learning_rate": 3.1002604630324394e-09, "loss": 0.0019, "step": 21279 }, { "epoch": 4.841865756541525, "grad_norm": 0.16566431153561204, "learning_rate": 3.091378167464146e-09, "loss": 0.0007, "step": 21280 }, { "epoch": 4.842093287827076, "grad_norm": 0.7285103159591267, "learning_rate": 3.0825085825690603e-09, "loss": 0.0034, "step": 21281 }, { "epoch": 4.842320819112628, "grad_norm": 0.383534663830692, "learning_rate": 3.0736517085284966e-09, "loss": 0.0017, "step": 21282 }, { "epoch": 4.84254835039818, "grad_norm": 0.14408549244690522, "learning_rate": 3.0648075455234904e-09, "loss": 0.0011, "step": 21283 }, { "epoch": 4.842775881683732, "grad_norm": 0.4255793648928252, "learning_rate": 3.055976093734869e-09, "loss": 0.0037, "step": 21284 }, { "epoch": 4.843003412969283, "grad_norm": 0.058414333474381776, "learning_rate": 3.047157353342975e-09, "loss": 0.0001, "step": 21285 }, { "epoch": 4.843230944254835, "grad_norm": 0.07305094695421711, "learning_rate": 3.0383513245281493e-09, "loss": 0.0002, "step": 21286 }, { "epoch": 4.843458475540387, "grad_norm": 0.34348845469521305, "learning_rate": 3.0295580074703874e-09, "loss": 0.0009, "step": 21287 }, { "epoch": 4.843686006825939, "grad_norm": 0.16360134731444548, "learning_rate": 3.020777402349337e-09, "loss": 0.0009, "step": 21288 }, { "epoch": 4.84391353811149, "grad_norm": 0.16864792876644072, "learning_rate": 3.0120095093445073e-09, "loss": 0.0005, "step": 21289 }, { "epoch": 4.844141069397042, "grad_norm": 0.4331070550536426, "learning_rate": 3.00325432863513e-09, "loss": 0.0048, "step": 21290 }, { "epoch": 4.844368600682594, "grad_norm": 0.33720696150116347, "learning_rate": 2.9945118604000207e-09, "loss": 0.0012, "step": 21291 }, { "epoch": 4.844596131968146, "grad_norm": 1.1229279249705635, "learning_rate": 2.985782104817994e-09, "loss": 0.0019, "step": 21292 }, { "epoch": 4.844823663253697, "grad_norm": 0.37080164530822635, "learning_rate": 2.9770650620674497e-09, "loss": 0.0013, "step": 21293 }, { "epoch": 4.845051194539249, "grad_norm": 0.1940632530420708, "learning_rate": 2.968360732326439e-09, "loss": 0.001, "step": 21294 }, { "epoch": 4.845278725824801, "grad_norm": 1.108260193191357, "learning_rate": 2.959669115772945e-09, "loss": 0.0027, "step": 21295 }, { "epoch": 4.845506257110353, "grad_norm": 0.15163427208467525, "learning_rate": 2.950990212584673e-09, "loss": 0.0008, "step": 21296 }, { "epoch": 4.845733788395904, "grad_norm": 0.06456564995301978, "learning_rate": 2.9423240229388416e-09, "loss": 0.0004, "step": 21297 }, { "epoch": 4.845961319681456, "grad_norm": 0.4152851135453963, "learning_rate": 2.9336705470127402e-09, "loss": 0.0028, "step": 21298 }, { "epoch": 4.846188850967008, "grad_norm": 0.03871083472231935, "learning_rate": 2.9250297849831023e-09, "loss": 0.0002, "step": 21299 }, { "epoch": 4.84641638225256, "grad_norm": 0.6435382893822191, "learning_rate": 2.9164017370265916e-09, "loss": 0.0023, "step": 21300 }, { "epoch": 4.846643913538111, "grad_norm": 0.07018542604805189, "learning_rate": 2.9077864033195263e-09, "loss": 0.0002, "step": 21301 }, { "epoch": 4.846871444823663, "grad_norm": 0.41276980057249263, "learning_rate": 2.899183784038015e-09, "loss": 0.0014, "step": 21302 }, { "epoch": 4.847098976109215, "grad_norm": 0.6359223504687057, "learning_rate": 2.890593879357889e-09, "loss": 0.0033, "step": 21303 }, { "epoch": 4.847326507394767, "grad_norm": 0.13280459680238763, "learning_rate": 2.8820166894546335e-09, "loss": 0.0006, "step": 21304 }, { "epoch": 4.847554038680318, "grad_norm": 0.6348525425149876, "learning_rate": 2.8734522145035943e-09, "loss": 0.0044, "step": 21305 }, { "epoch": 4.84778156996587, "grad_norm": 0.12275412749774717, "learning_rate": 2.8649004546799088e-09, "loss": 0.0006, "step": 21306 }, { "epoch": 4.848009101251422, "grad_norm": 0.13051148810041627, "learning_rate": 2.8563614101581595e-09, "loss": 0.0006, "step": 21307 }, { "epoch": 4.848236632536974, "grad_norm": 0.1270005663683027, "learning_rate": 2.847835081113068e-09, "loss": 0.0002, "step": 21308 }, { "epoch": 4.848464163822525, "grad_norm": 0.39630713029956954, "learning_rate": 2.8393214677188003e-09, "loss": 0.0015, "step": 21309 }, { "epoch": 4.848691695108077, "grad_norm": 0.12555212023557594, "learning_rate": 2.8308205701493146e-09, "loss": 0.0005, "step": 21310 }, { "epoch": 4.84891922639363, "grad_norm": 0.13283176133745617, "learning_rate": 2.8223323885784304e-09, "loss": 0.0005, "step": 21311 }, { "epoch": 4.849146757679181, "grad_norm": 0.07810167313545763, "learning_rate": 2.8138569231796198e-09, "loss": 0.0001, "step": 21312 }, { "epoch": 4.849374288964732, "grad_norm": 0.5029367170650946, "learning_rate": 2.8053941741260775e-09, "loss": 0.0033, "step": 21313 }, { "epoch": 4.849601820250284, "grad_norm": 0.09153083665562431, "learning_rate": 2.7969441415907906e-09, "loss": 0.0002, "step": 21314 }, { "epoch": 4.849829351535837, "grad_norm": 6.332037360836823, "learning_rate": 2.7885068257464678e-09, "loss": 0.0575, "step": 21315 }, { "epoch": 4.850056882821388, "grad_norm": 0.768852881340002, "learning_rate": 2.78008222676554e-09, "loss": 0.0063, "step": 21316 }, { "epoch": 4.850284414106939, "grad_norm": 0.6169524984825525, "learning_rate": 2.7716703448201627e-09, "loss": 0.0049, "step": 21317 }, { "epoch": 4.850511945392491, "grad_norm": 0.3783928923525154, "learning_rate": 2.7632711800822805e-09, "loss": 0.0034, "step": 21318 }, { "epoch": 4.850739476678044, "grad_norm": 0.3188265146496144, "learning_rate": 2.7548847327235617e-09, "loss": 0.0009, "step": 21319 }, { "epoch": 4.850967007963595, "grad_norm": 0.1545497640426315, "learning_rate": 2.746511002915467e-09, "loss": 0.0004, "step": 21320 }, { "epoch": 4.851194539249147, "grad_norm": 0.496766605012271, "learning_rate": 2.738149990829039e-09, "loss": 0.0041, "step": 21321 }, { "epoch": 4.851422070534698, "grad_norm": 0.17105996387275957, "learning_rate": 2.7298016966351843e-09, "loss": 0.0007, "step": 21322 }, { "epoch": 4.851649601820251, "grad_norm": 0.20584492846899086, "learning_rate": 2.721466120504598e-09, "loss": 0.001, "step": 21323 }, { "epoch": 4.851877133105802, "grad_norm": 0.37358494308417145, "learning_rate": 2.713143262607562e-09, "loss": 0.0011, "step": 21324 }, { "epoch": 4.852104664391354, "grad_norm": 0.14506603102919105, "learning_rate": 2.7048331231142865e-09, "loss": 0.0006, "step": 21325 }, { "epoch": 4.852332195676905, "grad_norm": 0.07674329355300052, "learning_rate": 2.6965357021944276e-09, "loss": 0.0003, "step": 21326 }, { "epoch": 4.852559726962458, "grad_norm": 0.05140138558416586, "learning_rate": 2.6882510000177803e-09, "loss": 0.0001, "step": 21327 }, { "epoch": 4.852787258248009, "grad_norm": 0.18889090371651684, "learning_rate": 2.6799790167535145e-09, "loss": 0.0005, "step": 21328 }, { "epoch": 4.853014789533561, "grad_norm": 0.513039267749354, "learning_rate": 2.6717197525707312e-09, "loss": 0.0031, "step": 21329 }, { "epoch": 4.853242320819112, "grad_norm": 0.2911658761170837, "learning_rate": 2.663473207638323e-09, "loss": 0.0019, "step": 21330 }, { "epoch": 4.853469852104665, "grad_norm": 0.4731724914659026, "learning_rate": 2.655239382124766e-09, "loss": 0.0033, "step": 21331 }, { "epoch": 4.853697383390216, "grad_norm": 0.6265224935007325, "learning_rate": 2.647018276198329e-09, "loss": 0.007, "step": 21332 }, { "epoch": 4.853924914675768, "grad_norm": 0.04351025461119477, "learning_rate": 2.6388098900270025e-09, "loss": 0.0001, "step": 21333 }, { "epoch": 4.8541524459613195, "grad_norm": 0.5318702527587658, "learning_rate": 2.630614223778638e-09, "loss": 0.0029, "step": 21334 }, { "epoch": 4.854379977246872, "grad_norm": 0.12182846097633036, "learning_rate": 2.6224312776207406e-09, "loss": 0.0006, "step": 21335 }, { "epoch": 4.854607508532423, "grad_norm": 0.3502859169510412, "learning_rate": 2.6142610517204682e-09, "loss": 0.0027, "step": 21336 }, { "epoch": 4.854835039817975, "grad_norm": 0.03650324606297097, "learning_rate": 2.6061035462448397e-09, "loss": 0.0001, "step": 21337 }, { "epoch": 4.8550625711035265, "grad_norm": 0.4626145850410716, "learning_rate": 2.5979587613606665e-09, "loss": 0.0058, "step": 21338 }, { "epoch": 4.855290102389079, "grad_norm": 0.19398783888973914, "learning_rate": 2.5898266972342738e-09, "loss": 0.0009, "step": 21339 }, { "epoch": 4.85551763367463, "grad_norm": 0.3564883012006367, "learning_rate": 2.581707354031987e-09, "loss": 0.0007, "step": 21340 }, { "epoch": 4.855745164960182, "grad_norm": 0.19266843160486163, "learning_rate": 2.5736007319196455e-09, "loss": 0.0021, "step": 21341 }, { "epoch": 4.8559726962457335, "grad_norm": 1.523390035138123, "learning_rate": 2.5655068310630192e-09, "loss": 0.0061, "step": 21342 }, { "epoch": 4.856200227531286, "grad_norm": 0.1591938976992027, "learning_rate": 2.5574256516274627e-09, "loss": 0.0006, "step": 21343 }, { "epoch": 4.856427758816837, "grad_norm": 0.04976661500514073, "learning_rate": 2.54935719377819e-09, "loss": 0.0001, "step": 21344 }, { "epoch": 4.856655290102389, "grad_norm": 0.17188265398875477, "learning_rate": 2.5413014576800704e-09, "loss": 0.0005, "step": 21345 }, { "epoch": 4.8568828213879405, "grad_norm": 0.3725108319206827, "learning_rate": 2.5332584434978326e-09, "loss": 0.0008, "step": 21346 }, { "epoch": 4.857110352673493, "grad_norm": 0.23081218981065835, "learning_rate": 2.5252281513957893e-09, "loss": 0.0012, "step": 21347 }, { "epoch": 4.857337883959044, "grad_norm": 0.2215505132587786, "learning_rate": 2.517210581538046e-09, "loss": 0.0007, "step": 21348 }, { "epoch": 4.857565415244596, "grad_norm": 0.17973373087542083, "learning_rate": 2.509205734088499e-09, "loss": 0.0009, "step": 21349 }, { "epoch": 4.857792946530148, "grad_norm": 0.5924617440682277, "learning_rate": 2.501213609210698e-09, "loss": 0.0018, "step": 21350 }, { "epoch": 4.8580204778157, "grad_norm": 0.05113564457229709, "learning_rate": 2.4932342070681237e-09, "loss": 0.0002, "step": 21351 }, { "epoch": 4.858248009101251, "grad_norm": 0.2323112627346064, "learning_rate": 2.485267527823701e-09, "loss": 0.0007, "step": 21352 }, { "epoch": 4.858475540386803, "grad_norm": 0.35780181125180843, "learning_rate": 2.4773135716404245e-09, "loss": 0.0011, "step": 21353 }, { "epoch": 4.858703071672355, "grad_norm": 0.3078561617353071, "learning_rate": 2.469372338680734e-09, "loss": 0.0015, "step": 21354 }, { "epoch": 4.858930602957907, "grad_norm": 0.549560686349855, "learning_rate": 2.4614438291068605e-09, "loss": 0.0026, "step": 21355 }, { "epoch": 4.859158134243458, "grad_norm": 0.26039247357611434, "learning_rate": 2.4535280430810355e-09, "loss": 0.0014, "step": 21356 }, { "epoch": 4.85938566552901, "grad_norm": 0.016014506151638117, "learning_rate": 2.445624980764935e-09, "loss": 0.0001, "step": 21357 }, { "epoch": 4.859613196814562, "grad_norm": 0.5559334637292579, "learning_rate": 2.437734642320097e-09, "loss": 0.003, "step": 21358 }, { "epoch": 4.859840728100114, "grad_norm": 0.02444117321303475, "learning_rate": 2.4298570279078505e-09, "loss": 0.0001, "step": 21359 }, { "epoch": 4.860068259385666, "grad_norm": 0.23451543807065348, "learning_rate": 2.4219921376891083e-09, "loss": 0.0012, "step": 21360 }, { "epoch": 4.860295790671217, "grad_norm": 0.5315947070852108, "learning_rate": 2.4141399718246444e-09, "loss": 0.0018, "step": 21361 }, { "epoch": 4.860523321956769, "grad_norm": 0.09183187220079948, "learning_rate": 2.4063005304748864e-09, "loss": 0.0003, "step": 21362 }, { "epoch": 4.860750853242321, "grad_norm": 0.46057651933891275, "learning_rate": 2.3984738138001923e-09, "loss": 0.0041, "step": 21363 }, { "epoch": 4.860978384527873, "grad_norm": 0.8699227293940427, "learning_rate": 2.390659821960434e-09, "loss": 0.0031, "step": 21364 }, { "epoch": 4.861205915813424, "grad_norm": 0.22357794116397586, "learning_rate": 2.382858555115344e-09, "loss": 0.0011, "step": 21365 }, { "epoch": 4.861433447098976, "grad_norm": 0.09195700558613931, "learning_rate": 2.3750700134243797e-09, "loss": 0.0002, "step": 21366 }, { "epoch": 4.861660978384528, "grad_norm": 0.33441446215747656, "learning_rate": 2.367294197046649e-09, "loss": 0.0016, "step": 21367 }, { "epoch": 4.86188850967008, "grad_norm": 0.14628776608467367, "learning_rate": 2.3595311061411915e-09, "loss": 0.0007, "step": 21368 }, { "epoch": 4.862116040955631, "grad_norm": 0.3977098679502957, "learning_rate": 2.3517807408665605e-09, "loss": 0.0028, "step": 21369 }, { "epoch": 4.862343572241183, "grad_norm": 0.2278986025244656, "learning_rate": 2.3440431013811714e-09, "loss": 0.0009, "step": 21370 }, { "epoch": 4.862571103526735, "grad_norm": 0.9920374677187803, "learning_rate": 2.3363181878432312e-09, "loss": 0.0025, "step": 21371 }, { "epoch": 4.862798634812287, "grad_norm": 0.09489292287042188, "learning_rate": 2.328606000410599e-09, "loss": 0.0004, "step": 21372 }, { "epoch": 4.863026166097838, "grad_norm": 0.1004484901886649, "learning_rate": 2.3209065392409273e-09, "loss": 0.0004, "step": 21373 }, { "epoch": 4.86325369738339, "grad_norm": 0.48814195387529363, "learning_rate": 2.3132198044915204e-09, "loss": 0.0047, "step": 21374 }, { "epoch": 4.863481228668942, "grad_norm": 0.22237107782815568, "learning_rate": 2.305545796319475e-09, "loss": 0.0011, "step": 21375 }, { "epoch": 4.863708759954494, "grad_norm": 0.035606096860910125, "learning_rate": 2.297884514881679e-09, "loss": 0.0001, "step": 21376 }, { "epoch": 4.863936291240045, "grad_norm": 0.2894627415159703, "learning_rate": 2.2902359603346747e-09, "loss": 0.0009, "step": 21377 }, { "epoch": 4.864163822525597, "grad_norm": 0.32934072869995745, "learning_rate": 2.282600132834864e-09, "loss": 0.0027, "step": 21378 }, { "epoch": 4.864391353811149, "grad_norm": 0.33616540393167843, "learning_rate": 2.2749770325382336e-09, "loss": 0.0006, "step": 21379 }, { "epoch": 4.864618885096701, "grad_norm": 0.8594531233633897, "learning_rate": 2.2673666596005617e-09, "loss": 0.0032, "step": 21380 }, { "epoch": 4.864846416382252, "grad_norm": 0.7157858576981146, "learning_rate": 2.2597690141774874e-09, "loss": 0.0037, "step": 21381 }, { "epoch": 4.865073947667804, "grad_norm": 0.3267385575473171, "learning_rate": 2.2521840964242343e-09, "loss": 0.0026, "step": 21382 }, { "epoch": 4.865301478953356, "grad_norm": 0.7602030034318087, "learning_rate": 2.244611906495747e-09, "loss": 0.0041, "step": 21383 }, { "epoch": 4.865529010238908, "grad_norm": 0.3504207236051818, "learning_rate": 2.237052444546972e-09, "loss": 0.0014, "step": 21384 }, { "epoch": 4.865756541524459, "grad_norm": 0.20630010767969204, "learning_rate": 2.229505710732299e-09, "loss": 0.0009, "step": 21385 }, { "epoch": 4.865984072810011, "grad_norm": 0.040266407516577396, "learning_rate": 2.221971705205911e-09, "loss": 0.0002, "step": 21386 }, { "epoch": 4.866211604095563, "grad_norm": 0.06847532035499883, "learning_rate": 2.21445042812192e-09, "loss": 0.0002, "step": 21387 }, { "epoch": 4.866439135381115, "grad_norm": 0.357291616731083, "learning_rate": 2.206941879633953e-09, "loss": 0.0024, "step": 21388 }, { "epoch": 4.866666666666667, "grad_norm": 0.47246309994716157, "learning_rate": 2.199446059895499e-09, "loss": 0.0027, "step": 21389 }, { "epoch": 4.8668941979522184, "grad_norm": 0.08046607953845084, "learning_rate": 2.1919629690597688e-09, "loss": 0.0003, "step": 21390 }, { "epoch": 4.86712172923777, "grad_norm": 0.2011276937368147, "learning_rate": 2.184492607279695e-09, "loss": 0.001, "step": 21391 }, { "epoch": 4.867349260523322, "grad_norm": 0.2220143959533949, "learning_rate": 2.1770349747080034e-09, "loss": 0.0009, "step": 21392 }, { "epoch": 4.867576791808874, "grad_norm": 0.6870401853362487, "learning_rate": 2.169590071497002e-09, "loss": 0.0033, "step": 21393 }, { "epoch": 4.8678043230944255, "grad_norm": 0.1590240693464828, "learning_rate": 2.162157897799e-09, "loss": 0.0007, "step": 21394 }, { "epoch": 4.868031854379977, "grad_norm": 0.6325320140719689, "learning_rate": 2.1547384537657508e-09, "loss": 0.0021, "step": 21395 }, { "epoch": 4.868259385665529, "grad_norm": 0.05664425795158954, "learning_rate": 2.147331739549008e-09, "loss": 0.0001, "step": 21396 }, { "epoch": 4.868486916951081, "grad_norm": 0.24303190674024008, "learning_rate": 2.1399377553000395e-09, "loss": 0.0005, "step": 21397 }, { "epoch": 4.8687144482366325, "grad_norm": 0.5449650430466004, "learning_rate": 2.1325565011700437e-09, "loss": 0.0023, "step": 21398 }, { "epoch": 4.868941979522185, "grad_norm": 0.05339957220043379, "learning_rate": 2.125187977309942e-09, "loss": 0.0001, "step": 21399 }, { "epoch": 4.869169510807736, "grad_norm": 0.15995032614675642, "learning_rate": 2.1178321838701693e-09, "loss": 0.0007, "step": 21400 }, { "epoch": 4.869397042093288, "grad_norm": 0.5232382939199927, "learning_rate": 2.11048912100123e-09, "loss": 0.0051, "step": 21401 }, { "epoch": 4.8696245733788395, "grad_norm": 0.04627016991494156, "learning_rate": 2.1031587888530047e-09, "loss": 0.0001, "step": 21402 }, { "epoch": 4.869852104664392, "grad_norm": 0.28261617981731346, "learning_rate": 2.095841187575512e-09, "loss": 0.0009, "step": 21403 }, { "epoch": 4.870079635949943, "grad_norm": 0.13719741432493046, "learning_rate": 2.088536317318285e-09, "loss": 0.0006, "step": 21404 }, { "epoch": 4.870307167235495, "grad_norm": 0.5242610201963858, "learning_rate": 2.0812441782304404e-09, "loss": 0.0039, "step": 21405 }, { "epoch": 4.8705346985210465, "grad_norm": 0.08665063273250163, "learning_rate": 2.0739647704612344e-09, "loss": 0.0003, "step": 21406 }, { "epoch": 4.870762229806599, "grad_norm": 0.42668041724419686, "learning_rate": 2.0666980941592973e-09, "loss": 0.0035, "step": 21407 }, { "epoch": 4.87098976109215, "grad_norm": 0.47984843622436546, "learning_rate": 2.059444149473261e-09, "loss": 0.002, "step": 21408 }, { "epoch": 4.871217292377702, "grad_norm": 0.5928033444620374, "learning_rate": 2.05220293655127e-09, "loss": 0.0018, "step": 21409 }, { "epoch": 4.8714448236632535, "grad_norm": 0.46323109728965306, "learning_rate": 2.044974455541401e-09, "loss": 0.0022, "step": 21410 }, { "epoch": 4.871672354948806, "grad_norm": 0.29193611957750104, "learning_rate": 2.0377587065913835e-09, "loss": 0.0021, "step": 21411 }, { "epoch": 4.871899886234357, "grad_norm": 0.4538368026893101, "learning_rate": 2.030555689848668e-09, "loss": 0.0031, "step": 21412 }, { "epoch": 4.872127417519909, "grad_norm": 0.24236141811603654, "learning_rate": 2.023365405460498e-09, "loss": 0.0016, "step": 21413 }, { "epoch": 4.8723549488054605, "grad_norm": 0.2520533632376691, "learning_rate": 2.01618785357377e-09, "loss": 0.0011, "step": 21414 }, { "epoch": 4.872582480091013, "grad_norm": 1.1018105000708012, "learning_rate": 2.009023034335242e-09, "loss": 0.011, "step": 21415 }, { "epoch": 4.872810011376564, "grad_norm": 0.17381378546591875, "learning_rate": 2.001870947891324e-09, "loss": 0.0005, "step": 21416 }, { "epoch": 4.873037542662116, "grad_norm": 0.06282396257071393, "learning_rate": 1.9947315943881494e-09, "loss": 0.0002, "step": 21417 }, { "epoch": 4.8732650739476675, "grad_norm": 0.24619102565637893, "learning_rate": 1.9876049739717822e-09, "loss": 0.0017, "step": 21418 }, { "epoch": 4.87349260523322, "grad_norm": 0.8214957110008129, "learning_rate": 1.9804910867877307e-09, "loss": 0.0032, "step": 21419 }, { "epoch": 4.873720136518771, "grad_norm": 0.08959040106869333, "learning_rate": 1.9733899329814342e-09, "loss": 0.0004, "step": 21420 }, { "epoch": 4.873947667804323, "grad_norm": 0.40247281062272716, "learning_rate": 1.9663015126979853e-09, "loss": 0.0025, "step": 21421 }, { "epoch": 4.8741751990898745, "grad_norm": 0.41927036651718436, "learning_rate": 1.9592258260823364e-09, "loss": 0.0011, "step": 21422 }, { "epoch": 4.874402730375427, "grad_norm": 0.5658801187534543, "learning_rate": 1.9521628732790955e-09, "loss": 0.0025, "step": 21423 }, { "epoch": 4.874630261660978, "grad_norm": 0.07154196233793748, "learning_rate": 1.945112654432521e-09, "loss": 0.0002, "step": 21424 }, { "epoch": 4.87485779294653, "grad_norm": 0.07680702747074104, "learning_rate": 1.938075169686804e-09, "loss": 0.0003, "step": 21425 }, { "epoch": 4.8750853242320815, "grad_norm": 0.08520739932797297, "learning_rate": 1.9310504191857177e-09, "loss": 0.0004, "step": 21426 }, { "epoch": 4.875312855517634, "grad_norm": 0.636842700037433, "learning_rate": 1.924038403072967e-09, "loss": 0.0046, "step": 21427 }, { "epoch": 4.875540386803186, "grad_norm": 0.09784834467250032, "learning_rate": 1.9170391214916317e-09, "loss": 0.0006, "step": 21428 }, { "epoch": 4.875767918088737, "grad_norm": 0.48250051610327754, "learning_rate": 1.9100525745850002e-09, "loss": 0.0065, "step": 21429 }, { "epoch": 4.8759954493742885, "grad_norm": 0.17765210802482376, "learning_rate": 1.9030787624956666e-09, "loss": 0.0005, "step": 21430 }, { "epoch": 4.876222980659841, "grad_norm": 0.05082156016661, "learning_rate": 1.8961176853662944e-09, "loss": 0.0001, "step": 21431 }, { "epoch": 4.876450511945393, "grad_norm": 0.13561536896122553, "learning_rate": 1.8891693433391312e-09, "loss": 0.0009, "step": 21432 }, { "epoch": 4.876678043230944, "grad_norm": 0.1266554612332527, "learning_rate": 1.882233736556216e-09, "loss": 0.0005, "step": 21433 }, { "epoch": 4.8769055745164955, "grad_norm": 0.1178602850925001, "learning_rate": 1.8753108651591717e-09, "loss": 0.0002, "step": 21434 }, { "epoch": 4.877133105802048, "grad_norm": 0.3940044735816887, "learning_rate": 1.868400729289621e-09, "loss": 0.0012, "step": 21435 }, { "epoch": 4.8773606370876, "grad_norm": 0.40827054892257547, "learning_rate": 1.8615033290887013e-09, "loss": 0.0013, "step": 21436 }, { "epoch": 4.877588168373151, "grad_norm": 0.1650577465442219, "learning_rate": 1.85461866469748e-09, "loss": 0.0004, "step": 21437 }, { "epoch": 4.877815699658703, "grad_norm": 0.7236240806349119, "learning_rate": 1.8477467362566087e-09, "loss": 0.0036, "step": 21438 }, { "epoch": 4.878043230944255, "grad_norm": 0.39463151622367854, "learning_rate": 1.8408875439065305e-09, "loss": 0.0023, "step": 21439 }, { "epoch": 4.878270762229807, "grad_norm": 0.266807791376999, "learning_rate": 1.8340410877874804e-09, "loss": 0.0013, "step": 21440 }, { "epoch": 4.878498293515358, "grad_norm": 0.585826897046924, "learning_rate": 1.8272073680393464e-09, "loss": 0.0025, "step": 21441 }, { "epoch": 4.87872582480091, "grad_norm": 0.8988847813108026, "learning_rate": 1.8203863848017394e-09, "loss": 0.0115, "step": 21442 }, { "epoch": 4.878953356086462, "grad_norm": 0.20492802552749703, "learning_rate": 1.8135781382142004e-09, "loss": 0.0009, "step": 21443 }, { "epoch": 4.879180887372014, "grad_norm": 0.5017755297560434, "learning_rate": 1.8067826284157847e-09, "loss": 0.0076, "step": 21444 }, { "epoch": 4.879408418657565, "grad_norm": 0.25059392758222127, "learning_rate": 1.799999855545409e-09, "loss": 0.0008, "step": 21445 }, { "epoch": 4.8796359499431174, "grad_norm": 0.039257168215697774, "learning_rate": 1.7932298197417125e-09, "loss": 0.0001, "step": 21446 }, { "epoch": 4.879863481228669, "grad_norm": 0.07796107334056707, "learning_rate": 1.7864725211430567e-09, "loss": 0.0002, "step": 21447 }, { "epoch": 4.880091012514221, "grad_norm": 0.402573048567612, "learning_rate": 1.7797279598875256e-09, "loss": 0.0009, "step": 21448 }, { "epoch": 4.880318543799772, "grad_norm": 0.4881298718889043, "learning_rate": 1.772996136112995e-09, "loss": 0.0013, "step": 21449 }, { "epoch": 4.8805460750853245, "grad_norm": 0.30399741918934836, "learning_rate": 1.766277049957063e-09, "loss": 0.0014, "step": 21450 }, { "epoch": 4.880773606370876, "grad_norm": 0.019254759013511, "learning_rate": 1.7595707015569814e-09, "loss": 0.0, "step": 21451 }, { "epoch": 4.881001137656428, "grad_norm": 0.6479311453787154, "learning_rate": 1.752877091049862e-09, "loss": 0.0023, "step": 21452 }, { "epoch": 4.881228668941979, "grad_norm": 0.7113798665499381, "learning_rate": 1.7461962185725406e-09, "loss": 0.0051, "step": 21453 }, { "epoch": 4.8814562002275315, "grad_norm": 0.20516481667189984, "learning_rate": 1.7395280842615047e-09, "loss": 0.0009, "step": 21454 }, { "epoch": 4.881683731513083, "grad_norm": 0.326586069435691, "learning_rate": 1.7328726882531038e-09, "loss": 0.0032, "step": 21455 }, { "epoch": 4.881911262798635, "grad_norm": 0.762997219799854, "learning_rate": 1.7262300306832707e-09, "loss": 0.0068, "step": 21456 }, { "epoch": 4.882138794084186, "grad_norm": 0.020096684935153698, "learning_rate": 1.719600111687869e-09, "loss": 0.0, "step": 21457 }, { "epoch": 4.8823663253697385, "grad_norm": 0.23086967656223029, "learning_rate": 1.712982931402346e-09, "loss": 0.0006, "step": 21458 }, { "epoch": 4.88259385665529, "grad_norm": 0.32125342073838586, "learning_rate": 1.7063784899619407e-09, "loss": 0.0014, "step": 21459 }, { "epoch": 4.882821387940842, "grad_norm": 0.484331560664325, "learning_rate": 1.6997867875016837e-09, "loss": 0.0019, "step": 21460 }, { "epoch": 4.883048919226393, "grad_norm": 1.0568621687007258, "learning_rate": 1.6932078241562594e-09, "loss": 0.0047, "step": 21461 }, { "epoch": 4.8832764505119455, "grad_norm": 0.47663436479258553, "learning_rate": 1.6866416000601432e-09, "loss": 0.0066, "step": 21462 }, { "epoch": 4.883503981797497, "grad_norm": 0.09742152937845014, "learning_rate": 1.6800881153474642e-09, "loss": 0.0003, "step": 21463 }, { "epoch": 4.883731513083049, "grad_norm": 0.6605510992031558, "learning_rate": 1.6735473701522814e-09, "loss": 0.0031, "step": 21464 }, { "epoch": 4.8839590443686, "grad_norm": 0.3819842528720097, "learning_rate": 1.6670193646082382e-09, "loss": 0.0007, "step": 21465 }, { "epoch": 4.8841865756541525, "grad_norm": 0.18315065377229042, "learning_rate": 1.6605040988487003e-09, "loss": 0.0004, "step": 21466 }, { "epoch": 4.884414106939705, "grad_norm": 0.5621085088917365, "learning_rate": 1.654001573006825e-09, "loss": 0.0017, "step": 21467 }, { "epoch": 4.884641638225256, "grad_norm": 0.20576196684423834, "learning_rate": 1.6475117872156304e-09, "loss": 0.0004, "step": 21468 }, { "epoch": 4.884869169510807, "grad_norm": 0.18369532943247094, "learning_rate": 1.64103474160765e-09, "loss": 0.001, "step": 21469 }, { "epoch": 4.8850967007963595, "grad_norm": 0.4632162221569648, "learning_rate": 1.6345704363152776e-09, "loss": 0.0052, "step": 21470 }, { "epoch": 4.885324232081912, "grad_norm": 0.07878360130904737, "learning_rate": 1.628118871470699e-09, "loss": 0.0003, "step": 21471 }, { "epoch": 4.885551763367463, "grad_norm": 0.19035788321519836, "learning_rate": 1.6216800472056844e-09, "loss": 0.0009, "step": 21472 }, { "epoch": 4.885779294653014, "grad_norm": 0.0742033365725708, "learning_rate": 1.6152539636518639e-09, "loss": 0.0001, "step": 21473 }, { "epoch": 4.8860068259385665, "grad_norm": 0.08920475665957311, "learning_rate": 1.608840620940591e-09, "loss": 0.0004, "step": 21474 }, { "epoch": 4.886234357224119, "grad_norm": 0.2671622620441501, "learning_rate": 1.602440019202872e-09, "loss": 0.001, "step": 21475 }, { "epoch": 4.88646188850967, "grad_norm": 0.32178726754299414, "learning_rate": 1.5960521585696442e-09, "loss": 0.0017, "step": 21476 }, { "epoch": 4.886689419795222, "grad_norm": 0.27437515065857354, "learning_rate": 1.5896770391713585e-09, "loss": 0.0016, "step": 21477 }, { "epoch": 4.8869169510807735, "grad_norm": 0.07549261717686373, "learning_rate": 1.5833146611383964e-09, "loss": 0.0003, "step": 21478 }, { "epoch": 4.887144482366326, "grad_norm": 0.390838756894558, "learning_rate": 1.576965024600724e-09, "loss": 0.0028, "step": 21479 }, { "epoch": 4.887372013651877, "grad_norm": 0.39387950621551293, "learning_rate": 1.5706281296881675e-09, "loss": 0.0014, "step": 21480 }, { "epoch": 4.887599544937429, "grad_norm": 0.12474563814859439, "learning_rate": 1.5643039765301376e-09, "loss": 0.0003, "step": 21481 }, { "epoch": 4.8878270762229805, "grad_norm": 0.02629900904362342, "learning_rate": 1.5579925652560445e-09, "loss": 0.0001, "step": 21482 }, { "epoch": 4.888054607508533, "grad_norm": 0.09752007552591899, "learning_rate": 1.5516938959947435e-09, "loss": 0.0003, "step": 21483 }, { "epoch": 4.888282138794084, "grad_norm": 0.23563809600263846, "learning_rate": 1.54540796887509e-09, "loss": 0.0013, "step": 21484 }, { "epoch": 4.888509670079636, "grad_norm": 0.06365898712861587, "learning_rate": 1.5391347840254534e-09, "loss": 0.0001, "step": 21485 }, { "epoch": 4.8887372013651875, "grad_norm": 0.18985726513305326, "learning_rate": 1.5328743415741338e-09, "loss": 0.0014, "step": 21486 }, { "epoch": 4.88896473265074, "grad_norm": 0.18097590617457116, "learning_rate": 1.5266266416489455e-09, "loss": 0.001, "step": 21487 }, { "epoch": 4.889192263936291, "grad_norm": 0.29303957470454045, "learning_rate": 1.5203916843777727e-09, "loss": 0.0019, "step": 21488 }, { "epoch": 4.889419795221843, "grad_norm": 0.059647415425345185, "learning_rate": 1.5141694698878745e-09, "loss": 0.0003, "step": 21489 }, { "epoch": 4.8896473265073945, "grad_norm": 0.25088476610748495, "learning_rate": 1.5079599983065103e-09, "loss": 0.0012, "step": 21490 }, { "epoch": 4.889874857792947, "grad_norm": 0.0636076910937644, "learning_rate": 1.501763269760592e-09, "loss": 0.0002, "step": 21491 }, { "epoch": 4.890102389078498, "grad_norm": 0.14461554394731657, "learning_rate": 1.4955792843767553e-09, "loss": 0.0006, "step": 21492 }, { "epoch": 4.89032992036405, "grad_norm": 0.3921962546223937, "learning_rate": 1.4894080422813568e-09, "loss": 0.0022, "step": 21493 }, { "epoch": 4.8905574516496015, "grad_norm": 0.25793282135605766, "learning_rate": 1.4832495436005456e-09, "loss": 0.0018, "step": 21494 }, { "epoch": 4.890784982935154, "grad_norm": 0.2859357024333565, "learning_rate": 1.4771037884601935e-09, "loss": 0.0014, "step": 21495 }, { "epoch": 4.891012514220705, "grad_norm": 0.042977489744294106, "learning_rate": 1.4709707769859638e-09, "loss": 0.0001, "step": 21496 }, { "epoch": 4.891240045506257, "grad_norm": 0.0933637170533471, "learning_rate": 1.4648505093031034e-09, "loss": 0.0003, "step": 21497 }, { "epoch": 4.8914675767918085, "grad_norm": 5.452632788480302, "learning_rate": 1.4587429855367207e-09, "loss": 0.0289, "step": 21498 }, { "epoch": 4.891695108077361, "grad_norm": 0.26282863085990904, "learning_rate": 1.4526482058117852e-09, "loss": 0.0026, "step": 21499 }, { "epoch": 4.891922639362912, "grad_norm": 0.29974547715846167, "learning_rate": 1.4465661702526418e-09, "loss": 0.0022, "step": 21500 }, { "epoch": 4.892150170648464, "grad_norm": 0.15025898756479375, "learning_rate": 1.440496878983705e-09, "loss": 0.0003, "step": 21501 }, { "epoch": 4.8923777019340156, "grad_norm": 0.3359621577013396, "learning_rate": 1.4344403321290418e-09, "loss": 0.0019, "step": 21502 }, { "epoch": 4.892605233219568, "grad_norm": 0.33717233137200164, "learning_rate": 1.4283965298124424e-09, "loss": 0.003, "step": 21503 }, { "epoch": 4.892832764505119, "grad_norm": 0.15089919003320704, "learning_rate": 1.42236547215735e-09, "loss": 0.0004, "step": 21504 }, { "epoch": 4.893060295790671, "grad_norm": 0.24650750443472053, "learning_rate": 1.4163471592871375e-09, "loss": 0.0009, "step": 21505 }, { "epoch": 4.8932878270762235, "grad_norm": 0.06749272125144422, "learning_rate": 1.4103415913246931e-09, "loss": 0.0002, "step": 21506 }, { "epoch": 4.893515358361775, "grad_norm": 1.2902579029198717, "learning_rate": 1.4043487683928353e-09, "loss": 0.0074, "step": 21507 }, { "epoch": 4.893742889647326, "grad_norm": 0.39666309213412715, "learning_rate": 1.3983686906140354e-09, "loss": 0.0009, "step": 21508 }, { "epoch": 4.893970420932878, "grad_norm": 0.20571803523941962, "learning_rate": 1.3924013581104872e-09, "loss": 0.0007, "step": 21509 }, { "epoch": 4.8941979522184305, "grad_norm": 0.45384264228225984, "learning_rate": 1.3864467710041768e-09, "loss": 0.0022, "step": 21510 }, { "epoch": 4.894425483503982, "grad_norm": 0.5941926650850076, "learning_rate": 1.3805049294168122e-09, "loss": 0.003, "step": 21511 }, { "epoch": 4.894653014789533, "grad_norm": 0.4484169796651249, "learning_rate": 1.374575833469824e-09, "loss": 0.0015, "step": 21512 }, { "epoch": 4.894880546075085, "grad_norm": 0.314833469904633, "learning_rate": 1.3686594832843654e-09, "loss": 0.0009, "step": 21513 }, { "epoch": 4.8951080773606375, "grad_norm": 0.4213100032800285, "learning_rate": 1.362755878981381e-09, "loss": 0.0012, "step": 21514 }, { "epoch": 4.895335608646189, "grad_norm": 0.3028436211348417, "learning_rate": 1.356865020681608e-09, "loss": 0.0009, "step": 21515 }, { "epoch": 4.895563139931741, "grad_norm": 0.5230467192521754, "learning_rate": 1.3509869085052974e-09, "loss": 0.0026, "step": 21516 }, { "epoch": 4.895790671217292, "grad_norm": 0.4675011847620908, "learning_rate": 1.3451215425726306e-09, "loss": 0.0027, "step": 21517 }, { "epoch": 4.8960182025028445, "grad_norm": 0.1430707565139182, "learning_rate": 1.3392689230035811e-09, "loss": 0.0003, "step": 21518 }, { "epoch": 4.896245733788396, "grad_norm": 0.028830838722711246, "learning_rate": 1.3334290499176367e-09, "loss": 0.0001, "step": 21519 }, { "epoch": 4.896473265073948, "grad_norm": 0.16693935449186523, "learning_rate": 1.3276019234342158e-09, "loss": 0.0004, "step": 21520 }, { "epoch": 4.896700796359499, "grad_norm": 0.41838574830803493, "learning_rate": 1.3217875436723898e-09, "loss": 0.0026, "step": 21521 }, { "epoch": 4.8969283276450515, "grad_norm": 0.5754331678173911, "learning_rate": 1.3159859107510915e-09, "loss": 0.0032, "step": 21522 }, { "epoch": 4.897155858930603, "grad_norm": 0.2474248298671071, "learning_rate": 1.3101970247887674e-09, "loss": 0.0011, "step": 21523 }, { "epoch": 4.897383390216155, "grad_norm": 0.11617155450408657, "learning_rate": 1.3044208859037954e-09, "loss": 0.0004, "step": 21524 }, { "epoch": 4.897610921501706, "grad_norm": 0.058871534272022005, "learning_rate": 1.298657494214206e-09, "loss": 0.0001, "step": 21525 }, { "epoch": 4.8978384527872585, "grad_norm": 1.0183606982782316, "learning_rate": 1.2929068498377523e-09, "loss": 0.005, "step": 21526 }, { "epoch": 4.89806598407281, "grad_norm": 0.5402647286653992, "learning_rate": 1.2871689528921178e-09, "loss": 0.0046, "step": 21527 }, { "epoch": 4.898293515358362, "grad_norm": 0.16872488780756048, "learning_rate": 1.2814438034943617e-09, "loss": 0.0006, "step": 21528 }, { "epoch": 4.898521046643913, "grad_norm": 0.6619105800256817, "learning_rate": 1.2757314017616818e-09, "loss": 0.0041, "step": 21529 }, { "epoch": 4.8987485779294655, "grad_norm": 0.766475598036962, "learning_rate": 1.2700317478107904e-09, "loss": 0.0033, "step": 21530 }, { "epoch": 4.898976109215017, "grad_norm": 0.5165752506215311, "learning_rate": 1.264344841758053e-09, "loss": 0.0027, "step": 21531 }, { "epoch": 4.899203640500569, "grad_norm": 0.4788148737063385, "learning_rate": 1.2586706837198342e-09, "loss": 0.0018, "step": 21532 }, { "epoch": 4.89943117178612, "grad_norm": 0.17930961905503243, "learning_rate": 1.2530092738120836e-09, "loss": 0.0007, "step": 21533 }, { "epoch": 4.8996587030716725, "grad_norm": 0.33036962087841, "learning_rate": 1.2473606121504723e-09, "loss": 0.0021, "step": 21534 }, { "epoch": 4.899886234357224, "grad_norm": 0.5031436540413855, "learning_rate": 1.2417246988503939e-09, "loss": 0.0017, "step": 21535 }, { "epoch": 4.900113765642776, "grad_norm": 0.039472274250274095, "learning_rate": 1.2361015340271732e-09, "loss": 0.0001, "step": 21536 }, { "epoch": 4.900341296928327, "grad_norm": 0.16285530987468563, "learning_rate": 1.2304911177956486e-09, "loss": 0.0006, "step": 21537 }, { "epoch": 4.9005688282138795, "grad_norm": 0.37653459130436556, "learning_rate": 1.22489345027052e-09, "loss": 0.0057, "step": 21538 }, { "epoch": 4.900796359499431, "grad_norm": 0.035269874005065004, "learning_rate": 1.2193085315661406e-09, "loss": 0.0001, "step": 21539 }, { "epoch": 4.901023890784983, "grad_norm": 0.16574576664837523, "learning_rate": 1.2137363617967247e-09, "loss": 0.001, "step": 21540 }, { "epoch": 4.901251422070534, "grad_norm": 0.0712632534270357, "learning_rate": 1.208176941076139e-09, "loss": 0.0002, "step": 21541 }, { "epoch": 4.9014789533560865, "grad_norm": 0.47809848904052266, "learning_rate": 1.2026302695179737e-09, "loss": 0.0035, "step": 21542 }, { "epoch": 4.901706484641638, "grad_norm": 0.16387078639321703, "learning_rate": 1.1970963472356102e-09, "loss": 0.0005, "step": 21543 }, { "epoch": 4.90193401592719, "grad_norm": 0.16053347760593556, "learning_rate": 1.1915751743421522e-09, "loss": 0.0004, "step": 21544 }, { "epoch": 4.902161547212742, "grad_norm": 1.125169669345952, "learning_rate": 1.1860667509504959e-09, "loss": 0.0031, "step": 21545 }, { "epoch": 4.9023890784982935, "grad_norm": 0.03062820069795028, "learning_rate": 1.1805710771731204e-09, "loss": 0.0001, "step": 21546 }, { "epoch": 4.902616609783845, "grad_norm": 0.11194999650808324, "learning_rate": 1.1750881531224362e-09, "loss": 0.0004, "step": 21547 }, { "epoch": 4.902844141069397, "grad_norm": 0.30453911893571284, "learning_rate": 1.1696179789104367e-09, "loss": 0.0024, "step": 21548 }, { "epoch": 4.903071672354949, "grad_norm": 0.028682408103305402, "learning_rate": 1.164160554648977e-09, "loss": 0.0001, "step": 21549 }, { "epoch": 4.9032992036405005, "grad_norm": 0.058610434855581976, "learning_rate": 1.1587158804495652e-09, "loss": 0.0002, "step": 21550 }, { "epoch": 4.903526734926052, "grad_norm": 0.14689744445276368, "learning_rate": 1.153283956423501e-09, "loss": 0.0002, "step": 21551 }, { "epoch": 4.903754266211604, "grad_norm": 0.10367802972153393, "learning_rate": 1.1478647826817373e-09, "loss": 0.0003, "step": 21552 }, { "epoch": 4.903981797497156, "grad_norm": 0.20318915135241336, "learning_rate": 1.1424583593351579e-09, "loss": 0.0005, "step": 21553 }, { "epoch": 4.9042093287827075, "grad_norm": 0.3828827787975051, "learning_rate": 1.1370646864941604e-09, "loss": 0.001, "step": 21554 }, { "epoch": 4.90443686006826, "grad_norm": 0.4247299761751869, "learning_rate": 1.1316837642690036e-09, "loss": 0.0015, "step": 21555 }, { "epoch": 4.904664391353811, "grad_norm": 0.43901275982780597, "learning_rate": 1.1263155927696694e-09, "loss": 0.0022, "step": 21556 }, { "epoch": 4.904891922639363, "grad_norm": 0.28404171623501007, "learning_rate": 1.1209601721058616e-09, "loss": 0.002, "step": 21557 }, { "epoch": 4.9051194539249146, "grad_norm": 0.9845255969975512, "learning_rate": 1.1156175023870757e-09, "loss": 0.0085, "step": 21558 }, { "epoch": 4.905346985210467, "grad_norm": 0.4480354823170128, "learning_rate": 1.110287583722461e-09, "loss": 0.0018, "step": 21559 }, { "epoch": 4.905574516496018, "grad_norm": 2.605993177902986, "learning_rate": 1.1049704162209573e-09, "loss": 0.0151, "step": 21560 }, { "epoch": 4.90580204778157, "grad_norm": 0.6083887272841336, "learning_rate": 1.0996659999912978e-09, "loss": 0.0042, "step": 21561 }, { "epoch": 4.906029579067122, "grad_norm": 0.36662271858585216, "learning_rate": 1.0943743351417984e-09, "loss": 0.0016, "step": 21562 }, { "epoch": 4.906257110352674, "grad_norm": 0.1963302271403175, "learning_rate": 1.089095421780706e-09, "loss": 0.0009, "step": 21563 }, { "epoch": 4.906484641638225, "grad_norm": 0.6172446093812417, "learning_rate": 1.0838292600157813e-09, "loss": 0.0015, "step": 21564 }, { "epoch": 4.906712172923777, "grad_norm": 0.046985135612179076, "learning_rate": 1.0785758499548549e-09, "loss": 0.0002, "step": 21565 }, { "epoch": 4.906939704209329, "grad_norm": 0.15167948456354793, "learning_rate": 1.0733351917050633e-09, "loss": 0.0011, "step": 21566 }, { "epoch": 4.907167235494881, "grad_norm": 0.5157877573600502, "learning_rate": 1.0681072853736819e-09, "loss": 0.0041, "step": 21567 }, { "epoch": 4.907394766780432, "grad_norm": 0.16488345516055583, "learning_rate": 1.0628921310675694e-09, "loss": 0.0007, "step": 21568 }, { "epoch": 4.907622298065984, "grad_norm": 0.11756857445115365, "learning_rate": 1.0576897288931686e-09, "loss": 0.0009, "step": 21569 }, { "epoch": 4.907849829351536, "grad_norm": 0.4033426920870241, "learning_rate": 1.0525000789569223e-09, "loss": 0.0029, "step": 21570 }, { "epoch": 4.908077360637088, "grad_norm": 0.6962695958831311, "learning_rate": 1.0473231813648567e-09, "loss": 0.0073, "step": 21571 }, { "epoch": 4.908304891922639, "grad_norm": 0.15743414647767237, "learning_rate": 1.0421590362227897e-09, "loss": 0.0009, "step": 21572 }, { "epoch": 4.908532423208191, "grad_norm": 0.8390414935708549, "learning_rate": 1.037007643636262e-09, "loss": 0.0052, "step": 21573 }, { "epoch": 4.908759954493743, "grad_norm": 0.19974163684285243, "learning_rate": 1.031869003710606e-09, "loss": 0.0009, "step": 21574 }, { "epoch": 4.908987485779295, "grad_norm": 0.19969218080027207, "learning_rate": 1.0267431165508073e-09, "loss": 0.0013, "step": 21575 }, { "epoch": 4.909215017064846, "grad_norm": 0.15314123657596468, "learning_rate": 1.0216299822615739e-09, "loss": 0.0006, "step": 21576 }, { "epoch": 4.909442548350398, "grad_norm": 0.19833184878562787, "learning_rate": 1.0165296009474745e-09, "loss": 0.0008, "step": 21577 }, { "epoch": 4.90967007963595, "grad_norm": 0.4157824162162483, "learning_rate": 1.0114419727127316e-09, "loss": 0.0019, "step": 21578 }, { "epoch": 4.909897610921502, "grad_norm": 0.3600128205382567, "learning_rate": 1.0063670976613592e-09, "loss": 0.0014, "step": 21579 }, { "epoch": 4.910125142207053, "grad_norm": 0.4590872181619299, "learning_rate": 1.001304975897094e-09, "loss": 0.0037, "step": 21580 }, { "epoch": 4.910352673492605, "grad_norm": 0.09429088330892361, "learning_rate": 9.962556075232555e-10, "loss": 0.0003, "step": 21581 }, { "epoch": 4.910580204778157, "grad_norm": 0.18881353341574497, "learning_rate": 9.912189926432336e-10, "loss": 0.001, "step": 21582 }, { "epoch": 4.910807736063709, "grad_norm": 0.07746886556596089, "learning_rate": 9.861951313597933e-10, "loss": 0.0002, "step": 21583 }, { "epoch": 4.911035267349261, "grad_norm": 0.38410749666769306, "learning_rate": 9.811840237757691e-10, "loss": 0.0008, "step": 21584 }, { "epoch": 4.911262798634812, "grad_norm": 0.5910088081525278, "learning_rate": 9.761856699934403e-10, "loss": 0.0021, "step": 21585 }, { "epoch": 4.911490329920364, "grad_norm": 0.013231644806485011, "learning_rate": 9.712000701150859e-10, "loss": 0.0, "step": 21586 }, { "epoch": 4.911717861205916, "grad_norm": 0.2956244509986345, "learning_rate": 9.662272242425691e-10, "loss": 0.0019, "step": 21587 }, { "epoch": 4.911945392491468, "grad_norm": 0.38318774306712916, "learning_rate": 9.612671324774752e-10, "loss": 0.0014, "step": 21588 }, { "epoch": 4.912172923777019, "grad_norm": 0.1284705523220628, "learning_rate": 9.563197949211816e-10, "loss": 0.0004, "step": 21589 }, { "epoch": 4.912400455062571, "grad_norm": 0.2186299081157726, "learning_rate": 9.513852116748572e-10, "loss": 0.0009, "step": 21590 }, { "epoch": 4.912627986348123, "grad_norm": 0.31404442405176713, "learning_rate": 9.464633828393244e-10, "loss": 0.0014, "step": 21591 }, { "epoch": 4.912855517633675, "grad_norm": 0.06478821904764008, "learning_rate": 9.415543085151273e-10, "loss": 0.0002, "step": 21592 }, { "epoch": 4.913083048919226, "grad_norm": 0.5204964828748947, "learning_rate": 9.366579888027416e-10, "loss": 0.0039, "step": 21593 }, { "epoch": 4.9133105802047785, "grad_norm": 0.4152579831981076, "learning_rate": 9.317744238020871e-10, "loss": 0.0051, "step": 21594 }, { "epoch": 4.91353811149033, "grad_norm": 0.4144774339639756, "learning_rate": 9.269036136130144e-10, "loss": 0.0048, "step": 21595 }, { "epoch": 4.913765642775882, "grad_norm": 0.20707815852064856, "learning_rate": 9.220455583351662e-10, "loss": 0.0009, "step": 21596 }, { "epoch": 4.913993174061433, "grad_norm": 0.9635824086699499, "learning_rate": 9.172002580677686e-10, "loss": 0.0038, "step": 21597 }, { "epoch": 4.9142207053469855, "grad_norm": 0.14221510002487409, "learning_rate": 9.123677129097702e-10, "loss": 0.0004, "step": 21598 }, { "epoch": 4.914448236632537, "grad_norm": 0.10494297229620841, "learning_rate": 9.075479229600504e-10, "loss": 0.0004, "step": 21599 }, { "epoch": 4.914675767918089, "grad_norm": 0.5359069159853818, "learning_rate": 9.027408883170718e-10, "loss": 0.0019, "step": 21600 }, { "epoch": 4.91490329920364, "grad_norm": 0.5269765894902128, "learning_rate": 8.979466090790895e-10, "loss": 0.002, "step": 21601 }, { "epoch": 4.9151308304891925, "grad_norm": 0.058570327956431745, "learning_rate": 8.931650853440804e-10, "loss": 0.0003, "step": 21602 }, { "epoch": 4.915358361774744, "grad_norm": 0.22109233283651755, "learning_rate": 8.883963172098137e-10, "loss": 0.0005, "step": 21603 }, { "epoch": 4.915585893060296, "grad_norm": 0.11847640959547989, "learning_rate": 8.836403047737113e-10, "loss": 0.0005, "step": 21604 }, { "epoch": 4.915813424345847, "grad_norm": 0.32457666612259245, "learning_rate": 8.788970481329873e-10, "loss": 0.0019, "step": 21605 }, { "epoch": 4.9160409556313995, "grad_norm": 0.9702935171075162, "learning_rate": 8.741665473845779e-10, "loss": 0.0056, "step": 21606 }, { "epoch": 4.916268486916951, "grad_norm": 0.30422416671179814, "learning_rate": 8.694488026251419e-10, "loss": 0.0023, "step": 21607 }, { "epoch": 4.916496018202503, "grad_norm": 0.27416998980225565, "learning_rate": 8.647438139511993e-10, "loss": 0.0022, "step": 21608 }, { "epoch": 4.916723549488054, "grad_norm": 0.020161383596675964, "learning_rate": 8.600515814587846e-10, "loss": 0.0001, "step": 21609 }, { "epoch": 4.9169510807736065, "grad_norm": 0.22034153548241334, "learning_rate": 8.553721052438623e-10, "loss": 0.0007, "step": 21610 }, { "epoch": 4.917178612059158, "grad_norm": 0.48115515809626486, "learning_rate": 8.507053854020508e-10, "loss": 0.0036, "step": 21611 }, { "epoch": 4.91740614334471, "grad_norm": 0.10652044994919314, "learning_rate": 8.460514220287597e-10, "loss": 0.0008, "step": 21612 }, { "epoch": 4.917633674630261, "grad_norm": 0.18854243067014065, "learning_rate": 8.414102152191211e-10, "loss": 0.0005, "step": 21613 }, { "epoch": 4.9178612059158135, "grad_norm": 0.05996129735872425, "learning_rate": 8.367817650679899e-10, "loss": 0.0001, "step": 21614 }, { "epoch": 4.918088737201365, "grad_norm": 1.0164048933380054, "learning_rate": 8.321660716698738e-10, "loss": 0.0022, "step": 21615 }, { "epoch": 4.918316268486917, "grad_norm": 0.3260734490965167, "learning_rate": 8.27563135119211e-10, "loss": 0.0017, "step": 21616 }, { "epoch": 4.918543799772468, "grad_norm": 0.08303509409361703, "learning_rate": 8.229729555100236e-10, "loss": 0.0001, "step": 21617 }, { "epoch": 4.918771331058021, "grad_norm": 0.21324927885371608, "learning_rate": 8.183955329361254e-10, "loss": 0.0006, "step": 21618 }, { "epoch": 4.918998862343572, "grad_norm": 0.3414559662247054, "learning_rate": 8.138308674911221e-10, "loss": 0.0019, "step": 21619 }, { "epoch": 4.919226393629124, "grad_norm": 0.9378160028852106, "learning_rate": 8.092789592682726e-10, "loss": 0.0113, "step": 21620 }, { "epoch": 4.919453924914675, "grad_norm": 1.2219455996857214, "learning_rate": 8.047398083605578e-10, "loss": 0.0066, "step": 21621 }, { "epoch": 4.919681456200228, "grad_norm": 0.27915901074211275, "learning_rate": 8.002134148608898e-10, "loss": 0.0006, "step": 21622 }, { "epoch": 4.91990898748578, "grad_norm": 0.367087885468058, "learning_rate": 7.956997788616249e-10, "loss": 0.0016, "step": 21623 }, { "epoch": 4.920136518771331, "grad_norm": 0.09312931382992881, "learning_rate": 7.911989004551201e-10, "loss": 0.0003, "step": 21624 }, { "epoch": 4.920364050056882, "grad_norm": 0.21693795021426332, "learning_rate": 7.867107797333157e-10, "loss": 0.0007, "step": 21625 }, { "epoch": 4.920591581342435, "grad_norm": 0.18191927283560477, "learning_rate": 7.822354167878743e-10, "loss": 0.0005, "step": 21626 }, { "epoch": 4.920819112627987, "grad_norm": 0.3365353646151496, "learning_rate": 7.777728117104589e-10, "loss": 0.0025, "step": 21627 }, { "epoch": 4.921046643913538, "grad_norm": 0.6156022323548598, "learning_rate": 7.733229645921076e-10, "loss": 0.004, "step": 21628 }, { "epoch": 4.921274175199089, "grad_norm": 0.30098193028004583, "learning_rate": 7.688858755237893e-10, "loss": 0.0012, "step": 21629 }, { "epoch": 4.921501706484642, "grad_norm": 0.27082041897134373, "learning_rate": 7.64461544596265e-10, "loss": 0.0005, "step": 21630 }, { "epoch": 4.921729237770194, "grad_norm": 0.22773135667596442, "learning_rate": 7.600499718998788e-10, "loss": 0.0012, "step": 21631 }, { "epoch": 4.921956769055745, "grad_norm": 0.25184038656355506, "learning_rate": 7.556511575248365e-10, "loss": 0.0007, "step": 21632 }, { "epoch": 4.922184300341297, "grad_norm": 0.26498606527535146, "learning_rate": 7.512651015610661e-10, "loss": 0.0009, "step": 21633 }, { "epoch": 4.922411831626849, "grad_norm": 0.49128379418256696, "learning_rate": 7.468918040981487e-10, "loss": 0.0021, "step": 21634 }, { "epoch": 4.922639362912401, "grad_norm": 1.6398294224643895, "learning_rate": 7.425312652254573e-10, "loss": 0.0087, "step": 21635 }, { "epoch": 4.922866894197952, "grad_norm": 0.24164455365014065, "learning_rate": 7.381834850322262e-10, "loss": 0.0036, "step": 21636 }, { "epoch": 4.923094425483504, "grad_norm": 0.019576011795376862, "learning_rate": 7.338484636072035e-10, "loss": 0.0001, "step": 21637 }, { "epoch": 4.923321956769056, "grad_norm": 0.04052166120886244, "learning_rate": 7.295262010390686e-10, "loss": 0.0001, "step": 21638 }, { "epoch": 4.923549488054608, "grad_norm": 0.27478616830860003, "learning_rate": 7.25216697416084e-10, "loss": 0.0014, "step": 21639 }, { "epoch": 4.923777019340159, "grad_norm": 0.7724650783981153, "learning_rate": 7.209199528263738e-10, "loss": 0.0025, "step": 21640 }, { "epoch": 4.924004550625711, "grad_norm": 0.378634049545702, "learning_rate": 7.16635967357715e-10, "loss": 0.002, "step": 21641 }, { "epoch": 4.924232081911263, "grad_norm": 1.194385138276711, "learning_rate": 7.123647410977457e-10, "loss": 0.001, "step": 21642 }, { "epoch": 4.924459613196815, "grad_norm": 0.5672947561966812, "learning_rate": 7.081062741336881e-10, "loss": 0.0033, "step": 21643 }, { "epoch": 4.924687144482366, "grad_norm": 0.45719430195828115, "learning_rate": 7.03860566552625e-10, "loss": 0.0016, "step": 21644 }, { "epoch": 4.924914675767918, "grad_norm": 0.32952434300870803, "learning_rate": 6.996276184412925e-10, "loss": 0.0008, "step": 21645 }, { "epoch": 4.92514220705347, "grad_norm": 0.632385841855353, "learning_rate": 6.95407429886219e-10, "loss": 0.0042, "step": 21646 }, { "epoch": 4.925369738339022, "grad_norm": 0.3526802208478668, "learning_rate": 6.912000009736547e-10, "loss": 0.0025, "step": 21647 }, { "epoch": 4.925597269624573, "grad_norm": 0.20950862942206727, "learning_rate": 6.870053317895725e-10, "loss": 0.0013, "step": 21648 }, { "epoch": 4.925824800910125, "grad_norm": 0.12214186670006608, "learning_rate": 6.828234224198066e-10, "loss": 0.0006, "step": 21649 }, { "epoch": 4.926052332195677, "grad_norm": 0.3630004716803572, "learning_rate": 6.786542729496359e-10, "loss": 0.0016, "step": 21650 }, { "epoch": 4.926279863481229, "grad_norm": 0.5099490878959247, "learning_rate": 6.744978834644784e-10, "loss": 0.0079, "step": 21651 }, { "epoch": 4.92650739476678, "grad_norm": 0.25943985279896753, "learning_rate": 6.70354254049127e-10, "loss": 0.0006, "step": 21652 }, { "epoch": 4.926734926052332, "grad_norm": 0.32062762609517764, "learning_rate": 6.66223384788306e-10, "loss": 0.0009, "step": 21653 }, { "epoch": 4.926962457337884, "grad_norm": 0.38915050074322316, "learning_rate": 6.621052757665308e-10, "loss": 0.0027, "step": 21654 }, { "epoch": 4.927189988623436, "grad_norm": 0.1348035787197575, "learning_rate": 6.579999270679011e-10, "loss": 0.0004, "step": 21655 }, { "epoch": 4.927417519908987, "grad_norm": 1.2641519177470828, "learning_rate": 6.539073387763079e-10, "loss": 0.0049, "step": 21656 }, { "epoch": 4.927645051194539, "grad_norm": 0.6317281967586785, "learning_rate": 6.498275109753649e-10, "loss": 0.0024, "step": 21657 }, { "epoch": 4.927872582480091, "grad_norm": 0.2334422862632627, "learning_rate": 6.45760443748547e-10, "loss": 0.0035, "step": 21658 }, { "epoch": 4.928100113765643, "grad_norm": 0.3859157408821618, "learning_rate": 6.41706137178913e-10, "loss": 0.0017, "step": 21659 }, { "epoch": 4.928327645051194, "grad_norm": 0.23666910982346498, "learning_rate": 6.376645913493823e-10, "loss": 0.0009, "step": 21660 }, { "epoch": 4.928555176336746, "grad_norm": 0.5863872208708665, "learning_rate": 6.33635806342528e-10, "loss": 0.0016, "step": 21661 }, { "epoch": 4.9287827076222985, "grad_norm": 0.46690015469570195, "learning_rate": 6.296197822406452e-10, "loss": 0.0023, "step": 21662 }, { "epoch": 4.92901023890785, "grad_norm": 0.09908744242878396, "learning_rate": 6.256165191258906e-10, "loss": 0.0002, "step": 21663 }, { "epoch": 4.929237770193401, "grad_norm": 0.2544215340390182, "learning_rate": 6.216260170800043e-10, "loss": 0.0018, "step": 21664 }, { "epoch": 4.929465301478953, "grad_norm": 1.1909446961834722, "learning_rate": 6.17648276184657e-10, "loss": 0.0053, "step": 21665 }, { "epoch": 4.9296928327645055, "grad_norm": 0.42966134841037, "learning_rate": 6.136832965209644e-10, "loss": 0.0018, "step": 21666 }, { "epoch": 4.929920364050057, "grad_norm": 0.3384507949218928, "learning_rate": 6.097310781701809e-10, "loss": 0.0016, "step": 21667 }, { "epoch": 4.930147895335608, "grad_norm": 0.039165956471103505, "learning_rate": 6.057916212129366e-10, "loss": 0.0001, "step": 21668 }, { "epoch": 4.93037542662116, "grad_norm": 0.07859842790435521, "learning_rate": 6.01864925729792e-10, "loss": 0.0003, "step": 21669 }, { "epoch": 4.9306029579067125, "grad_norm": 0.13889159008012908, "learning_rate": 5.979509918010301e-10, "loss": 0.0009, "step": 21670 }, { "epoch": 4.930830489192264, "grad_norm": 0.20811353003489078, "learning_rate": 5.940498195065175e-10, "loss": 0.0014, "step": 21671 }, { "epoch": 4.931058020477816, "grad_norm": 0.0654527794958919, "learning_rate": 5.901614089261904e-10, "loss": 0.0002, "step": 21672 }, { "epoch": 4.931285551763367, "grad_norm": 0.5092080116298918, "learning_rate": 5.862857601393602e-10, "loss": 0.0059, "step": 21673 }, { "epoch": 4.9315130830489196, "grad_norm": 0.020660220443364873, "learning_rate": 5.82422873225269e-10, "loss": 0.0, "step": 21674 }, { "epoch": 4.931740614334471, "grad_norm": 0.028340653557443345, "learning_rate": 5.785727482628817e-10, "loss": 0.0001, "step": 21675 }, { "epoch": 4.931968145620023, "grad_norm": 0.43319691345836875, "learning_rate": 5.747353853309545e-10, "loss": 0.0017, "step": 21676 }, { "epoch": 4.932195676905574, "grad_norm": 0.19082576362931897, "learning_rate": 5.709107845078277e-10, "loss": 0.001, "step": 21677 }, { "epoch": 4.932423208191127, "grad_norm": 0.2703521937686772, "learning_rate": 5.670989458716331e-10, "loss": 0.0011, "step": 21678 }, { "epoch": 4.932650739476678, "grad_norm": 0.19776991085509524, "learning_rate": 5.632998695004332e-10, "loss": 0.0005, "step": 21679 }, { "epoch": 4.93287827076223, "grad_norm": 0.2634032942080241, "learning_rate": 5.595135554717357e-10, "loss": 0.002, "step": 21680 }, { "epoch": 4.933105802047781, "grad_norm": 0.7476059883006896, "learning_rate": 5.557400038629784e-10, "loss": 0.0066, "step": 21681 }, { "epoch": 4.933333333333334, "grad_norm": 0.5988318360587137, "learning_rate": 5.51979214751322e-10, "loss": 0.0055, "step": 21682 }, { "epoch": 4.933560864618885, "grad_norm": 0.5026053699272354, "learning_rate": 5.482311882135105e-10, "loss": 0.0039, "step": 21683 }, { "epoch": 4.933788395904437, "grad_norm": 0.3431470717371673, "learning_rate": 5.444959243262881e-10, "loss": 0.0028, "step": 21684 }, { "epoch": 4.934015927189988, "grad_norm": 0.08212526127612821, "learning_rate": 5.407734231659135e-10, "loss": 0.0002, "step": 21685 }, { "epoch": 4.934243458475541, "grad_norm": 0.1545662413485209, "learning_rate": 5.370636848085059e-10, "loss": 0.0004, "step": 21686 }, { "epoch": 4.934470989761092, "grad_norm": 0.10622158871347732, "learning_rate": 5.333667093298383e-10, "loss": 0.0002, "step": 21687 }, { "epoch": 4.934698521046644, "grad_norm": 0.17761924972405252, "learning_rate": 5.296824968054754e-10, "loss": 0.0011, "step": 21688 }, { "epoch": 4.934926052332195, "grad_norm": 0.28286571698065655, "learning_rate": 5.260110473107732e-10, "loss": 0.0012, "step": 21689 }, { "epoch": 4.935153583617748, "grad_norm": 0.07401666508622805, "learning_rate": 5.223523609207415e-10, "loss": 0.0003, "step": 21690 }, { "epoch": 4.935381114903299, "grad_norm": 0.43278734189085705, "learning_rate": 5.18706437710112e-10, "loss": 0.0032, "step": 21691 }, { "epoch": 4.935608646188851, "grad_norm": 0.7437706177686083, "learning_rate": 5.150732777534778e-10, "loss": 0.0043, "step": 21692 }, { "epoch": 4.935836177474402, "grad_norm": 0.09290383410533334, "learning_rate": 5.114528811250158e-10, "loss": 0.0002, "step": 21693 }, { "epoch": 4.936063708759955, "grad_norm": 0.3691378456101134, "learning_rate": 5.078452478988332e-10, "loss": 0.0031, "step": 21694 }, { "epoch": 4.936291240045506, "grad_norm": 0.05168022963109401, "learning_rate": 5.04250378148552e-10, "loss": 0.0002, "step": 21695 }, { "epoch": 4.936518771331058, "grad_norm": 0.2891939454042776, "learning_rate": 5.006682719476547e-10, "loss": 0.0024, "step": 21696 }, { "epoch": 4.936746302616609, "grad_norm": 0.3829050511064802, "learning_rate": 4.970989293694162e-10, "loss": 0.0021, "step": 21697 }, { "epoch": 4.936973833902162, "grad_norm": 0.29877977223480157, "learning_rate": 4.935423504866948e-10, "loss": 0.002, "step": 21698 }, { "epoch": 4.937201365187713, "grad_norm": 0.7265948568031029, "learning_rate": 4.899985353722797e-10, "loss": 0.004, "step": 21699 }, { "epoch": 4.937428896473265, "grad_norm": 0.06316776038129292, "learning_rate": 4.864674840986127e-10, "loss": 0.0002, "step": 21700 }, { "epoch": 4.937656427758817, "grad_norm": 0.5768185015254635, "learning_rate": 4.829491967377197e-10, "loss": 0.0017, "step": 21701 }, { "epoch": 4.937883959044369, "grad_norm": 0.125957656192256, "learning_rate": 4.794436733616264e-10, "loss": 0.0005, "step": 21702 }, { "epoch": 4.93811149032992, "grad_norm": 0.24957792231157516, "learning_rate": 4.759509140420116e-10, "loss": 0.0011, "step": 21703 }, { "epoch": 4.938339021615472, "grad_norm": 0.12580468740601453, "learning_rate": 4.724709188501376e-10, "loss": 0.0006, "step": 21704 }, { "epoch": 4.938566552901024, "grad_norm": 0.06834673766881871, "learning_rate": 4.690036878571974e-10, "loss": 0.0002, "step": 21705 }, { "epoch": 4.938794084186576, "grad_norm": 0.6455062809189583, "learning_rate": 4.655492211340373e-10, "loss": 0.0034, "step": 21706 }, { "epoch": 4.939021615472127, "grad_norm": 0.08558284128849537, "learning_rate": 4.621075187512952e-10, "loss": 0.0004, "step": 21707 }, { "epoch": 4.939249146757679, "grad_norm": 0.04144724560846205, "learning_rate": 4.586785807792621e-10, "loss": 0.0001, "step": 21708 }, { "epoch": 4.939476678043231, "grad_norm": 0.13785456928534254, "learning_rate": 4.5526240728802076e-10, "loss": 0.0002, "step": 21709 }, { "epoch": 4.939704209328783, "grad_norm": 0.23656194690212923, "learning_rate": 4.518589983474458e-10, "loss": 0.0014, "step": 21710 }, { "epoch": 4.939931740614335, "grad_norm": 0.24912704002702327, "learning_rate": 4.484683540270651e-10, "loss": 0.0007, "step": 21711 }, { "epoch": 4.940159271899886, "grad_norm": 0.3837080564962255, "learning_rate": 4.450904743961981e-10, "loss": 0.0019, "step": 21712 }, { "epoch": 4.940386803185438, "grad_norm": 0.6732936230060036, "learning_rate": 4.4172535952388683e-10, "loss": 0.0046, "step": 21713 }, { "epoch": 4.94061433447099, "grad_norm": 0.05949565662397861, "learning_rate": 4.3837300947882635e-10, "loss": 0.0003, "step": 21714 }, { "epoch": 4.940841865756542, "grad_norm": 0.11226887410389849, "learning_rate": 4.3503342432964224e-10, "loss": 0.0006, "step": 21715 }, { "epoch": 4.941069397042093, "grad_norm": 0.4062163438101342, "learning_rate": 4.317066041444745e-10, "loss": 0.0025, "step": 21716 }, { "epoch": 4.941296928327645, "grad_norm": 0.3654128673100633, "learning_rate": 4.283925489913937e-10, "loss": 0.0043, "step": 21717 }, { "epoch": 4.941524459613197, "grad_norm": 0.776605721697431, "learning_rate": 4.250912589381928e-10, "loss": 0.0088, "step": 21718 }, { "epoch": 4.941751990898749, "grad_norm": 0.12361809874671809, "learning_rate": 4.218027340521791e-10, "loss": 0.0003, "step": 21719 }, { "epoch": 4.9419795221843, "grad_norm": 0.2300716940963105, "learning_rate": 4.185269744007292e-10, "loss": 0.0009, "step": 21720 }, { "epoch": 4.942207053469852, "grad_norm": 0.2116200009975446, "learning_rate": 4.1526398005066473e-10, "loss": 0.001, "step": 21721 }, { "epoch": 4.942434584755404, "grad_norm": 0.2826355786946275, "learning_rate": 4.1201375106873773e-10, "loss": 0.001, "step": 21722 }, { "epoch": 4.942662116040956, "grad_norm": 0.4099206496971517, "learning_rate": 4.0877628752142296e-10, "loss": 0.0021, "step": 21723 }, { "epoch": 4.942889647326507, "grad_norm": 0.47692657682229916, "learning_rate": 4.055515894747786e-10, "loss": 0.0025, "step": 21724 }, { "epoch": 4.943117178612059, "grad_norm": 0.8207575917586942, "learning_rate": 4.023396569947935e-10, "loss": 0.005, "step": 21725 }, { "epoch": 4.943344709897611, "grad_norm": 0.5824656118927991, "learning_rate": 3.9914049014710967e-10, "loss": 0.0033, "step": 21726 }, { "epoch": 4.943572241183163, "grad_norm": 0.057628794625552246, "learning_rate": 3.959540889970914e-10, "loss": 0.0003, "step": 21727 }, { "epoch": 4.943799772468714, "grad_norm": 0.6836026498999395, "learning_rate": 3.9278045360982566e-10, "loss": 0.0029, "step": 21728 }, { "epoch": 4.944027303754266, "grad_norm": 0.2308788489611822, "learning_rate": 3.8961958405026033e-10, "loss": 0.0009, "step": 21729 }, { "epoch": 4.944254835039818, "grad_norm": 0.21676210105998028, "learning_rate": 3.8647148038285784e-10, "loss": 0.0014, "step": 21730 }, { "epoch": 4.94448236632537, "grad_norm": 0.07995965358136874, "learning_rate": 3.833361426721499e-10, "loss": 0.0003, "step": 21731 }, { "epoch": 4.944709897610921, "grad_norm": 0.5411598138922457, "learning_rate": 3.802135709821131e-10, "loss": 0.0036, "step": 21732 }, { "epoch": 4.944937428896473, "grad_norm": 0.22212326183957062, "learning_rate": 3.771037653765158e-10, "loss": 0.0009, "step": 21733 }, { "epoch": 4.945164960182025, "grad_norm": 0.15564564332360573, "learning_rate": 3.740067259189878e-10, "loss": 0.0009, "step": 21734 }, { "epoch": 4.945392491467577, "grad_norm": 0.18821458968366192, "learning_rate": 3.7092245267288107e-10, "loss": 0.0005, "step": 21735 }, { "epoch": 4.945620022753128, "grad_norm": 0.07126788793000119, "learning_rate": 3.6785094570106204e-10, "loss": 0.0002, "step": 21736 }, { "epoch": 4.94584755403868, "grad_norm": 0.15224377672466693, "learning_rate": 3.647922050664665e-10, "loss": 0.0005, "step": 21737 }, { "epoch": 4.946075085324232, "grad_norm": 0.636182231130051, "learning_rate": 3.617462308315445e-10, "loss": 0.0049, "step": 21738 }, { "epoch": 4.946302616609784, "grad_norm": 0.2276706621280368, "learning_rate": 3.5871302305860723e-10, "loss": 0.0019, "step": 21739 }, { "epoch": 4.946530147895336, "grad_norm": 0.7286539277835197, "learning_rate": 3.5569258180954967e-10, "loss": 0.0062, "step": 21740 }, { "epoch": 4.946757679180887, "grad_norm": 0.03375885548902128, "learning_rate": 3.52684907146128e-10, "loss": 0.0001, "step": 21741 }, { "epoch": 4.946985210466439, "grad_norm": 0.1449084162843057, "learning_rate": 3.496899991298902e-10, "loss": 0.0008, "step": 21742 }, { "epoch": 4.947212741751991, "grad_norm": 0.10102459028177721, "learning_rate": 3.467078578219679e-10, "loss": 0.0002, "step": 21743 }, { "epoch": 4.947440273037543, "grad_norm": 0.2252127488279534, "learning_rate": 3.43738483283354e-10, "loss": 0.0006, "step": 21744 }, { "epoch": 4.947667804323094, "grad_norm": 0.16799260850862882, "learning_rate": 3.4078187557469445e-10, "loss": 0.0008, "step": 21745 }, { "epoch": 4.947895335608646, "grad_norm": 0.26291057115647337, "learning_rate": 3.3783803475649636e-10, "loss": 0.001, "step": 21746 }, { "epoch": 4.948122866894198, "grad_norm": 0.11870959229854605, "learning_rate": 3.3490696088885053e-10, "loss": 0.0005, "step": 21747 }, { "epoch": 4.94835039817975, "grad_norm": 1.2303987400595018, "learning_rate": 3.319886540317091e-10, "loss": 0.0026, "step": 21748 }, { "epoch": 4.948577929465301, "grad_norm": 0.3609633193348525, "learning_rate": 3.290831142446077e-10, "loss": 0.0055, "step": 21749 }, { "epoch": 4.948805460750854, "grad_norm": 0.24193280340785805, "learning_rate": 3.261903415870821e-10, "loss": 0.0006, "step": 21750 }, { "epoch": 4.949032992036405, "grad_norm": 0.2838230916703595, "learning_rate": 3.2331033611818217e-10, "loss": 0.001, "step": 21751 }, { "epoch": 4.949260523321957, "grad_norm": 0.7305726736092816, "learning_rate": 3.2044309789681925e-10, "loss": 0.0051, "step": 21752 }, { "epoch": 4.949488054607508, "grad_norm": 0.14835244944732381, "learning_rate": 3.1758862698148816e-10, "loss": 0.0006, "step": 21753 }, { "epoch": 4.949715585893061, "grad_norm": 0.29901518099773905, "learning_rate": 3.147469234306144e-10, "loss": 0.0012, "step": 21754 }, { "epoch": 4.949943117178612, "grad_norm": 0.8247496478574948, "learning_rate": 3.119179873022765e-10, "loss": 0.0066, "step": 21755 }, { "epoch": 4.950170648464164, "grad_norm": 0.31761854820974667, "learning_rate": 3.091018186542061e-10, "loss": 0.002, "step": 21756 }, { "epoch": 4.950398179749715, "grad_norm": 0.19769996622230696, "learning_rate": 3.062984175441347e-10, "loss": 0.0005, "step": 21757 }, { "epoch": 4.950625711035268, "grad_norm": 0.18852703216871672, "learning_rate": 3.0350778402916947e-10, "loss": 0.0012, "step": 21758 }, { "epoch": 4.950853242320819, "grad_norm": 0.33078811855648577, "learning_rate": 3.007299181664175e-10, "loss": 0.0005, "step": 21759 }, { "epoch": 4.951080773606371, "grad_norm": 0.48308488723478227, "learning_rate": 2.9796482001270833e-10, "loss": 0.0026, "step": 21760 }, { "epoch": 4.951308304891922, "grad_norm": 0.1341802864901695, "learning_rate": 2.9521248962452456e-10, "loss": 0.0007, "step": 21761 }, { "epoch": 4.951535836177475, "grad_norm": 0.028811683723319763, "learning_rate": 2.924729270580712e-10, "loss": 0.0001, "step": 21762 }, { "epoch": 4.951763367463026, "grad_norm": 0.32438066480928274, "learning_rate": 2.897461323693451e-10, "loss": 0.0017, "step": 21763 }, { "epoch": 4.951990898748578, "grad_norm": 0.3628701982160309, "learning_rate": 2.870321056141351e-10, "loss": 0.0072, "step": 21764 }, { "epoch": 4.952218430034129, "grad_norm": 0.2581567638366368, "learning_rate": 2.843308468478828e-10, "loss": 0.0004, "step": 21765 }, { "epoch": 4.952445961319682, "grad_norm": 0.40391285997430987, "learning_rate": 2.8164235612582194e-10, "loss": 0.0016, "step": 21766 }, { "epoch": 4.952673492605233, "grad_norm": 0.494415076414527, "learning_rate": 2.7896663350283904e-10, "loss": 0.0023, "step": 21767 }, { "epoch": 4.952901023890785, "grad_norm": 0.17813795524239343, "learning_rate": 2.7630367903368205e-10, "loss": 0.0016, "step": 21768 }, { "epoch": 4.953128555176336, "grad_norm": 0.16262494596452998, "learning_rate": 2.736534927727519e-10, "loss": 0.0005, "step": 21769 }, { "epoch": 4.953356086461889, "grad_norm": 2.1221176704743976, "learning_rate": 2.710160747741719e-10, "loss": 0.0171, "step": 21770 }, { "epoch": 4.95358361774744, "grad_norm": 0.25983176499392147, "learning_rate": 2.6839142509192673e-10, "loss": 0.0033, "step": 21771 }, { "epoch": 4.953811149032992, "grad_norm": 0.3795209618400166, "learning_rate": 2.657795437795846e-10, "loss": 0.0021, "step": 21772 }, { "epoch": 4.954038680318543, "grad_norm": 5.5000178943365725, "learning_rate": 2.63180430890575e-10, "loss": 0.0194, "step": 21773 }, { "epoch": 4.954266211604096, "grad_norm": 0.070531786850832, "learning_rate": 2.605940864780498e-10, "loss": 0.0003, "step": 21774 }, { "epoch": 4.954493742889647, "grad_norm": 0.4393180007816095, "learning_rate": 2.580205105947448e-10, "loss": 0.0059, "step": 21775 }, { "epoch": 4.954721274175199, "grad_norm": 0.45640965806572903, "learning_rate": 2.5545970329339533e-10, "loss": 0.001, "step": 21776 }, { "epoch": 4.9549488054607504, "grad_norm": 0.21889742981647375, "learning_rate": 2.529116646262514e-10, "loss": 0.0007, "step": 21777 }, { "epoch": 4.955176336746303, "grad_norm": 0.1398129105103625, "learning_rate": 2.503763946454935e-10, "loss": 0.0006, "step": 21778 }, { "epoch": 4.955403868031855, "grad_norm": 0.2581264219745696, "learning_rate": 2.478538934028163e-10, "loss": 0.0011, "step": 21779 }, { "epoch": 4.955631399317406, "grad_norm": 1.5554178627530866, "learning_rate": 2.453441609497759e-10, "loss": 0.0066, "step": 21780 }, { "epoch": 4.9558589306029575, "grad_norm": 0.14530847753737955, "learning_rate": 2.428471973377894e-10, "loss": 0.0004, "step": 21781 }, { "epoch": 4.95608646188851, "grad_norm": 0.36464040442823586, "learning_rate": 2.403630026177883e-10, "loss": 0.0023, "step": 21782 }, { "epoch": 4.956313993174062, "grad_norm": 0.982403667952695, "learning_rate": 2.378915768405654e-10, "loss": 0.0062, "step": 21783 }, { "epoch": 4.956541524459613, "grad_norm": 0.179897382250973, "learning_rate": 2.354329200566358e-10, "loss": 0.0004, "step": 21784 }, { "epoch": 4.9567690557451645, "grad_norm": 0.46973771890659444, "learning_rate": 2.3298703231630635e-10, "loss": 0.0023, "step": 21785 }, { "epoch": 4.956996587030717, "grad_norm": 0.11394722984897272, "learning_rate": 2.305539136694679e-10, "loss": 0.0004, "step": 21786 }, { "epoch": 4.957224118316269, "grad_norm": 0.32783691058348063, "learning_rate": 2.2813356416594156e-10, "loss": 0.0015, "step": 21787 }, { "epoch": 4.95745164960182, "grad_norm": 0.24942092076079708, "learning_rate": 2.2572598385513233e-10, "loss": 0.0008, "step": 21788 }, { "epoch": 4.957679180887372, "grad_norm": 0.27350701654647713, "learning_rate": 2.2333117278623695e-10, "loss": 0.0008, "step": 21789 }, { "epoch": 4.957906712172924, "grad_norm": 0.28378467554440334, "learning_rate": 2.20949131008244e-10, "loss": 0.0016, "step": 21790 }, { "epoch": 4.958134243458476, "grad_norm": 0.05033647564355511, "learning_rate": 2.1857985856979514e-10, "loss": 0.0001, "step": 21791 }, { "epoch": 4.958361774744027, "grad_norm": 0.20920358675786696, "learning_rate": 2.1622335551939323e-10, "loss": 0.0012, "step": 21792 }, { "epoch": 4.958589306029579, "grad_norm": 0.057379808957869156, "learning_rate": 2.1387962190512478e-10, "loss": 0.0001, "step": 21793 }, { "epoch": 4.958816837315131, "grad_norm": 0.2720072877760544, "learning_rate": 2.1154865777486821e-10, "loss": 0.0014, "step": 21794 }, { "epoch": 4.959044368600683, "grad_norm": 0.77597804971137, "learning_rate": 2.0923046317636307e-10, "loss": 0.0069, "step": 21795 }, { "epoch": 4.959271899886234, "grad_norm": 0.3243878201858694, "learning_rate": 2.0692503815693265e-10, "loss": 0.0019, "step": 21796 }, { "epoch": 4.959499431171786, "grad_norm": 0.4078721874502804, "learning_rate": 2.04632382763692e-10, "loss": 0.0014, "step": 21797 }, { "epoch": 4.959726962457338, "grad_norm": 0.22200382534335733, "learning_rate": 2.023524970434787e-10, "loss": 0.0006, "step": 21798 }, { "epoch": 4.95995449374289, "grad_norm": 0.5789022545807424, "learning_rate": 2.0008538104299147e-10, "loss": 0.0014, "step": 21799 }, { "epoch": 4.960182025028441, "grad_norm": 0.19281664385679914, "learning_rate": 1.9783103480844334e-10, "loss": 0.0007, "step": 21800 }, { "epoch": 4.960409556313993, "grad_norm": 0.15244653097924232, "learning_rate": 1.95589458385978e-10, "loss": 0.0005, "step": 21801 }, { "epoch": 4.960637087599545, "grad_norm": 0.378530253796837, "learning_rate": 1.9336065182139207e-10, "loss": 0.0017, "step": 21802 }, { "epoch": 4.960864618885097, "grad_norm": 0.24564365230476568, "learning_rate": 1.9114461516020478e-10, "loss": 0.001, "step": 21803 }, { "epoch": 4.961092150170648, "grad_norm": 0.07847993171779467, "learning_rate": 1.8894134844772704e-10, "loss": 0.0002, "step": 21804 }, { "epoch": 4.9613196814562, "grad_norm": 0.07725551883389865, "learning_rate": 1.8675085172906172e-10, "loss": 0.0003, "step": 21805 }, { "epoch": 4.961547212741752, "grad_norm": 0.06781936580178305, "learning_rate": 1.8457312504882586e-10, "loss": 0.0003, "step": 21806 }, { "epoch": 4.961774744027304, "grad_norm": 0.06925673289476589, "learning_rate": 1.8240816845170596e-10, "loss": 0.0002, "step": 21807 }, { "epoch": 4.962002275312855, "grad_norm": 0.3908855620283534, "learning_rate": 1.8025598198183336e-10, "loss": 0.0028, "step": 21808 }, { "epoch": 4.962229806598407, "grad_norm": 0.5880518170068746, "learning_rate": 1.7811656568320068e-10, "loss": 0.0018, "step": 21809 }, { "epoch": 4.962457337883959, "grad_norm": 0.672598180332212, "learning_rate": 1.7598991959959233e-10, "loss": 0.0036, "step": 21810 }, { "epoch": 4.962684869169511, "grad_norm": 0.1554706150008289, "learning_rate": 1.7387604377444578e-10, "loss": 0.0004, "step": 21811 }, { "epoch": 4.962912400455062, "grad_norm": 0.307737057997673, "learning_rate": 1.7177493825092096e-10, "loss": 0.0015, "step": 21812 }, { "epoch": 4.963139931740614, "grad_norm": 0.07563547879418793, "learning_rate": 1.69686603072039e-10, "loss": 0.0004, "step": 21813 }, { "epoch": 4.963367463026166, "grad_norm": 0.10955259955431906, "learning_rate": 1.676110382804047e-10, "loss": 0.0002, "step": 21814 }, { "epoch": 4.963594994311718, "grad_norm": 0.3215416946333697, "learning_rate": 1.6554824391848412e-10, "loss": 0.0013, "step": 21815 }, { "epoch": 4.963822525597269, "grad_norm": 0.3172119463051519, "learning_rate": 1.6349822002846573e-10, "loss": 0.0009, "step": 21816 }, { "epoch": 4.964050056882821, "grad_norm": 0.10784132550524426, "learning_rate": 1.6146096665212162e-10, "loss": 0.0003, "step": 21817 }, { "epoch": 4.964277588168374, "grad_norm": 0.2159852973154908, "learning_rate": 1.5943648383122401e-10, "loss": 0.001, "step": 21818 }, { "epoch": 4.964505119453925, "grad_norm": 0.4173653728976672, "learning_rate": 1.5742477160712865e-10, "loss": 0.0022, "step": 21819 }, { "epoch": 4.964732650739476, "grad_norm": 0.5470027223936652, "learning_rate": 1.5542583002091382e-10, "loss": 0.0026, "step": 21820 }, { "epoch": 4.964960182025028, "grad_norm": 0.3881228859113475, "learning_rate": 1.534396591134496e-10, "loss": 0.0017, "step": 21821 }, { "epoch": 4.965187713310581, "grad_norm": 0.1452014572259893, "learning_rate": 1.514662589253285e-10, "loss": 0.0008, "step": 21822 }, { "epoch": 4.965415244596132, "grad_norm": 0.12874537692317617, "learning_rate": 1.4950562949686554e-10, "loss": 0.0007, "step": 21823 }, { "epoch": 4.965642775881683, "grad_norm": 0.014174941443484062, "learning_rate": 1.4755777086816746e-10, "loss": 0.0, "step": 21824 }, { "epoch": 4.965870307167235, "grad_norm": 0.23613307363728278, "learning_rate": 1.4562268307899419e-10, "loss": 0.0015, "step": 21825 }, { "epoch": 4.966097838452788, "grad_norm": 0.36866584088107973, "learning_rate": 1.4370036616889738e-10, "loss": 0.003, "step": 21826 }, { "epoch": 4.966325369738339, "grad_norm": 0.33657626900650245, "learning_rate": 1.4179082017728995e-10, "loss": 0.0046, "step": 21827 }, { "epoch": 4.966552901023891, "grad_norm": 0.5947749639094747, "learning_rate": 1.3989404514302974e-10, "loss": 0.0079, "step": 21828 }, { "epoch": 4.966780432309442, "grad_norm": 0.964597288473139, "learning_rate": 1.380100411049745e-10, "loss": 0.0044, "step": 21829 }, { "epoch": 4.967007963594995, "grad_norm": 0.3670636761719434, "learning_rate": 1.3613880810163516e-10, "loss": 0.0013, "step": 21830 }, { "epoch": 4.967235494880546, "grad_norm": 0.35363863149233626, "learning_rate": 1.3428034617124498e-10, "loss": 0.0026, "step": 21831 }, { "epoch": 4.967463026166098, "grad_norm": 0.5777013340962724, "learning_rate": 1.3243465535169032e-10, "loss": 0.0019, "step": 21832 }, { "epoch": 4.9676905574516494, "grad_norm": 1.098372584877333, "learning_rate": 1.3060173568085755e-10, "loss": 0.007, "step": 21833 }, { "epoch": 4.967918088737202, "grad_norm": 0.02300647639860876, "learning_rate": 1.287815871961473e-10, "loss": 0.0001, "step": 21834 }, { "epoch": 4.968145620022753, "grad_norm": 0.4290076412701639, "learning_rate": 1.2697420993468268e-10, "loss": 0.0011, "step": 21835 }, { "epoch": 4.968373151308305, "grad_norm": 0.40003692354491105, "learning_rate": 1.2517960393351735e-10, "loss": 0.0022, "step": 21836 }, { "epoch": 4.9686006825938565, "grad_norm": 0.22793843556591628, "learning_rate": 1.233977692292193e-10, "loss": 0.0011, "step": 21837 }, { "epoch": 4.968828213879409, "grad_norm": 0.7334508077185156, "learning_rate": 1.216287058583565e-10, "loss": 0.0024, "step": 21838 }, { "epoch": 4.96905574516496, "grad_norm": 0.9661716158051209, "learning_rate": 1.1987241385687243e-10, "loss": 0.0125, "step": 21839 }, { "epoch": 4.969283276450512, "grad_norm": 0.7214382045733048, "learning_rate": 1.181288932608493e-10, "loss": 0.0016, "step": 21840 }, { "epoch": 4.9695108077360635, "grad_norm": 0.25574594956044794, "learning_rate": 1.1639814410588368e-10, "loss": 0.0011, "step": 21841 }, { "epoch": 4.969738339021616, "grad_norm": 0.4607544315815796, "learning_rate": 1.1468016642729451e-10, "loss": 0.0039, "step": 21842 }, { "epoch": 4.969965870307167, "grad_norm": 1.202175849364283, "learning_rate": 1.1297496026019261e-10, "loss": 0.0061, "step": 21843 }, { "epoch": 4.970193401592719, "grad_norm": 0.05653718842149494, "learning_rate": 1.1128252563948061e-10, "loss": 0.0001, "step": 21844 }, { "epoch": 4.9704209328782705, "grad_norm": 0.48143899504914933, "learning_rate": 1.096028625997142e-10, "loss": 0.0017, "step": 21845 }, { "epoch": 4.970648464163823, "grad_norm": 0.042692927605627345, "learning_rate": 1.079359711752409e-10, "loss": 0.0001, "step": 21846 }, { "epoch": 4.970875995449374, "grad_norm": 1.6918149299313534, "learning_rate": 1.0628185140006131e-10, "loss": 0.0116, "step": 21847 }, { "epoch": 4.971103526734926, "grad_norm": 0.5512707801070456, "learning_rate": 1.046405033081066e-10, "loss": 0.0091, "step": 21848 }, { "epoch": 4.9713310580204775, "grad_norm": 0.11197077464714263, "learning_rate": 1.0301192693289163e-10, "loss": 0.0004, "step": 21849 }, { "epoch": 4.97155858930603, "grad_norm": 0.3414996226756123, "learning_rate": 1.0139612230758434e-10, "loss": 0.0016, "step": 21850 }, { "epoch": 4.971786120591581, "grad_norm": 0.3095104862904181, "learning_rate": 9.97930894653526e-11, "loss": 0.0023, "step": 21851 }, { "epoch": 4.972013651877133, "grad_norm": 1.5176694081469666, "learning_rate": 9.820282843887862e-11, "loss": 0.0072, "step": 21852 }, { "epoch": 4.9722411831626845, "grad_norm": 0.19631421102600635, "learning_rate": 9.662533926070583e-11, "loss": 0.0009, "step": 21853 }, { "epoch": 4.972468714448237, "grad_norm": 0.3592904781251559, "learning_rate": 9.506062196303068e-11, "loss": 0.0018, "step": 21854 }, { "epoch": 4.972696245733788, "grad_norm": 0.6293097152540528, "learning_rate": 9.350867657784146e-11, "loss": 0.0049, "step": 21855 }, { "epoch": 4.97292377701934, "grad_norm": 0.3812293417170532, "learning_rate": 9.196950313684894e-11, "loss": 0.0022, "step": 21856 }, { "epoch": 4.973151308304892, "grad_norm": 0.3040073952993717, "learning_rate": 9.044310167162507e-11, "loss": 0.0008, "step": 21857 }, { "epoch": 4.973378839590444, "grad_norm": 0.31122451184209304, "learning_rate": 8.89294722131867e-11, "loss": 0.0013, "step": 21858 }, { "epoch": 4.973606370875995, "grad_norm": 0.5451165597433609, "learning_rate": 8.74286147926201e-11, "loss": 0.0046, "step": 21859 }, { "epoch": 4.973833902161547, "grad_norm": 0.022983474531562718, "learning_rate": 8.594052944052578e-11, "loss": 0.0, "step": 21860 }, { "epoch": 4.974061433447099, "grad_norm": 0.22746433732193885, "learning_rate": 8.446521618729609e-11, "loss": 0.0005, "step": 21861 }, { "epoch": 4.974288964732651, "grad_norm": 0.2382478923678566, "learning_rate": 8.30026750631846e-11, "loss": 0.0041, "step": 21862 }, { "epoch": 4.974516496018202, "grad_norm": 2.607537872268214, "learning_rate": 8.155290609795919e-11, "loss": 0.0615, "step": 21863 }, { "epoch": 4.974744027303754, "grad_norm": 0.9908701231765097, "learning_rate": 8.01159093213183e-11, "loss": 0.003, "step": 21864 }, { "epoch": 4.974971558589306, "grad_norm": 0.3180383395459392, "learning_rate": 7.869168476268285e-11, "loss": 0.0015, "step": 21865 }, { "epoch": 4.975199089874858, "grad_norm": 0.6393063064057367, "learning_rate": 7.728023245098803e-11, "loss": 0.005, "step": 21866 }, { "epoch": 4.97542662116041, "grad_norm": 0.7817074099675083, "learning_rate": 7.588155241530781e-11, "loss": 0.0026, "step": 21867 }, { "epoch": 4.975654152445961, "grad_norm": 0.42669982544705265, "learning_rate": 7.449564468402227e-11, "loss": 0.0034, "step": 21868 }, { "epoch": 4.975881683731513, "grad_norm": 0.3103355417494269, "learning_rate": 7.312250928558085e-11, "loss": 0.0014, "step": 21869 }, { "epoch": 4.976109215017065, "grad_norm": 0.0645708039012626, "learning_rate": 7.17621462480167e-11, "loss": 0.0003, "step": 21870 }, { "epoch": 4.976336746302617, "grad_norm": 1.7058944698576095, "learning_rate": 7.041455559915478e-11, "loss": 0.0077, "step": 21871 }, { "epoch": 4.976564277588168, "grad_norm": 0.11010246957972349, "learning_rate": 6.90797373665425e-11, "loss": 0.0004, "step": 21872 }, { "epoch": 4.97679180887372, "grad_norm": 1.6948187394680896, "learning_rate": 6.775769157738033e-11, "loss": 0.005, "step": 21873 }, { "epoch": 4.977019340159272, "grad_norm": 0.15625872512942698, "learning_rate": 6.644841825872994e-11, "loss": 0.001, "step": 21874 }, { "epoch": 4.977246871444824, "grad_norm": 0.23494367150538462, "learning_rate": 6.515191743737548e-11, "loss": 0.0009, "step": 21875 }, { "epoch": 4.977474402730375, "grad_norm": 0.3375745635316706, "learning_rate": 6.386818913982351e-11, "loss": 0.0014, "step": 21876 }, { "epoch": 4.977701934015927, "grad_norm": 0.022100390570908218, "learning_rate": 6.259723339230305e-11, "loss": 0.0, "step": 21877 }, { "epoch": 4.977929465301479, "grad_norm": 0.4360812012012897, "learning_rate": 6.13390502206962e-11, "loss": 0.0041, "step": 21878 }, { "epoch": 4.978156996587031, "grad_norm": 0.16899905920069025, "learning_rate": 6.009363965088499e-11, "loss": 0.0009, "step": 21879 }, { "epoch": 4.978384527872582, "grad_norm": 0.31096858642270686, "learning_rate": 5.886100170819641e-11, "loss": 0.0016, "step": 21880 }, { "epoch": 4.978612059158134, "grad_norm": 1.145802275850203, "learning_rate": 5.7641136417888043e-11, "loss": 0.005, "step": 21881 }, { "epoch": 4.978839590443686, "grad_norm": 0.22981523377872035, "learning_rate": 5.6434043804801106e-11, "loss": 0.0007, "step": 21882 }, { "epoch": 4.979067121729238, "grad_norm": 0.1600563845046016, "learning_rate": 5.523972389370746e-11, "loss": 0.0012, "step": 21883 }, { "epoch": 4.979294653014789, "grad_norm": 0.5039824656782148, "learning_rate": 5.405817670903202e-11, "loss": 0.0016, "step": 21884 }, { "epoch": 4.979522184300341, "grad_norm": 0.28082667646173454, "learning_rate": 5.288940227485273e-11, "loss": 0.0006, "step": 21885 }, { "epoch": 4.979749715585893, "grad_norm": 0.6713604002637186, "learning_rate": 5.1733400615039394e-11, "loss": 0.0018, "step": 21886 }, { "epoch": 4.979977246871445, "grad_norm": 0.08353528330857826, "learning_rate": 5.0590171753253644e-11, "loss": 0.0004, "step": 21887 }, { "epoch": 4.980204778156996, "grad_norm": 0.3679122783932147, "learning_rate": 4.9459715712879555e-11, "loss": 0.0039, "step": 21888 }, { "epoch": 4.9804323094425484, "grad_norm": 0.10115508325209585, "learning_rate": 4.834203251702363e-11, "loss": 0.0002, "step": 21889 }, { "epoch": 4.9806598407281, "grad_norm": 0.6438795476978569, "learning_rate": 4.723712218851484e-11, "loss": 0.0016, "step": 21890 }, { "epoch": 4.980887372013652, "grad_norm": 1.1367316236485348, "learning_rate": 4.614498474990459e-11, "loss": 0.0052, "step": 21891 }, { "epoch": 4.981114903299203, "grad_norm": 0.5940293383449562, "learning_rate": 4.506562022353611e-11, "loss": 0.004, "step": 21892 }, { "epoch": 4.9813424345847555, "grad_norm": 0.2982081326884642, "learning_rate": 4.3999028631544486e-11, "loss": 0.0028, "step": 21893 }, { "epoch": 4.981569965870307, "grad_norm": 0.04271196784392789, "learning_rate": 4.2945209995579054e-11, "loss": 0.0002, "step": 21894 }, { "epoch": 4.981797497155859, "grad_norm": 0.15850049531207103, "learning_rate": 4.190416433728917e-11, "loss": 0.0003, "step": 21895 }, { "epoch": 4.982025028441411, "grad_norm": 0.03523856142072903, "learning_rate": 4.087589167790784e-11, "loss": 0.0001, "step": 21896 }, { "epoch": 4.9822525597269625, "grad_norm": 0.43522604125729025, "learning_rate": 3.9860392038529316e-11, "loss": 0.0025, "step": 21897 }, { "epoch": 4.982480091012514, "grad_norm": 0.2942269495889321, "learning_rate": 3.8857665439762105e-11, "loss": 0.0011, "step": 21898 }, { "epoch": 4.982707622298066, "grad_norm": 0.560410909786968, "learning_rate": 3.786771190221472e-11, "loss": 0.006, "step": 21899 }, { "epoch": 4.982935153583618, "grad_norm": 0.07927513051407901, "learning_rate": 3.689053144607935e-11, "loss": 0.0002, "step": 21900 }, { "epoch": 4.9831626848691695, "grad_norm": 0.1042085139633552, "learning_rate": 3.5926124091339995e-11, "loss": 0.0003, "step": 21901 }, { "epoch": 4.983390216154721, "grad_norm": 0.5369354669097183, "learning_rate": 3.4974489857703134e-11, "loss": 0.002, "step": 21902 }, { "epoch": 4.983617747440273, "grad_norm": 0.5767009875812025, "learning_rate": 3.403562876459765e-11, "loss": 0.0055, "step": 21903 }, { "epoch": 4.983845278725825, "grad_norm": 0.16045692673277923, "learning_rate": 3.310954083131368e-11, "loss": 0.0005, "step": 21904 }, { "epoch": 4.9840728100113765, "grad_norm": 0.6437213191296822, "learning_rate": 3.219622607658623e-11, "loss": 0.0075, "step": 21905 }, { "epoch": 4.984300341296929, "grad_norm": 0.02683792333396913, "learning_rate": 3.1295684519289107e-11, "loss": 0.0001, "step": 21906 }, { "epoch": 4.98452787258248, "grad_norm": 0.4933967835574327, "learning_rate": 3.040791617767158e-11, "loss": 0.0017, "step": 21907 }, { "epoch": 4.984755403868032, "grad_norm": 0.8730511320510521, "learning_rate": 2.9532921069982956e-11, "loss": 0.0046, "step": 21908 }, { "epoch": 4.9849829351535835, "grad_norm": 23.083492879373765, "learning_rate": 2.8670699213986796e-11, "loss": 0.034, "step": 21909 }, { "epoch": 4.985210466439136, "grad_norm": 0.031727119798473875, "learning_rate": 2.7821250627446672e-11, "loss": 0.0001, "step": 21910 }, { "epoch": 4.985437997724687, "grad_norm": 1.0138665818469168, "learning_rate": 2.698457532764043e-11, "loss": 0.0112, "step": 21911 }, { "epoch": 4.985665529010239, "grad_norm": 0.1150360553581566, "learning_rate": 2.6160673331707133e-11, "loss": 0.0003, "step": 21912 }, { "epoch": 4.9858930602957905, "grad_norm": 0.3981051429256963, "learning_rate": 2.534954465643891e-11, "loss": 0.0016, "step": 21913 }, { "epoch": 4.986120591581343, "grad_norm": 2.8666637303668887, "learning_rate": 2.455118931841971e-11, "loss": 0.0073, "step": 21914 }, { "epoch": 4.986348122866894, "grad_norm": 0.8356393510433815, "learning_rate": 2.376560733402533e-11, "loss": 0.0048, "step": 21915 }, { "epoch": 4.986575654152446, "grad_norm": 0.28344044590809353, "learning_rate": 2.2992798719284614e-11, "loss": 0.0009, "step": 21916 }, { "epoch": 4.9868031854379975, "grad_norm": 0.15951623608348126, "learning_rate": 2.223276348994885e-11, "loss": 0.0008, "step": 21917 }, { "epoch": 4.98703071672355, "grad_norm": 0.3386684700235153, "learning_rate": 2.148550166156116e-11, "loss": 0.0022, "step": 21918 }, { "epoch": 4.987258248009101, "grad_norm": 0.28882622397796076, "learning_rate": 2.0751013249456498e-11, "loss": 0.0012, "step": 21919 }, { "epoch": 4.987485779294653, "grad_norm": 0.1586541574343037, "learning_rate": 2.0029298268553488e-11, "loss": 0.0006, "step": 21920 }, { "epoch": 4.9877133105802045, "grad_norm": 0.6074355569638791, "learning_rate": 1.932035673370136e-11, "loss": 0.0054, "step": 21921 }, { "epoch": 4.987940841865757, "grad_norm": 0.28023176787014586, "learning_rate": 1.862418865933302e-11, "loss": 0.0021, "step": 21922 }, { "epoch": 4.988168373151308, "grad_norm": 0.35235474710058207, "learning_rate": 1.7940794059673195e-11, "loss": 0.0023, "step": 21923 }, { "epoch": 4.98839590443686, "grad_norm": 0.38330129475143637, "learning_rate": 1.727017294873845e-11, "loss": 0.0022, "step": 21924 }, { "epoch": 4.9886234357224115, "grad_norm": 0.08656622964259729, "learning_rate": 1.661232534019841e-11, "loss": 0.0002, "step": 21925 }, { "epoch": 4.988850967007964, "grad_norm": 0.19419207851596904, "learning_rate": 1.5967251247445135e-11, "loss": 0.0006, "step": 21926 }, { "epoch": 4.989078498293515, "grad_norm": 0.1351397094882124, "learning_rate": 1.5334950683731918e-11, "loss": 0.0005, "step": 21927 }, { "epoch": 4.989306029579067, "grad_norm": 1.0922679465042182, "learning_rate": 1.471542366203449e-11, "loss": 0.0057, "step": 21928 }, { "epoch": 4.9895335608646185, "grad_norm": 0.15911247730415012, "learning_rate": 1.4108670194842855e-11, "loss": 0.0006, "step": 21929 }, { "epoch": 4.989761092150171, "grad_norm": 0.00912518342647751, "learning_rate": 1.3514690294716415e-11, "loss": 0.0, "step": 21930 }, { "epoch": 4.989988623435722, "grad_norm": 0.19079392549930652, "learning_rate": 1.2933483973728845e-11, "loss": 0.0016, "step": 21931 }, { "epoch": 4.990216154721274, "grad_norm": 0.5411351025693507, "learning_rate": 1.2365051243815041e-11, "loss": 0.0063, "step": 21932 }, { "epoch": 4.9904436860068255, "grad_norm": 0.28246362762340405, "learning_rate": 1.1809392116493568e-11, "loss": 0.0013, "step": 21933 }, { "epoch": 4.990671217292378, "grad_norm": 0.09195872192435191, "learning_rate": 1.12665066032136e-11, "loss": 0.0003, "step": 21934 }, { "epoch": 4.99089874857793, "grad_norm": 0.03372447541846176, "learning_rate": 1.073639471500798e-11, "loss": 0.0001, "step": 21935 }, { "epoch": 4.991126279863481, "grad_norm": 0.1735185278902204, "learning_rate": 1.0219056462770771e-11, "loss": 0.0007, "step": 21936 }, { "epoch": 4.9913538111490325, "grad_norm": 0.3570637504070858, "learning_rate": 9.71449185704909e-12, "loss": 0.0015, "step": 21937 }, { "epoch": 4.991581342434585, "grad_norm": 0.7429457255368466, "learning_rate": 9.222700908112503e-12, "loss": 0.0059, "step": 21938 }, { "epoch": 4.991808873720137, "grad_norm": 0.5372664358422938, "learning_rate": 8.743683626022403e-12, "loss": 0.0026, "step": 21939 }, { "epoch": 4.992036405005688, "grad_norm": 0.11242742280629132, "learning_rate": 8.277440020632022e-12, "loss": 0.0002, "step": 21940 }, { "epoch": 4.99226393629124, "grad_norm": 0.2974423326685251, "learning_rate": 7.823970101447641e-12, "loss": 0.001, "step": 21941 }, { "epoch": 4.992491467576792, "grad_norm": 0.4181309496701355, "learning_rate": 7.383273877697993e-12, "loss": 0.0026, "step": 21942 }, { "epoch": 4.992718998862344, "grad_norm": 0.46792235942518895, "learning_rate": 6.955351358403639e-12, "loss": 0.0016, "step": 21943 }, { "epoch": 4.992946530147895, "grad_norm": 0.643118284782613, "learning_rate": 6.540202552307584e-12, "loss": 0.0069, "step": 21944 }, { "epoch": 4.993174061433447, "grad_norm": 0.7253461552704301, "learning_rate": 6.1378274679446684e-12, "loss": 0.0027, "step": 21945 }, { "epoch": 4.993401592718999, "grad_norm": 0.2254476980561995, "learning_rate": 5.748226113502786e-12, "loss": 0.0008, "step": 21946 }, { "epoch": 4.993629124004551, "grad_norm": 0.6176690729495912, "learning_rate": 5.371398496892277e-12, "loss": 0.0136, "step": 21947 }, { "epoch": 4.993856655290102, "grad_norm": 0.06265960659496693, "learning_rate": 5.007344625954092e-12, "loss": 0.0002, "step": 21948 }, { "epoch": 4.9940841865756544, "grad_norm": 0.2076496381421376, "learning_rate": 4.6560645079740676e-12, "loss": 0.0004, "step": 21949 }, { "epoch": 4.994311717861206, "grad_norm": 0.1463890084308325, "learning_rate": 4.317558150238044e-12, "loss": 0.0004, "step": 21950 }, { "epoch": 4.994539249146758, "grad_norm": 0.1220720300963023, "learning_rate": 3.991825559615526e-12, "loss": 0.0002, "step": 21951 }, { "epoch": 4.994766780432309, "grad_norm": 0.09625036720038956, "learning_rate": 3.678866742767851e-12, "loss": 0.0003, "step": 21952 }, { "epoch": 4.9949943117178615, "grad_norm": 0.07738257376760913, "learning_rate": 3.3786817060788014e-12, "loss": 0.0003, "step": 21953 }, { "epoch": 4.995221843003413, "grad_norm": 0.1366648968414785, "learning_rate": 3.0912704557239936e-12, "loss": 0.0003, "step": 21954 }, { "epoch": 4.995449374288965, "grad_norm": 0.07052156404958118, "learning_rate": 2.816632997532098e-12, "loss": 0.0003, "step": 21955 }, { "epoch": 4.995676905574516, "grad_norm": 0.03855199252789031, "learning_rate": 2.5547693371930082e-12, "loss": 0.0001, "step": 21956 }, { "epoch": 4.9959044368600685, "grad_norm": 0.7884214528269075, "learning_rate": 2.305679479980283e-12, "loss": 0.0019, "step": 21957 }, { "epoch": 4.99613196814562, "grad_norm": 0.04523718945872072, "learning_rate": 2.069363430959315e-12, "loss": 0.0002, "step": 21958 }, { "epoch": 4.996359499431172, "grad_norm": 0.6185059986697276, "learning_rate": 1.8458211950567184e-12, "loss": 0.0024, "step": 21959 }, { "epoch": 4.996587030716723, "grad_norm": 0.06775786199937232, "learning_rate": 1.6350527767827751e-12, "loss": 0.0002, "step": 21960 }, { "epoch": 4.9968145620022755, "grad_norm": 0.4112864504429127, "learning_rate": 1.4370581805089878e-12, "loss": 0.0025, "step": 21961 }, { "epoch": 4.997042093287827, "grad_norm": 0.6037889796207432, "learning_rate": 1.2518374101905262e-12, "loss": 0.0035, "step": 21962 }, { "epoch": 4.997269624573379, "grad_norm": 0.38240414108610854, "learning_rate": 1.0793904696437818e-12, "loss": 0.0015, "step": 21963 }, { "epoch": 4.99749715585893, "grad_norm": 0.04482302372575851, "learning_rate": 9.197173624075906e-13, "loss": 0.0001, "step": 21964 }, { "epoch": 4.9977246871444825, "grad_norm": 0.04611357277174689, "learning_rate": 7.728180917432326e-13, "loss": 0.0001, "step": 21965 }, { "epoch": 4.997952218430034, "grad_norm": 0.9320741466331328, "learning_rate": 6.386926606344324e-13, "loss": 0.0051, "step": 21966 }, { "epoch": 4.998179749715586, "grad_norm": 0.8462568865244944, "learning_rate": 5.173410718567473e-13, "loss": 0.0064, "step": 21967 }, { "epoch": 4.998407281001137, "grad_norm": 0.34139639782997455, "learning_rate": 4.087633278387904e-13, "loss": 0.002, "step": 21968 }, { "epoch": 4.9986348122866895, "grad_norm": 0.20877987878508777, "learning_rate": 3.129594308703965e-13, "loss": 0.0017, "step": 21969 }, { "epoch": 4.998862343572241, "grad_norm": 0.6843669410316169, "learning_rate": 2.2992938289445597e-13, "loss": 0.0064, "step": 21970 }, { "epoch": 4.999089874857793, "grad_norm": 0.10989633868154672, "learning_rate": 1.596731855763034e-13, "loss": 0.0003, "step": 21971 }, { "epoch": 4.999317406143344, "grad_norm": 0.034969826488561694, "learning_rate": 1.0219084030371751e-13, "loss": 0.0001, "step": 21972 }, { "epoch": 4.9995449374288965, "grad_norm": 0.6958343056777389, "learning_rate": 5.748234839508815e-14, "loss": 0.0054, "step": 21973 }, { "epoch": 4.999772468714449, "grad_norm": 0.33571360647230875, "learning_rate": 2.5547710613693654e-14, "loss": 0.0004, "step": 21974 }, { "epoch": 5.0, "grad_norm": 0.03646151181613518, "learning_rate": 6.386927653423414e-15, "loss": 0.0001, "step": 21975 }, { "epoch": 5.0, "step": 21975, "total_flos": 72305340162048.0, "train_loss": 0.02156848646505916, "train_runtime": 27527.2242, "train_samples_per_second": 1.596, "train_steps_per_second": 0.798 } ], "logging_steps": 1, "max_steps": 21975, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 1110, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 72305340162048.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }