{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 21975, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00022753128555176336, "grad_norm": 12.3071932545212, "learning_rate": 1.25e-06, "loss": 0.3186, "step": 1 }, { "epoch": 0.0004550625711035267, "grad_norm": 11.959825961880057, "learning_rate": 1.2499999936130725e-06, "loss": 0.3776, "step": 2 }, { "epoch": 0.0006825938566552901, "grad_norm": 5.4315221034586365, "learning_rate": 1.2499999744522896e-06, "loss": 0.4755, "step": 3 }, { "epoch": 0.0009101251422070534, "grad_norm": 21.003860231065644, "learning_rate": 1.2499999425176518e-06, "loss": 0.3334, "step": 4 }, { "epoch": 0.0011376564277588168, "grad_norm": 9.549170994835775, "learning_rate": 1.2499998978091598e-06, "loss": 0.375, "step": 5 }, { "epoch": 0.0013651877133105802, "grad_norm": 3.400827392368318, "learning_rate": 1.2499998403268147e-06, "loss": 0.2286, "step": 6 }, { "epoch": 0.0015927189988623437, "grad_norm": 8.451175634489234, "learning_rate": 1.2499997700706173e-06, "loss": 0.3216, "step": 7 }, { "epoch": 0.0018202502844141069, "grad_norm": 7.494987211346803, "learning_rate": 1.2499996870405692e-06, "loss": 0.2339, "step": 8 }, { "epoch": 0.0020477815699658703, "grad_norm": 9.138399201835718, "learning_rate": 1.2499995912366722e-06, "loss": 0.326, "step": 9 }, { "epoch": 0.0022753128555176336, "grad_norm": 3.2188295955534123, "learning_rate": 1.2499994826589282e-06, "loss": 0.2514, "step": 10 }, { "epoch": 0.002502844141069397, "grad_norm": 22.66663249526738, "learning_rate": 1.2499993613073393e-06, "loss": 0.4005, "step": 11 }, { "epoch": 0.0027303754266211604, "grad_norm": 21.954799290073527, "learning_rate": 1.2499992271819083e-06, "loss": 0.1492, "step": 12 }, { "epoch": 0.0029579067121729237, "grad_norm": 6.624298118045555, "learning_rate": 1.2499990802826377e-06, "loss": 0.3024, "step": 13 }, { "epoch": 0.0031854379977246873, "grad_norm": 3.923975454400113, "learning_rate": 1.2499989206095304e-06, "loss": 0.2411, "step": 14 }, { "epoch": 0.0034129692832764505, "grad_norm": 1.9398605915746092, "learning_rate": 1.2499987481625899e-06, "loss": 0.1849, "step": 15 }, { "epoch": 0.0036405005688282138, "grad_norm": 5.482695785208493, "learning_rate": 1.2499985629418195e-06, "loss": 0.3122, "step": 16 }, { "epoch": 0.0038680318543799774, "grad_norm": 5.552109300872593, "learning_rate": 1.2499983649472233e-06, "loss": 0.3393, "step": 17 }, { "epoch": 0.004095563139931741, "grad_norm": 4.610318891370888, "learning_rate": 1.249998154178805e-06, "loss": 0.3, "step": 18 }, { "epoch": 0.004323094425483504, "grad_norm": 8.793267315718285, "learning_rate": 1.2499979306365692e-06, "loss": 0.2266, "step": 19 }, { "epoch": 0.004550625711035267, "grad_norm": 11.785540460314868, "learning_rate": 1.2499976943205202e-06, "loss": 0.258, "step": 20 }, { "epoch": 0.00477815699658703, "grad_norm": 7.848333910468807, "learning_rate": 1.249997445230663e-06, "loss": 0.3733, "step": 21 }, { "epoch": 0.005005688282138794, "grad_norm": 11.509651474854413, "learning_rate": 1.2499971833670026e-06, "loss": 0.3606, "step": 22 }, { "epoch": 0.005233219567690558, "grad_norm": 8.662973783002895, "learning_rate": 1.2499969087295443e-06, "loss": 0.3884, "step": 23 }, { "epoch": 0.005460750853242321, "grad_norm": 5.341258812295752, "learning_rate": 1.249996621318294e-06, "loss": 0.2677, "step": 24 }, { "epoch": 0.005688282138794084, "grad_norm": 4.742018594757072, "learning_rate": 1.2499963211332573e-06, "loss": 0.3253, "step": 25 }, { "epoch": 0.005915813424345847, "grad_norm": 2.4536573603250624, "learning_rate": 1.2499960081744405e-06, "loss": 0.2393, "step": 26 }, { "epoch": 0.0061433447098976105, "grad_norm": 6.34705088291597, "learning_rate": 1.24999568244185e-06, "loss": 0.4326, "step": 27 }, { "epoch": 0.006370875995449375, "grad_norm": 9.775833264439491, "learning_rate": 1.249995343935492e-06, "loss": 0.4252, "step": 28 }, { "epoch": 0.006598407281001138, "grad_norm": 6.064212735225404, "learning_rate": 1.2499949926553743e-06, "loss": 0.2988, "step": 29 }, { "epoch": 0.006825938566552901, "grad_norm": 4.4254830237015845, "learning_rate": 1.2499946286015032e-06, "loss": 0.2988, "step": 30 }, { "epoch": 0.007053469852104664, "grad_norm": 4.883047495609927, "learning_rate": 1.2499942517738867e-06, "loss": 0.2285, "step": 31 }, { "epoch": 0.0072810011376564275, "grad_norm": 8.135398866699179, "learning_rate": 1.2499938621725322e-06, "loss": 0.1529, "step": 32 }, { "epoch": 0.007508532423208191, "grad_norm": 2.973365765084456, "learning_rate": 1.2499934597974478e-06, "loss": 0.2436, "step": 33 }, { "epoch": 0.007736063708759955, "grad_norm": 5.612693729952574, "learning_rate": 1.2499930446486416e-06, "loss": 0.3466, "step": 34 }, { "epoch": 0.007963594994311717, "grad_norm": 3.022290156639827, "learning_rate": 1.2499926167261224e-06, "loss": 0.2728, "step": 35 }, { "epoch": 0.008191126279863481, "grad_norm": 3.1279992715224467, "learning_rate": 1.2499921760298987e-06, "loss": 0.2469, "step": 36 }, { "epoch": 0.008418657565415245, "grad_norm": 14.845448376418034, "learning_rate": 1.2499917225599796e-06, "loss": 0.5145, "step": 37 }, { "epoch": 0.008646188850967008, "grad_norm": 14.138433401115075, "learning_rate": 1.2499912563163742e-06, "loss": 0.2705, "step": 38 }, { "epoch": 0.008873720136518772, "grad_norm": 4.324563647824762, "learning_rate": 1.249990777299092e-06, "loss": 0.1563, "step": 39 }, { "epoch": 0.009101251422070534, "grad_norm": 11.315529959215173, "learning_rate": 1.249990285508143e-06, "loss": 0.4123, "step": 40 }, { "epoch": 0.009328782707622298, "grad_norm": 6.3112839729366765, "learning_rate": 1.2499897809435374e-06, "loss": 0.1742, "step": 41 }, { "epoch": 0.00955631399317406, "grad_norm": 8.25726966946455, "learning_rate": 1.249989263605285e-06, "loss": 0.3229, "step": 42 }, { "epoch": 0.009783845278725825, "grad_norm": 6.3545712967505334, "learning_rate": 1.249988733493397e-06, "loss": 0.3055, "step": 43 }, { "epoch": 0.010011376564277589, "grad_norm": 5.356373706603287, "learning_rate": 1.2499881906078836e-06, "loss": 0.2601, "step": 44 }, { "epoch": 0.010238907849829351, "grad_norm": 1.9215795165819936, "learning_rate": 1.2499876349487564e-06, "loss": 0.1517, "step": 45 }, { "epoch": 0.010466439135381115, "grad_norm": 8.506503892761648, "learning_rate": 1.2499870665160262e-06, "loss": 0.2831, "step": 46 }, { "epoch": 0.010693970420932878, "grad_norm": 5.909503420571465, "learning_rate": 1.2499864853097054e-06, "loss": 0.2252, "step": 47 }, { "epoch": 0.010921501706484642, "grad_norm": 5.488265194188453, "learning_rate": 1.2499858913298053e-06, "loss": 0.3466, "step": 48 }, { "epoch": 0.011149032992036406, "grad_norm": 12.162427245650075, "learning_rate": 1.249985284576338e-06, "loss": 0.2426, "step": 49 }, { "epoch": 0.011376564277588168, "grad_norm": 9.969211407495816, "learning_rate": 1.2499846650493164e-06, "loss": 0.2801, "step": 50 }, { "epoch": 0.011604095563139932, "grad_norm": 5.741578552447352, "learning_rate": 1.2499840327487528e-06, "loss": 0.2664, "step": 51 }, { "epoch": 0.011831626848691695, "grad_norm": 2.937767840084915, "learning_rate": 1.24998338767466e-06, "loss": 0.1834, "step": 52 }, { "epoch": 0.012059158134243459, "grad_norm": 4.130655112830682, "learning_rate": 1.2499827298270515e-06, "loss": 0.2675, "step": 53 }, { "epoch": 0.012286689419795221, "grad_norm": 4.5227789119131625, "learning_rate": 1.2499820592059405e-06, "loss": 0.3205, "step": 54 }, { "epoch": 0.012514220705346985, "grad_norm": 4.653850683576537, "learning_rate": 1.2499813758113409e-06, "loss": 0.1921, "step": 55 }, { "epoch": 0.01274175199089875, "grad_norm": 6.204991552012506, "learning_rate": 1.2499806796432665e-06, "loss": 0.1989, "step": 56 }, { "epoch": 0.012969283276450512, "grad_norm": 7.81696538748595, "learning_rate": 1.2499799707017315e-06, "loss": 0.1301, "step": 57 }, { "epoch": 0.013196814562002276, "grad_norm": 6.427887275035889, "learning_rate": 1.2499792489867508e-06, "loss": 0.3376, "step": 58 }, { "epoch": 0.013424345847554038, "grad_norm": 4.713573539887475, "learning_rate": 1.2499785144983386e-06, "loss": 0.1673, "step": 59 }, { "epoch": 0.013651877133105802, "grad_norm": 6.7169275734426055, "learning_rate": 1.24997776723651e-06, "loss": 0.2501, "step": 60 }, { "epoch": 0.013879408418657566, "grad_norm": 11.702392641770421, "learning_rate": 1.2499770072012809e-06, "loss": 0.293, "step": 61 }, { "epoch": 0.014106939704209329, "grad_norm": 5.86563350345107, "learning_rate": 1.2499762343926661e-06, "loss": 0.2346, "step": 62 }, { "epoch": 0.014334470989761093, "grad_norm": 4.562933746130791, "learning_rate": 1.2499754488106817e-06, "loss": 0.1349, "step": 63 }, { "epoch": 0.014562002275312855, "grad_norm": 16.935870758573948, "learning_rate": 1.2499746504553436e-06, "loss": 0.2869, "step": 64 }, { "epoch": 0.01478953356086462, "grad_norm": 3.252674290241083, "learning_rate": 1.2499738393266684e-06, "loss": 0.2125, "step": 65 }, { "epoch": 0.015017064846416382, "grad_norm": 3.767321260449828, "learning_rate": 1.2499730154246726e-06, "loss": 0.2049, "step": 66 }, { "epoch": 0.015244596131968146, "grad_norm": 7.264091175555215, "learning_rate": 1.2499721787493726e-06, "loss": 0.2521, "step": 67 }, { "epoch": 0.01547212741751991, "grad_norm": 2.846384337735166, "learning_rate": 1.2499713293007862e-06, "loss": 0.1745, "step": 68 }, { "epoch": 0.015699658703071672, "grad_norm": 30.829215228751778, "learning_rate": 1.2499704670789301e-06, "loss": 0.1514, "step": 69 }, { "epoch": 0.015927189988623434, "grad_norm": 7.168923083631056, "learning_rate": 1.2499695920838225e-06, "loss": 0.2393, "step": 70 }, { "epoch": 0.0161547212741752, "grad_norm": 3.418723817035884, "learning_rate": 1.2499687043154809e-06, "loss": 0.1342, "step": 71 }, { "epoch": 0.016382252559726963, "grad_norm": 6.316537441364383, "learning_rate": 1.2499678037739235e-06, "loss": 0.1698, "step": 72 }, { "epoch": 0.016609783845278725, "grad_norm": 3.8561981086650596, "learning_rate": 1.2499668904591688e-06, "loss": 0.3104, "step": 73 }, { "epoch": 0.01683731513083049, "grad_norm": 4.679806938064617, "learning_rate": 1.2499659643712356e-06, "loss": 0.2139, "step": 74 }, { "epoch": 0.017064846416382253, "grad_norm": 4.26137230837329, "learning_rate": 1.2499650255101425e-06, "loss": 0.2433, "step": 75 }, { "epoch": 0.017292377701934016, "grad_norm": 3.7227188471827914, "learning_rate": 1.2499640738759088e-06, "loss": 0.2334, "step": 76 }, { "epoch": 0.017519908987485778, "grad_norm": 6.044525591826923, "learning_rate": 1.249963109468554e-06, "loss": 0.3106, "step": 77 }, { "epoch": 0.017747440273037544, "grad_norm": 6.248705646938244, "learning_rate": 1.2499621322880979e-06, "loss": 0.2025, "step": 78 }, { "epoch": 0.017974971558589306, "grad_norm": 2.8368621495357313, "learning_rate": 1.2499611423345604e-06, "loss": 0.1492, "step": 79 }, { "epoch": 0.01820250284414107, "grad_norm": 5.049736361542706, "learning_rate": 1.2499601396079617e-06, "loss": 0.1341, "step": 80 }, { "epoch": 0.018430034129692834, "grad_norm": 6.760221850362585, "learning_rate": 1.2499591241083222e-06, "loss": 0.2092, "step": 81 }, { "epoch": 0.018657565415244597, "grad_norm": 6.630540720646431, "learning_rate": 1.2499580958356628e-06, "loss": 0.2181, "step": 82 }, { "epoch": 0.01888509670079636, "grad_norm": 3.8482585047631863, "learning_rate": 1.2499570547900045e-06, "loss": 0.1613, "step": 83 }, { "epoch": 0.01911262798634812, "grad_norm": 6.605304588968454, "learning_rate": 1.2499560009713684e-06, "loss": 0.2959, "step": 84 }, { "epoch": 0.019340159271899887, "grad_norm": 6.012809221970948, "learning_rate": 1.2499549343797764e-06, "loss": 0.2393, "step": 85 }, { "epoch": 0.01956769055745165, "grad_norm": 6.254621323206641, "learning_rate": 1.24995385501525e-06, "loss": 0.2285, "step": 86 }, { "epoch": 0.019795221843003412, "grad_norm": 3.4046999226542733, "learning_rate": 1.2499527628778116e-06, "loss": 0.1187, "step": 87 }, { "epoch": 0.020022753128555178, "grad_norm": 7.419781715158706, "learning_rate": 1.2499516579674831e-06, "loss": 0.2817, "step": 88 }, { "epoch": 0.02025028441410694, "grad_norm": 21.819719933471735, "learning_rate": 1.2499505402842872e-06, "loss": 0.2469, "step": 89 }, { "epoch": 0.020477815699658702, "grad_norm": 2.8418419055080766, "learning_rate": 1.2499494098282469e-06, "loss": 0.2955, "step": 90 }, { "epoch": 0.020705346985210465, "grad_norm": 7.066317637431583, "learning_rate": 1.2499482665993851e-06, "loss": 0.2044, "step": 91 }, { "epoch": 0.02093287827076223, "grad_norm": 5.925737098985834, "learning_rate": 1.2499471105977252e-06, "loss": 0.2335, "step": 92 }, { "epoch": 0.021160409556313993, "grad_norm": 3.0480275776898473, "learning_rate": 1.249945941823291e-06, "loss": 0.3633, "step": 93 }, { "epoch": 0.021387940841865755, "grad_norm": 2.946352549362824, "learning_rate": 1.2499447602761063e-06, "loss": 0.2011, "step": 94 }, { "epoch": 0.02161547212741752, "grad_norm": 6.07129225638081, "learning_rate": 1.2499435659561954e-06, "loss": 0.2585, "step": 95 }, { "epoch": 0.021843003412969283, "grad_norm": 4.592794032374342, "learning_rate": 1.2499423588635823e-06, "loss": 0.2336, "step": 96 }, { "epoch": 0.022070534698521046, "grad_norm": 19.61835193566366, "learning_rate": 1.2499411389982919e-06, "loss": 0.2438, "step": 97 }, { "epoch": 0.02229806598407281, "grad_norm": 4.697964666160796, "learning_rate": 1.2499399063603492e-06, "loss": 0.26, "step": 98 }, { "epoch": 0.022525597269624574, "grad_norm": 6.831528796415563, "learning_rate": 1.2499386609497793e-06, "loss": 0.1291, "step": 99 }, { "epoch": 0.022753128555176336, "grad_norm": 3.3770537551655653, "learning_rate": 1.2499374027666078e-06, "loss": 0.1919, "step": 100 }, { "epoch": 0.0229806598407281, "grad_norm": 10.54402988548413, "learning_rate": 1.2499361318108602e-06, "loss": 0.2695, "step": 101 }, { "epoch": 0.023208191126279865, "grad_norm": 6.4464740357818116, "learning_rate": 1.2499348480825627e-06, "loss": 0.1883, "step": 102 }, { "epoch": 0.023435722411831627, "grad_norm": 5.7228283849137895, "learning_rate": 1.2499335515817413e-06, "loss": 0.225, "step": 103 }, { "epoch": 0.02366325369738339, "grad_norm": 8.575195167369158, "learning_rate": 1.2499322423084226e-06, "loss": 0.1988, "step": 104 }, { "epoch": 0.023890784982935155, "grad_norm": 5.524822469569831, "learning_rate": 1.2499309202626336e-06, "loss": 0.1362, "step": 105 }, { "epoch": 0.024118316268486917, "grad_norm": 1.4259194554286314, "learning_rate": 1.249929585444401e-06, "loss": 0.1341, "step": 106 }, { "epoch": 0.02434584755403868, "grad_norm": 5.569399731315438, "learning_rate": 1.2499282378537522e-06, "loss": 0.1823, "step": 107 }, { "epoch": 0.024573378839590442, "grad_norm": 5.131038290322419, "learning_rate": 1.2499268774907144e-06, "loss": 0.1674, "step": 108 }, { "epoch": 0.024800910125142208, "grad_norm": 2.9740215362829368, "learning_rate": 1.249925504355316e-06, "loss": 0.1443, "step": 109 }, { "epoch": 0.02502844141069397, "grad_norm": 7.125610878241638, "learning_rate": 1.2499241184475848e-06, "loss": 0.1993, "step": 110 }, { "epoch": 0.025255972696245733, "grad_norm": 3.5104920582246284, "learning_rate": 1.249922719767549e-06, "loss": 0.1387, "step": 111 }, { "epoch": 0.0254835039817975, "grad_norm": 15.180689323576399, "learning_rate": 1.2499213083152374e-06, "loss": 0.1609, "step": 112 }, { "epoch": 0.02571103526734926, "grad_norm": 2.6467486780240077, "learning_rate": 1.2499198840906787e-06, "loss": 0.0766, "step": 113 }, { "epoch": 0.025938566552901023, "grad_norm": 6.947833673299234, "learning_rate": 1.249918447093902e-06, "loss": 0.1988, "step": 114 }, { "epoch": 0.026166097838452786, "grad_norm": 3.236155694827761, "learning_rate": 1.249916997324937e-06, "loss": 0.2822, "step": 115 }, { "epoch": 0.02639362912400455, "grad_norm": 4.424229361394889, "learning_rate": 1.2499155347838129e-06, "loss": 0.2639, "step": 116 }, { "epoch": 0.026621160409556314, "grad_norm": 6.7125880752306, "learning_rate": 1.2499140594705596e-06, "loss": 0.1758, "step": 117 }, { "epoch": 0.026848691695108076, "grad_norm": 12.978485247890044, "learning_rate": 1.2499125713852076e-06, "loss": 0.2966, "step": 118 }, { "epoch": 0.027076222980659842, "grad_norm": 2.4562187666064297, "learning_rate": 1.2499110705277869e-06, "loss": 0.1317, "step": 119 }, { "epoch": 0.027303754266211604, "grad_norm": 2.450514697648912, "learning_rate": 1.2499095568983284e-06, "loss": 0.2491, "step": 120 }, { "epoch": 0.027531285551763367, "grad_norm": 2.962900989508568, "learning_rate": 1.2499080304968634e-06, "loss": 0.1782, "step": 121 }, { "epoch": 0.027758816837315133, "grad_norm": 4.706451675787787, "learning_rate": 1.2499064913234222e-06, "loss": 0.2063, "step": 122 }, { "epoch": 0.027986348122866895, "grad_norm": 4.848247166198472, "learning_rate": 1.249904939378037e-06, "loss": 0.1873, "step": 123 }, { "epoch": 0.028213879408418657, "grad_norm": 5.57275566955423, "learning_rate": 1.2499033746607395e-06, "loss": 0.2362, "step": 124 }, { "epoch": 0.02844141069397042, "grad_norm": 4.528761927217566, "learning_rate": 1.2499017971715614e-06, "loss": 0.2686, "step": 125 }, { "epoch": 0.028668941979522185, "grad_norm": 7.35859467900191, "learning_rate": 1.2499002069105348e-06, "loss": 0.275, "step": 126 }, { "epoch": 0.028896473265073948, "grad_norm": 4.494727686955716, "learning_rate": 1.2498986038776926e-06, "loss": 0.1759, "step": 127 }, { "epoch": 0.02912400455062571, "grad_norm": 7.273216392666622, "learning_rate": 1.2498969880730671e-06, "loss": 0.2159, "step": 128 }, { "epoch": 0.029351535836177476, "grad_norm": 4.955227920384567, "learning_rate": 1.249895359496692e-06, "loss": 0.1888, "step": 129 }, { "epoch": 0.02957906712172924, "grad_norm": 6.321445200949685, "learning_rate": 1.2498937181486e-06, "loss": 0.3007, "step": 130 }, { "epoch": 0.029806598407281, "grad_norm": 2.76312902269676, "learning_rate": 1.2498920640288248e-06, "loss": 0.2442, "step": 131 }, { "epoch": 0.030034129692832763, "grad_norm": 56.774720129580295, "learning_rate": 1.2498903971374005e-06, "loss": 0.223, "step": 132 }, { "epoch": 0.03026166097838453, "grad_norm": 3.9468490187056324, "learning_rate": 1.2498887174743606e-06, "loss": 0.2504, "step": 133 }, { "epoch": 0.03048919226393629, "grad_norm": 3.9118814976883542, "learning_rate": 1.24988702503974e-06, "loss": 0.1939, "step": 134 }, { "epoch": 0.030716723549488054, "grad_norm": 3.7837188268010506, "learning_rate": 1.2498853198335728e-06, "loss": 0.2199, "step": 135 }, { "epoch": 0.03094425483503982, "grad_norm": 4.0297942240817175, "learning_rate": 1.2498836018558942e-06, "loss": 0.1566, "step": 136 }, { "epoch": 0.031171786120591582, "grad_norm": 3.4754550482446698, "learning_rate": 1.2498818711067392e-06, "loss": 0.2666, "step": 137 }, { "epoch": 0.031399317406143344, "grad_norm": 3.864651244769, "learning_rate": 1.2498801275861433e-06, "loss": 0.1173, "step": 138 }, { "epoch": 0.03162684869169511, "grad_norm": 8.216814820623972, "learning_rate": 1.2498783712941418e-06, "loss": 0.1879, "step": 139 }, { "epoch": 0.03185437997724687, "grad_norm": 3.637457358045326, "learning_rate": 1.2498766022307709e-06, "loss": 0.2047, "step": 140 }, { "epoch": 0.032081911262798635, "grad_norm": 2.58051980801193, "learning_rate": 1.2498748203960665e-06, "loss": 0.1008, "step": 141 }, { "epoch": 0.0323094425483504, "grad_norm": 3.8775724824241764, "learning_rate": 1.2498730257900655e-06, "loss": 0.2042, "step": 142 }, { "epoch": 0.03253697383390216, "grad_norm": 5.772591680829651, "learning_rate": 1.249871218412804e-06, "loss": 0.2352, "step": 143 }, { "epoch": 0.032764505119453925, "grad_norm": 2.210254874393301, "learning_rate": 1.2498693982643192e-06, "loss": 0.1803, "step": 144 }, { "epoch": 0.03299203640500569, "grad_norm": 6.540771980552272, "learning_rate": 1.2498675653446485e-06, "loss": 0.2304, "step": 145 }, { "epoch": 0.03321956769055745, "grad_norm": 2.904522388367919, "learning_rate": 1.249865719653829e-06, "loss": 0.1707, "step": 146 }, { "epoch": 0.033447098976109216, "grad_norm": 9.318986716894935, "learning_rate": 1.2498638611918985e-06, "loss": 0.2038, "step": 147 }, { "epoch": 0.03367463026166098, "grad_norm": 9.58516027118141, "learning_rate": 1.249861989958895e-06, "loss": 0.2357, "step": 148 }, { "epoch": 0.03390216154721274, "grad_norm": 3.559770501878285, "learning_rate": 1.2498601059548572e-06, "loss": 0.1613, "step": 149 }, { "epoch": 0.034129692832764506, "grad_norm": 3.348814329958542, "learning_rate": 1.2498582091798228e-06, "loss": 0.2016, "step": 150 }, { "epoch": 0.034357224118316265, "grad_norm": 6.375342543891093, "learning_rate": 1.2498562996338312e-06, "loss": 0.2231, "step": 151 }, { "epoch": 0.03458475540386803, "grad_norm": 7.488809251815451, "learning_rate": 1.249854377316921e-06, "loss": 0.1819, "step": 152 }, { "epoch": 0.0348122866894198, "grad_norm": 2.508487580474721, "learning_rate": 1.2498524422291319e-06, "loss": 0.182, "step": 153 }, { "epoch": 0.035039817974971556, "grad_norm": 3.656563964135558, "learning_rate": 1.2498504943705033e-06, "loss": 0.165, "step": 154 }, { "epoch": 0.03526734926052332, "grad_norm": 2.771070563762278, "learning_rate": 1.249848533741075e-06, "loss": 0.2569, "step": 155 }, { "epoch": 0.03549488054607509, "grad_norm": 5.610529774003187, "learning_rate": 1.2498465603408865e-06, "loss": 0.2873, "step": 156 }, { "epoch": 0.035722411831626846, "grad_norm": 3.6657793262286638, "learning_rate": 1.2498445741699792e-06, "loss": 0.1086, "step": 157 }, { "epoch": 0.03594994311717861, "grad_norm": 11.136381961854878, "learning_rate": 1.249842575228393e-06, "loss": 0.1653, "step": 158 }, { "epoch": 0.03617747440273038, "grad_norm": 4.607920317694178, "learning_rate": 1.249840563516169e-06, "loss": 0.1816, "step": 159 }, { "epoch": 0.03640500568828214, "grad_norm": 4.765507333684582, "learning_rate": 1.249838539033348e-06, "loss": 0.1735, "step": 160 }, { "epoch": 0.0366325369738339, "grad_norm": 3.024559515436515, "learning_rate": 1.2498365017799715e-06, "loss": 0.0997, "step": 161 }, { "epoch": 0.03686006825938567, "grad_norm": 3.0006086205585594, "learning_rate": 1.2498344517560815e-06, "loss": 0.2742, "step": 162 }, { "epoch": 0.03708759954493743, "grad_norm": 4.390575337778858, "learning_rate": 1.2498323889617198e-06, "loss": 0.2112, "step": 163 }, { "epoch": 0.03731513083048919, "grad_norm": 4.987032274568943, "learning_rate": 1.2498303133969281e-06, "loss": 0.2282, "step": 164 }, { "epoch": 0.03754266211604096, "grad_norm": 3.813775711394782, "learning_rate": 1.2498282250617492e-06, "loss": 0.1944, "step": 165 }, { "epoch": 0.03777019340159272, "grad_norm": 3.361678763128891, "learning_rate": 1.2498261239562257e-06, "loss": 0.2018, "step": 166 }, { "epoch": 0.037997724687144484, "grad_norm": 4.992072192203259, "learning_rate": 1.2498240100804005e-06, "loss": 0.2089, "step": 167 }, { "epoch": 0.03822525597269624, "grad_norm": 8.050790934059092, "learning_rate": 1.249821883434317e-06, "loss": 0.2696, "step": 168 }, { "epoch": 0.03845278725824801, "grad_norm": 2.642297340192281, "learning_rate": 1.2498197440180182e-06, "loss": 0.2691, "step": 169 }, { "epoch": 0.038680318543799774, "grad_norm": 3.35790306734272, "learning_rate": 1.2498175918315484e-06, "loss": 0.1851, "step": 170 }, { "epoch": 0.03890784982935153, "grad_norm": 3.524642269348137, "learning_rate": 1.2498154268749513e-06, "loss": 0.2276, "step": 171 }, { "epoch": 0.0391353811149033, "grad_norm": 2.188667506818875, "learning_rate": 1.249813249148271e-06, "loss": 0.1616, "step": 172 }, { "epoch": 0.039362912400455065, "grad_norm": 5.1958946099491845, "learning_rate": 1.2498110586515525e-06, "loss": 0.1987, "step": 173 }, { "epoch": 0.039590443686006824, "grad_norm": 5.09328084896296, "learning_rate": 1.2498088553848398e-06, "loss": 0.195, "step": 174 }, { "epoch": 0.03981797497155859, "grad_norm": 2.8290595777512952, "learning_rate": 1.2498066393481787e-06, "loss": 0.1568, "step": 175 }, { "epoch": 0.040045506257110355, "grad_norm": 2.360697357040943, "learning_rate": 1.249804410541614e-06, "loss": 0.2065, "step": 176 }, { "epoch": 0.040273037542662114, "grad_norm": 4.718810327826489, "learning_rate": 1.2498021689651916e-06, "loss": 0.2003, "step": 177 }, { "epoch": 0.04050056882821388, "grad_norm": 2.6458436624930237, "learning_rate": 1.249799914618957e-06, "loss": 0.1589, "step": 178 }, { "epoch": 0.040728100113765646, "grad_norm": 3.289621635927127, "learning_rate": 1.2497976475029566e-06, "loss": 0.1905, "step": 179 }, { "epoch": 0.040955631399317405, "grad_norm": 2.7547654896260028, "learning_rate": 1.2497953676172364e-06, "loss": 0.1538, "step": 180 }, { "epoch": 0.04118316268486917, "grad_norm": 4.715970073162376, "learning_rate": 1.2497930749618431e-06, "loss": 0.1297, "step": 181 }, { "epoch": 0.04141069397042093, "grad_norm": 13.147614048372157, "learning_rate": 1.2497907695368238e-06, "loss": 0.164, "step": 182 }, { "epoch": 0.041638225255972695, "grad_norm": 2.692225418023433, "learning_rate": 1.2497884513422253e-06, "loss": 0.2537, "step": 183 }, { "epoch": 0.04186575654152446, "grad_norm": 5.166049507007355, "learning_rate": 1.249786120378095e-06, "loss": 0.074, "step": 184 }, { "epoch": 0.04209328782707622, "grad_norm": 3.0648916024092596, "learning_rate": 1.2497837766444806e-06, "loss": 0.1639, "step": 185 }, { "epoch": 0.042320819112627986, "grad_norm": 4.567688921451397, "learning_rate": 1.2497814201414304e-06, "loss": 0.2905, "step": 186 }, { "epoch": 0.04254835039817975, "grad_norm": 3.970377559361967, "learning_rate": 1.249779050868992e-06, "loss": 0.2001, "step": 187 }, { "epoch": 0.04277588168373151, "grad_norm": 2.2768846909587763, "learning_rate": 1.249776668827214e-06, "loss": 0.0951, "step": 188 }, { "epoch": 0.043003412969283276, "grad_norm": 6.438142708090974, "learning_rate": 1.249774274016145e-06, "loss": 0.203, "step": 189 }, { "epoch": 0.04323094425483504, "grad_norm": 2.4175466744317977, "learning_rate": 1.2497718664358341e-06, "loss": 0.1713, "step": 190 }, { "epoch": 0.0434584755403868, "grad_norm": 4.37204480901975, "learning_rate": 1.2497694460863307e-06, "loss": 0.2986, "step": 191 }, { "epoch": 0.04368600682593857, "grad_norm": 3.2046762676937255, "learning_rate": 1.2497670129676838e-06, "loss": 0.1288, "step": 192 }, { "epoch": 0.04391353811149033, "grad_norm": 3.901472238917995, "learning_rate": 1.2497645670799436e-06, "loss": 0.1291, "step": 193 }, { "epoch": 0.04414106939704209, "grad_norm": 3.891177273974114, "learning_rate": 1.2497621084231595e-06, "loss": 0.1165, "step": 194 }, { "epoch": 0.04436860068259386, "grad_norm": 3.831124951630966, "learning_rate": 1.2497596369973823e-06, "loss": 0.175, "step": 195 }, { "epoch": 0.04459613196814562, "grad_norm": 7.137497588920377, "learning_rate": 1.2497571528026623e-06, "loss": 0.2319, "step": 196 }, { "epoch": 0.04482366325369738, "grad_norm": 2.9787063992991256, "learning_rate": 1.2497546558390503e-06, "loss": 0.2044, "step": 197 }, { "epoch": 0.04505119453924915, "grad_norm": 2.5728244375494413, "learning_rate": 1.2497521461065973e-06, "loss": 0.1395, "step": 198 }, { "epoch": 0.04527872582480091, "grad_norm": 7.102221321561537, "learning_rate": 1.2497496236053547e-06, "loss": 0.1969, "step": 199 }, { "epoch": 0.04550625711035267, "grad_norm": 2.579422809989494, "learning_rate": 1.2497470883353738e-06, "loss": 0.1019, "step": 200 }, { "epoch": 0.04573378839590444, "grad_norm": 4.340132040430137, "learning_rate": 1.2497445402967068e-06, "loss": 0.241, "step": 201 }, { "epoch": 0.0459613196814562, "grad_norm": 2.2195665044126276, "learning_rate": 1.2497419794894053e-06, "loss": 0.2059, "step": 202 }, { "epoch": 0.04618885096700796, "grad_norm": 3.274345001247324, "learning_rate": 1.249739405913522e-06, "loss": 0.1328, "step": 203 }, { "epoch": 0.04641638225255973, "grad_norm": 2.527264534705696, "learning_rate": 1.2497368195691095e-06, "loss": 0.1408, "step": 204 }, { "epoch": 0.04664391353811149, "grad_norm": 3.306757570747259, "learning_rate": 1.2497342204562205e-06, "loss": 0.2233, "step": 205 }, { "epoch": 0.046871444823663254, "grad_norm": 3.6647451852915336, "learning_rate": 1.2497316085749081e-06, "loss": 0.1239, "step": 206 }, { "epoch": 0.04709897610921502, "grad_norm": 4.68508784917087, "learning_rate": 1.249728983925226e-06, "loss": 0.1707, "step": 207 }, { "epoch": 0.04732650739476678, "grad_norm": 3.18438034976801, "learning_rate": 1.2497263465072274e-06, "loss": 0.1325, "step": 208 }, { "epoch": 0.047554038680318544, "grad_norm": 2.665536371480516, "learning_rate": 1.2497236963209663e-06, "loss": 0.247, "step": 209 }, { "epoch": 0.04778156996587031, "grad_norm": 3.6305897675111822, "learning_rate": 1.2497210333664972e-06, "loss": 0.1399, "step": 210 }, { "epoch": 0.04800910125142207, "grad_norm": 3.427786312260657, "learning_rate": 1.2497183576438743e-06, "loss": 0.1595, "step": 211 }, { "epoch": 0.048236632536973835, "grad_norm": 3.501593030667954, "learning_rate": 1.2497156691531523e-06, "loss": 0.1895, "step": 212 }, { "epoch": 0.048464163822525594, "grad_norm": 2.29399983953313, "learning_rate": 1.249712967894386e-06, "loss": 0.1273, "step": 213 }, { "epoch": 0.04869169510807736, "grad_norm": 4.248497703608046, "learning_rate": 1.2497102538676308e-06, "loss": 0.2118, "step": 214 }, { "epoch": 0.048919226393629126, "grad_norm": 5.009911727752511, "learning_rate": 1.249707527072942e-06, "loss": 0.1533, "step": 215 }, { "epoch": 0.049146757679180884, "grad_norm": 3.254064879259487, "learning_rate": 1.2497047875103757e-06, "loss": 0.3042, "step": 216 }, { "epoch": 0.04937428896473265, "grad_norm": 2.700363753095535, "learning_rate": 1.2497020351799875e-06, "loss": 0.1933, "step": 217 }, { "epoch": 0.049601820250284416, "grad_norm": 2.2159854350533763, "learning_rate": 1.2496992700818335e-06, "loss": 0.1733, "step": 218 }, { "epoch": 0.049829351535836175, "grad_norm": 6.438623712108173, "learning_rate": 1.249696492215971e-06, "loss": 0.2233, "step": 219 }, { "epoch": 0.05005688282138794, "grad_norm": 3.6403163135182552, "learning_rate": 1.249693701582456e-06, "loss": 0.1542, "step": 220 }, { "epoch": 0.05028441410693971, "grad_norm": 3.280631643810882, "learning_rate": 1.2496908981813458e-06, "loss": 0.1799, "step": 221 }, { "epoch": 0.050511945392491465, "grad_norm": 2.5684306853319687, "learning_rate": 1.2496880820126977e-06, "loss": 0.2051, "step": 222 }, { "epoch": 0.05073947667804323, "grad_norm": 2.7401430199461108, "learning_rate": 1.2496852530765695e-06, "loss": 0.1828, "step": 223 }, { "epoch": 0.050967007963595, "grad_norm": 2.95485123311806, "learning_rate": 1.2496824113730186e-06, "loss": 0.2602, "step": 224 }, { "epoch": 0.051194539249146756, "grad_norm": 2.5679914292312738, "learning_rate": 1.2496795569021033e-06, "loss": 0.1838, "step": 225 }, { "epoch": 0.05142207053469852, "grad_norm": 4.2106953289503055, "learning_rate": 1.2496766896638819e-06, "loss": 0.1831, "step": 226 }, { "epoch": 0.05164960182025029, "grad_norm": 2.4133590857510603, "learning_rate": 1.249673809658413e-06, "loss": 0.1869, "step": 227 }, { "epoch": 0.05187713310580205, "grad_norm": 2.009672236932174, "learning_rate": 1.2496709168857555e-06, "loss": 0.1297, "step": 228 }, { "epoch": 0.05210466439135381, "grad_norm": 2.57569428799923, "learning_rate": 1.2496680113459683e-06, "loss": 0.1887, "step": 229 }, { "epoch": 0.05233219567690557, "grad_norm": 3.3094428680937464, "learning_rate": 1.2496650930391113e-06, "loss": 0.2654, "step": 230 }, { "epoch": 0.05255972696245734, "grad_norm": 2.847650693015463, "learning_rate": 1.2496621619652435e-06, "loss": 0.1704, "step": 231 }, { "epoch": 0.0527872582480091, "grad_norm": 2.9888611972362167, "learning_rate": 1.2496592181244253e-06, "loss": 0.1601, "step": 232 }, { "epoch": 0.05301478953356086, "grad_norm": 2.08648737949565, "learning_rate": 1.249656261516717e-06, "loss": 0.1953, "step": 233 }, { "epoch": 0.05324232081911263, "grad_norm": 2.531082669247976, "learning_rate": 1.2496532921421781e-06, "loss": 0.1717, "step": 234 }, { "epoch": 0.053469852104664393, "grad_norm": 2.7509933573597896, "learning_rate": 1.2496503100008704e-06, "loss": 0.2469, "step": 235 }, { "epoch": 0.05369738339021615, "grad_norm": 3.5155091690123923, "learning_rate": 1.249647315092854e-06, "loss": 0.1314, "step": 236 }, { "epoch": 0.05392491467576792, "grad_norm": 3.2336581137529135, "learning_rate": 1.2496443074181905e-06, "loss": 0.1479, "step": 237 }, { "epoch": 0.054152445961319684, "grad_norm": 1.9727228995954271, "learning_rate": 1.2496412869769415e-06, "loss": 0.1072, "step": 238 }, { "epoch": 0.05437997724687144, "grad_norm": 9.030280638699303, "learning_rate": 1.2496382537691686e-06, "loss": 0.1993, "step": 239 }, { "epoch": 0.05460750853242321, "grad_norm": 2.012237999972146, "learning_rate": 1.2496352077949336e-06, "loss": 0.2021, "step": 240 }, { "epoch": 0.054835039817974975, "grad_norm": 2.875480352440569, "learning_rate": 1.249632149054299e-06, "loss": 0.1071, "step": 241 }, { "epoch": 0.05506257110352673, "grad_norm": 3.027078266755971, "learning_rate": 1.249629077547327e-06, "loss": 0.2081, "step": 242 }, { "epoch": 0.0552901023890785, "grad_norm": 3.212706521917931, "learning_rate": 1.2496259932740813e-06, "loss": 0.235, "step": 243 }, { "epoch": 0.055517633674630265, "grad_norm": 1.5899391805286471, "learning_rate": 1.2496228962346236e-06, "loss": 0.1498, "step": 244 }, { "epoch": 0.055745164960182024, "grad_norm": 2.252897408154709, "learning_rate": 1.249619786429018e-06, "loss": 0.0875, "step": 245 }, { "epoch": 0.05597269624573379, "grad_norm": 1.7851217439709355, "learning_rate": 1.2496166638573278e-06, "loss": 0.163, "step": 246 }, { "epoch": 0.05620022753128555, "grad_norm": 4.076208180076855, "learning_rate": 1.2496135285196172e-06, "loss": 0.1298, "step": 247 }, { "epoch": 0.056427758816837315, "grad_norm": 8.235783447081577, "learning_rate": 1.2496103804159497e-06, "loss": 0.1994, "step": 248 }, { "epoch": 0.05665529010238908, "grad_norm": 4.224863516307238, "learning_rate": 1.2496072195463904e-06, "loss": 0.1917, "step": 249 }, { "epoch": 0.05688282138794084, "grad_norm": 2.600108393969465, "learning_rate": 1.249604045911003e-06, "loss": 0.1728, "step": 250 }, { "epoch": 0.057110352673492605, "grad_norm": 4.193154020881599, "learning_rate": 1.249600859509853e-06, "loss": 0.1469, "step": 251 }, { "epoch": 0.05733788395904437, "grad_norm": 3.3023049454358957, "learning_rate": 1.2495976603430054e-06, "loss": 0.3015, "step": 252 }, { "epoch": 0.05756541524459613, "grad_norm": 2.1335803404002815, "learning_rate": 1.2495944484105254e-06, "loss": 0.1237, "step": 253 }, { "epoch": 0.057792946530147896, "grad_norm": 5.342229724882705, "learning_rate": 1.2495912237124787e-06, "loss": 0.1134, "step": 254 }, { "epoch": 0.05802047781569966, "grad_norm": 4.8799722775641765, "learning_rate": 1.2495879862489312e-06, "loss": 0.1865, "step": 255 }, { "epoch": 0.05824800910125142, "grad_norm": 5.731543371657422, "learning_rate": 1.2495847360199495e-06, "loss": 0.2008, "step": 256 }, { "epoch": 0.058475540386803186, "grad_norm": 2.313924736001694, "learning_rate": 1.2495814730255993e-06, "loss": 0.1361, "step": 257 }, { "epoch": 0.05870307167235495, "grad_norm": 1.3942403935107488, "learning_rate": 1.2495781972659479e-06, "loss": 0.1103, "step": 258 }, { "epoch": 0.05893060295790671, "grad_norm": 1.8635600367271647, "learning_rate": 1.2495749087410618e-06, "loss": 0.1736, "step": 259 }, { "epoch": 0.05915813424345848, "grad_norm": 3.934800507138662, "learning_rate": 1.2495716074510087e-06, "loss": 0.1706, "step": 260 }, { "epoch": 0.059385665529010236, "grad_norm": 7.067913001607123, "learning_rate": 1.2495682933958555e-06, "loss": 0.1963, "step": 261 }, { "epoch": 0.059613196814562, "grad_norm": 2.692944909371077, "learning_rate": 1.2495649665756705e-06, "loss": 0.2486, "step": 262 }, { "epoch": 0.05984072810011377, "grad_norm": 2.4930462253175305, "learning_rate": 1.2495616269905212e-06, "loss": 0.1447, "step": 263 }, { "epoch": 0.060068259385665526, "grad_norm": 1.7948148568482771, "learning_rate": 1.2495582746404762e-06, "loss": 0.0994, "step": 264 }, { "epoch": 0.06029579067121729, "grad_norm": 2.021876252112372, "learning_rate": 1.249554909525604e-06, "loss": 0.1386, "step": 265 }, { "epoch": 0.06052332195676906, "grad_norm": 2.069960058640526, "learning_rate": 1.249551531645973e-06, "loss": 0.1866, "step": 266 }, { "epoch": 0.06075085324232082, "grad_norm": 8.549797598789278, "learning_rate": 1.2495481410016527e-06, "loss": 0.3426, "step": 267 }, { "epoch": 0.06097838452787258, "grad_norm": 6.033524800668443, "learning_rate": 1.2495447375927122e-06, "loss": 0.2039, "step": 268 }, { "epoch": 0.06120591581342435, "grad_norm": 3.3984019223631656, "learning_rate": 1.2495413214192209e-06, "loss": 0.1562, "step": 269 }, { "epoch": 0.06143344709897611, "grad_norm": 2.78909231360363, "learning_rate": 1.2495378924812486e-06, "loss": 0.2056, "step": 270 }, { "epoch": 0.06166097838452787, "grad_norm": 5.781877877875473, "learning_rate": 1.2495344507788662e-06, "loss": 0.2293, "step": 271 }, { "epoch": 0.06188850967007964, "grad_norm": 2.3180826263300607, "learning_rate": 1.249530996312143e-06, "loss": 0.1489, "step": 272 }, { "epoch": 0.0621160409556314, "grad_norm": 7.2617460886104475, "learning_rate": 1.2495275290811499e-06, "loss": 0.2172, "step": 273 }, { "epoch": 0.062343572241183164, "grad_norm": 2.1316035699431173, "learning_rate": 1.2495240490859581e-06, "loss": 0.2176, "step": 274 }, { "epoch": 0.06257110352673492, "grad_norm": 2.5542857532037235, "learning_rate": 1.2495205563266384e-06, "loss": 0.1521, "step": 275 }, { "epoch": 0.06279863481228669, "grad_norm": 3.5696131149812644, "learning_rate": 1.2495170508032624e-06, "loss": 0.2817, "step": 276 }, { "epoch": 0.06302616609783845, "grad_norm": 4.055804927691344, "learning_rate": 1.2495135325159015e-06, "loss": 0.1484, "step": 277 }, { "epoch": 0.06325369738339022, "grad_norm": 2.830287596995614, "learning_rate": 1.2495100014646277e-06, "loss": 0.1714, "step": 278 }, { "epoch": 0.06348122866894199, "grad_norm": 5.2323794095215685, "learning_rate": 1.2495064576495134e-06, "loss": 0.3121, "step": 279 }, { "epoch": 0.06370875995449374, "grad_norm": 2.500465425444752, "learning_rate": 1.2495029010706306e-06, "loss": 0.1005, "step": 280 }, { "epoch": 0.0639362912400455, "grad_norm": 2.7474098845449433, "learning_rate": 1.2494993317280524e-06, "loss": 0.1755, "step": 281 }, { "epoch": 0.06416382252559727, "grad_norm": 3.1110646620479967, "learning_rate": 1.2494957496218516e-06, "loss": 0.194, "step": 282 }, { "epoch": 0.06439135381114904, "grad_norm": 1.162926170243262, "learning_rate": 1.2494921547521013e-06, "loss": 0.1667, "step": 283 }, { "epoch": 0.0646188850967008, "grad_norm": 2.034958588386092, "learning_rate": 1.249488547118875e-06, "loss": 0.1031, "step": 284 }, { "epoch": 0.06484641638225255, "grad_norm": 2.8585727096596214, "learning_rate": 1.2494849267222466e-06, "loss": 0.1199, "step": 285 }, { "epoch": 0.06507394766780432, "grad_norm": 2.3756686418598916, "learning_rate": 1.24948129356229e-06, "loss": 0.203, "step": 286 }, { "epoch": 0.06530147895335608, "grad_norm": 6.080154909085321, "learning_rate": 1.2494776476390793e-06, "loss": 0.2723, "step": 287 }, { "epoch": 0.06552901023890785, "grad_norm": 3.1578927707769684, "learning_rate": 1.2494739889526894e-06, "loss": 0.1218, "step": 288 }, { "epoch": 0.06575654152445962, "grad_norm": 2.7745317736308373, "learning_rate": 1.2494703175031946e-06, "loss": 0.194, "step": 289 }, { "epoch": 0.06598407281001138, "grad_norm": 2.872306438815133, "learning_rate": 1.2494666332906702e-06, "loss": 0.143, "step": 290 }, { "epoch": 0.06621160409556313, "grad_norm": 2.2661659384858277, "learning_rate": 1.2494629363151916e-06, "loss": 0.1497, "step": 291 }, { "epoch": 0.0664391353811149, "grad_norm": 2.7978250826969586, "learning_rate": 1.2494592265768343e-06, "loss": 0.1817, "step": 292 }, { "epoch": 0.06666666666666667, "grad_norm": 2.9435086338480496, "learning_rate": 1.2494555040756737e-06, "loss": 0.1195, "step": 293 }, { "epoch": 0.06689419795221843, "grad_norm": 2.525871560805257, "learning_rate": 1.2494517688117867e-06, "loss": 0.2054, "step": 294 }, { "epoch": 0.0671217292377702, "grad_norm": 3.3530486331117126, "learning_rate": 1.2494480207852489e-06, "loss": 0.1186, "step": 295 }, { "epoch": 0.06734926052332196, "grad_norm": 3.791549905681902, "learning_rate": 1.249444259996137e-06, "loss": 0.1616, "step": 296 }, { "epoch": 0.06757679180887372, "grad_norm": 2.3603348366809236, "learning_rate": 1.2494404864445284e-06, "loss": 0.1392, "step": 297 }, { "epoch": 0.06780432309442548, "grad_norm": 2.161901751847752, "learning_rate": 1.2494367001304996e-06, "loss": 0.1548, "step": 298 }, { "epoch": 0.06803185437997725, "grad_norm": 2.3978175716297634, "learning_rate": 1.2494329010541284e-06, "loss": 0.1634, "step": 299 }, { "epoch": 0.06825938566552901, "grad_norm": 5.413503442113624, "learning_rate": 1.2494290892154922e-06, "loss": 0.2876, "step": 300 }, { "epoch": 0.06848691695108078, "grad_norm": 1.904095426332445, "learning_rate": 1.2494252646146692e-06, "loss": 0.1942, "step": 301 }, { "epoch": 0.06871444823663253, "grad_norm": 2.0091735504190504, "learning_rate": 1.249421427251737e-06, "loss": 0.1403, "step": 302 }, { "epoch": 0.0689419795221843, "grad_norm": 2.6001586830103123, "learning_rate": 1.2494175771267748e-06, "loss": 0.2376, "step": 303 }, { "epoch": 0.06916951080773606, "grad_norm": 2.8009063420794265, "learning_rate": 1.2494137142398607e-06, "loss": 0.1877, "step": 304 }, { "epoch": 0.06939704209328783, "grad_norm": 2.0648464255318517, "learning_rate": 1.249409838591074e-06, "loss": 0.1462, "step": 305 }, { "epoch": 0.0696245733788396, "grad_norm": 2.6396516124770657, "learning_rate": 1.2494059501804937e-06, "loss": 0.256, "step": 306 }, { "epoch": 0.06985210466439136, "grad_norm": 2.9901343092043837, "learning_rate": 1.249402049008199e-06, "loss": 0.1483, "step": 307 }, { "epoch": 0.07007963594994311, "grad_norm": 3.0343546498099356, "learning_rate": 1.2493981350742704e-06, "loss": 0.1561, "step": 308 }, { "epoch": 0.07030716723549488, "grad_norm": 3.2148889672864636, "learning_rate": 1.2493942083787872e-06, "loss": 0.1856, "step": 309 }, { "epoch": 0.07053469852104664, "grad_norm": 2.795539793994042, "learning_rate": 1.2493902689218299e-06, "loss": 0.1294, "step": 310 }, { "epoch": 0.07076222980659841, "grad_norm": 2.1866434219410307, "learning_rate": 1.249386316703479e-06, "loss": 0.1789, "step": 311 }, { "epoch": 0.07098976109215017, "grad_norm": 4.93386744278198, "learning_rate": 1.2493823517238154e-06, "loss": 0.1529, "step": 312 }, { "epoch": 0.07121729237770194, "grad_norm": 2.127480030167813, "learning_rate": 1.2493783739829202e-06, "loss": 0.1593, "step": 313 }, { "epoch": 0.07144482366325369, "grad_norm": 2.565861378561538, "learning_rate": 1.2493743834808741e-06, "loss": 0.1442, "step": 314 }, { "epoch": 0.07167235494880546, "grad_norm": 3.129314599970171, "learning_rate": 1.2493703802177594e-06, "loss": 0.1936, "step": 315 }, { "epoch": 0.07189988623435722, "grad_norm": 4.26603531282599, "learning_rate": 1.2493663641936576e-06, "loss": 0.1343, "step": 316 }, { "epoch": 0.07212741751990899, "grad_norm": 1.778626655821605, "learning_rate": 1.2493623354086507e-06, "loss": 0.1751, "step": 317 }, { "epoch": 0.07235494880546076, "grad_norm": 2.576979617695665, "learning_rate": 1.2493582938628213e-06, "loss": 0.1405, "step": 318 }, { "epoch": 0.07258248009101251, "grad_norm": 2.528946823784448, "learning_rate": 1.2493542395562516e-06, "loss": 0.1207, "step": 319 }, { "epoch": 0.07281001137656427, "grad_norm": 1.7105561186222351, "learning_rate": 1.2493501724890247e-06, "loss": 0.1067, "step": 320 }, { "epoch": 0.07303754266211604, "grad_norm": 3.0021555230652144, "learning_rate": 1.249346092661224e-06, "loss": 0.1769, "step": 321 }, { "epoch": 0.0732650739476678, "grad_norm": 3.2473648686733787, "learning_rate": 1.2493420000729322e-06, "loss": 0.1797, "step": 322 }, { "epoch": 0.07349260523321957, "grad_norm": 2.9141882965376644, "learning_rate": 1.2493378947242336e-06, "loss": 0.1936, "step": 323 }, { "epoch": 0.07372013651877134, "grad_norm": 2.139000059452357, "learning_rate": 1.2493337766152119e-06, "loss": 0.1323, "step": 324 }, { "epoch": 0.07394766780432309, "grad_norm": 3.7562365963393773, "learning_rate": 1.249329645745951e-06, "loss": 0.1521, "step": 325 }, { "epoch": 0.07417519908987485, "grad_norm": 3.1427328506374343, "learning_rate": 1.2493255021165357e-06, "loss": 0.1426, "step": 326 }, { "epoch": 0.07440273037542662, "grad_norm": 2.5928821859504225, "learning_rate": 1.2493213457270504e-06, "loss": 0.1492, "step": 327 }, { "epoch": 0.07463026166097839, "grad_norm": 2.6116349350740773, "learning_rate": 1.2493171765775804e-06, "loss": 0.1079, "step": 328 }, { "epoch": 0.07485779294653015, "grad_norm": 2.5063754100070796, "learning_rate": 1.2493129946682107e-06, "loss": 0.1449, "step": 329 }, { "epoch": 0.07508532423208192, "grad_norm": 2.7029390289735247, "learning_rate": 1.2493087999990263e-06, "loss": 0.2012, "step": 330 }, { "epoch": 0.07531285551763367, "grad_norm": 3.168250561710959, "learning_rate": 1.249304592570114e-06, "loss": 0.135, "step": 331 }, { "epoch": 0.07554038680318544, "grad_norm": 3.358825282989208, "learning_rate": 1.2493003723815588e-06, "loss": 0.202, "step": 332 }, { "epoch": 0.0757679180887372, "grad_norm": 3.4712230061099367, "learning_rate": 1.2492961394334474e-06, "loss": 0.1796, "step": 333 }, { "epoch": 0.07599544937428897, "grad_norm": 2.7447934095202586, "learning_rate": 1.2492918937258663e-06, "loss": 0.1529, "step": 334 }, { "epoch": 0.07622298065984073, "grad_norm": 4.884489478774658, "learning_rate": 1.2492876352589024e-06, "loss": 0.1983, "step": 335 }, { "epoch": 0.07645051194539249, "grad_norm": 11.840111431867928, "learning_rate": 1.2492833640326424e-06, "loss": 0.1701, "step": 336 }, { "epoch": 0.07667804323094425, "grad_norm": 3.6493332372043032, "learning_rate": 1.2492790800471738e-06, "loss": 0.1894, "step": 337 }, { "epoch": 0.07690557451649602, "grad_norm": 2.2273861687776657, "learning_rate": 1.249274783302584e-06, "loss": 0.1168, "step": 338 }, { "epoch": 0.07713310580204778, "grad_norm": 3.0155968100929016, "learning_rate": 1.249270473798961e-06, "loss": 0.1877, "step": 339 }, { "epoch": 0.07736063708759955, "grad_norm": 3.6811309004263197, "learning_rate": 1.249266151536393e-06, "loss": 0.1841, "step": 340 }, { "epoch": 0.07758816837315131, "grad_norm": 3.3318670131929355, "learning_rate": 1.249261816514968e-06, "loss": 0.1425, "step": 341 }, { "epoch": 0.07781569965870307, "grad_norm": 1.542707864707429, "learning_rate": 1.2492574687347747e-06, "loss": 0.0954, "step": 342 }, { "epoch": 0.07804323094425483, "grad_norm": 5.219514434003638, "learning_rate": 1.249253108195902e-06, "loss": 0.1523, "step": 343 }, { "epoch": 0.0782707622298066, "grad_norm": 2.685054702258556, "learning_rate": 1.249248734898439e-06, "loss": 0.1932, "step": 344 }, { "epoch": 0.07849829351535836, "grad_norm": 3.782143044532345, "learning_rate": 1.2492443488424753e-06, "loss": 0.1782, "step": 345 }, { "epoch": 0.07872582480091013, "grad_norm": 2.987081909452687, "learning_rate": 1.2492399500281002e-06, "loss": 0.1174, "step": 346 }, { "epoch": 0.07895335608646188, "grad_norm": 2.4163752446451667, "learning_rate": 1.2492355384554039e-06, "loss": 0.1864, "step": 347 }, { "epoch": 0.07918088737201365, "grad_norm": 2.881696468020635, "learning_rate": 1.2492311141244764e-06, "loss": 0.1509, "step": 348 }, { "epoch": 0.07940841865756541, "grad_norm": 4.2425549257036925, "learning_rate": 1.249226677035408e-06, "loss": 0.1384, "step": 349 }, { "epoch": 0.07963594994311718, "grad_norm": 2.999886291999185, "learning_rate": 1.2492222271882896e-06, "loss": 0.1631, "step": 350 }, { "epoch": 0.07986348122866894, "grad_norm": 4.681484131322112, "learning_rate": 1.2492177645832121e-06, "loss": 0.1752, "step": 351 }, { "epoch": 0.08009101251422071, "grad_norm": 2.921704965075288, "learning_rate": 1.2492132892202668e-06, "loss": 0.1486, "step": 352 }, { "epoch": 0.08031854379977246, "grad_norm": 5.592595582830648, "learning_rate": 1.2492088010995449e-06, "loss": 0.2707, "step": 353 }, { "epoch": 0.08054607508532423, "grad_norm": 2.9440013961704823, "learning_rate": 1.2492043002211385e-06, "loss": 0.2054, "step": 354 }, { "epoch": 0.080773606370876, "grad_norm": 2.2221784159000006, "learning_rate": 1.2491997865851392e-06, "loss": 0.1373, "step": 355 }, { "epoch": 0.08100113765642776, "grad_norm": 1.7381570114572884, "learning_rate": 1.2491952601916395e-06, "loss": 0.0858, "step": 356 }, { "epoch": 0.08122866894197953, "grad_norm": 2.930524510809462, "learning_rate": 1.2491907210407319e-06, "loss": 0.2179, "step": 357 }, { "epoch": 0.08145620022753129, "grad_norm": 1.329914120982883, "learning_rate": 1.249186169132509e-06, "loss": 0.1839, "step": 358 }, { "epoch": 0.08168373151308304, "grad_norm": 4.774637200381304, "learning_rate": 1.2491816044670641e-06, "loss": 0.1266, "step": 359 }, { "epoch": 0.08191126279863481, "grad_norm": 3.0085506218930442, "learning_rate": 1.24917702704449e-06, "loss": 0.1813, "step": 360 }, { "epoch": 0.08213879408418658, "grad_norm": 2.683588571853357, "learning_rate": 1.2491724368648808e-06, "loss": 0.1182, "step": 361 }, { "epoch": 0.08236632536973834, "grad_norm": 4.142859587264675, "learning_rate": 1.2491678339283303e-06, "loss": 0.1213, "step": 362 }, { "epoch": 0.08259385665529011, "grad_norm": 2.266538556877378, "learning_rate": 1.249163218234932e-06, "loss": 0.1669, "step": 363 }, { "epoch": 0.08282138794084186, "grad_norm": 3.340308786527698, "learning_rate": 1.249158589784781e-06, "loss": 0.1449, "step": 364 }, { "epoch": 0.08304891922639362, "grad_norm": 3.600922134824311, "learning_rate": 1.2491539485779713e-06, "loss": 0.1934, "step": 365 }, { "epoch": 0.08327645051194539, "grad_norm": 2.5603148777390796, "learning_rate": 1.2491492946145981e-06, "loss": 0.1215, "step": 366 }, { "epoch": 0.08350398179749716, "grad_norm": 1.4306937563740754, "learning_rate": 1.2491446278947563e-06, "loss": 0.1218, "step": 367 }, { "epoch": 0.08373151308304892, "grad_norm": 6.514691076015768, "learning_rate": 1.2491399484185413e-06, "loss": 0.1723, "step": 368 }, { "epoch": 0.08395904436860069, "grad_norm": 2.1513333963844214, "learning_rate": 1.249135256186049e-06, "loss": 0.242, "step": 369 }, { "epoch": 0.08418657565415244, "grad_norm": 1.697947937157404, "learning_rate": 1.249130551197375e-06, "loss": 0.1045, "step": 370 }, { "epoch": 0.0844141069397042, "grad_norm": 1.4338559958770856, "learning_rate": 1.2491258334526155e-06, "loss": 0.1671, "step": 371 }, { "epoch": 0.08464163822525597, "grad_norm": 2.7532236684188773, "learning_rate": 1.2491211029518672e-06, "loss": 0.1034, "step": 372 }, { "epoch": 0.08486916951080774, "grad_norm": 2.665642318134447, "learning_rate": 1.2491163596952264e-06, "loss": 0.1737, "step": 373 }, { "epoch": 0.0850967007963595, "grad_norm": 1.5130437493435105, "learning_rate": 1.2491116036827902e-06, "loss": 0.0804, "step": 374 }, { "epoch": 0.08532423208191127, "grad_norm": 1.3642320073282543, "learning_rate": 1.2491068349146559e-06, "loss": 0.1428, "step": 375 }, { "epoch": 0.08555176336746302, "grad_norm": 2.1006895230964444, "learning_rate": 1.249102053390921e-06, "loss": 0.2759, "step": 376 }, { "epoch": 0.08577929465301479, "grad_norm": 1.5335225229109515, "learning_rate": 1.249097259111683e-06, "loss": 0.1836, "step": 377 }, { "epoch": 0.08600682593856655, "grad_norm": 4.09523641946509, "learning_rate": 1.24909245207704e-06, "loss": 0.2771, "step": 378 }, { "epoch": 0.08623435722411832, "grad_norm": 2.2658393838403477, "learning_rate": 1.2490876322870904e-06, "loss": 0.1815, "step": 379 }, { "epoch": 0.08646188850967008, "grad_norm": 3.053596441038967, "learning_rate": 1.2490827997419325e-06, "loss": 0.1183, "step": 380 }, { "epoch": 0.08668941979522184, "grad_norm": 2.9366601199125153, "learning_rate": 1.249077954441665e-06, "loss": 0.1738, "step": 381 }, { "epoch": 0.0869169510807736, "grad_norm": 1.9726593738442935, "learning_rate": 1.249073096386387e-06, "loss": 0.1427, "step": 382 }, { "epoch": 0.08714448236632537, "grad_norm": 2.8452874204285985, "learning_rate": 1.249068225576198e-06, "loss": 0.2767, "step": 383 }, { "epoch": 0.08737201365187713, "grad_norm": 4.292343700500067, "learning_rate": 1.2490633420111974e-06, "loss": 0.127, "step": 384 }, { "epoch": 0.0875995449374289, "grad_norm": 4.105827667785258, "learning_rate": 1.249058445691485e-06, "loss": 0.1639, "step": 385 }, { "epoch": 0.08782707622298067, "grad_norm": 4.310698395146462, "learning_rate": 1.2490535366171607e-06, "loss": 0.1289, "step": 386 }, { "epoch": 0.08805460750853242, "grad_norm": 3.5788743602832795, "learning_rate": 1.249048614788325e-06, "loss": 0.1804, "step": 387 }, { "epoch": 0.08828213879408418, "grad_norm": 2.6616942664445413, "learning_rate": 1.249043680205079e-06, "loss": 0.144, "step": 388 }, { "epoch": 0.08850967007963595, "grad_norm": 2.989163897960478, "learning_rate": 1.2490387328675226e-06, "loss": 0.2016, "step": 389 }, { "epoch": 0.08873720136518772, "grad_norm": 4.587176162210019, "learning_rate": 1.2490337727757576e-06, "loss": 0.2284, "step": 390 }, { "epoch": 0.08896473265073948, "grad_norm": 2.794747809075531, "learning_rate": 1.249028799929885e-06, "loss": 0.2002, "step": 391 }, { "epoch": 0.08919226393629125, "grad_norm": 2.0197262567230276, "learning_rate": 1.2490238143300066e-06, "loss": 0.1143, "step": 392 }, { "epoch": 0.089419795221843, "grad_norm": 3.184614553894442, "learning_rate": 1.2490188159762243e-06, "loss": 0.1913, "step": 393 }, { "epoch": 0.08964732650739476, "grad_norm": 2.518010477046937, "learning_rate": 1.2490138048686405e-06, "loss": 0.1981, "step": 394 }, { "epoch": 0.08987485779294653, "grad_norm": 5.010077865699377, "learning_rate": 1.249008781007357e-06, "loss": 0.1423, "step": 395 }, { "epoch": 0.0901023890784983, "grad_norm": 1.420461399090385, "learning_rate": 1.2490037443924768e-06, "loss": 0.1363, "step": 396 }, { "epoch": 0.09032992036405006, "grad_norm": 2.5810652557759863, "learning_rate": 1.2489986950241032e-06, "loss": 0.1002, "step": 397 }, { "epoch": 0.09055745164960181, "grad_norm": 1.8725706501255737, "learning_rate": 1.2489936329023387e-06, "loss": 0.1974, "step": 398 }, { "epoch": 0.09078498293515358, "grad_norm": 3.2869147678539554, "learning_rate": 1.2489885580272874e-06, "loss": 0.1629, "step": 399 }, { "epoch": 0.09101251422070535, "grad_norm": 1.7546095764098488, "learning_rate": 1.2489834703990527e-06, "loss": 0.1326, "step": 400 }, { "epoch": 0.09124004550625711, "grad_norm": 3.0930989898336407, "learning_rate": 1.2489783700177385e-06, "loss": 0.2565, "step": 401 }, { "epoch": 0.09146757679180888, "grad_norm": 4.363886237065706, "learning_rate": 1.2489732568834492e-06, "loss": 0.1425, "step": 402 }, { "epoch": 0.09169510807736064, "grad_norm": 2.141413419957395, "learning_rate": 1.2489681309962895e-06, "loss": 0.1458, "step": 403 }, { "epoch": 0.0919226393629124, "grad_norm": 4.5478526718009205, "learning_rate": 1.2489629923563637e-06, "loss": 0.1655, "step": 404 }, { "epoch": 0.09215017064846416, "grad_norm": 5.253865415098631, "learning_rate": 1.2489578409637774e-06, "loss": 0.2702, "step": 405 }, { "epoch": 0.09237770193401593, "grad_norm": 6.114423825591168, "learning_rate": 1.2489526768186352e-06, "loss": 0.1364, "step": 406 }, { "epoch": 0.09260523321956769, "grad_norm": 2.4260049242900505, "learning_rate": 1.2489474999210434e-06, "loss": 0.1573, "step": 407 }, { "epoch": 0.09283276450511946, "grad_norm": 6.696614155480106, "learning_rate": 1.2489423102711068e-06, "loss": 0.2365, "step": 408 }, { "epoch": 0.09306029579067122, "grad_norm": 3.4093511525509848, "learning_rate": 1.2489371078689326e-06, "loss": 0.1552, "step": 409 }, { "epoch": 0.09328782707622298, "grad_norm": 3.512014449058475, "learning_rate": 1.2489318927146263e-06, "loss": 0.1392, "step": 410 }, { "epoch": 0.09351535836177474, "grad_norm": 4.385040034701264, "learning_rate": 1.2489266648082951e-06, "loss": 0.1184, "step": 411 }, { "epoch": 0.09374288964732651, "grad_norm": 11.030038016242493, "learning_rate": 1.2489214241500453e-06, "loss": 0.2445, "step": 412 }, { "epoch": 0.09397042093287827, "grad_norm": 3.8160488235069487, "learning_rate": 1.2489161707399843e-06, "loss": 0.2422, "step": 413 }, { "epoch": 0.09419795221843004, "grad_norm": 2.5154081754915554, "learning_rate": 1.2489109045782194e-06, "loss": 0.1284, "step": 414 }, { "epoch": 0.09442548350398179, "grad_norm": 2.186602019326803, "learning_rate": 1.2489056256648582e-06, "loss": 0.1387, "step": 415 }, { "epoch": 0.09465301478953356, "grad_norm": 3.1244704898712223, "learning_rate": 1.2489003340000089e-06, "loss": 0.2695, "step": 416 }, { "epoch": 0.09488054607508532, "grad_norm": 1.9015703147093774, "learning_rate": 1.2488950295837792e-06, "loss": 0.2029, "step": 417 }, { "epoch": 0.09510807736063709, "grad_norm": 3.2255120343889523, "learning_rate": 1.2488897124162777e-06, "loss": 0.1708, "step": 418 }, { "epoch": 0.09533560864618885, "grad_norm": 2.4361554392110354, "learning_rate": 1.248884382497613e-06, "loss": 0.237, "step": 419 }, { "epoch": 0.09556313993174062, "grad_norm": 5.44904137240634, "learning_rate": 1.2488790398278941e-06, "loss": 0.2259, "step": 420 }, { "epoch": 0.09579067121729237, "grad_norm": 2.5542725247665725, "learning_rate": 1.2488736844072304e-06, "loss": 0.1706, "step": 421 }, { "epoch": 0.09601820250284414, "grad_norm": 3.3440828684749837, "learning_rate": 1.248868316235731e-06, "loss": 0.166, "step": 422 }, { "epoch": 0.0962457337883959, "grad_norm": 2.837980086891423, "learning_rate": 1.2488629353135059e-06, "loss": 0.1974, "step": 423 }, { "epoch": 0.09647326507394767, "grad_norm": 3.0821716156484413, "learning_rate": 1.2488575416406649e-06, "loss": 0.2029, "step": 424 }, { "epoch": 0.09670079635949944, "grad_norm": 4.11082660525738, "learning_rate": 1.2488521352173183e-06, "loss": 0.1288, "step": 425 }, { "epoch": 0.09692832764505119, "grad_norm": 2.792375492899653, "learning_rate": 1.2488467160435765e-06, "loss": 0.1318, "step": 426 }, { "epoch": 0.09715585893060295, "grad_norm": 2.54978143800456, "learning_rate": 1.2488412841195505e-06, "loss": 0.2235, "step": 427 }, { "epoch": 0.09738339021615472, "grad_norm": 1.8685713785223814, "learning_rate": 1.2488358394453512e-06, "loss": 0.1018, "step": 428 }, { "epoch": 0.09761092150170649, "grad_norm": 2.19856597261874, "learning_rate": 1.2488303820210897e-06, "loss": 0.0955, "step": 429 }, { "epoch": 0.09783845278725825, "grad_norm": 2.756460140283964, "learning_rate": 1.2488249118468776e-06, "loss": 0.161, "step": 430 }, { "epoch": 0.09806598407281002, "grad_norm": 3.1658885878432446, "learning_rate": 1.248819428922827e-06, "loss": 0.1707, "step": 431 }, { "epoch": 0.09829351535836177, "grad_norm": 3.574624372801338, "learning_rate": 1.2488139332490495e-06, "loss": 0.2412, "step": 432 }, { "epoch": 0.09852104664391353, "grad_norm": 2.63473599121384, "learning_rate": 1.248808424825658e-06, "loss": 0.1195, "step": 433 }, { "epoch": 0.0987485779294653, "grad_norm": 3.928170371490413, "learning_rate": 1.2488029036527645e-06, "loss": 0.1478, "step": 434 }, { "epoch": 0.09897610921501707, "grad_norm": 2.0459697190569583, "learning_rate": 1.2487973697304822e-06, "loss": 0.0868, "step": 435 }, { "epoch": 0.09920364050056883, "grad_norm": 2.2037192709560283, "learning_rate": 1.248791823058924e-06, "loss": 0.1911, "step": 436 }, { "epoch": 0.0994311717861206, "grad_norm": 3.549121049187713, "learning_rate": 1.2487862636382034e-06, "loss": 0.1218, "step": 437 }, { "epoch": 0.09965870307167235, "grad_norm": 1.4303061363329783, "learning_rate": 1.248780691468434e-06, "loss": 0.1116, "step": 438 }, { "epoch": 0.09988623435722412, "grad_norm": 3.8141735085769746, "learning_rate": 1.2487751065497296e-06, "loss": 0.2179, "step": 439 }, { "epoch": 0.10011376564277588, "grad_norm": 2.6329169063924986, "learning_rate": 1.2487695088822044e-06, "loss": 0.1492, "step": 440 }, { "epoch": 0.10034129692832765, "grad_norm": 2.8773216855185635, "learning_rate": 1.2487638984659729e-06, "loss": 0.0988, "step": 441 }, { "epoch": 0.10056882821387941, "grad_norm": 2.5448731857786284, "learning_rate": 1.2487582753011496e-06, "loss": 0.1023, "step": 442 }, { "epoch": 0.10079635949943117, "grad_norm": 2.4399816480891445, "learning_rate": 1.2487526393878497e-06, "loss": 0.2015, "step": 443 }, { "epoch": 0.10102389078498293, "grad_norm": 2.056202357783669, "learning_rate": 1.248746990726188e-06, "loss": 0.1376, "step": 444 }, { "epoch": 0.1012514220705347, "grad_norm": 2.489946255383071, "learning_rate": 1.2487413293162803e-06, "loss": 0.1389, "step": 445 }, { "epoch": 0.10147895335608646, "grad_norm": 2.3660691937468807, "learning_rate": 1.2487356551582421e-06, "loss": 0.2235, "step": 446 }, { "epoch": 0.10170648464163823, "grad_norm": 2.5030375037996575, "learning_rate": 1.2487299682521893e-06, "loss": 0.2156, "step": 447 }, { "epoch": 0.10193401592719, "grad_norm": 2.210721856008811, "learning_rate": 1.2487242685982384e-06, "loss": 0.1101, "step": 448 }, { "epoch": 0.10216154721274175, "grad_norm": 2.250420318734035, "learning_rate": 1.2487185561965057e-06, "loss": 0.1241, "step": 449 }, { "epoch": 0.10238907849829351, "grad_norm": 2.019413043508561, "learning_rate": 1.248712831047108e-06, "loss": 0.1217, "step": 450 }, { "epoch": 0.10261660978384528, "grad_norm": 3.2295330442493713, "learning_rate": 1.2487070931501624e-06, "loss": 0.2304, "step": 451 }, { "epoch": 0.10284414106939704, "grad_norm": 2.444299385213433, "learning_rate": 1.2487013425057858e-06, "loss": 0.2084, "step": 452 }, { "epoch": 0.10307167235494881, "grad_norm": 2.8966369631126367, "learning_rate": 1.2486955791140964e-06, "loss": 0.1838, "step": 453 }, { "epoch": 0.10329920364050058, "grad_norm": 2.0941566856763387, "learning_rate": 1.2486898029752113e-06, "loss": 0.1043, "step": 454 }, { "epoch": 0.10352673492605233, "grad_norm": 2.3019250022426925, "learning_rate": 1.248684014089249e-06, "loss": 0.1189, "step": 455 }, { "epoch": 0.1037542662116041, "grad_norm": 2.1349092143720387, "learning_rate": 1.2486782124563277e-06, "loss": 0.1708, "step": 456 }, { "epoch": 0.10398179749715586, "grad_norm": 3.101054381668985, "learning_rate": 1.2486723980765659e-06, "loss": 0.1796, "step": 457 }, { "epoch": 0.10420932878270762, "grad_norm": 1.9574694651381292, "learning_rate": 1.2486665709500826e-06, "loss": 0.1762, "step": 458 }, { "epoch": 0.10443686006825939, "grad_norm": 1.9997685220641748, "learning_rate": 1.2486607310769965e-06, "loss": 0.1626, "step": 459 }, { "epoch": 0.10466439135381114, "grad_norm": 1.4987645243428842, "learning_rate": 1.2486548784574275e-06, "loss": 0.1104, "step": 460 }, { "epoch": 0.10489192263936291, "grad_norm": 3.0056305765303857, "learning_rate": 1.2486490130914948e-06, "loss": 0.1526, "step": 461 }, { "epoch": 0.10511945392491467, "grad_norm": 1.6498658926200307, "learning_rate": 1.2486431349793185e-06, "loss": 0.1158, "step": 462 }, { "epoch": 0.10534698521046644, "grad_norm": 2.8097802744351035, "learning_rate": 1.2486372441210188e-06, "loss": 0.174, "step": 463 }, { "epoch": 0.1055745164960182, "grad_norm": 2.2295425114906955, "learning_rate": 1.248631340516716e-06, "loss": 0.0993, "step": 464 }, { "epoch": 0.10580204778156997, "grad_norm": 1.7352971105344217, "learning_rate": 1.2486254241665302e-06, "loss": 0.1799, "step": 465 }, { "epoch": 0.10602957906712172, "grad_norm": 3.37890451450669, "learning_rate": 1.2486194950705831e-06, "loss": 0.1456, "step": 466 }, { "epoch": 0.10625711035267349, "grad_norm": 4.485196875503332, "learning_rate": 1.248613553228996e-06, "loss": 0.1509, "step": 467 }, { "epoch": 0.10648464163822526, "grad_norm": 3.8128664414272833, "learning_rate": 1.2486075986418896e-06, "loss": 0.1217, "step": 468 }, { "epoch": 0.10671217292377702, "grad_norm": 1.9049325746647565, "learning_rate": 1.248601631309386e-06, "loss": 0.1973, "step": 469 }, { "epoch": 0.10693970420932879, "grad_norm": 1.9433225744575688, "learning_rate": 1.2485956512316072e-06, "loss": 0.1422, "step": 470 }, { "epoch": 0.10716723549488055, "grad_norm": 1.7542185976103952, "learning_rate": 1.2485896584086754e-06, "loss": 0.1187, "step": 471 }, { "epoch": 0.1073947667804323, "grad_norm": 0.985585738392577, "learning_rate": 1.248583652840713e-06, "loss": 0.1116, "step": 472 }, { "epoch": 0.10762229806598407, "grad_norm": 6.520293791736507, "learning_rate": 1.2485776345278427e-06, "loss": 0.1634, "step": 473 }, { "epoch": 0.10784982935153584, "grad_norm": 2.9958165676640935, "learning_rate": 1.2485716034701876e-06, "loss": 0.1468, "step": 474 }, { "epoch": 0.1080773606370876, "grad_norm": 3.496540224028896, "learning_rate": 1.2485655596678712e-06, "loss": 0.1444, "step": 475 }, { "epoch": 0.10830489192263937, "grad_norm": 2.6887910577996603, "learning_rate": 1.2485595031210164e-06, "loss": 0.2257, "step": 476 }, { "epoch": 0.10853242320819112, "grad_norm": 2.210859712757279, "learning_rate": 1.2485534338297475e-06, "loss": 0.0858, "step": 477 }, { "epoch": 0.10875995449374289, "grad_norm": 1.5912288577365465, "learning_rate": 1.2485473517941884e-06, "loss": 0.1021, "step": 478 }, { "epoch": 0.10898748577929465, "grad_norm": 2.162920899638659, "learning_rate": 1.2485412570144633e-06, "loss": 0.2051, "step": 479 }, { "epoch": 0.10921501706484642, "grad_norm": 2.3337569161162186, "learning_rate": 1.2485351494906969e-06, "loss": 0.1726, "step": 480 }, { "epoch": 0.10944254835039818, "grad_norm": 1.6587972530161754, "learning_rate": 1.2485290292230142e-06, "loss": 0.1589, "step": 481 }, { "epoch": 0.10967007963594995, "grad_norm": 2.549443212629399, "learning_rate": 1.24852289621154e-06, "loss": 0.1107, "step": 482 }, { "epoch": 0.1098976109215017, "grad_norm": 1.9600173744992218, "learning_rate": 1.2485167504563995e-06, "loss": 0.1497, "step": 483 }, { "epoch": 0.11012514220705347, "grad_norm": 2.914488733886043, "learning_rate": 1.2485105919577187e-06, "loss": 0.2242, "step": 484 }, { "epoch": 0.11035267349260523, "grad_norm": 2.4334592724633475, "learning_rate": 1.2485044207156233e-06, "loss": 0.1326, "step": 485 }, { "epoch": 0.110580204778157, "grad_norm": 2.1918094312708374, "learning_rate": 1.2484982367302395e-06, "loss": 0.1611, "step": 486 }, { "epoch": 0.11080773606370876, "grad_norm": 2.2072766100880843, "learning_rate": 1.2484920400016936e-06, "loss": 0.1402, "step": 487 }, { "epoch": 0.11103526734926053, "grad_norm": 1.6859469474720183, "learning_rate": 1.2484858305301122e-06, "loss": 0.1472, "step": 488 }, { "epoch": 0.11126279863481228, "grad_norm": 1.590244696061809, "learning_rate": 1.2484796083156222e-06, "loss": 0.0824, "step": 489 }, { "epoch": 0.11149032992036405, "grad_norm": 4.525638347888733, "learning_rate": 1.2484733733583511e-06, "loss": 0.1257, "step": 490 }, { "epoch": 0.11171786120591581, "grad_norm": 2.6721724669454723, "learning_rate": 1.248467125658426e-06, "loss": 0.2084, "step": 491 }, { "epoch": 0.11194539249146758, "grad_norm": 2.300055245713483, "learning_rate": 1.2484608652159746e-06, "loss": 0.1053, "step": 492 }, { "epoch": 0.11217292377701935, "grad_norm": 3.273977920110333, "learning_rate": 1.248454592031125e-06, "loss": 0.1176, "step": 493 }, { "epoch": 0.1124004550625711, "grad_norm": 2.101057790899636, "learning_rate": 1.2484483061040054e-06, "loss": 0.1277, "step": 494 }, { "epoch": 0.11262798634812286, "grad_norm": 3.6133620556599984, "learning_rate": 1.2484420074347441e-06, "loss": 0.1845, "step": 495 }, { "epoch": 0.11285551763367463, "grad_norm": 1.9619725915027257, "learning_rate": 1.24843569602347e-06, "loss": 0.1894, "step": 496 }, { "epoch": 0.1130830489192264, "grad_norm": 2.636905846270966, "learning_rate": 1.2484293718703119e-06, "loss": 0.1874, "step": 497 }, { "epoch": 0.11331058020477816, "grad_norm": 2.5593822043936125, "learning_rate": 1.2484230349753994e-06, "loss": 0.0927, "step": 498 }, { "epoch": 0.11353811149032993, "grad_norm": 2.2440609982402715, "learning_rate": 1.2484166853388617e-06, "loss": 0.1381, "step": 499 }, { "epoch": 0.11376564277588168, "grad_norm": 2.7232866925160506, "learning_rate": 1.2484103229608288e-06, "loss": 0.1758, "step": 500 }, { "epoch": 0.11399317406143344, "grad_norm": 2.6484317978572816, "learning_rate": 1.2484039478414305e-06, "loss": 0.1259, "step": 501 }, { "epoch": 0.11422070534698521, "grad_norm": 2.1058374053464464, "learning_rate": 1.2483975599807972e-06, "loss": 0.1369, "step": 502 }, { "epoch": 0.11444823663253698, "grad_norm": 2.1458925241645903, "learning_rate": 1.2483911593790595e-06, "loss": 0.1004, "step": 503 }, { "epoch": 0.11467576791808874, "grad_norm": 3.031837353586065, "learning_rate": 1.2483847460363482e-06, "loss": 0.154, "step": 504 }, { "epoch": 0.1149032992036405, "grad_norm": 3.1297621875057544, "learning_rate": 1.2483783199527943e-06, "loss": 0.1071, "step": 505 }, { "epoch": 0.11513083048919226, "grad_norm": 2.5407911203085787, "learning_rate": 1.2483718811285296e-06, "loss": 0.1744, "step": 506 }, { "epoch": 0.11535836177474403, "grad_norm": 3.1175064627764377, "learning_rate": 1.2483654295636848e-06, "loss": 0.1072, "step": 507 }, { "epoch": 0.11558589306029579, "grad_norm": 3.0988741009535667, "learning_rate": 1.2483589652583924e-06, "loss": 0.1753, "step": 508 }, { "epoch": 0.11581342434584756, "grad_norm": 1.8808814641931946, "learning_rate": 1.2483524882127846e-06, "loss": 0.0859, "step": 509 }, { "epoch": 0.11604095563139932, "grad_norm": 2.8937543802568158, "learning_rate": 1.2483459984269933e-06, "loss": 0.1816, "step": 510 }, { "epoch": 0.11626848691695107, "grad_norm": 2.186370885841539, "learning_rate": 1.2483394959011514e-06, "loss": 0.0819, "step": 511 }, { "epoch": 0.11649601820250284, "grad_norm": 1.8650801779387822, "learning_rate": 1.248332980635392e-06, "loss": 0.1436, "step": 512 }, { "epoch": 0.1167235494880546, "grad_norm": 2.9270321544640994, "learning_rate": 1.2483264526298478e-06, "loss": 0.1308, "step": 513 }, { "epoch": 0.11695108077360637, "grad_norm": 1.9942689645578024, "learning_rate": 1.2483199118846525e-06, "loss": 0.1656, "step": 514 }, { "epoch": 0.11717861205915814, "grad_norm": 2.8104633311436116, "learning_rate": 1.2483133583999399e-06, "loss": 0.1681, "step": 515 }, { "epoch": 0.1174061433447099, "grad_norm": 2.546169206593085, "learning_rate": 1.2483067921758439e-06, "loss": 0.0925, "step": 516 }, { "epoch": 0.11763367463026166, "grad_norm": 2.0758430805982178, "learning_rate": 1.2483002132124983e-06, "loss": 0.203, "step": 517 }, { "epoch": 0.11786120591581342, "grad_norm": 2.1497459150584386, "learning_rate": 1.2482936215100382e-06, "loss": 0.1056, "step": 518 }, { "epoch": 0.11808873720136519, "grad_norm": 2.197584956184683, "learning_rate": 1.2482870170685978e-06, "loss": 0.0933, "step": 519 }, { "epoch": 0.11831626848691695, "grad_norm": 4.944962250057973, "learning_rate": 1.2482803998883122e-06, "loss": 0.2129, "step": 520 }, { "epoch": 0.11854379977246872, "grad_norm": 1.5333537239736301, "learning_rate": 1.2482737699693168e-06, "loss": 0.1729, "step": 521 }, { "epoch": 0.11877133105802047, "grad_norm": 2.5556570479037948, "learning_rate": 1.248267127311747e-06, "loss": 0.1607, "step": 522 }, { "epoch": 0.11899886234357224, "grad_norm": 2.0949542782407398, "learning_rate": 1.2482604719157386e-06, "loss": 0.1857, "step": 523 }, { "epoch": 0.119226393629124, "grad_norm": 2.2586097350216385, "learning_rate": 1.2482538037814277e-06, "loss": 0.1258, "step": 524 }, { "epoch": 0.11945392491467577, "grad_norm": 3.036602602741407, "learning_rate": 1.2482471229089502e-06, "loss": 0.161, "step": 525 }, { "epoch": 0.11968145620022753, "grad_norm": 3.382002996482515, "learning_rate": 1.2482404292984431e-06, "loss": 0.1784, "step": 526 }, { "epoch": 0.1199089874857793, "grad_norm": 1.571226708630226, "learning_rate": 1.248233722950043e-06, "loss": 0.1605, "step": 527 }, { "epoch": 0.12013651877133105, "grad_norm": 3.0053996402943737, "learning_rate": 1.2482270038638872e-06, "loss": 0.1201, "step": 528 }, { "epoch": 0.12036405005688282, "grad_norm": 4.663906907753179, "learning_rate": 1.2482202720401128e-06, "loss": 0.203, "step": 529 }, { "epoch": 0.12059158134243458, "grad_norm": 2.107107186527039, "learning_rate": 1.248213527478857e-06, "loss": 0.1933, "step": 530 }, { "epoch": 0.12081911262798635, "grad_norm": 2.191569921182264, "learning_rate": 1.2482067701802583e-06, "loss": 0.1735, "step": 531 }, { "epoch": 0.12104664391353812, "grad_norm": 1.611611034864374, "learning_rate": 1.2482000001444547e-06, "loss": 0.1299, "step": 532 }, { "epoch": 0.12127417519908988, "grad_norm": 1.9644367618752439, "learning_rate": 1.2481932173715845e-06, "loss": 0.0868, "step": 533 }, { "epoch": 0.12150170648464163, "grad_norm": 1.7597689357542332, "learning_rate": 1.2481864218617859e-06, "loss": 0.1977, "step": 534 }, { "epoch": 0.1217292377701934, "grad_norm": 1.0455766882042379, "learning_rate": 1.2481796136151984e-06, "loss": 0.0856, "step": 535 }, { "epoch": 0.12195676905574517, "grad_norm": 3.2419347761543684, "learning_rate": 1.2481727926319609e-06, "loss": 0.2399, "step": 536 }, { "epoch": 0.12218430034129693, "grad_norm": 3.339873316715719, "learning_rate": 1.2481659589122127e-06, "loss": 0.186, "step": 537 }, { "epoch": 0.1224118316268487, "grad_norm": 3.4453888669974146, "learning_rate": 1.2481591124560934e-06, "loss": 0.2007, "step": 538 }, { "epoch": 0.12263936291240045, "grad_norm": 3.4700673703521736, "learning_rate": 1.2481522532637435e-06, "loss": 0.1632, "step": 539 }, { "epoch": 0.12286689419795221, "grad_norm": 2.355397510374851, "learning_rate": 1.2481453813353026e-06, "loss": 0.1212, "step": 540 }, { "epoch": 0.12309442548350398, "grad_norm": 5.338957920220655, "learning_rate": 1.2481384966709116e-06, "loss": 0.1592, "step": 541 }, { "epoch": 0.12332195676905575, "grad_norm": 2.990026650956376, "learning_rate": 1.2481315992707104e-06, "loss": 0.2656, "step": 542 }, { "epoch": 0.12354948805460751, "grad_norm": 1.8798810865858828, "learning_rate": 1.248124689134841e-06, "loss": 0.1125, "step": 543 }, { "epoch": 0.12377701934015928, "grad_norm": 1.6104299610891197, "learning_rate": 1.2481177662634438e-06, "loss": 0.1557, "step": 544 }, { "epoch": 0.12400455062571103, "grad_norm": 3.302283676048537, "learning_rate": 1.2481108306566609e-06, "loss": 0.1799, "step": 545 }, { "epoch": 0.1242320819112628, "grad_norm": 2.0532951352869513, "learning_rate": 1.2481038823146338e-06, "loss": 0.0815, "step": 546 }, { "epoch": 0.12445961319681456, "grad_norm": 1.4326913794879275, "learning_rate": 1.2480969212375043e-06, "loss": 0.177, "step": 547 }, { "epoch": 0.12468714448236633, "grad_norm": 3.5494676426295286, "learning_rate": 1.2480899474254151e-06, "loss": 0.136, "step": 548 }, { "epoch": 0.12491467576791809, "grad_norm": 1.3410455744599155, "learning_rate": 1.2480829608785085e-06, "loss": 0.1078, "step": 549 }, { "epoch": 0.12514220705346984, "grad_norm": 1.7709434217848017, "learning_rate": 1.2480759615969273e-06, "loss": 0.1114, "step": 550 }, { "epoch": 0.12536973833902162, "grad_norm": 1.4865770903343614, "learning_rate": 1.2480689495808144e-06, "loss": 0.1377, "step": 551 }, { "epoch": 0.12559726962457338, "grad_norm": 1.6211826207402742, "learning_rate": 1.2480619248303133e-06, "loss": 0.1873, "step": 552 }, { "epoch": 0.12582480091012513, "grad_norm": 3.1755876159758794, "learning_rate": 1.2480548873455675e-06, "loss": 0.2135, "step": 553 }, { "epoch": 0.1260523321956769, "grad_norm": 3.6986046315140952, "learning_rate": 1.248047837126721e-06, "loss": 0.3549, "step": 554 }, { "epoch": 0.12627986348122866, "grad_norm": 2.782290781984551, "learning_rate": 1.248040774173918e-06, "loss": 0.1936, "step": 555 }, { "epoch": 0.12650739476678044, "grad_norm": 2.329760734261347, "learning_rate": 1.248033698487302e-06, "loss": 0.1395, "step": 556 }, { "epoch": 0.1267349260523322, "grad_norm": 2.258554836923121, "learning_rate": 1.2480266100670189e-06, "loss": 0.1605, "step": 557 }, { "epoch": 0.12696245733788397, "grad_norm": 3.058041285297341, "learning_rate": 1.2480195089132125e-06, "loss": 0.1975, "step": 558 }, { "epoch": 0.12718998862343572, "grad_norm": 2.406042057945949, "learning_rate": 1.2480123950260284e-06, "loss": 0.1405, "step": 559 }, { "epoch": 0.12741751990898748, "grad_norm": 1.4634033865621767, "learning_rate": 1.248005268405612e-06, "loss": 0.0686, "step": 560 }, { "epoch": 0.12764505119453926, "grad_norm": 1.1470288222889338, "learning_rate": 1.2479981290521087e-06, "loss": 0.0649, "step": 561 }, { "epoch": 0.127872582480091, "grad_norm": 3.357158703331078, "learning_rate": 1.2479909769656648e-06, "loss": 0.1684, "step": 562 }, { "epoch": 0.1281001137656428, "grad_norm": 2.4363436867877595, "learning_rate": 1.2479838121464263e-06, "loss": 0.2155, "step": 563 }, { "epoch": 0.12832764505119454, "grad_norm": 4.051636355021599, "learning_rate": 1.2479766345945395e-06, "loss": 0.1853, "step": 564 }, { "epoch": 0.1285551763367463, "grad_norm": 1.6707836764627593, "learning_rate": 1.2479694443101513e-06, "loss": 0.2261, "step": 565 }, { "epoch": 0.12878270762229807, "grad_norm": 1.3008647546251737, "learning_rate": 1.2479622412934087e-06, "loss": 0.1606, "step": 566 }, { "epoch": 0.12901023890784982, "grad_norm": 3.421202381350775, "learning_rate": 1.2479550255444586e-06, "loss": 0.147, "step": 567 }, { "epoch": 0.1292377701934016, "grad_norm": 1.5157864652280186, "learning_rate": 1.2479477970634487e-06, "loss": 0.1536, "step": 568 }, { "epoch": 0.12946530147895335, "grad_norm": 3.27856184412377, "learning_rate": 1.2479405558505267e-06, "loss": 0.1931, "step": 569 }, { "epoch": 0.1296928327645051, "grad_norm": 2.5943823025048474, "learning_rate": 1.247933301905841e-06, "loss": 0.1384, "step": 570 }, { "epoch": 0.12992036405005689, "grad_norm": 4.278003846990416, "learning_rate": 1.2479260352295388e-06, "loss": 0.1771, "step": 571 }, { "epoch": 0.13014789533560864, "grad_norm": 3.446486195671729, "learning_rate": 1.2479187558217697e-06, "loss": 0.1323, "step": 572 }, { "epoch": 0.13037542662116042, "grad_norm": 1.5099352019896337, "learning_rate": 1.247911463682682e-06, "loss": 0.1444, "step": 573 }, { "epoch": 0.13060295790671217, "grad_norm": 3.798908546439363, "learning_rate": 1.2479041588124247e-06, "loss": 0.1504, "step": 574 }, { "epoch": 0.13083048919226395, "grad_norm": 3.7532424433768754, "learning_rate": 1.2478968412111471e-06, "loss": 0.1518, "step": 575 }, { "epoch": 0.1310580204778157, "grad_norm": 2.056630545760187, "learning_rate": 1.247889510878999e-06, "loss": 0.2708, "step": 576 }, { "epoch": 0.13128555176336745, "grad_norm": 2.303355999452058, "learning_rate": 1.24788216781613e-06, "loss": 0.1662, "step": 577 }, { "epoch": 0.13151308304891923, "grad_norm": 2.269104241548175, "learning_rate": 1.2478748120226902e-06, "loss": 0.1337, "step": 578 }, { "epoch": 0.13174061433447098, "grad_norm": 3.0692597907642862, "learning_rate": 1.2478674434988299e-06, "loss": 0.1326, "step": 579 }, { "epoch": 0.13196814562002276, "grad_norm": 1.6865202158454742, "learning_rate": 1.2478600622447001e-06, "loss": 0.1647, "step": 580 }, { "epoch": 0.13219567690557452, "grad_norm": 2.939283703136826, "learning_rate": 1.2478526682604512e-06, "loss": 0.1303, "step": 581 }, { "epoch": 0.13242320819112627, "grad_norm": 3.1064926411391713, "learning_rate": 1.2478452615462345e-06, "loss": 0.1409, "step": 582 }, { "epoch": 0.13265073947667805, "grad_norm": 2.5571749562826485, "learning_rate": 1.247837842102201e-06, "loss": 0.1791, "step": 583 }, { "epoch": 0.1328782707622298, "grad_norm": 2.795629539563545, "learning_rate": 1.2478304099285031e-06, "loss": 0.1567, "step": 584 }, { "epoch": 0.13310580204778158, "grad_norm": 2.0832780528771466, "learning_rate": 1.2478229650252921e-06, "loss": 0.1639, "step": 585 }, { "epoch": 0.13333333333333333, "grad_norm": 2.9969798024524117, "learning_rate": 1.2478155073927204e-06, "loss": 0.2444, "step": 586 }, { "epoch": 0.13356086461888508, "grad_norm": 1.9274087851448982, "learning_rate": 1.2478080370309404e-06, "loss": 0.105, "step": 587 }, { "epoch": 0.13378839590443686, "grad_norm": 4.021015627831867, "learning_rate": 1.2478005539401046e-06, "loss": 0.1734, "step": 588 }, { "epoch": 0.13401592718998862, "grad_norm": 2.9342976021528027, "learning_rate": 1.2477930581203663e-06, "loss": 0.1465, "step": 589 }, { "epoch": 0.1342434584755404, "grad_norm": 2.3242426333780632, "learning_rate": 1.2477855495718782e-06, "loss": 0.2241, "step": 590 }, { "epoch": 0.13447098976109215, "grad_norm": 2.957504561813871, "learning_rate": 1.2477780282947942e-06, "loss": 0.1734, "step": 591 }, { "epoch": 0.13469852104664393, "grad_norm": 1.8788696793522301, "learning_rate": 1.2477704942892677e-06, "loss": 0.1469, "step": 592 }, { "epoch": 0.13492605233219568, "grad_norm": 2.339527187323086, "learning_rate": 1.2477629475554532e-06, "loss": 0.1312, "step": 593 }, { "epoch": 0.13515358361774743, "grad_norm": 3.707567497860105, "learning_rate": 1.2477553880935043e-06, "loss": 0.1916, "step": 594 }, { "epoch": 0.1353811149032992, "grad_norm": 3.2750827489523022, "learning_rate": 1.2477478159035758e-06, "loss": 0.1774, "step": 595 }, { "epoch": 0.13560864618885096, "grad_norm": 2.777476705753077, "learning_rate": 1.2477402309858226e-06, "loss": 0.1789, "step": 596 }, { "epoch": 0.13583617747440274, "grad_norm": 2.144596195630353, "learning_rate": 1.2477326333403995e-06, "loss": 0.147, "step": 597 }, { "epoch": 0.1360637087599545, "grad_norm": 2.3685083837175935, "learning_rate": 1.2477250229674618e-06, "loss": 0.1831, "step": 598 }, { "epoch": 0.13629124004550625, "grad_norm": 1.9843295041761948, "learning_rate": 1.2477173998671653e-06, "loss": 0.178, "step": 599 }, { "epoch": 0.13651877133105803, "grad_norm": 3.434039497211011, "learning_rate": 1.2477097640396655e-06, "loss": 0.1235, "step": 600 }, { "epoch": 0.13674630261660978, "grad_norm": 1.4586285890850859, "learning_rate": 1.2477021154851185e-06, "loss": 0.0977, "step": 601 }, { "epoch": 0.13697383390216156, "grad_norm": 3.3913304667052198, "learning_rate": 1.2476944542036806e-06, "loss": 0.1786, "step": 602 }, { "epoch": 0.1372013651877133, "grad_norm": 2.667804003182341, "learning_rate": 1.2476867801955086e-06, "loss": 0.1204, "step": 603 }, { "epoch": 0.13742889647326506, "grad_norm": 2.4655446209984033, "learning_rate": 1.247679093460759e-06, "loss": 0.2298, "step": 604 }, { "epoch": 0.13765642775881684, "grad_norm": 3.1521634114958816, "learning_rate": 1.2476713939995895e-06, "loss": 0.1264, "step": 605 }, { "epoch": 0.1378839590443686, "grad_norm": 1.8219187381761075, "learning_rate": 1.2476636818121568e-06, "loss": 0.1028, "step": 606 }, { "epoch": 0.13811149032992037, "grad_norm": 2.337156447435568, "learning_rate": 1.247655956898619e-06, "loss": 0.1946, "step": 607 }, { "epoch": 0.13833902161547212, "grad_norm": 3.2562899945752966, "learning_rate": 1.2476482192591335e-06, "loss": 0.1465, "step": 608 }, { "epoch": 0.1385665529010239, "grad_norm": 1.8250022998173558, "learning_rate": 1.247640468893859e-06, "loss": 0.1467, "step": 609 }, { "epoch": 0.13879408418657566, "grad_norm": 3.5242803865119603, "learning_rate": 1.2476327058029534e-06, "loss": 0.1225, "step": 610 }, { "epoch": 0.1390216154721274, "grad_norm": 3.027013883019154, "learning_rate": 1.2476249299865757e-06, "loss": 0.1595, "step": 611 }, { "epoch": 0.1392491467576792, "grad_norm": 2.3807833370240843, "learning_rate": 1.2476171414448847e-06, "loss": 0.0984, "step": 612 }, { "epoch": 0.13947667804323094, "grad_norm": 3.1119739781274416, "learning_rate": 1.2476093401780397e-06, "loss": 0.154, "step": 613 }, { "epoch": 0.13970420932878272, "grad_norm": 3.4567643287811958, "learning_rate": 1.2476015261861998e-06, "loss": 0.1405, "step": 614 }, { "epoch": 0.13993174061433447, "grad_norm": 2.6730132596017504, "learning_rate": 1.247593699469525e-06, "loss": 0.117, "step": 615 }, { "epoch": 0.14015927189988622, "grad_norm": 2.78286071664722, "learning_rate": 1.2475858600281754e-06, "loss": 0.1504, "step": 616 }, { "epoch": 0.140386803185438, "grad_norm": 2.0905809356248803, "learning_rate": 1.247578007862311e-06, "loss": 0.1221, "step": 617 }, { "epoch": 0.14061433447098975, "grad_norm": 2.307570493464016, "learning_rate": 1.2475701429720923e-06, "loss": 0.1166, "step": 618 }, { "epoch": 0.14084186575654153, "grad_norm": 1.2783682538203782, "learning_rate": 1.24756226535768e-06, "loss": 0.1346, "step": 619 }, { "epoch": 0.1410693970420933, "grad_norm": 1.497656716954093, "learning_rate": 1.2475543750192352e-06, "loss": 0.2064, "step": 620 }, { "epoch": 0.14129692832764504, "grad_norm": 3.79056695480817, "learning_rate": 1.2475464719569192e-06, "loss": 0.2673, "step": 621 }, { "epoch": 0.14152445961319682, "grad_norm": 1.4805750856049538, "learning_rate": 1.2475385561708934e-06, "loss": 0.1992, "step": 622 }, { "epoch": 0.14175199089874857, "grad_norm": 1.6748002073239907, "learning_rate": 1.2475306276613194e-06, "loss": 0.0979, "step": 623 }, { "epoch": 0.14197952218430035, "grad_norm": 2.5674392190565736, "learning_rate": 1.2475226864283596e-06, "loss": 0.1337, "step": 624 }, { "epoch": 0.1422070534698521, "grad_norm": 2.656075374063454, "learning_rate": 1.2475147324721764e-06, "loss": 0.2501, "step": 625 }, { "epoch": 0.14243458475540388, "grad_norm": 2.03707084801983, "learning_rate": 1.2475067657929319e-06, "loss": 0.1673, "step": 626 }, { "epoch": 0.14266211604095563, "grad_norm": 2.975904435297751, "learning_rate": 1.2474987863907894e-06, "loss": 0.135, "step": 627 }, { "epoch": 0.14288964732650739, "grad_norm": 2.2205623276633295, "learning_rate": 1.2474907942659116e-06, "loss": 0.2149, "step": 628 }, { "epoch": 0.14311717861205916, "grad_norm": 2.271865927518249, "learning_rate": 1.247482789418462e-06, "loss": 0.1519, "step": 629 }, { "epoch": 0.14334470989761092, "grad_norm": 6.542697842484103, "learning_rate": 1.2474747718486044e-06, "loss": 0.1757, "step": 630 }, { "epoch": 0.1435722411831627, "grad_norm": 1.8493295758356152, "learning_rate": 1.2474667415565022e-06, "loss": 0.096, "step": 631 }, { "epoch": 0.14379977246871445, "grad_norm": 4.567549869753572, "learning_rate": 1.24745869854232e-06, "loss": 0.1745, "step": 632 }, { "epoch": 0.1440273037542662, "grad_norm": 3.104479250541457, "learning_rate": 1.2474506428062219e-06, "loss": 0.14, "step": 633 }, { "epoch": 0.14425483503981798, "grad_norm": 2.9519743566943464, "learning_rate": 1.2474425743483726e-06, "loss": 0.237, "step": 634 }, { "epoch": 0.14448236632536973, "grad_norm": 1.4814831832284159, "learning_rate": 1.2474344931689371e-06, "loss": 0.0873, "step": 635 }, { "epoch": 0.1447098976109215, "grad_norm": 2.0222816327136712, "learning_rate": 1.2474263992680805e-06, "loss": 0.155, "step": 636 }, { "epoch": 0.14493742889647326, "grad_norm": 2.0590304829666914, "learning_rate": 1.247418292645968e-06, "loss": 0.107, "step": 637 }, { "epoch": 0.14516496018202502, "grad_norm": 2.5562023131920633, "learning_rate": 1.2474101733027659e-06, "loss": 0.2256, "step": 638 }, { "epoch": 0.1453924914675768, "grad_norm": 2.3833084873555195, "learning_rate": 1.2474020412386395e-06, "loss": 0.1087, "step": 639 }, { "epoch": 0.14562002275312855, "grad_norm": 1.5076273114920544, "learning_rate": 1.2473938964537551e-06, "loss": 0.0893, "step": 640 }, { "epoch": 0.14584755403868033, "grad_norm": 2.3708066851044887, "learning_rate": 1.2473857389482797e-06, "loss": 0.1247, "step": 641 }, { "epoch": 0.14607508532423208, "grad_norm": 1.5590215080673084, "learning_rate": 1.2473775687223794e-06, "loss": 0.1504, "step": 642 }, { "epoch": 0.14630261660978386, "grad_norm": 1.6107910166409294, "learning_rate": 1.2473693857762215e-06, "loss": 0.149, "step": 643 }, { "epoch": 0.1465301478953356, "grad_norm": 1.7918533159116738, "learning_rate": 1.247361190109973e-06, "loss": 0.1104, "step": 644 }, { "epoch": 0.14675767918088736, "grad_norm": 2.8984966135096566, "learning_rate": 1.2473529817238016e-06, "loss": 0.1755, "step": 645 }, { "epoch": 0.14698521046643914, "grad_norm": 1.9091822418599347, "learning_rate": 1.2473447606178754e-06, "loss": 0.1077, "step": 646 }, { "epoch": 0.1472127417519909, "grad_norm": 4.199288030915391, "learning_rate": 1.2473365267923617e-06, "loss": 0.2124, "step": 647 }, { "epoch": 0.14744027303754267, "grad_norm": 2.331859473332942, "learning_rate": 1.2473282802474293e-06, "loss": 0.1576, "step": 648 }, { "epoch": 0.14766780432309443, "grad_norm": 3.5722786659910577, "learning_rate": 1.2473200209832465e-06, "loss": 0.2027, "step": 649 }, { "epoch": 0.14789533560864618, "grad_norm": 1.5390826591189062, "learning_rate": 1.2473117489999823e-06, "loss": 0.161, "step": 650 }, { "epoch": 0.14812286689419796, "grad_norm": 2.741044883004237, "learning_rate": 1.2473034642978057e-06, "loss": 0.1656, "step": 651 }, { "epoch": 0.1483503981797497, "grad_norm": 2.2681711762464034, "learning_rate": 1.247295166876886e-06, "loss": 0.1254, "step": 652 }, { "epoch": 0.1485779294653015, "grad_norm": 2.2254637289761194, "learning_rate": 1.2472868567373924e-06, "loss": 0.1291, "step": 653 }, { "epoch": 0.14880546075085324, "grad_norm": 2.213517163461755, "learning_rate": 1.2472785338794953e-06, "loss": 0.1541, "step": 654 }, { "epoch": 0.149032992036405, "grad_norm": 1.6789308605390307, "learning_rate": 1.247270198303365e-06, "loss": 0.1316, "step": 655 }, { "epoch": 0.14926052332195677, "grad_norm": 2.179149997459725, "learning_rate": 1.247261850009171e-06, "loss": 0.2437, "step": 656 }, { "epoch": 0.14948805460750852, "grad_norm": 2.910894270371587, "learning_rate": 1.2472534889970848e-06, "loss": 0.2038, "step": 657 }, { "epoch": 0.1497155858930603, "grad_norm": 1.751607816792672, "learning_rate": 1.2472451152672766e-06, "loss": 0.1164, "step": 658 }, { "epoch": 0.14994311717861206, "grad_norm": 1.6602009490349432, "learning_rate": 1.2472367288199177e-06, "loss": 0.1193, "step": 659 }, { "epoch": 0.15017064846416384, "grad_norm": 2.038150970938399, "learning_rate": 1.2472283296551798e-06, "loss": 0.102, "step": 660 }, { "epoch": 0.1503981797497156, "grad_norm": 2.1439804373776936, "learning_rate": 1.2472199177732346e-06, "loss": 0.1502, "step": 661 }, { "epoch": 0.15062571103526734, "grad_norm": 2.5777822840030358, "learning_rate": 1.2472114931742537e-06, "loss": 0.1168, "step": 662 }, { "epoch": 0.15085324232081912, "grad_norm": 2.4175964563163177, "learning_rate": 1.2472030558584093e-06, "loss": 0.1035, "step": 663 }, { "epoch": 0.15108077360637087, "grad_norm": 2.635267423704016, "learning_rate": 1.2471946058258742e-06, "loss": 0.1701, "step": 664 }, { "epoch": 0.15130830489192265, "grad_norm": 1.9337561786859772, "learning_rate": 1.2471861430768205e-06, "loss": 0.1075, "step": 665 }, { "epoch": 0.1515358361774744, "grad_norm": 1.7937795679496227, "learning_rate": 1.2471776676114217e-06, "loss": 0.1785, "step": 666 }, { "epoch": 0.15176336746302616, "grad_norm": 3.0588710289274816, "learning_rate": 1.2471691794298508e-06, "loss": 0.1798, "step": 667 }, { "epoch": 0.15199089874857794, "grad_norm": 2.638986072752188, "learning_rate": 1.2471606785322814e-06, "loss": 0.0878, "step": 668 }, { "epoch": 0.1522184300341297, "grad_norm": 2.732712357601826, "learning_rate": 1.247152164918887e-06, "loss": 0.1267, "step": 669 }, { "epoch": 0.15244596131968147, "grad_norm": 1.7481991977105777, "learning_rate": 1.247143638589842e-06, "loss": 0.1584, "step": 670 }, { "epoch": 0.15267349260523322, "grad_norm": 2.794672743532085, "learning_rate": 1.2471350995453203e-06, "loss": 0.1584, "step": 671 }, { "epoch": 0.15290102389078497, "grad_norm": 3.1279366528301633, "learning_rate": 1.2471265477854966e-06, "loss": 0.148, "step": 672 }, { "epoch": 0.15312855517633675, "grad_norm": 3.920575109905724, "learning_rate": 1.2471179833105454e-06, "loss": 0.1732, "step": 673 }, { "epoch": 0.1533560864618885, "grad_norm": 1.7916571238390178, "learning_rate": 1.2471094061206422e-06, "loss": 0.2336, "step": 674 }, { "epoch": 0.15358361774744028, "grad_norm": 1.7363850632393116, "learning_rate": 1.247100816215962e-06, "loss": 0.1244, "step": 675 }, { "epoch": 0.15381114903299203, "grad_norm": 2.504377712379844, "learning_rate": 1.2470922135966806e-06, "loss": 0.1674, "step": 676 }, { "epoch": 0.1540386803185438, "grad_norm": 2.43043947984636, "learning_rate": 1.2470835982629736e-06, "loss": 0.1249, "step": 677 }, { "epoch": 0.15426621160409557, "grad_norm": 3.950497364660697, "learning_rate": 1.247074970215017e-06, "loss": 0.2401, "step": 678 }, { "epoch": 0.15449374288964732, "grad_norm": 3.1492013494233846, "learning_rate": 1.2470663294529873e-06, "loss": 0.1605, "step": 679 }, { "epoch": 0.1547212741751991, "grad_norm": 1.80598204305421, "learning_rate": 1.2470576759770612e-06, "loss": 0.113, "step": 680 }, { "epoch": 0.15494880546075085, "grad_norm": 2.0454054940402506, "learning_rate": 1.2470490097874155e-06, "loss": 0.1453, "step": 681 }, { "epoch": 0.15517633674630263, "grad_norm": 3.6952564849548053, "learning_rate": 1.247040330884227e-06, "loss": 0.1581, "step": 682 }, { "epoch": 0.15540386803185438, "grad_norm": 2.3655397835651075, "learning_rate": 1.2470316392676738e-06, "loss": 0.169, "step": 683 }, { "epoch": 0.15563139931740613, "grad_norm": 3.416348712472315, "learning_rate": 1.2470229349379326e-06, "loss": 0.1347, "step": 684 }, { "epoch": 0.1558589306029579, "grad_norm": 2.618995350775909, "learning_rate": 1.2470142178951822e-06, "loss": 0.1924, "step": 685 }, { "epoch": 0.15608646188850966, "grad_norm": 1.344663220923034, "learning_rate": 1.2470054881396002e-06, "loss": 0.2013, "step": 686 }, { "epoch": 0.15631399317406144, "grad_norm": 1.1568986493989724, "learning_rate": 1.246996745671365e-06, "loss": 0.131, "step": 687 }, { "epoch": 0.1565415244596132, "grad_norm": 3.0558312091963473, "learning_rate": 1.2469879904906556e-06, "loss": 0.14, "step": 688 }, { "epoch": 0.15676905574516495, "grad_norm": 4.767157427966137, "learning_rate": 1.2469792225976507e-06, "loss": 0.156, "step": 689 }, { "epoch": 0.15699658703071673, "grad_norm": 1.9971770266956603, "learning_rate": 1.2469704419925296e-06, "loss": 0.1413, "step": 690 }, { "epoch": 0.15722411831626848, "grad_norm": 3.560138993273607, "learning_rate": 1.246961648675472e-06, "loss": 0.2274, "step": 691 }, { "epoch": 0.15745164960182026, "grad_norm": 1.8091873297743188, "learning_rate": 1.246952842646657e-06, "loss": 0.2606, "step": 692 }, { "epoch": 0.157679180887372, "grad_norm": 1.9524492716137443, "learning_rate": 1.2469440239062653e-06, "loss": 0.1888, "step": 693 }, { "epoch": 0.15790671217292376, "grad_norm": 1.978419283294589, "learning_rate": 1.2469351924544766e-06, "loss": 0.168, "step": 694 }, { "epoch": 0.15813424345847554, "grad_norm": 1.909977232991382, "learning_rate": 1.2469263482914716e-06, "loss": 0.1302, "step": 695 }, { "epoch": 0.1583617747440273, "grad_norm": 2.786836009335205, "learning_rate": 1.246917491417431e-06, "loss": 0.1603, "step": 696 }, { "epoch": 0.15858930602957907, "grad_norm": 2.700038379786115, "learning_rate": 1.246908621832536e-06, "loss": 0.2268, "step": 697 }, { "epoch": 0.15881683731513083, "grad_norm": 1.4116863857464026, "learning_rate": 1.2468997395369677e-06, "loss": 0.1761, "step": 698 }, { "epoch": 0.1590443686006826, "grad_norm": 2.8928190492615133, "learning_rate": 1.2468908445309077e-06, "loss": 0.1789, "step": 699 }, { "epoch": 0.15927189988623436, "grad_norm": 1.650749552825084, "learning_rate": 1.2468819368145376e-06, "loss": 0.1324, "step": 700 }, { "epoch": 0.1594994311717861, "grad_norm": 2.3722473947353677, "learning_rate": 1.2468730163880398e-06, "loss": 0.1116, "step": 701 }, { "epoch": 0.1597269624573379, "grad_norm": 2.879822957568519, "learning_rate": 1.2468640832515962e-06, "loss": 0.0564, "step": 702 }, { "epoch": 0.15995449374288964, "grad_norm": 2.162764734574199, "learning_rate": 1.24685513740539e-06, "loss": 0.1739, "step": 703 }, { "epoch": 0.16018202502844142, "grad_norm": 2.8968364936480206, "learning_rate": 1.2468461788496036e-06, "loss": 0.2091, "step": 704 }, { "epoch": 0.16040955631399317, "grad_norm": 1.8559610510087743, "learning_rate": 1.24683720758442e-06, "loss": 0.1533, "step": 705 }, { "epoch": 0.16063708759954493, "grad_norm": 2.184281056476426, "learning_rate": 1.2468282236100226e-06, "loss": 0.1582, "step": 706 }, { "epoch": 0.1608646188850967, "grad_norm": 1.3209438595657337, "learning_rate": 1.2468192269265955e-06, "loss": 0.1914, "step": 707 }, { "epoch": 0.16109215017064846, "grad_norm": 2.1470386790088174, "learning_rate": 1.246810217534322e-06, "loss": 0.0831, "step": 708 }, { "epoch": 0.16131968145620024, "grad_norm": 1.594792083731403, "learning_rate": 1.2468011954333864e-06, "loss": 0.1349, "step": 709 }, { "epoch": 0.161547212741752, "grad_norm": 1.9899900139983586, "learning_rate": 1.2467921606239734e-06, "loss": 0.1406, "step": 710 }, { "epoch": 0.16177474402730374, "grad_norm": 2.161056989124219, "learning_rate": 1.2467831131062672e-06, "loss": 0.1186, "step": 711 }, { "epoch": 0.16200227531285552, "grad_norm": 3.2786168252573438, "learning_rate": 1.2467740528804528e-06, "loss": 0.1525, "step": 712 }, { "epoch": 0.16222980659840727, "grad_norm": 2.152367629184536, "learning_rate": 1.2467649799467156e-06, "loss": 0.1403, "step": 713 }, { "epoch": 0.16245733788395905, "grad_norm": 2.658644939282435, "learning_rate": 1.246755894305241e-06, "loss": 0.1287, "step": 714 }, { "epoch": 0.1626848691695108, "grad_norm": 1.8320157906526173, "learning_rate": 1.2467467959562143e-06, "loss": 0.1489, "step": 715 }, { "epoch": 0.16291240045506258, "grad_norm": 3.0792158572997526, "learning_rate": 1.2467376848998221e-06, "loss": 0.1929, "step": 716 }, { "epoch": 0.16313993174061434, "grad_norm": 2.592666663523021, "learning_rate": 1.2467285611362501e-06, "loss": 0.1198, "step": 717 }, { "epoch": 0.1633674630261661, "grad_norm": 2.3270639642215123, "learning_rate": 1.2467194246656851e-06, "loss": 0.119, "step": 718 }, { "epoch": 0.16359499431171787, "grad_norm": 1.5662096056295784, "learning_rate": 1.2467102754883136e-06, "loss": 0.1488, "step": 719 }, { "epoch": 0.16382252559726962, "grad_norm": 2.0754259992407174, "learning_rate": 1.2467011136043228e-06, "loss": 0.1206, "step": 720 }, { "epoch": 0.1640500568828214, "grad_norm": 2.377809704915352, "learning_rate": 1.2466919390138995e-06, "loss": 0.2349, "step": 721 }, { "epoch": 0.16427758816837315, "grad_norm": 2.1373727350700205, "learning_rate": 1.246682751717232e-06, "loss": 0.1333, "step": 722 }, { "epoch": 0.1645051194539249, "grad_norm": 3.8601459911234697, "learning_rate": 1.2466735517145074e-06, "loss": 0.3259, "step": 723 }, { "epoch": 0.16473265073947668, "grad_norm": 2.1273982856593614, "learning_rate": 1.2466643390059138e-06, "loss": 0.199, "step": 724 }, { "epoch": 0.16496018202502843, "grad_norm": 2.274158988300012, "learning_rate": 1.2466551135916398e-06, "loss": 0.1351, "step": 725 }, { "epoch": 0.16518771331058021, "grad_norm": 2.1566789936379287, "learning_rate": 1.2466458754718737e-06, "loss": 0.219, "step": 726 }, { "epoch": 0.16541524459613197, "grad_norm": 3.388462178150055, "learning_rate": 1.2466366246468045e-06, "loss": 0.1456, "step": 727 }, { "epoch": 0.16564277588168372, "grad_norm": 2.792548754369155, "learning_rate": 1.246627361116621e-06, "loss": 0.2178, "step": 728 }, { "epoch": 0.1658703071672355, "grad_norm": 1.7787275123381943, "learning_rate": 1.246618084881513e-06, "loss": 0.2584, "step": 729 }, { "epoch": 0.16609783845278725, "grad_norm": 2.150845029279013, "learning_rate": 1.2466087959416695e-06, "loss": 0.1474, "step": 730 }, { "epoch": 0.16632536973833903, "grad_norm": 3.4162019984229213, "learning_rate": 1.2465994942972805e-06, "loss": 0.1415, "step": 731 }, { "epoch": 0.16655290102389078, "grad_norm": 3.5172418167047743, "learning_rate": 1.2465901799485366e-06, "loss": 0.2267, "step": 732 }, { "epoch": 0.16678043230944256, "grad_norm": 1.9664520821504867, "learning_rate": 1.2465808528956277e-06, "loss": 0.1027, "step": 733 }, { "epoch": 0.1670079635949943, "grad_norm": 2.053925645911197, "learning_rate": 1.2465715131387446e-06, "loss": 0.1405, "step": 734 }, { "epoch": 0.16723549488054607, "grad_norm": 1.6417683696863474, "learning_rate": 1.2465621606780778e-06, "loss": 0.1804, "step": 735 }, { "epoch": 0.16746302616609784, "grad_norm": 1.9532511665276102, "learning_rate": 1.2465527955138191e-06, "loss": 0.1438, "step": 736 }, { "epoch": 0.1676905574516496, "grad_norm": 2.7978077296538295, "learning_rate": 1.2465434176461596e-06, "loss": 0.1806, "step": 737 }, { "epoch": 0.16791808873720138, "grad_norm": 1.7861222447513503, "learning_rate": 1.2465340270752908e-06, "loss": 0.0953, "step": 738 }, { "epoch": 0.16814562002275313, "grad_norm": 1.2545980680473232, "learning_rate": 1.2465246238014047e-06, "loss": 0.0881, "step": 739 }, { "epoch": 0.16837315130830488, "grad_norm": 2.49195685975364, "learning_rate": 1.2465152078246936e-06, "loss": 0.1643, "step": 740 }, { "epoch": 0.16860068259385666, "grad_norm": 2.0211233157427637, "learning_rate": 1.24650577914535e-06, "loss": 0.1263, "step": 741 }, { "epoch": 0.1688282138794084, "grad_norm": 2.7858317155477317, "learning_rate": 1.2464963377635667e-06, "loss": 0.1547, "step": 742 }, { "epoch": 0.1690557451649602, "grad_norm": 1.7097291360774547, "learning_rate": 1.246486883679536e-06, "loss": 0.2516, "step": 743 }, { "epoch": 0.16928327645051194, "grad_norm": 3.9137648292026737, "learning_rate": 1.246477416893452e-06, "loss": 0.2036, "step": 744 }, { "epoch": 0.1695108077360637, "grad_norm": 3.005605654107358, "learning_rate": 1.2464679374055074e-06, "loss": 0.1481, "step": 745 }, { "epoch": 0.16973833902161548, "grad_norm": 3.401532765227879, "learning_rate": 1.2464584452158968e-06, "loss": 0.1841, "step": 746 }, { "epoch": 0.16996587030716723, "grad_norm": 2.843140048954733, "learning_rate": 1.2464489403248133e-06, "loss": 0.184, "step": 747 }, { "epoch": 0.170193401592719, "grad_norm": 1.515779223289782, "learning_rate": 1.246439422732452e-06, "loss": 0.1262, "step": 748 }, { "epoch": 0.17042093287827076, "grad_norm": 2.618293101772126, "learning_rate": 1.2464298924390066e-06, "loss": 0.1415, "step": 749 }, { "epoch": 0.17064846416382254, "grad_norm": 2.248269138511338, "learning_rate": 1.2464203494446725e-06, "loss": 0.185, "step": 750 }, { "epoch": 0.1708759954493743, "grad_norm": 1.3558978429200024, "learning_rate": 1.2464107937496444e-06, "loss": 0.096, "step": 751 }, { "epoch": 0.17110352673492604, "grad_norm": 1.8355286869437153, "learning_rate": 1.246401225354118e-06, "loss": 0.0936, "step": 752 }, { "epoch": 0.17133105802047782, "grad_norm": 2.611386377303649, "learning_rate": 1.2463916442582883e-06, "loss": 0.2058, "step": 753 }, { "epoch": 0.17155858930602957, "grad_norm": 1.81511526173022, "learning_rate": 1.2463820504623516e-06, "loss": 0.0722, "step": 754 }, { "epoch": 0.17178612059158135, "grad_norm": 1.6836561465138316, "learning_rate": 1.246372443966504e-06, "loss": 0.1419, "step": 755 }, { "epoch": 0.1720136518771331, "grad_norm": 3.189715404864015, "learning_rate": 1.246362824770941e-06, "loss": 0.1604, "step": 756 }, { "epoch": 0.17224118316268486, "grad_norm": 2.8556456489625193, "learning_rate": 1.2463531928758605e-06, "loss": 0.1793, "step": 757 }, { "epoch": 0.17246871444823664, "grad_norm": 2.1490228034084344, "learning_rate": 1.2463435482814585e-06, "loss": 0.1928, "step": 758 }, { "epoch": 0.1726962457337884, "grad_norm": 1.866877451814791, "learning_rate": 1.246333890987932e-06, "loss": 0.2064, "step": 759 }, { "epoch": 0.17292377701934017, "grad_norm": 2.7361601673612284, "learning_rate": 1.246324220995479e-06, "loss": 0.1024, "step": 760 }, { "epoch": 0.17315130830489192, "grad_norm": 3.6715173407277004, "learning_rate": 1.2463145383042966e-06, "loss": 0.1741, "step": 761 }, { "epoch": 0.17337883959044367, "grad_norm": 4.388914943676026, "learning_rate": 1.2463048429145832e-06, "loss": 0.2951, "step": 762 }, { "epoch": 0.17360637087599545, "grad_norm": 3.0864567661578075, "learning_rate": 1.2462951348265364e-06, "loss": 0.1681, "step": 763 }, { "epoch": 0.1738339021615472, "grad_norm": 2.2429137189515487, "learning_rate": 1.2462854140403553e-06, "loss": 0.1698, "step": 764 }, { "epoch": 0.17406143344709898, "grad_norm": 3.7655750343422487, "learning_rate": 1.2462756805562378e-06, "loss": 0.1972, "step": 765 }, { "epoch": 0.17428896473265074, "grad_norm": 1.4821109763148475, "learning_rate": 1.2462659343743832e-06, "loss": 0.1144, "step": 766 }, { "epoch": 0.17451649601820252, "grad_norm": 2.9261323093043234, "learning_rate": 1.2462561754949908e-06, "loss": 0.1354, "step": 767 }, { "epoch": 0.17474402730375427, "grad_norm": 2.021278631174851, "learning_rate": 1.2462464039182598e-06, "loss": 0.1158, "step": 768 }, { "epoch": 0.17497155858930602, "grad_norm": 2.189903163956334, "learning_rate": 1.2462366196443903e-06, "loss": 0.1587, "step": 769 }, { "epoch": 0.1751990898748578, "grad_norm": 3.7285174958892364, "learning_rate": 1.246226822673582e-06, "loss": 0.2024, "step": 770 }, { "epoch": 0.17542662116040955, "grad_norm": 1.9007743093993184, "learning_rate": 1.2462170130060351e-06, "loss": 0.1025, "step": 771 }, { "epoch": 0.17565415244596133, "grad_norm": 3.3341124392840134, "learning_rate": 1.24620719064195e-06, "loss": 0.1718, "step": 772 }, { "epoch": 0.17588168373151308, "grad_norm": 2.271177623744295, "learning_rate": 1.246197355581528e-06, "loss": 0.1713, "step": 773 }, { "epoch": 0.17610921501706484, "grad_norm": 2.631276315974309, "learning_rate": 1.2461875078249694e-06, "loss": 0.1769, "step": 774 }, { "epoch": 0.17633674630261661, "grad_norm": 2.2924143983188765, "learning_rate": 1.246177647372476e-06, "loss": 0.1155, "step": 775 }, { "epoch": 0.17656427758816837, "grad_norm": 4.145219852575127, "learning_rate": 1.246167774224249e-06, "loss": 0.1997, "step": 776 }, { "epoch": 0.17679180887372015, "grad_norm": 3.5955716696986237, "learning_rate": 1.2461578883804903e-06, "loss": 0.1434, "step": 777 }, { "epoch": 0.1770193401592719, "grad_norm": 3.5823237759342477, "learning_rate": 1.246147989841402e-06, "loss": 0.131, "step": 778 }, { "epoch": 0.17724687144482365, "grad_norm": 1.7885388560764315, "learning_rate": 1.2461380786071863e-06, "loss": 0.0755, "step": 779 }, { "epoch": 0.17747440273037543, "grad_norm": 2.362853335883513, "learning_rate": 1.246128154678046e-06, "loss": 0.1285, "step": 780 }, { "epoch": 0.17770193401592718, "grad_norm": 2.826403481752188, "learning_rate": 1.2461182180541835e-06, "loss": 0.0898, "step": 781 }, { "epoch": 0.17792946530147896, "grad_norm": 5.793503549962082, "learning_rate": 1.2461082687358022e-06, "loss": 0.0971, "step": 782 }, { "epoch": 0.1781569965870307, "grad_norm": 1.8035940463938722, "learning_rate": 1.2460983067231055e-06, "loss": 0.1105, "step": 783 }, { "epoch": 0.1783845278725825, "grad_norm": 2.3286047675537613, "learning_rate": 1.246088332016297e-06, "loss": 0.0997, "step": 784 }, { "epoch": 0.17861205915813425, "grad_norm": 2.4331158536688067, "learning_rate": 1.2460783446155802e-06, "loss": 0.2145, "step": 785 }, { "epoch": 0.178839590443686, "grad_norm": 2.4301917574272234, "learning_rate": 1.2460683445211596e-06, "loss": 0.1826, "step": 786 }, { "epoch": 0.17906712172923778, "grad_norm": 3.191042960124482, "learning_rate": 1.2460583317332395e-06, "loss": 0.2224, "step": 787 }, { "epoch": 0.17929465301478953, "grad_norm": 1.9281932990563415, "learning_rate": 1.2460483062520246e-06, "loss": 0.1012, "step": 788 }, { "epoch": 0.1795221843003413, "grad_norm": 1.9401318974845003, "learning_rate": 1.2460382680777196e-06, "loss": 0.0761, "step": 789 }, { "epoch": 0.17974971558589306, "grad_norm": 13.086161362963225, "learning_rate": 1.2460282172105298e-06, "loss": 0.2088, "step": 790 }, { "epoch": 0.1799772468714448, "grad_norm": 1.4783130702588718, "learning_rate": 1.2460181536506608e-06, "loss": 0.2126, "step": 791 }, { "epoch": 0.1802047781569966, "grad_norm": 2.4964786740518763, "learning_rate": 1.2460080773983177e-06, "loss": 0.1385, "step": 792 }, { "epoch": 0.18043230944254834, "grad_norm": 2.7778972521749545, "learning_rate": 1.2459979884537072e-06, "loss": 0.1448, "step": 793 }, { "epoch": 0.18065984072810012, "grad_norm": 2.167813491126184, "learning_rate": 1.2459878868170348e-06, "loss": 0.1379, "step": 794 }, { "epoch": 0.18088737201365188, "grad_norm": 1.9654699615947284, "learning_rate": 1.2459777724885075e-06, "loss": 0.1314, "step": 795 }, { "epoch": 0.18111490329920363, "grad_norm": 2.293952257528565, "learning_rate": 1.2459676454683318e-06, "loss": 0.1695, "step": 796 }, { "epoch": 0.1813424345847554, "grad_norm": 3.9215044200778144, "learning_rate": 1.2459575057567144e-06, "loss": 0.2204, "step": 797 }, { "epoch": 0.18156996587030716, "grad_norm": 2.8214133097210117, "learning_rate": 1.245947353353863e-06, "loss": 0.1558, "step": 798 }, { "epoch": 0.18179749715585894, "grad_norm": 5.317020653859289, "learning_rate": 1.245937188259985e-06, "loss": 0.2603, "step": 799 }, { "epoch": 0.1820250284414107, "grad_norm": 4.004955818619992, "learning_rate": 1.245927010475288e-06, "loss": 0.1196, "step": 800 }, { "epoch": 0.18225255972696247, "grad_norm": 3.792524464667178, "learning_rate": 1.24591681999998e-06, "loss": 0.1821, "step": 801 }, { "epoch": 0.18248009101251422, "grad_norm": 2.813011742342484, "learning_rate": 1.2459066168342693e-06, "loss": 0.1513, "step": 802 }, { "epoch": 0.18270762229806597, "grad_norm": 3.511510747002315, "learning_rate": 1.2458964009783646e-06, "loss": 0.2163, "step": 803 }, { "epoch": 0.18293515358361775, "grad_norm": 2.802158661308834, "learning_rate": 1.2458861724324745e-06, "loss": 0.1963, "step": 804 }, { "epoch": 0.1831626848691695, "grad_norm": 3.64850186041969, "learning_rate": 1.2458759311968084e-06, "loss": 0.303, "step": 805 }, { "epoch": 0.1833902161547213, "grad_norm": 2.6182595326596725, "learning_rate": 1.245865677271575e-06, "loss": 0.1456, "step": 806 }, { "epoch": 0.18361774744027304, "grad_norm": 2.399741320725503, "learning_rate": 1.2458554106569844e-06, "loss": 0.2288, "step": 807 }, { "epoch": 0.1838452787258248, "grad_norm": 1.252106549654472, "learning_rate": 1.2458451313532463e-06, "loss": 0.0801, "step": 808 }, { "epoch": 0.18407281001137657, "grad_norm": 3.696224132577839, "learning_rate": 1.2458348393605708e-06, "loss": 0.2059, "step": 809 }, { "epoch": 0.18430034129692832, "grad_norm": 1.3783330613855644, "learning_rate": 1.2458245346791678e-06, "loss": 0.1164, "step": 810 }, { "epoch": 0.1845278725824801, "grad_norm": 1.5623432135982267, "learning_rate": 1.2458142173092486e-06, "loss": 0.176, "step": 811 }, { "epoch": 0.18475540386803185, "grad_norm": 6.552053967433837, "learning_rate": 1.2458038872510237e-06, "loss": 0.118, "step": 812 }, { "epoch": 0.1849829351535836, "grad_norm": 3.2237210845046964, "learning_rate": 1.2457935445047042e-06, "loss": 0.1875, "step": 813 }, { "epoch": 0.18521046643913538, "grad_norm": 1.7463109516387256, "learning_rate": 1.2457831890705018e-06, "loss": 0.1945, "step": 814 }, { "epoch": 0.18543799772468714, "grad_norm": 2.8292409598595953, "learning_rate": 1.2457728209486279e-06, "loss": 0.1711, "step": 815 }, { "epoch": 0.18566552901023892, "grad_norm": 3.198074487753419, "learning_rate": 1.2457624401392943e-06, "loss": 0.2552, "step": 816 }, { "epoch": 0.18589306029579067, "grad_norm": 3.2293783551138278, "learning_rate": 1.2457520466427135e-06, "loss": 0.1955, "step": 817 }, { "epoch": 0.18612059158134245, "grad_norm": 2.5604778410965383, "learning_rate": 1.2457416404590974e-06, "loss": 0.1689, "step": 818 }, { "epoch": 0.1863481228668942, "grad_norm": 2.4475267016374427, "learning_rate": 1.2457312215886592e-06, "loss": 0.1165, "step": 819 }, { "epoch": 0.18657565415244595, "grad_norm": 1.9856047790588058, "learning_rate": 1.2457207900316115e-06, "loss": 0.195, "step": 820 }, { "epoch": 0.18680318543799773, "grad_norm": 3.030251865029441, "learning_rate": 1.245710345788168e-06, "loss": 0.2233, "step": 821 }, { "epoch": 0.18703071672354948, "grad_norm": 6.914472069589314, "learning_rate": 1.2456998888585414e-06, "loss": 0.1294, "step": 822 }, { "epoch": 0.18725824800910126, "grad_norm": 1.5392801223632877, "learning_rate": 1.245689419242946e-06, "loss": 0.1031, "step": 823 }, { "epoch": 0.18748577929465302, "grad_norm": 1.5563008585328006, "learning_rate": 1.2456789369415955e-06, "loss": 0.1233, "step": 824 }, { "epoch": 0.18771331058020477, "grad_norm": 1.5005319006316646, "learning_rate": 1.2456684419547044e-06, "loss": 0.1698, "step": 825 }, { "epoch": 0.18794084186575655, "grad_norm": 2.5311436309198245, "learning_rate": 1.245657934282487e-06, "loss": 0.1242, "step": 826 }, { "epoch": 0.1881683731513083, "grad_norm": 1.3382771790085715, "learning_rate": 1.245647413925158e-06, "loss": 0.1173, "step": 827 }, { "epoch": 0.18839590443686008, "grad_norm": 2.455502403566395, "learning_rate": 1.2456368808829327e-06, "loss": 0.0912, "step": 828 }, { "epoch": 0.18862343572241183, "grad_norm": 2.9752303589937212, "learning_rate": 1.2456263351560261e-06, "loss": 0.2599, "step": 829 }, { "epoch": 0.18885096700796358, "grad_norm": 5.043835077918359, "learning_rate": 1.2456157767446538e-06, "loss": 0.1609, "step": 830 }, { "epoch": 0.18907849829351536, "grad_norm": 2.756359704558054, "learning_rate": 1.245605205649032e-06, "loss": 0.1323, "step": 831 }, { "epoch": 0.18930602957906711, "grad_norm": 1.835440265718024, "learning_rate": 1.245594621869376e-06, "loss": 0.2094, "step": 832 }, { "epoch": 0.1895335608646189, "grad_norm": 1.2880237601014817, "learning_rate": 1.2455840254059026e-06, "loss": 0.1085, "step": 833 }, { "epoch": 0.18976109215017065, "grad_norm": 1.4808086873300856, "learning_rate": 1.2455734162588282e-06, "loss": 0.1067, "step": 834 }, { "epoch": 0.1899886234357224, "grad_norm": 2.3351983872627597, "learning_rate": 1.2455627944283697e-06, "loss": 0.1493, "step": 835 }, { "epoch": 0.19021615472127418, "grad_norm": 2.422722379821762, "learning_rate": 1.245552159914744e-06, "loss": 0.1387, "step": 836 }, { "epoch": 0.19044368600682593, "grad_norm": 2.2005548282870477, "learning_rate": 1.245541512718169e-06, "loss": 0.1047, "step": 837 }, { "epoch": 0.1906712172923777, "grad_norm": 2.379475571028047, "learning_rate": 1.245530852838862e-06, "loss": 0.1524, "step": 838 }, { "epoch": 0.19089874857792946, "grad_norm": 1.669935289366072, "learning_rate": 1.2455201802770405e-06, "loss": 0.157, "step": 839 }, { "epoch": 0.19112627986348124, "grad_norm": 2.357020791051429, "learning_rate": 1.245509495032923e-06, "loss": 0.2156, "step": 840 }, { "epoch": 0.191353811149033, "grad_norm": 3.871602599108809, "learning_rate": 1.2454987971067278e-06, "loss": 0.1557, "step": 841 }, { "epoch": 0.19158134243458474, "grad_norm": 2.5332197020943887, "learning_rate": 1.2454880864986737e-06, "loss": 0.1644, "step": 842 }, { "epoch": 0.19180887372013652, "grad_norm": 3.1286962973408596, "learning_rate": 1.2454773632089795e-06, "loss": 0.0794, "step": 843 }, { "epoch": 0.19203640500568828, "grad_norm": 2.3210649274985666, "learning_rate": 1.2454666272378644e-06, "loss": 0.129, "step": 844 }, { "epoch": 0.19226393629124006, "grad_norm": 3.000200402253768, "learning_rate": 1.2454558785855475e-06, "loss": 0.1628, "step": 845 }, { "epoch": 0.1924914675767918, "grad_norm": 2.3643323080869902, "learning_rate": 1.245445117252249e-06, "loss": 0.1345, "step": 846 }, { "epoch": 0.19271899886234356, "grad_norm": 2.532625203594351, "learning_rate": 1.2454343432381886e-06, "loss": 0.2082, "step": 847 }, { "epoch": 0.19294653014789534, "grad_norm": 1.9628657145639428, "learning_rate": 1.2454235565435862e-06, "loss": 0.0782, "step": 848 }, { "epoch": 0.1931740614334471, "grad_norm": 1.609178421923729, "learning_rate": 1.2454127571686629e-06, "loss": 0.1405, "step": 849 }, { "epoch": 0.19340159271899887, "grad_norm": 1.7728115247069527, "learning_rate": 1.245401945113639e-06, "loss": 0.203, "step": 850 }, { "epoch": 0.19362912400455062, "grad_norm": 3.2450475274049118, "learning_rate": 1.2453911203787355e-06, "loss": 0.1524, "step": 851 }, { "epoch": 0.19385665529010238, "grad_norm": 22.097060091469434, "learning_rate": 1.2453802829641736e-06, "loss": 0.2636, "step": 852 }, { "epoch": 0.19408418657565416, "grad_norm": 2.5365065820289496, "learning_rate": 1.2453694328701752e-06, "loss": 0.1019, "step": 853 }, { "epoch": 0.1943117178612059, "grad_norm": 2.090322149834491, "learning_rate": 1.2453585700969614e-06, "loss": 0.1498, "step": 854 }, { "epoch": 0.1945392491467577, "grad_norm": 2.6606765925685787, "learning_rate": 1.2453476946447547e-06, "loss": 0.1398, "step": 855 }, { "epoch": 0.19476678043230944, "grad_norm": 3.56083888144899, "learning_rate": 1.2453368065137772e-06, "loss": 0.1463, "step": 856 }, { "epoch": 0.19499431171786122, "grad_norm": 2.1276836242796793, "learning_rate": 1.2453259057042514e-06, "loss": 0.1753, "step": 857 }, { "epoch": 0.19522184300341297, "grad_norm": 2.5690977004159805, "learning_rate": 1.2453149922164003e-06, "loss": 0.1292, "step": 858 }, { "epoch": 0.19544937428896472, "grad_norm": 4.345742784369693, "learning_rate": 1.2453040660504468e-06, "loss": 0.15, "step": 859 }, { "epoch": 0.1956769055745165, "grad_norm": 3.118246879884093, "learning_rate": 1.2452931272066141e-06, "loss": 0.169, "step": 860 }, { "epoch": 0.19590443686006825, "grad_norm": 2.68254786515319, "learning_rate": 1.245282175685126e-06, "loss": 0.157, "step": 861 }, { "epoch": 0.19613196814562003, "grad_norm": 2.088476673647213, "learning_rate": 1.2452712114862063e-06, "loss": 0.1782, "step": 862 }, { "epoch": 0.19635949943117179, "grad_norm": 1.568141769132608, "learning_rate": 1.245260234610079e-06, "loss": 0.1295, "step": 863 }, { "epoch": 0.19658703071672354, "grad_norm": 2.186319656948205, "learning_rate": 1.2452492450569682e-06, "loss": 0.1734, "step": 864 }, { "epoch": 0.19681456200227532, "grad_norm": 2.7655739546712135, "learning_rate": 1.245238242827099e-06, "loss": 0.1694, "step": 865 }, { "epoch": 0.19704209328782707, "grad_norm": 3.0373302408208196, "learning_rate": 1.245227227920696e-06, "loss": 0.1356, "step": 866 }, { "epoch": 0.19726962457337885, "grad_norm": 2.1820099415146914, "learning_rate": 1.2452162003379842e-06, "loss": 0.2082, "step": 867 }, { "epoch": 0.1974971558589306, "grad_norm": 3.6721625065681827, "learning_rate": 1.2452051600791891e-06, "loss": 0.1915, "step": 868 }, { "epoch": 0.19772468714448235, "grad_norm": 6.490462296454016, "learning_rate": 1.2451941071445367e-06, "loss": 0.1815, "step": 869 }, { "epoch": 0.19795221843003413, "grad_norm": 3.246518762107006, "learning_rate": 1.2451830415342524e-06, "loss": 0.137, "step": 870 }, { "epoch": 0.19817974971558588, "grad_norm": 2.7033364330836873, "learning_rate": 1.2451719632485627e-06, "loss": 0.1317, "step": 871 }, { "epoch": 0.19840728100113766, "grad_norm": 3.30778551761739, "learning_rate": 1.2451608722876938e-06, "loss": 0.1099, "step": 872 }, { "epoch": 0.19863481228668942, "grad_norm": 2.2687509460631294, "learning_rate": 1.2451497686518722e-06, "loss": 0.1361, "step": 873 }, { "epoch": 0.1988623435722412, "grad_norm": 1.641721237453431, "learning_rate": 1.2451386523413252e-06, "loss": 0.1052, "step": 874 }, { "epoch": 0.19908987485779295, "grad_norm": 2.206444085506852, "learning_rate": 1.24512752335628e-06, "loss": 0.1018, "step": 875 }, { "epoch": 0.1993174061433447, "grad_norm": 2.210652731669232, "learning_rate": 1.2451163816969639e-06, "loss": 0.1879, "step": 876 }, { "epoch": 0.19954493742889648, "grad_norm": 2.085600222270482, "learning_rate": 1.2451052273636045e-06, "loss": 0.127, "step": 877 }, { "epoch": 0.19977246871444823, "grad_norm": 2.6309536592299705, "learning_rate": 1.24509406035643e-06, "loss": 0.1678, "step": 878 }, { "epoch": 0.2, "grad_norm": 4.158698099165945, "learning_rate": 1.2450828806756685e-06, "loss": 0.2095, "step": 879 }, { "epoch": 0.20022753128555176, "grad_norm": 2.602198490586786, "learning_rate": 1.245071688321549e-06, "loss": 0.1436, "step": 880 }, { "epoch": 0.20045506257110352, "grad_norm": 2.252594865848713, "learning_rate": 1.2450604832942991e-06, "loss": 0.1231, "step": 881 }, { "epoch": 0.2006825938566553, "grad_norm": 1.912453352899942, "learning_rate": 1.245049265594149e-06, "loss": 0.1408, "step": 882 }, { "epoch": 0.20091012514220705, "grad_norm": 3.264942350461524, "learning_rate": 1.2450380352213271e-06, "loss": 0.1697, "step": 883 }, { "epoch": 0.20113765642775883, "grad_norm": 2.415399674888119, "learning_rate": 1.2450267921760636e-06, "loss": 0.1331, "step": 884 }, { "epoch": 0.20136518771331058, "grad_norm": 2.62867521080006, "learning_rate": 1.2450155364585878e-06, "loss": 0.1217, "step": 885 }, { "epoch": 0.20159271899886233, "grad_norm": 2.3552959017058477, "learning_rate": 1.2450042680691301e-06, "loss": 0.1216, "step": 886 }, { "epoch": 0.2018202502844141, "grad_norm": 1.4369969713280852, "learning_rate": 1.2449929870079206e-06, "loss": 0.1282, "step": 887 }, { "epoch": 0.20204778156996586, "grad_norm": 2.305787931213179, "learning_rate": 1.24498169327519e-06, "loss": 0.1076, "step": 888 }, { "epoch": 0.20227531285551764, "grad_norm": 1.7868835912702514, "learning_rate": 1.2449703868711688e-06, "loss": 0.1225, "step": 889 }, { "epoch": 0.2025028441410694, "grad_norm": 2.1124657583403494, "learning_rate": 1.2449590677960886e-06, "loss": 0.1765, "step": 890 }, { "epoch": 0.20273037542662117, "grad_norm": 1.6102832172606196, "learning_rate": 1.2449477360501802e-06, "loss": 0.0719, "step": 891 }, { "epoch": 0.20295790671217293, "grad_norm": 3.8988824882283843, "learning_rate": 1.2449363916336756e-06, "loss": 0.1854, "step": 892 }, { "epoch": 0.20318543799772468, "grad_norm": 3.2116126604298882, "learning_rate": 1.2449250345468065e-06, "loss": 0.2028, "step": 893 }, { "epoch": 0.20341296928327646, "grad_norm": 2.083882159988442, "learning_rate": 1.244913664789805e-06, "loss": 0.1337, "step": 894 }, { "epoch": 0.2036405005688282, "grad_norm": 1.8394649372022975, "learning_rate": 1.2449022823629036e-06, "loss": 0.1205, "step": 895 }, { "epoch": 0.20386803185438, "grad_norm": 2.6323013014057004, "learning_rate": 1.2448908872663347e-06, "loss": 0.1133, "step": 896 }, { "epoch": 0.20409556313993174, "grad_norm": 1.8291857038844686, "learning_rate": 1.2448794795003313e-06, "loss": 0.1142, "step": 897 }, { "epoch": 0.2043230944254835, "grad_norm": 1.7184606914815217, "learning_rate": 1.2448680590651269e-06, "loss": 0.1222, "step": 898 }, { "epoch": 0.20455062571103527, "grad_norm": 2.7034652156706716, "learning_rate": 1.2448566259609543e-06, "loss": 0.1991, "step": 899 }, { "epoch": 0.20477815699658702, "grad_norm": 2.5930455129642653, "learning_rate": 1.2448451801880476e-06, "loss": 0.1085, "step": 900 }, { "epoch": 0.2050056882821388, "grad_norm": 2.44560677998223, "learning_rate": 1.2448337217466404e-06, "loss": 0.1735, "step": 901 }, { "epoch": 0.20523321956769056, "grad_norm": 2.257000828394708, "learning_rate": 1.2448222506369675e-06, "loss": 0.1118, "step": 902 }, { "epoch": 0.2054607508532423, "grad_norm": 2.5459054260546323, "learning_rate": 1.2448107668592626e-06, "loss": 0.1975, "step": 903 }, { "epoch": 0.2056882821387941, "grad_norm": 5.093888329917388, "learning_rate": 1.244799270413761e-06, "loss": 0.2277, "step": 904 }, { "epoch": 0.20591581342434584, "grad_norm": 4.116266489839909, "learning_rate": 1.2447877613006972e-06, "loss": 0.2004, "step": 905 }, { "epoch": 0.20614334470989762, "grad_norm": 1.8199951318249294, "learning_rate": 1.244776239520307e-06, "loss": 0.2131, "step": 906 }, { "epoch": 0.20637087599544937, "grad_norm": 2.7663340604707267, "learning_rate": 1.244764705072825e-06, "loss": 0.2145, "step": 907 }, { "epoch": 0.20659840728100115, "grad_norm": 1.8748872621346087, "learning_rate": 1.2447531579584878e-06, "loss": 0.1327, "step": 908 }, { "epoch": 0.2068259385665529, "grad_norm": 3.4272822632320237, "learning_rate": 1.2447415981775312e-06, "loss": 0.2198, "step": 909 }, { "epoch": 0.20705346985210465, "grad_norm": 3.1215491420073396, "learning_rate": 1.2447300257301912e-06, "loss": 0.1342, "step": 910 }, { "epoch": 0.20728100113765643, "grad_norm": 2.5239722345332396, "learning_rate": 1.2447184406167045e-06, "loss": 0.1868, "step": 911 }, { "epoch": 0.2075085324232082, "grad_norm": 1.9655955083845185, "learning_rate": 1.2447068428373077e-06, "loss": 0.1769, "step": 912 }, { "epoch": 0.20773606370875997, "grad_norm": 3.157478086474276, "learning_rate": 1.244695232392238e-06, "loss": 0.1824, "step": 913 }, { "epoch": 0.20796359499431172, "grad_norm": 1.9386984879122342, "learning_rate": 1.2446836092817328e-06, "loss": 0.1036, "step": 914 }, { "epoch": 0.20819112627986347, "grad_norm": 2.2587342441489997, "learning_rate": 1.2446719735060293e-06, "loss": 0.2175, "step": 915 }, { "epoch": 0.20841865756541525, "grad_norm": 2.3841098586953846, "learning_rate": 1.2446603250653658e-06, "loss": 0.1917, "step": 916 }, { "epoch": 0.208646188850967, "grad_norm": 2.0643080194861496, "learning_rate": 1.24464866395998e-06, "loss": 0.1276, "step": 917 }, { "epoch": 0.20887372013651878, "grad_norm": 1.1445975014034748, "learning_rate": 1.2446369901901102e-06, "loss": 0.0884, "step": 918 }, { "epoch": 0.20910125142207053, "grad_norm": 3.359267538919808, "learning_rate": 1.2446253037559952e-06, "loss": 0.1214, "step": 919 }, { "epoch": 0.20932878270762229, "grad_norm": 2.1583486474112927, "learning_rate": 1.2446136046578739e-06, "loss": 0.1093, "step": 920 }, { "epoch": 0.20955631399317406, "grad_norm": 2.692763960200507, "learning_rate": 1.2446018928959853e-06, "loss": 0.2289, "step": 921 }, { "epoch": 0.20978384527872582, "grad_norm": 2.356276890733175, "learning_rate": 1.2445901684705685e-06, "loss": 0.2222, "step": 922 }, { "epoch": 0.2100113765642776, "grad_norm": 2.596476104334523, "learning_rate": 1.2445784313818638e-06, "loss": 0.1574, "step": 923 }, { "epoch": 0.21023890784982935, "grad_norm": 2.788233818738729, "learning_rate": 1.2445666816301102e-06, "loss": 0.1303, "step": 924 }, { "epoch": 0.21046643913538113, "grad_norm": 2.3013258694625245, "learning_rate": 1.2445549192155487e-06, "loss": 0.2232, "step": 925 }, { "epoch": 0.21069397042093288, "grad_norm": 2.364410552617768, "learning_rate": 1.244543144138419e-06, "loss": 0.1967, "step": 926 }, { "epoch": 0.21092150170648463, "grad_norm": 1.4320620142185012, "learning_rate": 1.2445313563989624e-06, "loss": 0.1533, "step": 927 }, { "epoch": 0.2111490329920364, "grad_norm": 1.8979786639459473, "learning_rate": 1.2445195559974194e-06, "loss": 0.1494, "step": 928 }, { "epoch": 0.21137656427758816, "grad_norm": 2.1174466003626446, "learning_rate": 1.244507742934031e-06, "loss": 0.1973, "step": 929 }, { "epoch": 0.21160409556313994, "grad_norm": 2.164188059326067, "learning_rate": 1.2444959172090393e-06, "loss": 0.1336, "step": 930 }, { "epoch": 0.2118316268486917, "grad_norm": 1.5503789009056947, "learning_rate": 1.2444840788226854e-06, "loss": 0.1948, "step": 931 }, { "epoch": 0.21205915813424345, "grad_norm": 1.8654319466920093, "learning_rate": 1.2444722277752114e-06, "loss": 0.2043, "step": 932 }, { "epoch": 0.21228668941979523, "grad_norm": 2.020474941013341, "learning_rate": 1.2444603640668596e-06, "loss": 0.2211, "step": 933 }, { "epoch": 0.21251422070534698, "grad_norm": 2.0138343922511206, "learning_rate": 1.2444484876978725e-06, "loss": 0.1402, "step": 934 }, { "epoch": 0.21274175199089876, "grad_norm": 1.5804379894073013, "learning_rate": 1.2444365986684929e-06, "loss": 0.1311, "step": 935 }, { "epoch": 0.2129692832764505, "grad_norm": 2.2151819679335367, "learning_rate": 1.2444246969789633e-06, "loss": 0.0884, "step": 936 }, { "epoch": 0.21319681456200226, "grad_norm": 2.4707341962723834, "learning_rate": 1.2444127826295277e-06, "loss": 0.1138, "step": 937 }, { "epoch": 0.21342434584755404, "grad_norm": 2.142646726979162, "learning_rate": 1.244400855620429e-06, "loss": 0.1234, "step": 938 }, { "epoch": 0.2136518771331058, "grad_norm": 1.3461044168942922, "learning_rate": 1.2443889159519113e-06, "loss": 0.0966, "step": 939 }, { "epoch": 0.21387940841865757, "grad_norm": 2.824705608850421, "learning_rate": 1.2443769636242185e-06, "loss": 0.1736, "step": 940 }, { "epoch": 0.21410693970420933, "grad_norm": 3.3926592270656526, "learning_rate": 1.244364998637595e-06, "loss": 0.102, "step": 941 }, { "epoch": 0.2143344709897611, "grad_norm": 2.1478829302272278, "learning_rate": 1.2443530209922848e-06, "loss": 0.0958, "step": 942 }, { "epoch": 0.21456200227531286, "grad_norm": 2.084791701381943, "learning_rate": 1.2443410306885337e-06, "loss": 0.128, "step": 943 }, { "epoch": 0.2147895335608646, "grad_norm": 2.667044034523646, "learning_rate": 1.244329027726586e-06, "loss": 0.2088, "step": 944 }, { "epoch": 0.2150170648464164, "grad_norm": 1.4354076627961647, "learning_rate": 1.2443170121066872e-06, "loss": 0.1295, "step": 945 }, { "epoch": 0.21524459613196814, "grad_norm": 3.608014557262876, "learning_rate": 1.2443049838290827e-06, "loss": 0.1479, "step": 946 }, { "epoch": 0.21547212741751992, "grad_norm": 2.4907426669888424, "learning_rate": 1.2442929428940186e-06, "loss": 0.2094, "step": 947 }, { "epoch": 0.21569965870307167, "grad_norm": 1.889292577370491, "learning_rate": 1.2442808893017414e-06, "loss": 0.1182, "step": 948 }, { "epoch": 0.21592718998862342, "grad_norm": 1.295703999044032, "learning_rate": 1.2442688230524965e-06, "loss": 0.1493, "step": 949 }, { "epoch": 0.2161547212741752, "grad_norm": 3.010053578949512, "learning_rate": 1.244256744146531e-06, "loss": 0.1837, "step": 950 }, { "epoch": 0.21638225255972696, "grad_norm": 2.2542440250817357, "learning_rate": 1.244244652584092e-06, "loss": 0.2011, "step": 951 }, { "epoch": 0.21660978384527874, "grad_norm": 1.8471360091007536, "learning_rate": 1.2442325483654263e-06, "loss": 0.1529, "step": 952 }, { "epoch": 0.2168373151308305, "grad_norm": 3.360264898638295, "learning_rate": 1.2442204314907812e-06, "loss": 0.1952, "step": 953 }, { "epoch": 0.21706484641638224, "grad_norm": 2.2836983418694308, "learning_rate": 1.2442083019604047e-06, "loss": 0.2068, "step": 954 }, { "epoch": 0.21729237770193402, "grad_norm": 2.534259478561885, "learning_rate": 1.2441961597745447e-06, "loss": 0.131, "step": 955 }, { "epoch": 0.21751990898748577, "grad_norm": 2.116332324988344, "learning_rate": 1.244184004933449e-06, "loss": 0.1433, "step": 956 }, { "epoch": 0.21774744027303755, "grad_norm": 1.9239447267712195, "learning_rate": 1.2441718374373662e-06, "loss": 0.1296, "step": 957 }, { "epoch": 0.2179749715585893, "grad_norm": 3.11283517907892, "learning_rate": 1.244159657286545e-06, "loss": 0.1556, "step": 958 }, { "epoch": 0.21820250284414108, "grad_norm": 2.1030310163998, "learning_rate": 1.2441474644812345e-06, "loss": 0.1398, "step": 959 }, { "epoch": 0.21843003412969283, "grad_norm": 2.6301386027385734, "learning_rate": 1.2441352590216836e-06, "loss": 0.1328, "step": 960 }, { "epoch": 0.2186575654152446, "grad_norm": 1.6843043929069075, "learning_rate": 1.244123040908142e-06, "loss": 0.2169, "step": 961 }, { "epoch": 0.21888509670079637, "grad_norm": 2.021371056385805, "learning_rate": 1.2441108101408592e-06, "loss": 0.105, "step": 962 }, { "epoch": 0.21911262798634812, "grad_norm": 2.932640255317413, "learning_rate": 1.2440985667200853e-06, "loss": 0.1186, "step": 963 }, { "epoch": 0.2193401592718999, "grad_norm": 2.287879466073487, "learning_rate": 1.2440863106460705e-06, "loss": 0.1418, "step": 964 }, { "epoch": 0.21956769055745165, "grad_norm": 2.4323172112890807, "learning_rate": 1.2440740419190655e-06, "loss": 0.2116, "step": 965 }, { "epoch": 0.2197952218430034, "grad_norm": 2.906286752213052, "learning_rate": 1.2440617605393208e-06, "loss": 0.2029, "step": 966 }, { "epoch": 0.22002275312855518, "grad_norm": 2.420234503572233, "learning_rate": 1.2440494665070874e-06, "loss": 0.2227, "step": 967 }, { "epoch": 0.22025028441410693, "grad_norm": 2.1531642600457874, "learning_rate": 1.2440371598226165e-06, "loss": 0.1565, "step": 968 }, { "epoch": 0.2204778156996587, "grad_norm": 1.7851844835265829, "learning_rate": 1.2440248404861598e-06, "loss": 0.1132, "step": 969 }, { "epoch": 0.22070534698521047, "grad_norm": 2.2253443799094605, "learning_rate": 1.2440125084979693e-06, "loss": 0.1141, "step": 970 }, { "epoch": 0.22093287827076222, "grad_norm": 3.491367387042196, "learning_rate": 1.2440001638582965e-06, "loss": 0.1678, "step": 971 }, { "epoch": 0.221160409556314, "grad_norm": 2.6799332639547297, "learning_rate": 1.2439878065673944e-06, "loss": 0.1791, "step": 972 }, { "epoch": 0.22138794084186575, "grad_norm": 0.9028117739016462, "learning_rate": 1.2439754366255149e-06, "loss": 0.0794, "step": 973 }, { "epoch": 0.22161547212741753, "grad_norm": 1.6629358802939667, "learning_rate": 1.2439630540329111e-06, "loss": 0.1328, "step": 974 }, { "epoch": 0.22184300341296928, "grad_norm": 2.734953415687441, "learning_rate": 1.2439506587898358e-06, "loss": 0.1168, "step": 975 }, { "epoch": 0.22207053469852106, "grad_norm": 2.0986779517624745, "learning_rate": 1.243938250896543e-06, "loss": 0.1288, "step": 976 }, { "epoch": 0.2222980659840728, "grad_norm": 2.4554262769941766, "learning_rate": 1.2439258303532858e-06, "loss": 0.1545, "step": 977 }, { "epoch": 0.22252559726962456, "grad_norm": 1.7628888954012072, "learning_rate": 1.243913397160318e-06, "loss": 0.0967, "step": 978 }, { "epoch": 0.22275312855517634, "grad_norm": 1.8371409568342896, "learning_rate": 1.2439009513178938e-06, "loss": 0.1184, "step": 979 }, { "epoch": 0.2229806598407281, "grad_norm": 3.4838138279645103, "learning_rate": 1.2438884928262678e-06, "loss": 0.1686, "step": 980 }, { "epoch": 0.22320819112627988, "grad_norm": 1.743212643613601, "learning_rate": 1.2438760216856944e-06, "loss": 0.1005, "step": 981 }, { "epoch": 0.22343572241183163, "grad_norm": 2.2940811110233135, "learning_rate": 1.2438635378964284e-06, "loss": 0.1261, "step": 982 }, { "epoch": 0.22366325369738338, "grad_norm": 3.306786589733754, "learning_rate": 1.2438510414587251e-06, "loss": 0.1057, "step": 983 }, { "epoch": 0.22389078498293516, "grad_norm": 1.8312197926008273, "learning_rate": 1.24383853237284e-06, "loss": 0.1121, "step": 984 }, { "epoch": 0.2241183162684869, "grad_norm": 1.375951456745173, "learning_rate": 1.2438260106390285e-06, "loss": 0.1137, "step": 985 }, { "epoch": 0.2243458475540387, "grad_norm": 2.2850475547846507, "learning_rate": 1.2438134762575467e-06, "loss": 0.1528, "step": 986 }, { "epoch": 0.22457337883959044, "grad_norm": 1.7811601291763544, "learning_rate": 1.243800929228651e-06, "loss": 0.114, "step": 987 }, { "epoch": 0.2248009101251422, "grad_norm": 2.175503500486742, "learning_rate": 1.2437883695525974e-06, "loss": 0.2246, "step": 988 }, { "epoch": 0.22502844141069397, "grad_norm": 2.5853887611675375, "learning_rate": 1.2437757972296427e-06, "loss": 0.2126, "step": 989 }, { "epoch": 0.22525597269624573, "grad_norm": 2.4622729490723065, "learning_rate": 1.2437632122600442e-06, "loss": 0.1806, "step": 990 }, { "epoch": 0.2254835039817975, "grad_norm": 2.2336859931017794, "learning_rate": 1.2437506146440587e-06, "loss": 0.1948, "step": 991 }, { "epoch": 0.22571103526734926, "grad_norm": 2.388802906376772, "learning_rate": 1.243738004381944e-06, "loss": 0.1028, "step": 992 }, { "epoch": 0.225938566552901, "grad_norm": 2.526457136508687, "learning_rate": 1.2437253814739572e-06, "loss": 0.1394, "step": 993 }, { "epoch": 0.2261660978384528, "grad_norm": 2.282347439516019, "learning_rate": 1.2437127459203572e-06, "loss": 0.1678, "step": 994 }, { "epoch": 0.22639362912400454, "grad_norm": 1.3050466119815518, "learning_rate": 1.2437000977214015e-06, "loss": 0.0753, "step": 995 }, { "epoch": 0.22662116040955632, "grad_norm": 2.159334429482828, "learning_rate": 1.243687436877349e-06, "loss": 0.2767, "step": 996 }, { "epoch": 0.22684869169510807, "grad_norm": 2.4741243617261617, "learning_rate": 1.2436747633884583e-06, "loss": 0.167, "step": 997 }, { "epoch": 0.22707622298065985, "grad_norm": 2.522130011756034, "learning_rate": 1.2436620772549885e-06, "loss": 0.2229, "step": 998 }, { "epoch": 0.2273037542662116, "grad_norm": 2.2654639871535873, "learning_rate": 1.243649378477199e-06, "loss": 0.1376, "step": 999 }, { "epoch": 0.22753128555176336, "grad_norm": 2.737389406083516, "learning_rate": 1.2436366670553491e-06, "loss": 0.1672, "step": 1000 }, { "epoch": 0.22775881683731514, "grad_norm": 2.497999857751637, "learning_rate": 1.2436239429896988e-06, "loss": 0.2831, "step": 1001 }, { "epoch": 0.2279863481228669, "grad_norm": 2.3986139069373125, "learning_rate": 1.2436112062805081e-06, "loss": 0.1413, "step": 1002 }, { "epoch": 0.22821387940841867, "grad_norm": 1.63194618315687, "learning_rate": 1.2435984569280372e-06, "loss": 0.1509, "step": 1003 }, { "epoch": 0.22844141069397042, "grad_norm": 1.9884735218546312, "learning_rate": 1.2435856949325467e-06, "loss": 0.0909, "step": 1004 }, { "epoch": 0.22866894197952217, "grad_norm": 3.7364717574130877, "learning_rate": 1.2435729202942972e-06, "loss": 0.1362, "step": 1005 }, { "epoch": 0.22889647326507395, "grad_norm": 4.3498400339740595, "learning_rate": 1.2435601330135506e-06, "loss": 0.1364, "step": 1006 }, { "epoch": 0.2291240045506257, "grad_norm": 1.468486521047109, "learning_rate": 1.2435473330905674e-06, "loss": 0.1902, "step": 1007 }, { "epoch": 0.22935153583617748, "grad_norm": 2.602985360302298, "learning_rate": 1.2435345205256097e-06, "loss": 0.0947, "step": 1008 }, { "epoch": 0.22957906712172924, "grad_norm": 2.117002790495142, "learning_rate": 1.243521695318939e-06, "loss": 0.1228, "step": 1009 }, { "epoch": 0.229806598407281, "grad_norm": 2.0012843231226034, "learning_rate": 1.2435088574708178e-06, "loss": 0.1156, "step": 1010 }, { "epoch": 0.23003412969283277, "grad_norm": 2.490148339748286, "learning_rate": 1.2434960069815083e-06, "loss": 0.164, "step": 1011 }, { "epoch": 0.23026166097838452, "grad_norm": 2.450730689081713, "learning_rate": 1.243483143851273e-06, "loss": 0.138, "step": 1012 }, { "epoch": 0.2304891922639363, "grad_norm": 2.892744061430906, "learning_rate": 1.2434702680803751e-06, "loss": 0.1061, "step": 1013 }, { "epoch": 0.23071672354948805, "grad_norm": 2.790226387512928, "learning_rate": 1.2434573796690774e-06, "loss": 0.1957, "step": 1014 }, { "epoch": 0.23094425483503983, "grad_norm": 2.4036726186705972, "learning_rate": 1.2434444786176435e-06, "loss": 0.1544, "step": 1015 }, { "epoch": 0.23117178612059158, "grad_norm": 1.3271746602955339, "learning_rate": 1.2434315649263372e-06, "loss": 0.061, "step": 1016 }, { "epoch": 0.23139931740614333, "grad_norm": 1.4063593684445947, "learning_rate": 1.2434186385954225e-06, "loss": 0.1068, "step": 1017 }, { "epoch": 0.23162684869169511, "grad_norm": 2.9525793198909724, "learning_rate": 1.243405699625163e-06, "loss": 0.1067, "step": 1018 }, { "epoch": 0.23185437997724687, "grad_norm": 2.7846219600282747, "learning_rate": 1.243392748015824e-06, "loss": 0.1435, "step": 1019 }, { "epoch": 0.23208191126279865, "grad_norm": 1.5658061687677385, "learning_rate": 1.2433797837676694e-06, "loss": 0.1492, "step": 1020 }, { "epoch": 0.2323094425483504, "grad_norm": 4.123388323133236, "learning_rate": 1.2433668068809648e-06, "loss": 0.1699, "step": 1021 }, { "epoch": 0.23253697383390215, "grad_norm": 2.0976126762166403, "learning_rate": 1.243353817355975e-06, "loss": 0.1257, "step": 1022 }, { "epoch": 0.23276450511945393, "grad_norm": 2.4116621601065296, "learning_rate": 1.2433408151929655e-06, "loss": 0.133, "step": 1023 }, { "epoch": 0.23299203640500568, "grad_norm": 1.395623834578789, "learning_rate": 1.2433278003922026e-06, "loss": 0.0936, "step": 1024 }, { "epoch": 0.23321956769055746, "grad_norm": 1.7768669244027402, "learning_rate": 1.2433147729539514e-06, "loss": 0.1264, "step": 1025 }, { "epoch": 0.2334470989761092, "grad_norm": 2.489847520949891, "learning_rate": 1.2433017328784788e-06, "loss": 0.1714, "step": 1026 }, { "epoch": 0.23367463026166096, "grad_norm": 1.722648702759186, "learning_rate": 1.2432886801660513e-06, "loss": 0.122, "step": 1027 }, { "epoch": 0.23390216154721274, "grad_norm": 1.3061284883014919, "learning_rate": 1.2432756148169354e-06, "loss": 0.0726, "step": 1028 }, { "epoch": 0.2341296928327645, "grad_norm": 2.807955909764041, "learning_rate": 1.2432625368313983e-06, "loss": 0.1667, "step": 1029 }, { "epoch": 0.23435722411831628, "grad_norm": 1.9724601313774524, "learning_rate": 1.2432494462097072e-06, "loss": 0.1995, "step": 1030 }, { "epoch": 0.23458475540386803, "grad_norm": 2.3943947067430895, "learning_rate": 1.2432363429521295e-06, "loss": 0.1625, "step": 1031 }, { "epoch": 0.2348122866894198, "grad_norm": 1.5436408096888365, "learning_rate": 1.2432232270589335e-06, "loss": 0.076, "step": 1032 }, { "epoch": 0.23503981797497156, "grad_norm": 1.1938881747627557, "learning_rate": 1.2432100985303868e-06, "loss": 0.1002, "step": 1033 }, { "epoch": 0.2352673492605233, "grad_norm": 2.0446974564823304, "learning_rate": 1.243196957366758e-06, "loss": 0.1721, "step": 1034 }, { "epoch": 0.2354948805460751, "grad_norm": 1.079879180238331, "learning_rate": 1.2431838035683155e-06, "loss": 0.1257, "step": 1035 }, { "epoch": 0.23572241183162684, "grad_norm": 1.8378535292320874, "learning_rate": 1.2431706371353282e-06, "loss": 0.1821, "step": 1036 }, { "epoch": 0.23594994311717862, "grad_norm": 1.969855842746801, "learning_rate": 1.2431574580680653e-06, "loss": 0.1436, "step": 1037 }, { "epoch": 0.23617747440273038, "grad_norm": 3.058757707801488, "learning_rate": 1.2431442663667958e-06, "loss": 0.1605, "step": 1038 }, { "epoch": 0.23640500568828213, "grad_norm": 1.2648716547694445, "learning_rate": 1.2431310620317898e-06, "loss": 0.1614, "step": 1039 }, { "epoch": 0.2366325369738339, "grad_norm": 1.9610877034271015, "learning_rate": 1.2431178450633168e-06, "loss": 0.139, "step": 1040 }, { "epoch": 0.23686006825938566, "grad_norm": 1.5919631273318544, "learning_rate": 1.2431046154616473e-06, "loss": 0.0888, "step": 1041 }, { "epoch": 0.23708759954493744, "grad_norm": 1.791707313865184, "learning_rate": 1.2430913732270512e-06, "loss": 0.1087, "step": 1042 }, { "epoch": 0.2373151308304892, "grad_norm": 3.1377911678690666, "learning_rate": 1.2430781183597995e-06, "loss": 0.1565, "step": 1043 }, { "epoch": 0.23754266211604094, "grad_norm": 2.2837991793589607, "learning_rate": 1.243064850860163e-06, "loss": 0.1126, "step": 1044 }, { "epoch": 0.23777019340159272, "grad_norm": 2.6823412767535246, "learning_rate": 1.243051570728413e-06, "loss": 0.2083, "step": 1045 }, { "epoch": 0.23799772468714447, "grad_norm": 4.365244516577561, "learning_rate": 1.2430382779648208e-06, "loss": 0.1904, "step": 1046 }, { "epoch": 0.23822525597269625, "grad_norm": 2.434739692035364, "learning_rate": 1.243024972569658e-06, "loss": 0.1347, "step": 1047 }, { "epoch": 0.238452787258248, "grad_norm": 2.1595986496307384, "learning_rate": 1.2430116545431966e-06, "loss": 0.1926, "step": 1048 }, { "epoch": 0.23868031854379979, "grad_norm": 2.2542031412662573, "learning_rate": 1.2429983238857088e-06, "loss": 0.1667, "step": 1049 }, { "epoch": 0.23890784982935154, "grad_norm": 2.0405926385207787, "learning_rate": 1.2429849805974673e-06, "loss": 0.1872, "step": 1050 }, { "epoch": 0.2391353811149033, "grad_norm": 2.2037085916589043, "learning_rate": 1.2429716246787444e-06, "loss": 0.0775, "step": 1051 }, { "epoch": 0.23936291240045507, "grad_norm": 0.9628371959013814, "learning_rate": 1.242958256129813e-06, "loss": 0.1378, "step": 1052 }, { "epoch": 0.23959044368600682, "grad_norm": 2.1187588487355424, "learning_rate": 1.242944874950947e-06, "loss": 0.159, "step": 1053 }, { "epoch": 0.2398179749715586, "grad_norm": 1.9961766997876433, "learning_rate": 1.2429314811424192e-06, "loss": 0.1568, "step": 1054 }, { "epoch": 0.24004550625711035, "grad_norm": 1.935471261024473, "learning_rate": 1.242918074704504e-06, "loss": 0.1596, "step": 1055 }, { "epoch": 0.2402730375426621, "grad_norm": 1.4988665110908368, "learning_rate": 1.2429046556374747e-06, "loss": 0.0987, "step": 1056 }, { "epoch": 0.24050056882821388, "grad_norm": 2.4283216098462015, "learning_rate": 1.2428912239416057e-06, "loss": 0.1127, "step": 1057 }, { "epoch": 0.24072810011376564, "grad_norm": 2.3264824459084448, "learning_rate": 1.242877779617172e-06, "loss": 0.1274, "step": 1058 }, { "epoch": 0.24095563139931742, "grad_norm": 2.159687331291489, "learning_rate": 1.242864322664448e-06, "loss": 0.1399, "step": 1059 }, { "epoch": 0.24118316268486917, "grad_norm": 2.3632421336063087, "learning_rate": 1.2428508530837088e-06, "loss": 0.1751, "step": 1060 }, { "epoch": 0.24141069397042092, "grad_norm": 4.564054038887482, "learning_rate": 1.2428373708752298e-06, "loss": 0.1623, "step": 1061 }, { "epoch": 0.2416382252559727, "grad_norm": 2.913968751293169, "learning_rate": 1.2428238760392862e-06, "loss": 0.2404, "step": 1062 }, { "epoch": 0.24186575654152445, "grad_norm": 2.375864551832549, "learning_rate": 1.2428103685761543e-06, "loss": 0.1551, "step": 1063 }, { "epoch": 0.24209328782707623, "grad_norm": 2.773326434228427, "learning_rate": 1.2427968484861097e-06, "loss": 0.1129, "step": 1064 }, { "epoch": 0.24232081911262798, "grad_norm": 3.440322207371564, "learning_rate": 1.2427833157694292e-06, "loss": 0.2312, "step": 1065 }, { "epoch": 0.24254835039817976, "grad_norm": 2.09362609958651, "learning_rate": 1.2427697704263892e-06, "loss": 0.1047, "step": 1066 }, { "epoch": 0.24277588168373151, "grad_norm": 2.0696892695320432, "learning_rate": 1.2427562124572663e-06, "loss": 0.1156, "step": 1067 }, { "epoch": 0.24300341296928327, "grad_norm": 1.923568801452821, "learning_rate": 1.2427426418623377e-06, "loss": 0.1609, "step": 1068 }, { "epoch": 0.24323094425483505, "grad_norm": 1.5158781630471698, "learning_rate": 1.242729058641881e-06, "loss": 0.094, "step": 1069 }, { "epoch": 0.2434584755403868, "grad_norm": 2.2258107327352037, "learning_rate": 1.2427154627961737e-06, "loss": 0.2017, "step": 1070 }, { "epoch": 0.24368600682593858, "grad_norm": 2.3481688305100645, "learning_rate": 1.2427018543254935e-06, "loss": 0.1535, "step": 1071 }, { "epoch": 0.24391353811149033, "grad_norm": 2.148375299510445, "learning_rate": 1.2426882332301187e-06, "loss": 0.1812, "step": 1072 }, { "epoch": 0.24414106939704208, "grad_norm": 1.6816805152718777, "learning_rate": 1.2426745995103277e-06, "loss": 0.1341, "step": 1073 }, { "epoch": 0.24436860068259386, "grad_norm": 2.651811251817173, "learning_rate": 1.242660953166399e-06, "loss": 0.1318, "step": 1074 }, { "epoch": 0.2445961319681456, "grad_norm": 2.473544844662378, "learning_rate": 1.2426472941986117e-06, "loss": 0.1972, "step": 1075 }, { "epoch": 0.2448236632536974, "grad_norm": 1.3274925024741444, "learning_rate": 1.2426336226072449e-06, "loss": 0.1497, "step": 1076 }, { "epoch": 0.24505119453924915, "grad_norm": 2.1014804926130277, "learning_rate": 1.242619938392578e-06, "loss": 0.1186, "step": 1077 }, { "epoch": 0.2452787258248009, "grad_norm": 3.0260303106049973, "learning_rate": 1.2426062415548907e-06, "loss": 0.2506, "step": 1078 }, { "epoch": 0.24550625711035268, "grad_norm": 1.2327761741993546, "learning_rate": 1.2425925320944628e-06, "loss": 0.117, "step": 1079 }, { "epoch": 0.24573378839590443, "grad_norm": 3.2155457599215036, "learning_rate": 1.2425788100115747e-06, "loss": 0.1412, "step": 1080 }, { "epoch": 0.2459613196814562, "grad_norm": 1.6672046307721682, "learning_rate": 1.2425650753065065e-06, "loss": 0.148, "step": 1081 }, { "epoch": 0.24618885096700796, "grad_norm": 4.323033908726176, "learning_rate": 1.2425513279795395e-06, "loss": 0.1685, "step": 1082 }, { "epoch": 0.24641638225255974, "grad_norm": 2.4128743686143146, "learning_rate": 1.2425375680309543e-06, "loss": 0.0992, "step": 1083 }, { "epoch": 0.2466439135381115, "grad_norm": 2.0582783253443497, "learning_rate": 1.2425237954610322e-06, "loss": 0.1263, "step": 1084 }, { "epoch": 0.24687144482366324, "grad_norm": 2.5810033905990637, "learning_rate": 1.2425100102700547e-06, "loss": 0.2102, "step": 1085 }, { "epoch": 0.24709897610921502, "grad_norm": 2.269665820869707, "learning_rate": 1.2424962124583033e-06, "loss": 0.105, "step": 1086 }, { "epoch": 0.24732650739476678, "grad_norm": 2.706182109515585, "learning_rate": 1.2424824020260603e-06, "loss": 0.1596, "step": 1087 }, { "epoch": 0.24755403868031856, "grad_norm": 3.0056026517839016, "learning_rate": 1.2424685789736077e-06, "loss": 0.1809, "step": 1088 }, { "epoch": 0.2477815699658703, "grad_norm": 2.2230272708907513, "learning_rate": 1.2424547433012284e-06, "loss": 0.1187, "step": 1089 }, { "epoch": 0.24800910125142206, "grad_norm": 2.271631978747539, "learning_rate": 1.2424408950092049e-06, "loss": 0.1478, "step": 1090 }, { "epoch": 0.24823663253697384, "grad_norm": 2.485671272218175, "learning_rate": 1.2424270340978204e-06, "loss": 0.1595, "step": 1091 }, { "epoch": 0.2484641638225256, "grad_norm": 2.5242524420773087, "learning_rate": 1.2424131605673582e-06, "loss": 0.2519, "step": 1092 }, { "epoch": 0.24869169510807737, "grad_norm": 2.6439941529662025, "learning_rate": 1.2423992744181015e-06, "loss": 0.1389, "step": 1093 }, { "epoch": 0.24891922639362912, "grad_norm": 2.1610086973465417, "learning_rate": 1.2423853756503343e-06, "loss": 0.1017, "step": 1094 }, { "epoch": 0.24914675767918087, "grad_norm": 1.8954846688503157, "learning_rate": 1.2423714642643408e-06, "loss": 0.2796, "step": 1095 }, { "epoch": 0.24937428896473265, "grad_norm": 1.3124277359799683, "learning_rate": 1.2423575402604051e-06, "loss": 0.12, "step": 1096 }, { "epoch": 0.2496018202502844, "grad_norm": 2.5234695537617444, "learning_rate": 1.2423436036388122e-06, "loss": 0.1242, "step": 1097 }, { "epoch": 0.24982935153583619, "grad_norm": 2.044792039361886, "learning_rate": 1.2423296543998465e-06, "loss": 0.1743, "step": 1098 }, { "epoch": 0.25005688282138794, "grad_norm": 3.6767614291561492, "learning_rate": 1.2423156925437932e-06, "loss": 0.2584, "step": 1099 }, { "epoch": 0.2502844141069397, "grad_norm": 2.1397151355216506, "learning_rate": 1.2423017180709376e-06, "loss": 0.1586, "step": 1100 }, { "epoch": 0.25051194539249144, "grad_norm": 1.670738860931536, "learning_rate": 1.2422877309815656e-06, "loss": 0.0821, "step": 1101 }, { "epoch": 0.25073947667804325, "grad_norm": 2.3733300367714185, "learning_rate": 1.242273731275963e-06, "loss": 0.1335, "step": 1102 }, { "epoch": 0.250967007963595, "grad_norm": 2.6954093027320534, "learning_rate": 1.2422597189544155e-06, "loss": 0.1244, "step": 1103 }, { "epoch": 0.25119453924914675, "grad_norm": 2.17330712431736, "learning_rate": 1.2422456940172101e-06, "loss": 0.1799, "step": 1104 }, { "epoch": 0.2514220705346985, "grad_norm": 2.4883101223722397, "learning_rate": 1.2422316564646331e-06, "loss": 0.0881, "step": 1105 }, { "epoch": 0.25164960182025026, "grad_norm": 2.4975644528149528, "learning_rate": 1.2422176062969713e-06, "loss": 0.2376, "step": 1106 }, { "epoch": 0.25187713310580206, "grad_norm": 2.242874102497345, "learning_rate": 1.2422035435145121e-06, "loss": 0.1117, "step": 1107 }, { "epoch": 0.2521046643913538, "grad_norm": 2.1430334401000994, "learning_rate": 1.2421894681175428e-06, "loss": 0.1937, "step": 1108 }, { "epoch": 0.25233219567690557, "grad_norm": 2.8329522904929796, "learning_rate": 1.2421753801063511e-06, "loss": 0.2192, "step": 1109 }, { "epoch": 0.2525597269624573, "grad_norm": 2.7185072984242016, "learning_rate": 1.2421612794812248e-06, "loss": 0.1612, "step": 1110 }, { "epoch": 0.25278725824800913, "grad_norm": 1.3607580813979583, "learning_rate": 1.2421471662424525e-06, "loss": 0.0967, "step": 1111 }, { "epoch": 0.2530147895335609, "grad_norm": 5.202570048846043, "learning_rate": 1.2421330403903222e-06, "loss": 0.1696, "step": 1112 }, { "epoch": 0.25324232081911263, "grad_norm": 5.6196714700914585, "learning_rate": 1.2421189019251228e-06, "loss": 0.1241, "step": 1113 }, { "epoch": 0.2534698521046644, "grad_norm": 5.713500840284014, "learning_rate": 1.2421047508471433e-06, "loss": 0.1904, "step": 1114 }, { "epoch": 0.25369738339021614, "grad_norm": 3.86015124804852, "learning_rate": 1.242090587156673e-06, "loss": 0.1078, "step": 1115 }, { "epoch": 0.25392491467576794, "grad_norm": 5.247729743705811, "learning_rate": 1.242076410854001e-06, "loss": 0.209, "step": 1116 }, { "epoch": 0.2541524459613197, "grad_norm": 2.568207486664039, "learning_rate": 1.2420622219394174e-06, "loss": 0.2007, "step": 1117 }, { "epoch": 0.25437997724687145, "grad_norm": 2.258373006677131, "learning_rate": 1.2420480204132117e-06, "loss": 0.1081, "step": 1118 }, { "epoch": 0.2546075085324232, "grad_norm": 2.124182108668539, "learning_rate": 1.242033806275675e-06, "loss": 0.1156, "step": 1119 }, { "epoch": 0.25483503981797495, "grad_norm": 1.909249686410152, "learning_rate": 1.2420195795270973e-06, "loss": 0.1354, "step": 1120 }, { "epoch": 0.25506257110352676, "grad_norm": 1.530124680560141, "learning_rate": 1.2420053401677693e-06, "loss": 0.1683, "step": 1121 }, { "epoch": 0.2552901023890785, "grad_norm": 2.2436097952937595, "learning_rate": 1.241991088197982e-06, "loss": 0.1121, "step": 1122 }, { "epoch": 0.25551763367463026, "grad_norm": 1.7733643321322672, "learning_rate": 1.241976823618027e-06, "loss": 0.2215, "step": 1123 }, { "epoch": 0.255745164960182, "grad_norm": 1.9028486034678005, "learning_rate": 1.241962546428196e-06, "loss": 0.1741, "step": 1124 }, { "epoch": 0.25597269624573377, "grad_norm": 2.225065438584496, "learning_rate": 1.24194825662878e-06, "loss": 0.1927, "step": 1125 }, { "epoch": 0.2562002275312856, "grad_norm": 2.5921888880190096, "learning_rate": 1.2419339542200715e-06, "loss": 0.1912, "step": 1126 }, { "epoch": 0.2564277588168373, "grad_norm": 1.450566741299779, "learning_rate": 1.241919639202363e-06, "loss": 0.1243, "step": 1127 }, { "epoch": 0.2566552901023891, "grad_norm": 2.0976571865572895, "learning_rate": 1.2419053115759468e-06, "loss": 0.1071, "step": 1128 }, { "epoch": 0.25688282138794083, "grad_norm": 2.7303120294152476, "learning_rate": 1.2418909713411161e-06, "loss": 0.1601, "step": 1129 }, { "epoch": 0.2571103526734926, "grad_norm": 2.685114498491141, "learning_rate": 1.2418766184981634e-06, "loss": 0.1577, "step": 1130 }, { "epoch": 0.2573378839590444, "grad_norm": 2.169271608320249, "learning_rate": 1.2418622530473825e-06, "loss": 0.1876, "step": 1131 }, { "epoch": 0.25756541524459614, "grad_norm": 1.4396485167707906, "learning_rate": 1.2418478749890672e-06, "loss": 0.1051, "step": 1132 }, { "epoch": 0.2577929465301479, "grad_norm": 1.3251283927245865, "learning_rate": 1.2418334843235105e-06, "loss": 0.127, "step": 1133 }, { "epoch": 0.25802047781569964, "grad_norm": 1.7722655677157828, "learning_rate": 1.2418190810510075e-06, "loss": 0.0952, "step": 1134 }, { "epoch": 0.2582480091012514, "grad_norm": 3.5363464228733976, "learning_rate": 1.2418046651718518e-06, "loss": 0.1442, "step": 1135 }, { "epoch": 0.2584755403868032, "grad_norm": 1.4208162726414038, "learning_rate": 1.2417902366863386e-06, "loss": 0.1365, "step": 1136 }, { "epoch": 0.25870307167235496, "grad_norm": 2.6384013164080367, "learning_rate": 1.2417757955947623e-06, "loss": 0.1884, "step": 1137 }, { "epoch": 0.2589306029579067, "grad_norm": 2.0462327930247555, "learning_rate": 1.2417613418974187e-06, "loss": 0.1815, "step": 1138 }, { "epoch": 0.25915813424345846, "grad_norm": 1.8956914891680519, "learning_rate": 1.2417468755946025e-06, "loss": 0.107, "step": 1139 }, { "epoch": 0.2593856655290102, "grad_norm": 1.6865771627256954, "learning_rate": 1.2417323966866097e-06, "loss": 0.1254, "step": 1140 }, { "epoch": 0.259613196814562, "grad_norm": 3.648733556829662, "learning_rate": 1.2417179051737364e-06, "loss": 0.1763, "step": 1141 }, { "epoch": 0.25984072810011377, "grad_norm": 3.6358344752950518, "learning_rate": 1.2417034010562784e-06, "loss": 0.2671, "step": 1142 }, { "epoch": 0.2600682593856655, "grad_norm": 1.8508374458484436, "learning_rate": 1.2416888843345323e-06, "loss": 0.1058, "step": 1143 }, { "epoch": 0.2602957906712173, "grad_norm": 1.6379231265979228, "learning_rate": 1.2416743550087951e-06, "loss": 0.1959, "step": 1144 }, { "epoch": 0.2605233219567691, "grad_norm": 2.7329577702359127, "learning_rate": 1.241659813079363e-06, "loss": 0.2185, "step": 1145 }, { "epoch": 0.26075085324232083, "grad_norm": 2.1507497458601206, "learning_rate": 1.2416452585465342e-06, "loss": 0.1289, "step": 1146 }, { "epoch": 0.2609783845278726, "grad_norm": 1.5858683873624775, "learning_rate": 1.2416306914106053e-06, "loss": 0.0789, "step": 1147 }, { "epoch": 0.26120591581342434, "grad_norm": 0.9635476566506428, "learning_rate": 1.2416161116718744e-06, "loss": 0.0576, "step": 1148 }, { "epoch": 0.2614334470989761, "grad_norm": 3.1902671512005747, "learning_rate": 1.2416015193306397e-06, "loss": 0.1745, "step": 1149 }, { "epoch": 0.2616609783845279, "grad_norm": 2.4274328198153383, "learning_rate": 1.241586914387199e-06, "loss": 0.1837, "step": 1150 }, { "epoch": 0.26188850967007965, "grad_norm": 2.1338082634804647, "learning_rate": 1.2415722968418508e-06, "loss": 0.1481, "step": 1151 }, { "epoch": 0.2621160409556314, "grad_norm": 2.9280052924977973, "learning_rate": 1.2415576666948945e-06, "loss": 0.1869, "step": 1152 }, { "epoch": 0.26234357224118315, "grad_norm": 2.4870407515539315, "learning_rate": 1.2415430239466283e-06, "loss": 0.1309, "step": 1153 }, { "epoch": 0.2625711035267349, "grad_norm": 2.393155333098502, "learning_rate": 1.241528368597352e-06, "loss": 0.1292, "step": 1154 }, { "epoch": 0.2627986348122867, "grad_norm": 2.327013036086297, "learning_rate": 1.2415137006473649e-06, "loss": 0.1714, "step": 1155 }, { "epoch": 0.26302616609783847, "grad_norm": 1.3188871982753934, "learning_rate": 1.241499020096967e-06, "loss": 0.1637, "step": 1156 }, { "epoch": 0.2632536973833902, "grad_norm": 2.588916025472251, "learning_rate": 1.2414843269464579e-06, "loss": 0.1409, "step": 1157 }, { "epoch": 0.26348122866894197, "grad_norm": 3.771153920400626, "learning_rate": 1.2414696211961386e-06, "loss": 0.1883, "step": 1158 }, { "epoch": 0.2637087599544937, "grad_norm": 4.499595516640575, "learning_rate": 1.2414549028463087e-06, "loss": 0.2953, "step": 1159 }, { "epoch": 0.26393629124004553, "grad_norm": 2.2000868495180517, "learning_rate": 1.2414401718972703e-06, "loss": 0.11, "step": 1160 }, { "epoch": 0.2641638225255973, "grad_norm": 1.9915564020272047, "learning_rate": 1.2414254283493232e-06, "loss": 0.15, "step": 1161 }, { "epoch": 0.26439135381114903, "grad_norm": 1.7935865348857665, "learning_rate": 1.2414106722027694e-06, "loss": 0.1163, "step": 1162 }, { "epoch": 0.2646188850967008, "grad_norm": 1.9493113396178239, "learning_rate": 1.2413959034579104e-06, "loss": 0.0895, "step": 1163 }, { "epoch": 0.26484641638225254, "grad_norm": 2.164760508646002, "learning_rate": 1.2413811221150478e-06, "loss": 0.1221, "step": 1164 }, { "epoch": 0.26507394766780434, "grad_norm": 2.7137559336575787, "learning_rate": 1.2413663281744843e-06, "loss": 0.1702, "step": 1165 }, { "epoch": 0.2653014789533561, "grad_norm": 2.05578991760022, "learning_rate": 1.2413515216365216e-06, "loss": 0.1885, "step": 1166 }, { "epoch": 0.26552901023890785, "grad_norm": 2.347644214662539, "learning_rate": 1.2413367025014628e-06, "loss": 0.1502, "step": 1167 }, { "epoch": 0.2657565415244596, "grad_norm": 1.4755705417169032, "learning_rate": 1.2413218707696103e-06, "loss": 0.1539, "step": 1168 }, { "epoch": 0.26598407281001135, "grad_norm": 3.019338825447027, "learning_rate": 1.2413070264412677e-06, "loss": 0.19, "step": 1169 }, { "epoch": 0.26621160409556316, "grad_norm": 1.6254001938773606, "learning_rate": 1.2412921695167381e-06, "loss": 0.1523, "step": 1170 }, { "epoch": 0.2664391353811149, "grad_norm": 1.440204109732841, "learning_rate": 1.2412772999963253e-06, "loss": 0.0929, "step": 1171 }, { "epoch": 0.26666666666666666, "grad_norm": 1.8290728472456999, "learning_rate": 1.2412624178803332e-06, "loss": 0.1134, "step": 1172 }, { "epoch": 0.2668941979522184, "grad_norm": 3.0096224925263084, "learning_rate": 1.2412475231690656e-06, "loss": 0.1381, "step": 1173 }, { "epoch": 0.26712172923777017, "grad_norm": 2.199128798503439, "learning_rate": 1.2412326158628275e-06, "loss": 0.125, "step": 1174 }, { "epoch": 0.267349260523322, "grad_norm": 2.319439781311619, "learning_rate": 1.2412176959619232e-06, "loss": 0.1491, "step": 1175 }, { "epoch": 0.2675767918088737, "grad_norm": 2.941596562466676, "learning_rate": 1.2412027634666578e-06, "loss": 0.2338, "step": 1176 }, { "epoch": 0.2678043230944255, "grad_norm": 2.7311199238732913, "learning_rate": 1.2411878183773366e-06, "loss": 0.2515, "step": 1177 }, { "epoch": 0.26803185437997723, "grad_norm": 1.687064956485482, "learning_rate": 1.2411728606942647e-06, "loss": 0.0986, "step": 1178 }, { "epoch": 0.26825938566552904, "grad_norm": 1.7930618105277882, "learning_rate": 1.241157890417748e-06, "loss": 0.1244, "step": 1179 }, { "epoch": 0.2684869169510808, "grad_norm": 2.3927463592130698, "learning_rate": 1.2411429075480923e-06, "loss": 0.1089, "step": 1180 }, { "epoch": 0.26871444823663254, "grad_norm": 1.5259649282053471, "learning_rate": 1.2411279120856042e-06, "loss": 0.0967, "step": 1181 }, { "epoch": 0.2689419795221843, "grad_norm": 2.2250810785927535, "learning_rate": 1.24111290403059e-06, "loss": 0.1186, "step": 1182 }, { "epoch": 0.26916951080773605, "grad_norm": 3.3967860062269963, "learning_rate": 1.2410978833833564e-06, "loss": 0.1935, "step": 1183 }, { "epoch": 0.26939704209328785, "grad_norm": 2.1923686844506483, "learning_rate": 1.2410828501442104e-06, "loss": 0.1329, "step": 1184 }, { "epoch": 0.2696245733788396, "grad_norm": 2.149373763512951, "learning_rate": 1.2410678043134591e-06, "loss": 0.1829, "step": 1185 }, { "epoch": 0.26985210466439136, "grad_norm": 2.6757944701183, "learning_rate": 1.2410527458914103e-06, "loss": 0.2143, "step": 1186 }, { "epoch": 0.2700796359499431, "grad_norm": 1.8260214393902483, "learning_rate": 1.2410376748783714e-06, "loss": 0.165, "step": 1187 }, { "epoch": 0.27030716723549486, "grad_norm": 1.0916691334467872, "learning_rate": 1.241022591274651e-06, "loss": 0.1854, "step": 1188 }, { "epoch": 0.27053469852104667, "grad_norm": 2.642360870928487, "learning_rate": 1.2410074950805567e-06, "loss": 0.174, "step": 1189 }, { "epoch": 0.2707622298065984, "grad_norm": 2.1241327325526576, "learning_rate": 1.2409923862963973e-06, "loss": 0.1936, "step": 1190 }, { "epoch": 0.27098976109215017, "grad_norm": 1.8969035428697545, "learning_rate": 1.240977264922482e-06, "loss": 0.1241, "step": 1191 }, { "epoch": 0.2712172923777019, "grad_norm": 2.4081060354042743, "learning_rate": 1.2409621309591195e-06, "loss": 0.0908, "step": 1192 }, { "epoch": 0.2714448236632537, "grad_norm": 3.1731526033525763, "learning_rate": 1.2409469844066188e-06, "loss": 0.1297, "step": 1193 }, { "epoch": 0.2716723549488055, "grad_norm": 1.415839584125243, "learning_rate": 1.2409318252652899e-06, "loss": 0.1728, "step": 1194 }, { "epoch": 0.27189988623435724, "grad_norm": 2.5230803349903366, "learning_rate": 1.2409166535354428e-06, "loss": 0.2229, "step": 1195 }, { "epoch": 0.272127417519909, "grad_norm": 1.8982743663775448, "learning_rate": 1.2409014692173872e-06, "loss": 0.08, "step": 1196 }, { "epoch": 0.27235494880546074, "grad_norm": 2.3845532894091943, "learning_rate": 1.240886272311433e-06, "loss": 0.1233, "step": 1197 }, { "epoch": 0.2725824800910125, "grad_norm": 1.6059907271573468, "learning_rate": 1.240871062817892e-06, "loss": 0.141, "step": 1198 }, { "epoch": 0.2728100113765643, "grad_norm": 2.2253019852544647, "learning_rate": 1.240855840737074e-06, "loss": 0.1019, "step": 1199 }, { "epoch": 0.27303754266211605, "grad_norm": 1.8641328097177905, "learning_rate": 1.2408406060692909e-06, "loss": 0.1489, "step": 1200 }, { "epoch": 0.2732650739476678, "grad_norm": 2.451518484264104, "learning_rate": 1.2408253588148532e-06, "loss": 0.1228, "step": 1201 }, { "epoch": 0.27349260523321955, "grad_norm": 6.967560998726808, "learning_rate": 1.2408100989740735e-06, "loss": 0.1814, "step": 1202 }, { "epoch": 0.2737201365187713, "grad_norm": 3.37131502042136, "learning_rate": 1.2407948265472628e-06, "loss": 0.1422, "step": 1203 }, { "epoch": 0.2739476678043231, "grad_norm": 2.5918040941488, "learning_rate": 1.2407795415347336e-06, "loss": 0.1913, "step": 1204 }, { "epoch": 0.27417519908987487, "grad_norm": 2.0851599516702106, "learning_rate": 1.2407642439367986e-06, "loss": 0.1295, "step": 1205 }, { "epoch": 0.2744027303754266, "grad_norm": 2.384896134065583, "learning_rate": 1.24074893375377e-06, "loss": 0.1765, "step": 1206 }, { "epoch": 0.27463026166097837, "grad_norm": 2.2263253984010087, "learning_rate": 1.2407336109859607e-06, "loss": 0.1677, "step": 1207 }, { "epoch": 0.2748577929465301, "grad_norm": 4.0136041805844425, "learning_rate": 1.2407182756336844e-06, "loss": 0.2066, "step": 1208 }, { "epoch": 0.27508532423208193, "grad_norm": 2.4218266443283794, "learning_rate": 1.240702927697254e-06, "loss": 0.1029, "step": 1209 }, { "epoch": 0.2753128555176337, "grad_norm": 2.975498605777637, "learning_rate": 1.2406875671769837e-06, "loss": 0.2205, "step": 1210 }, { "epoch": 0.27554038680318543, "grad_norm": 2.6168945120696634, "learning_rate": 1.2406721940731866e-06, "loss": 0.1641, "step": 1211 }, { "epoch": 0.2757679180887372, "grad_norm": 2.189110929582555, "learning_rate": 1.2406568083861776e-06, "loss": 0.1369, "step": 1212 }, { "epoch": 0.27599544937428894, "grad_norm": 3.1088766651076574, "learning_rate": 1.2406414101162708e-06, "loss": 0.2137, "step": 1213 }, { "epoch": 0.27622298065984074, "grad_norm": 1.477236721867685, "learning_rate": 1.2406259992637815e-06, "loss": 0.108, "step": 1214 }, { "epoch": 0.2764505119453925, "grad_norm": 3.1813357275785275, "learning_rate": 1.240610575829024e-06, "loss": 0.1376, "step": 1215 }, { "epoch": 0.27667804323094425, "grad_norm": 2.636131080247815, "learning_rate": 1.2405951398123136e-06, "loss": 0.1964, "step": 1216 }, { "epoch": 0.276905574516496, "grad_norm": 2.580878369476001, "learning_rate": 1.2405796912139662e-06, "loss": 0.1478, "step": 1217 }, { "epoch": 0.2771331058020478, "grad_norm": 3.2969282409336564, "learning_rate": 1.240564230034297e-06, "loss": 0.1612, "step": 1218 }, { "epoch": 0.27736063708759956, "grad_norm": 1.7096976219125466, "learning_rate": 1.2405487562736226e-06, "loss": 0.0684, "step": 1219 }, { "epoch": 0.2775881683731513, "grad_norm": 1.5798461976853169, "learning_rate": 1.240533269932259e-06, "loss": 0.2019, "step": 1220 }, { "epoch": 0.27781569965870306, "grad_norm": 1.7709663885713447, "learning_rate": 1.2405177710105223e-06, "loss": 0.0644, "step": 1221 }, { "epoch": 0.2780432309442548, "grad_norm": 2.7039398210352172, "learning_rate": 1.24050225950873e-06, "loss": 0.1279, "step": 1222 }, { "epoch": 0.2782707622298066, "grad_norm": 2.064393149635238, "learning_rate": 1.2404867354271984e-06, "loss": 0.2025, "step": 1223 }, { "epoch": 0.2784982935153584, "grad_norm": 2.1454678847811506, "learning_rate": 1.2404711987662452e-06, "loss": 0.22, "step": 1224 }, { "epoch": 0.2787258248009101, "grad_norm": 3.3818448317517706, "learning_rate": 1.240455649526188e-06, "loss": 0.1667, "step": 1225 }, { "epoch": 0.2789533560864619, "grad_norm": 1.5959505587151988, "learning_rate": 1.2404400877073446e-06, "loss": 0.0904, "step": 1226 }, { "epoch": 0.27918088737201363, "grad_norm": 3.2220961668197337, "learning_rate": 1.2404245133100328e-06, "loss": 0.1864, "step": 1227 }, { "epoch": 0.27940841865756544, "grad_norm": 2.1405279662692744, "learning_rate": 1.240408926334571e-06, "loss": 0.1021, "step": 1228 }, { "epoch": 0.2796359499431172, "grad_norm": 3.3416472980915315, "learning_rate": 1.240393326781278e-06, "loss": 0.2055, "step": 1229 }, { "epoch": 0.27986348122866894, "grad_norm": 2.091589658288764, "learning_rate": 1.2403777146504722e-06, "loss": 0.126, "step": 1230 }, { "epoch": 0.2800910125142207, "grad_norm": 1.416466049772803, "learning_rate": 1.240362089942473e-06, "loss": 0.1542, "step": 1231 }, { "epoch": 0.28031854379977245, "grad_norm": 1.3058878302334813, "learning_rate": 1.2403464526575997e-06, "loss": 0.0475, "step": 1232 }, { "epoch": 0.28054607508532425, "grad_norm": 4.125917323296369, "learning_rate": 1.240330802796172e-06, "loss": 0.181, "step": 1233 }, { "epoch": 0.280773606370876, "grad_norm": 2.716562306766488, "learning_rate": 1.2403151403585093e-06, "loss": 0.202, "step": 1234 }, { "epoch": 0.28100113765642776, "grad_norm": 3.3376947997511843, "learning_rate": 1.240299465344932e-06, "loss": 0.1384, "step": 1235 }, { "epoch": 0.2812286689419795, "grad_norm": 1.2166897547300315, "learning_rate": 1.2402837777557608e-06, "loss": 0.143, "step": 1236 }, { "epoch": 0.28145620022753126, "grad_norm": 2.6970364920835115, "learning_rate": 1.240268077591316e-06, "loss": 0.1991, "step": 1237 }, { "epoch": 0.28168373151308307, "grad_norm": 1.2700401000964803, "learning_rate": 1.2402523648519184e-06, "loss": 0.0921, "step": 1238 }, { "epoch": 0.2819112627986348, "grad_norm": 3.574248956011507, "learning_rate": 1.2402366395378892e-06, "loss": 0.1592, "step": 1239 }, { "epoch": 0.2821387940841866, "grad_norm": 1.6046415887628118, "learning_rate": 1.24022090164955e-06, "loss": 0.1368, "step": 1240 }, { "epoch": 0.2823663253697383, "grad_norm": 1.853669863286635, "learning_rate": 1.240205151187222e-06, "loss": 0.1363, "step": 1241 }, { "epoch": 0.2825938566552901, "grad_norm": 2.166955108238533, "learning_rate": 1.2401893881512278e-06, "loss": 0.1211, "step": 1242 }, { "epoch": 0.2828213879408419, "grad_norm": 2.523899316800894, "learning_rate": 1.240173612541889e-06, "loss": 0.1735, "step": 1243 }, { "epoch": 0.28304891922639364, "grad_norm": 2.5063025861997046, "learning_rate": 1.2401578243595281e-06, "loss": 0.1227, "step": 1244 }, { "epoch": 0.2832764505119454, "grad_norm": 1.8829939441618075, "learning_rate": 1.2401420236044678e-06, "loss": 0.0875, "step": 1245 }, { "epoch": 0.28350398179749714, "grad_norm": 2.927257999123812, "learning_rate": 1.2401262102770314e-06, "loss": 0.2237, "step": 1246 }, { "epoch": 0.2837315130830489, "grad_norm": 2.701605298598302, "learning_rate": 1.2401103843775416e-06, "loss": 0.2524, "step": 1247 }, { "epoch": 0.2839590443686007, "grad_norm": 1.8917697620531524, "learning_rate": 1.240094545906322e-06, "loss": 0.1681, "step": 1248 }, { "epoch": 0.28418657565415245, "grad_norm": 1.790645149029892, "learning_rate": 1.2400786948636966e-06, "loss": 0.1141, "step": 1249 }, { "epoch": 0.2844141069397042, "grad_norm": 2.280105910927623, "learning_rate": 1.2400628312499892e-06, "loss": 0.1467, "step": 1250 }, { "epoch": 0.28464163822525596, "grad_norm": 1.9642089313734676, "learning_rate": 1.2400469550655239e-06, "loss": 0.1475, "step": 1251 }, { "epoch": 0.28486916951080776, "grad_norm": 2.788765935312485, "learning_rate": 1.240031066310625e-06, "loss": 0.1443, "step": 1252 }, { "epoch": 0.2850967007963595, "grad_norm": 2.7906903727756007, "learning_rate": 1.2400151649856176e-06, "loss": 0.2541, "step": 1253 }, { "epoch": 0.28532423208191127, "grad_norm": 1.7188739232283574, "learning_rate": 1.2399992510908266e-06, "loss": 0.1287, "step": 1254 }, { "epoch": 0.285551763367463, "grad_norm": 2.448125831923515, "learning_rate": 1.2399833246265772e-06, "loss": 0.1727, "step": 1255 }, { "epoch": 0.28577929465301477, "grad_norm": 1.670687696490714, "learning_rate": 1.2399673855931951e-06, "loss": 0.1783, "step": 1256 }, { "epoch": 0.2860068259385666, "grad_norm": 1.680153916469469, "learning_rate": 1.2399514339910058e-06, "loss": 0.072, "step": 1257 }, { "epoch": 0.28623435722411833, "grad_norm": 1.868751293838818, "learning_rate": 1.2399354698203353e-06, "loss": 0.2307, "step": 1258 }, { "epoch": 0.2864618885096701, "grad_norm": 1.7025005817315042, "learning_rate": 1.2399194930815103e-06, "loss": 0.1668, "step": 1259 }, { "epoch": 0.28668941979522183, "grad_norm": 2.300621243889103, "learning_rate": 1.2399035037748567e-06, "loss": 0.1402, "step": 1260 }, { "epoch": 0.2869169510807736, "grad_norm": 1.9848277562608234, "learning_rate": 1.2398875019007017e-06, "loss": 0.0945, "step": 1261 }, { "epoch": 0.2871444823663254, "grad_norm": 2.389217778082788, "learning_rate": 1.2398714874593722e-06, "loss": 0.1205, "step": 1262 }, { "epoch": 0.28737201365187715, "grad_norm": 2.3311431375314733, "learning_rate": 1.2398554604511958e-06, "loss": 0.1375, "step": 1263 }, { "epoch": 0.2875995449374289, "grad_norm": 1.6072927982231655, "learning_rate": 1.2398394208764998e-06, "loss": 0.1011, "step": 1264 }, { "epoch": 0.28782707622298065, "grad_norm": 3.4081467363917772, "learning_rate": 1.239823368735612e-06, "loss": 0.2417, "step": 1265 }, { "epoch": 0.2880546075085324, "grad_norm": 1.534207652635837, "learning_rate": 1.2398073040288605e-06, "loss": 0.102, "step": 1266 }, { "epoch": 0.2882821387940842, "grad_norm": 1.5768713894474875, "learning_rate": 1.2397912267565738e-06, "loss": 0.1132, "step": 1267 }, { "epoch": 0.28850967007963596, "grad_norm": 1.579072277023231, "learning_rate": 1.2397751369190803e-06, "loss": 0.0898, "step": 1268 }, { "epoch": 0.2887372013651877, "grad_norm": 1.3149754797376219, "learning_rate": 1.2397590345167088e-06, "loss": 0.2011, "step": 1269 }, { "epoch": 0.28896473265073946, "grad_norm": 1.6939565519433766, "learning_rate": 1.2397429195497887e-06, "loss": 0.1421, "step": 1270 }, { "epoch": 0.2891922639362912, "grad_norm": 1.356052034301606, "learning_rate": 1.239726792018649e-06, "loss": 0.0458, "step": 1271 }, { "epoch": 0.289419795221843, "grad_norm": 2.355272771080969, "learning_rate": 1.2397106519236197e-06, "loss": 0.1237, "step": 1272 }, { "epoch": 0.2896473265073948, "grad_norm": 1.8270125880467827, "learning_rate": 1.2396944992650303e-06, "loss": 0.0967, "step": 1273 }, { "epoch": 0.2898748577929465, "grad_norm": 1.6010935468127871, "learning_rate": 1.2396783340432111e-06, "loss": 0.1298, "step": 1274 }, { "epoch": 0.2901023890784983, "grad_norm": 2.7512982520907685, "learning_rate": 1.2396621562584925e-06, "loss": 0.1489, "step": 1275 }, { "epoch": 0.29032992036405003, "grad_norm": 2.7012778911705153, "learning_rate": 1.2396459659112052e-06, "loss": 0.1077, "step": 1276 }, { "epoch": 0.29055745164960184, "grad_norm": 1.942476301895168, "learning_rate": 1.23962976300168e-06, "loss": 0.1705, "step": 1277 }, { "epoch": 0.2907849829351536, "grad_norm": 1.9559220493458693, "learning_rate": 1.2396135475302478e-06, "loss": 0.194, "step": 1278 }, { "epoch": 0.29101251422070534, "grad_norm": 3.484604084621699, "learning_rate": 1.2395973194972406e-06, "loss": 0.1654, "step": 1279 }, { "epoch": 0.2912400455062571, "grad_norm": 3.078532329336957, "learning_rate": 1.2395810789029898e-06, "loss": 0.1096, "step": 1280 }, { "epoch": 0.29146757679180885, "grad_norm": 2.1019316083307005, "learning_rate": 1.2395648257478271e-06, "loss": 0.1773, "step": 1281 }, { "epoch": 0.29169510807736065, "grad_norm": 2.6622086192257646, "learning_rate": 1.239548560032085e-06, "loss": 0.1807, "step": 1282 }, { "epoch": 0.2919226393629124, "grad_norm": 2.724085742068777, "learning_rate": 1.239532281756096e-06, "loss": 0.2633, "step": 1283 }, { "epoch": 0.29215017064846416, "grad_norm": 3.476983854537522, "learning_rate": 1.2395159909201924e-06, "loss": 0.2206, "step": 1284 }, { "epoch": 0.2923777019340159, "grad_norm": 2.4597634305484304, "learning_rate": 1.2394996875247075e-06, "loss": 0.173, "step": 1285 }, { "epoch": 0.2926052332195677, "grad_norm": 2.382644730814897, "learning_rate": 1.2394833715699743e-06, "loss": 0.171, "step": 1286 }, { "epoch": 0.29283276450511947, "grad_norm": 1.0179882332573014, "learning_rate": 1.2394670430563264e-06, "loss": 0.1506, "step": 1287 }, { "epoch": 0.2930602957906712, "grad_norm": 2.0313503091521845, "learning_rate": 1.2394507019840974e-06, "loss": 0.1157, "step": 1288 }, { "epoch": 0.293287827076223, "grad_norm": 2.1563562932885394, "learning_rate": 1.2394343483536215e-06, "loss": 0.1455, "step": 1289 }, { "epoch": 0.2935153583617747, "grad_norm": 1.590464214144866, "learning_rate": 1.2394179821652326e-06, "loss": 0.1346, "step": 1290 }, { "epoch": 0.29374288964732653, "grad_norm": 2.5288590185853885, "learning_rate": 1.2394016034192654e-06, "loss": 0.1699, "step": 1291 }, { "epoch": 0.2939704209328783, "grad_norm": 1.3441395527785462, "learning_rate": 1.2393852121160548e-06, "loss": 0.1406, "step": 1292 }, { "epoch": 0.29419795221843004, "grad_norm": 2.53263687980971, "learning_rate": 1.2393688082559357e-06, "loss": 0.1629, "step": 1293 }, { "epoch": 0.2944254835039818, "grad_norm": 1.711669162319686, "learning_rate": 1.2393523918392433e-06, "loss": 0.0766, "step": 1294 }, { "epoch": 0.29465301478953354, "grad_norm": 2.2138801021106214, "learning_rate": 1.2393359628663133e-06, "loss": 0.2713, "step": 1295 }, { "epoch": 0.29488054607508535, "grad_norm": 1.7610053159614951, "learning_rate": 1.239319521337481e-06, "loss": 0.1454, "step": 1296 }, { "epoch": 0.2951080773606371, "grad_norm": 1.1164148240700726, "learning_rate": 1.2393030672530828e-06, "loss": 0.0873, "step": 1297 }, { "epoch": 0.29533560864618885, "grad_norm": 2.5581318128260264, "learning_rate": 1.239286600613455e-06, "loss": 0.1986, "step": 1298 }, { "epoch": 0.2955631399317406, "grad_norm": 2.130651315015736, "learning_rate": 1.2392701214189343e-06, "loss": 0.1141, "step": 1299 }, { "epoch": 0.29579067121729236, "grad_norm": 2.483114350460134, "learning_rate": 1.2392536296698571e-06, "loss": 0.1921, "step": 1300 }, { "epoch": 0.29601820250284416, "grad_norm": 1.4378642619116868, "learning_rate": 1.2392371253665605e-06, "loss": 0.1495, "step": 1301 }, { "epoch": 0.2962457337883959, "grad_norm": 1.4002315021023155, "learning_rate": 1.2392206085093823e-06, "loss": 0.1929, "step": 1302 }, { "epoch": 0.29647326507394767, "grad_norm": 1.8673010049645025, "learning_rate": 1.2392040790986594e-06, "loss": 0.17, "step": 1303 }, { "epoch": 0.2967007963594994, "grad_norm": 2.360098461281988, "learning_rate": 1.2391875371347303e-06, "loss": 0.1221, "step": 1304 }, { "epoch": 0.29692832764505117, "grad_norm": 1.149049669073457, "learning_rate": 1.2391709826179327e-06, "loss": 0.0648, "step": 1305 }, { "epoch": 0.297155858930603, "grad_norm": 2.7074403192946948, "learning_rate": 1.239154415548605e-06, "loss": 0.101, "step": 1306 }, { "epoch": 0.29738339021615473, "grad_norm": 2.3404087696279006, "learning_rate": 1.2391378359270859e-06, "loss": 0.169, "step": 1307 }, { "epoch": 0.2976109215017065, "grad_norm": 2.051588482824801, "learning_rate": 1.2391212437537138e-06, "loss": 0.1231, "step": 1308 }, { "epoch": 0.29783845278725823, "grad_norm": 2.242825915734783, "learning_rate": 1.2391046390288287e-06, "loss": 0.0926, "step": 1309 }, { "epoch": 0.29806598407281, "grad_norm": 1.544022609869059, "learning_rate": 1.2390880217527692e-06, "loss": 0.1112, "step": 1310 }, { "epoch": 0.2982935153583618, "grad_norm": 2.1469992174478802, "learning_rate": 1.2390713919258752e-06, "loss": 0.142, "step": 1311 }, { "epoch": 0.29852104664391355, "grad_norm": 1.7368698248867847, "learning_rate": 1.2390547495484866e-06, "loss": 0.173, "step": 1312 }, { "epoch": 0.2987485779294653, "grad_norm": 1.935573701314408, "learning_rate": 1.2390380946209436e-06, "loss": 0.1042, "step": 1313 }, { "epoch": 0.29897610921501705, "grad_norm": 2.312605974001612, "learning_rate": 1.2390214271435863e-06, "loss": 0.1058, "step": 1314 }, { "epoch": 0.2992036405005688, "grad_norm": 2.0971420605328417, "learning_rate": 1.2390047471167557e-06, "loss": 0.1131, "step": 1315 }, { "epoch": 0.2994311717861206, "grad_norm": 2.3687418924208568, "learning_rate": 1.2389880545407926e-06, "loss": 0.1197, "step": 1316 }, { "epoch": 0.29965870307167236, "grad_norm": 2.1772555556434163, "learning_rate": 1.2389713494160379e-06, "loss": 0.1362, "step": 1317 }, { "epoch": 0.2998862343572241, "grad_norm": 2.347318027018112, "learning_rate": 1.2389546317428335e-06, "loss": 0.115, "step": 1318 }, { "epoch": 0.30011376564277586, "grad_norm": 2.393814516886665, "learning_rate": 1.2389379015215208e-06, "loss": 0.1366, "step": 1319 }, { "epoch": 0.3003412969283277, "grad_norm": 3.2734975537480295, "learning_rate": 1.2389211587524416e-06, "loss": 0.1663, "step": 1320 }, { "epoch": 0.3005688282138794, "grad_norm": 2.476288177865563, "learning_rate": 1.2389044034359383e-06, "loss": 0.1585, "step": 1321 }, { "epoch": 0.3007963594994312, "grad_norm": 2.0801439225061094, "learning_rate": 1.2388876355723533e-06, "loss": 0.0896, "step": 1322 }, { "epoch": 0.30102389078498293, "grad_norm": 3.3649680946347043, "learning_rate": 1.2388708551620295e-06, "loss": 0.1927, "step": 1323 }, { "epoch": 0.3012514220705347, "grad_norm": 1.6798655722697455, "learning_rate": 1.2388540622053095e-06, "loss": 0.1514, "step": 1324 }, { "epoch": 0.3014789533560865, "grad_norm": 2.2188046109768433, "learning_rate": 1.2388372567025367e-06, "loss": 0.1886, "step": 1325 }, { "epoch": 0.30170648464163824, "grad_norm": 3.8153066915737437, "learning_rate": 1.2388204386540546e-06, "loss": 0.1965, "step": 1326 }, { "epoch": 0.30193401592719, "grad_norm": 3.157753400659548, "learning_rate": 1.238803608060207e-06, "loss": 0.1571, "step": 1327 }, { "epoch": 0.30216154721274174, "grad_norm": 3.5073308744349068, "learning_rate": 1.2387867649213376e-06, "loss": 0.1517, "step": 1328 }, { "epoch": 0.3023890784982935, "grad_norm": 2.1553373677368017, "learning_rate": 1.2387699092377908e-06, "loss": 0.2075, "step": 1329 }, { "epoch": 0.3026166097838453, "grad_norm": 2.282662404456656, "learning_rate": 1.2387530410099113e-06, "loss": 0.1414, "step": 1330 }, { "epoch": 0.30284414106939705, "grad_norm": 1.9117313188558176, "learning_rate": 1.2387361602380436e-06, "loss": 0.1266, "step": 1331 }, { "epoch": 0.3030716723549488, "grad_norm": 1.8130769390361627, "learning_rate": 1.238719266922533e-06, "loss": 0.1256, "step": 1332 }, { "epoch": 0.30329920364050056, "grad_norm": 2.8536582395130865, "learning_rate": 1.238702361063724e-06, "loss": 0.1814, "step": 1333 }, { "epoch": 0.3035267349260523, "grad_norm": 2.8179372897441057, "learning_rate": 1.2386854426619633e-06, "loss": 0.1992, "step": 1334 }, { "epoch": 0.3037542662116041, "grad_norm": 2.8198093864971274, "learning_rate": 1.2386685117175956e-06, "loss": 0.1244, "step": 1335 }, { "epoch": 0.30398179749715587, "grad_norm": 2.2011541075966514, "learning_rate": 1.2386515682309676e-06, "loss": 0.1803, "step": 1336 }, { "epoch": 0.3042093287827076, "grad_norm": 2.1936077489701513, "learning_rate": 1.2386346122024253e-06, "loss": 0.1471, "step": 1337 }, { "epoch": 0.3044368600682594, "grad_norm": 2.3414747287597586, "learning_rate": 1.2386176436323154e-06, "loss": 0.1968, "step": 1338 }, { "epoch": 0.3046643913538111, "grad_norm": 2.4409570439965265, "learning_rate": 1.2386006625209847e-06, "loss": 0.1304, "step": 1339 }, { "epoch": 0.30489192263936293, "grad_norm": 1.5611171851558396, "learning_rate": 1.2385836688687802e-06, "loss": 0.1826, "step": 1340 }, { "epoch": 0.3051194539249147, "grad_norm": 2.1495118418095758, "learning_rate": 1.2385666626760493e-06, "loss": 0.1299, "step": 1341 }, { "epoch": 0.30534698521046644, "grad_norm": 2.427709246358795, "learning_rate": 1.2385496439431395e-06, "loss": 0.2042, "step": 1342 }, { "epoch": 0.3055745164960182, "grad_norm": 2.1582025174813477, "learning_rate": 1.2385326126703986e-06, "loss": 0.2137, "step": 1343 }, { "epoch": 0.30580204778156994, "grad_norm": 2.692539956880222, "learning_rate": 1.2385155688581746e-06, "loss": 0.1421, "step": 1344 }, { "epoch": 0.30602957906712175, "grad_norm": 1.7945869672607249, "learning_rate": 1.238498512506816e-06, "loss": 0.1111, "step": 1345 }, { "epoch": 0.3062571103526735, "grad_norm": 2.4481827112208236, "learning_rate": 1.2384814436166715e-06, "loss": 0.198, "step": 1346 }, { "epoch": 0.30648464163822525, "grad_norm": 1.515871383058607, "learning_rate": 1.2384643621880898e-06, "loss": 0.1038, "step": 1347 }, { "epoch": 0.306712172923777, "grad_norm": 1.1946482530332898, "learning_rate": 1.2384472682214201e-06, "loss": 0.1369, "step": 1348 }, { "epoch": 0.30693970420932876, "grad_norm": 2.487738709975654, "learning_rate": 1.2384301617170116e-06, "loss": 0.1327, "step": 1349 }, { "epoch": 0.30716723549488056, "grad_norm": 2.2787395109301496, "learning_rate": 1.2384130426752142e-06, "loss": 0.2413, "step": 1350 }, { "epoch": 0.3073947667804323, "grad_norm": 3.009302223374968, "learning_rate": 1.2383959110963775e-06, "loss": 0.1308, "step": 1351 }, { "epoch": 0.30762229806598407, "grad_norm": 1.9025690830276472, "learning_rate": 1.2383787669808518e-06, "loss": 0.1292, "step": 1352 }, { "epoch": 0.3078498293515358, "grad_norm": 2.478845494921755, "learning_rate": 1.2383616103289871e-06, "loss": 0.1388, "step": 1353 }, { "epoch": 0.3080773606370876, "grad_norm": 2.378732608316692, "learning_rate": 1.238344441141135e-06, "loss": 0.1445, "step": 1354 }, { "epoch": 0.3083048919226394, "grad_norm": 2.7865872438029604, "learning_rate": 1.2383272594176454e-06, "loss": 0.1987, "step": 1355 }, { "epoch": 0.30853242320819113, "grad_norm": 4.941618358694522, "learning_rate": 1.23831006515887e-06, "loss": 0.2554, "step": 1356 }, { "epoch": 0.3087599544937429, "grad_norm": 2.3648132845973433, "learning_rate": 1.2382928583651601e-06, "loss": 0.1316, "step": 1357 }, { "epoch": 0.30898748577929463, "grad_norm": 4.506401473111362, "learning_rate": 1.2382756390368674e-06, "loss": 0.2064, "step": 1358 }, { "epoch": 0.30921501706484644, "grad_norm": 2.9032765876327367, "learning_rate": 1.2382584071743438e-06, "loss": 0.1722, "step": 1359 }, { "epoch": 0.3094425483503982, "grad_norm": 1.9393994140062751, "learning_rate": 1.2382411627779414e-06, "loss": 0.1198, "step": 1360 }, { "epoch": 0.30967007963594995, "grad_norm": 2.9033108515884813, "learning_rate": 1.2382239058480128e-06, "loss": 0.1898, "step": 1361 }, { "epoch": 0.3098976109215017, "grad_norm": 1.7474335493562614, "learning_rate": 1.2382066363849106e-06, "loss": 0.0873, "step": 1362 }, { "epoch": 0.31012514220705345, "grad_norm": 2.7271456687130535, "learning_rate": 1.2381893543889878e-06, "loss": 0.1164, "step": 1363 }, { "epoch": 0.31035267349260526, "grad_norm": 2.442928236122465, "learning_rate": 1.2381720598605976e-06, "loss": 0.1866, "step": 1364 }, { "epoch": 0.310580204778157, "grad_norm": 2.2683129394489336, "learning_rate": 1.2381547528000934e-06, "loss": 0.1821, "step": 1365 }, { "epoch": 0.31080773606370876, "grad_norm": 2.2249086535735665, "learning_rate": 1.238137433207829e-06, "loss": 0.1424, "step": 1366 }, { "epoch": 0.3110352673492605, "grad_norm": 2.3904987570803424, "learning_rate": 1.2381201010841585e-06, "loss": 0.1241, "step": 1367 }, { "epoch": 0.31126279863481227, "grad_norm": 3.3568266666034563, "learning_rate": 1.2381027564294359e-06, "loss": 0.2353, "step": 1368 }, { "epoch": 0.3114903299203641, "grad_norm": 1.089454840869158, "learning_rate": 1.238085399244016e-06, "loss": 0.1225, "step": 1369 }, { "epoch": 0.3117178612059158, "grad_norm": 1.9458300879080968, "learning_rate": 1.2380680295282532e-06, "loss": 0.1969, "step": 1370 }, { "epoch": 0.3119453924914676, "grad_norm": 2.205384735367907, "learning_rate": 1.2380506472825025e-06, "loss": 0.1051, "step": 1371 }, { "epoch": 0.31217292377701933, "grad_norm": 3.009662856212347, "learning_rate": 1.2380332525071194e-06, "loss": 0.1357, "step": 1372 }, { "epoch": 0.3124004550625711, "grad_norm": 1.7090482855605478, "learning_rate": 1.2380158452024595e-06, "loss": 0.1093, "step": 1373 }, { "epoch": 0.3126279863481229, "grad_norm": 1.534224124551681, "learning_rate": 1.2379984253688783e-06, "loss": 0.1668, "step": 1374 }, { "epoch": 0.31285551763367464, "grad_norm": 3.5021434401932847, "learning_rate": 1.237980993006732e-06, "loss": 0.2277, "step": 1375 }, { "epoch": 0.3130830489192264, "grad_norm": 2.262191330295691, "learning_rate": 1.2379635481163768e-06, "loss": 0.2239, "step": 1376 }, { "epoch": 0.31331058020477814, "grad_norm": 2.053106476927726, "learning_rate": 1.2379460906981692e-06, "loss": 0.1531, "step": 1377 }, { "epoch": 0.3135381114903299, "grad_norm": 1.5039843101917945, "learning_rate": 1.237928620752466e-06, "loss": 0.15, "step": 1378 }, { "epoch": 0.3137656427758817, "grad_norm": 3.906363623322993, "learning_rate": 1.2379111382796246e-06, "loss": 0.1751, "step": 1379 }, { "epoch": 0.31399317406143346, "grad_norm": 1.8885019656196067, "learning_rate": 1.2378936432800017e-06, "loss": 0.1706, "step": 1380 }, { "epoch": 0.3142207053469852, "grad_norm": 2.868561218863271, "learning_rate": 1.2378761357539554e-06, "loss": 0.2075, "step": 1381 }, { "epoch": 0.31444823663253696, "grad_norm": 3.0041888547459332, "learning_rate": 1.2378586157018434e-06, "loss": 0.1174, "step": 1382 }, { "epoch": 0.3146757679180887, "grad_norm": 4.579577850116939, "learning_rate": 1.2378410831240235e-06, "loss": 0.0989, "step": 1383 }, { "epoch": 0.3149032992036405, "grad_norm": 2.369378417403813, "learning_rate": 1.2378235380208542e-06, "loss": 0.1939, "step": 1384 }, { "epoch": 0.31513083048919227, "grad_norm": 1.3212831618328642, "learning_rate": 1.2378059803926941e-06, "loss": 0.0939, "step": 1385 }, { "epoch": 0.315358361774744, "grad_norm": 1.7303755529967304, "learning_rate": 1.2377884102399023e-06, "loss": 0.0969, "step": 1386 }, { "epoch": 0.3155858930602958, "grad_norm": 2.1687520273562058, "learning_rate": 1.2377708275628375e-06, "loss": 0.1362, "step": 1387 }, { "epoch": 0.3158134243458475, "grad_norm": 2.1690288580082115, "learning_rate": 1.2377532323618593e-06, "loss": 0.15, "step": 1388 }, { "epoch": 0.31604095563139933, "grad_norm": 2.6772029483945987, "learning_rate": 1.2377356246373271e-06, "loss": 0.156, "step": 1389 }, { "epoch": 0.3162684869169511, "grad_norm": 2.3434657796789335, "learning_rate": 1.2377180043896012e-06, "loss": 0.1605, "step": 1390 }, { "epoch": 0.31649601820250284, "grad_norm": 0.6778545782857848, "learning_rate": 1.2377003716190411e-06, "loss": 0.0678, "step": 1391 }, { "epoch": 0.3167235494880546, "grad_norm": 3.4250150700071598, "learning_rate": 1.2376827263260078e-06, "loss": 0.2121, "step": 1392 }, { "epoch": 0.3169510807736064, "grad_norm": 2.212315886742257, "learning_rate": 1.2376650685108612e-06, "loss": 0.0926, "step": 1393 }, { "epoch": 0.31717861205915815, "grad_norm": 2.544863602950953, "learning_rate": 1.2376473981739632e-06, "loss": 0.2237, "step": 1394 }, { "epoch": 0.3174061433447099, "grad_norm": 3.814819174160117, "learning_rate": 1.237629715315674e-06, "loss": 0.1803, "step": 1395 }, { "epoch": 0.31763367463026165, "grad_norm": 2.016517421139005, "learning_rate": 1.2376120199363554e-06, "loss": 0.1629, "step": 1396 }, { "epoch": 0.3178612059158134, "grad_norm": 2.2015049586409736, "learning_rate": 1.2375943120363692e-06, "loss": 0.1774, "step": 1397 }, { "epoch": 0.3180887372013652, "grad_norm": 1.4748569856435156, "learning_rate": 1.2375765916160773e-06, "loss": 0.0892, "step": 1398 }, { "epoch": 0.31831626848691696, "grad_norm": 4.396707263537578, "learning_rate": 1.2375588586758415e-06, "loss": 0.1694, "step": 1399 }, { "epoch": 0.3185437997724687, "grad_norm": 2.815879553875731, "learning_rate": 1.2375411132160245e-06, "loss": 0.1574, "step": 1400 }, { "epoch": 0.31877133105802047, "grad_norm": 2.854288805845529, "learning_rate": 1.2375233552369892e-06, "loss": 0.21, "step": 1401 }, { "epoch": 0.3189988623435722, "grad_norm": 2.4400815913032137, "learning_rate": 1.237505584739098e-06, "loss": 0.1222, "step": 1402 }, { "epoch": 0.31922639362912403, "grad_norm": 2.448908720687653, "learning_rate": 1.2374878017227147e-06, "loss": 0.1494, "step": 1403 }, { "epoch": 0.3194539249146758, "grad_norm": 1.687277139104771, "learning_rate": 1.237470006188202e-06, "loss": 0.0914, "step": 1404 }, { "epoch": 0.31968145620022753, "grad_norm": 2.2925412031811776, "learning_rate": 1.2374521981359245e-06, "loss": 0.0881, "step": 1405 }, { "epoch": 0.3199089874857793, "grad_norm": 4.072013951747903, "learning_rate": 1.2374343775662456e-06, "loss": 0.1696, "step": 1406 }, { "epoch": 0.32013651877133104, "grad_norm": 2.5836523726802545, "learning_rate": 1.2374165444795296e-06, "loss": 0.1346, "step": 1407 }, { "epoch": 0.32036405005688284, "grad_norm": 1.599365912466973, "learning_rate": 1.237398698876141e-06, "loss": 0.1517, "step": 1408 }, { "epoch": 0.3205915813424346, "grad_norm": 3.0092798969280694, "learning_rate": 1.2373808407564446e-06, "loss": 0.1388, "step": 1409 }, { "epoch": 0.32081911262798635, "grad_norm": 2.7681165920899917, "learning_rate": 1.2373629701208053e-06, "loss": 0.0903, "step": 1410 }, { "epoch": 0.3210466439135381, "grad_norm": 2.33093444363794, "learning_rate": 1.2373450869695883e-06, "loss": 0.1258, "step": 1411 }, { "epoch": 0.32127417519908985, "grad_norm": 1.3355433862599404, "learning_rate": 1.2373271913031593e-06, "loss": 0.0933, "step": 1412 }, { "epoch": 0.32150170648464166, "grad_norm": 3.6953831980698326, "learning_rate": 1.237309283121884e-06, "loss": 0.1799, "step": 1413 }, { "epoch": 0.3217292377701934, "grad_norm": 1.4551969737419015, "learning_rate": 1.237291362426128e-06, "loss": 0.132, "step": 1414 }, { "epoch": 0.32195676905574516, "grad_norm": 1.3811005049648641, "learning_rate": 1.2372734292162584e-06, "loss": 0.1393, "step": 1415 }, { "epoch": 0.3221843003412969, "grad_norm": 2.1330825424153894, "learning_rate": 1.237255483492641e-06, "loss": 0.1174, "step": 1416 }, { "epoch": 0.32241183162684867, "grad_norm": 2.487852309027526, "learning_rate": 1.2372375252556429e-06, "loss": 0.1417, "step": 1417 }, { "epoch": 0.3226393629124005, "grad_norm": 1.913453520107482, "learning_rate": 1.2372195545056308e-06, "loss": 0.0859, "step": 1418 }, { "epoch": 0.3228668941979522, "grad_norm": 2.646487035772668, "learning_rate": 1.2372015712429725e-06, "loss": 0.1506, "step": 1419 }, { "epoch": 0.323094425483504, "grad_norm": 3.245246064065534, "learning_rate": 1.237183575468035e-06, "loss": 0.1634, "step": 1420 }, { "epoch": 0.32332195676905573, "grad_norm": 2.0793785188235714, "learning_rate": 1.2371655671811866e-06, "loss": 0.0893, "step": 1421 }, { "epoch": 0.3235494880546075, "grad_norm": 3.3709416770572203, "learning_rate": 1.237147546382795e-06, "loss": 0.1863, "step": 1422 }, { "epoch": 0.3237770193401593, "grad_norm": 1.9834221905090585, "learning_rate": 1.237129513073229e-06, "loss": 0.1022, "step": 1423 }, { "epoch": 0.32400455062571104, "grad_norm": 1.5082121448900583, "learning_rate": 1.2371114672528565e-06, "loss": 0.1191, "step": 1424 }, { "epoch": 0.3242320819112628, "grad_norm": 2.1618548110562728, "learning_rate": 1.2370934089220466e-06, "loss": 0.1036, "step": 1425 }, { "epoch": 0.32445961319681454, "grad_norm": 3.0267433921571247, "learning_rate": 1.2370753380811685e-06, "loss": 0.1331, "step": 1426 }, { "epoch": 0.32468714448236635, "grad_norm": 2.328447963796751, "learning_rate": 1.2370572547305915e-06, "loss": 0.0995, "step": 1427 }, { "epoch": 0.3249146757679181, "grad_norm": 1.5043231729048063, "learning_rate": 1.237039158870685e-06, "loss": 0.0938, "step": 1428 }, { "epoch": 0.32514220705346986, "grad_norm": 1.489228734233685, "learning_rate": 1.237021050501819e-06, "loss": 0.183, "step": 1429 }, { "epoch": 0.3253697383390216, "grad_norm": 1.3957978981478696, "learning_rate": 1.2370029296243638e-06, "loss": 0.137, "step": 1430 }, { "epoch": 0.32559726962457336, "grad_norm": 1.6002182368576523, "learning_rate": 1.2369847962386893e-06, "loss": 0.1383, "step": 1431 }, { "epoch": 0.32582480091012517, "grad_norm": 1.4292210315358218, "learning_rate": 1.2369666503451665e-06, "loss": 0.1306, "step": 1432 }, { "epoch": 0.3260523321956769, "grad_norm": 3.3664684804237153, "learning_rate": 1.2369484919441662e-06, "loss": 0.1495, "step": 1433 }, { "epoch": 0.32627986348122867, "grad_norm": 2.213331961777345, "learning_rate": 1.2369303210360592e-06, "loss": 0.104, "step": 1434 }, { "epoch": 0.3265073947667804, "grad_norm": 2.7142292072973224, "learning_rate": 1.2369121376212174e-06, "loss": 0.23, "step": 1435 }, { "epoch": 0.3267349260523322, "grad_norm": 2.5989691526691905, "learning_rate": 1.236893941700012e-06, "loss": 0.164, "step": 1436 }, { "epoch": 0.326962457337884, "grad_norm": 2.7909547799726666, "learning_rate": 1.236875733272815e-06, "loss": 0.1231, "step": 1437 }, { "epoch": 0.32718998862343573, "grad_norm": 1.5171731115835614, "learning_rate": 1.236857512339999e-06, "loss": 0.1568, "step": 1438 }, { "epoch": 0.3274175199089875, "grad_norm": 2.7299542263447987, "learning_rate": 1.2368392789019356e-06, "loss": 0.192, "step": 1439 }, { "epoch": 0.32764505119453924, "grad_norm": 3.3727472288143074, "learning_rate": 1.2368210329589982e-06, "loss": 0.1387, "step": 1440 }, { "epoch": 0.327872582480091, "grad_norm": 1.9170836491353773, "learning_rate": 1.236802774511559e-06, "loss": 0.1325, "step": 1441 }, { "epoch": 0.3281001137656428, "grad_norm": 1.3065482462033158, "learning_rate": 1.2367845035599919e-06, "loss": 0.1347, "step": 1442 }, { "epoch": 0.32832764505119455, "grad_norm": 1.170941186213132, "learning_rate": 1.2367662201046698e-06, "loss": 0.0776, "step": 1443 }, { "epoch": 0.3285551763367463, "grad_norm": 2.650987144280419, "learning_rate": 1.2367479241459666e-06, "loss": 0.137, "step": 1444 }, { "epoch": 0.32878270762229805, "grad_norm": 2.607148114564764, "learning_rate": 1.2367296156842562e-06, "loss": 0.1722, "step": 1445 }, { "epoch": 0.3290102389078498, "grad_norm": 1.9899827485582995, "learning_rate": 1.2367112947199128e-06, "loss": 0.0736, "step": 1446 }, { "epoch": 0.3292377701934016, "grad_norm": 1.7939831808853237, "learning_rate": 1.2366929612533109e-06, "loss": 0.1678, "step": 1447 }, { "epoch": 0.32946530147895337, "grad_norm": 2.18373767390569, "learning_rate": 1.2366746152848249e-06, "loss": 0.1248, "step": 1448 }, { "epoch": 0.3296928327645051, "grad_norm": 1.6786238142668934, "learning_rate": 1.2366562568148301e-06, "loss": 0.113, "step": 1449 }, { "epoch": 0.32992036405005687, "grad_norm": 2.1261100071128216, "learning_rate": 1.2366378858437016e-06, "loss": 0.2123, "step": 1450 }, { "epoch": 0.3301478953356086, "grad_norm": 2.428509398466611, "learning_rate": 1.2366195023718152e-06, "loss": 0.1528, "step": 1451 }, { "epoch": 0.33037542662116043, "grad_norm": 2.0995327071550745, "learning_rate": 1.2366011063995458e-06, "loss": 0.1479, "step": 1452 }, { "epoch": 0.3306029579067122, "grad_norm": 2.7571183190896544, "learning_rate": 1.2365826979272702e-06, "loss": 0.2723, "step": 1453 }, { "epoch": 0.33083048919226393, "grad_norm": 1.37616338455632, "learning_rate": 1.2365642769553644e-06, "loss": 0.186, "step": 1454 }, { "epoch": 0.3310580204778157, "grad_norm": 3.562357493800966, "learning_rate": 1.2365458434842046e-06, "loss": 0.1935, "step": 1455 }, { "epoch": 0.33128555176336744, "grad_norm": 3.0055653446008757, "learning_rate": 1.2365273975141675e-06, "loss": 0.1349, "step": 1456 }, { "epoch": 0.33151308304891924, "grad_norm": 1.9084405856232312, "learning_rate": 1.2365089390456308e-06, "loss": 0.1579, "step": 1457 }, { "epoch": 0.331740614334471, "grad_norm": 2.509163343882828, "learning_rate": 1.236490468078971e-06, "loss": 0.1399, "step": 1458 }, { "epoch": 0.33196814562002275, "grad_norm": 2.762892695226815, "learning_rate": 1.2364719846145662e-06, "loss": 0.1123, "step": 1459 }, { "epoch": 0.3321956769055745, "grad_norm": 1.509507061278107, "learning_rate": 1.2364534886527937e-06, "loss": 0.0761, "step": 1460 }, { "epoch": 0.3324232081911263, "grad_norm": 2.125250796251379, "learning_rate": 1.2364349801940317e-06, "loss": 0.1979, "step": 1461 }, { "epoch": 0.33265073947667806, "grad_norm": 1.564090485896412, "learning_rate": 1.2364164592386588e-06, "loss": 0.1273, "step": 1462 }, { "epoch": 0.3328782707622298, "grad_norm": 2.1634983990483825, "learning_rate": 1.2363979257870528e-06, "loss": 0.1942, "step": 1463 }, { "epoch": 0.33310580204778156, "grad_norm": 3.3365080077721854, "learning_rate": 1.2363793798395932e-06, "loss": 0.1447, "step": 1464 }, { "epoch": 0.3333333333333333, "grad_norm": 3.9561125952221525, "learning_rate": 1.2363608213966588e-06, "loss": 0.2634, "step": 1465 }, { "epoch": 0.3335608646188851, "grad_norm": 2.251288332330216, "learning_rate": 1.2363422504586286e-06, "loss": 0.124, "step": 1466 }, { "epoch": 0.3337883959044369, "grad_norm": 2.6458027444889844, "learning_rate": 1.2363236670258827e-06, "loss": 0.1211, "step": 1467 }, { "epoch": 0.3340159271899886, "grad_norm": 2.256871006091209, "learning_rate": 1.2363050710988003e-06, "loss": 0.18, "step": 1468 }, { "epoch": 0.3342434584755404, "grad_norm": 2.3666680923714973, "learning_rate": 1.236286462677762e-06, "loss": 0.1146, "step": 1469 }, { "epoch": 0.33447098976109213, "grad_norm": 1.9163232744074985, "learning_rate": 1.236267841763148e-06, "loss": 0.2394, "step": 1470 }, { "epoch": 0.33469852104664394, "grad_norm": 1.0000779857222588, "learning_rate": 1.2362492083553387e-06, "loss": 0.1034, "step": 1471 }, { "epoch": 0.3349260523321957, "grad_norm": 4.037436172394108, "learning_rate": 1.236230562454715e-06, "loss": 0.1436, "step": 1472 }, { "epoch": 0.33515358361774744, "grad_norm": 2.470429929609661, "learning_rate": 1.2362119040616582e-06, "loss": 0.2313, "step": 1473 }, { "epoch": 0.3353811149032992, "grad_norm": 1.7641673534435953, "learning_rate": 1.2361932331765492e-06, "loss": 0.1131, "step": 1474 }, { "epoch": 0.33560864618885095, "grad_norm": 2.283488425453049, "learning_rate": 1.2361745497997702e-06, "loss": 0.2092, "step": 1475 }, { "epoch": 0.33583617747440275, "grad_norm": 2.44990291433213, "learning_rate": 1.2361558539317023e-06, "loss": 0.1677, "step": 1476 }, { "epoch": 0.3360637087599545, "grad_norm": 2.9170619880191664, "learning_rate": 1.2361371455727284e-06, "loss": 0.1622, "step": 1477 }, { "epoch": 0.33629124004550626, "grad_norm": 1.8904885782974936, "learning_rate": 1.2361184247232302e-06, "loss": 0.174, "step": 1478 }, { "epoch": 0.336518771331058, "grad_norm": 2.729547155076976, "learning_rate": 1.2360996913835907e-06, "loss": 0.2034, "step": 1479 }, { "epoch": 0.33674630261660976, "grad_norm": 1.5905963504881455, "learning_rate": 1.2360809455541928e-06, "loss": 0.1115, "step": 1480 }, { "epoch": 0.33697383390216157, "grad_norm": 2.0824120193071987, "learning_rate": 1.2360621872354195e-06, "loss": 0.0922, "step": 1481 }, { "epoch": 0.3372013651877133, "grad_norm": 1.7589178939868375, "learning_rate": 1.236043416427654e-06, "loss": 0.1538, "step": 1482 }, { "epoch": 0.33742889647326507, "grad_norm": 2.2945363962104595, "learning_rate": 1.2360246331312804e-06, "loss": 0.102, "step": 1483 }, { "epoch": 0.3376564277588168, "grad_norm": 2.6126607217811233, "learning_rate": 1.2360058373466821e-06, "loss": 0.1287, "step": 1484 }, { "epoch": 0.3378839590443686, "grad_norm": 3.6250866550257776, "learning_rate": 1.2359870290742437e-06, "loss": 0.2267, "step": 1485 }, { "epoch": 0.3381114903299204, "grad_norm": 1.9639418873270704, "learning_rate": 1.2359682083143494e-06, "loss": 0.1678, "step": 1486 }, { "epoch": 0.33833902161547214, "grad_norm": 1.7491629271174762, "learning_rate": 1.2359493750673835e-06, "loss": 0.0793, "step": 1487 }, { "epoch": 0.3385665529010239, "grad_norm": 2.9106462432976703, "learning_rate": 1.2359305293337316e-06, "loss": 0.1325, "step": 1488 }, { "epoch": 0.33879408418657564, "grad_norm": 1.276937248770152, "learning_rate": 1.2359116711137785e-06, "loss": 0.0878, "step": 1489 }, { "epoch": 0.3390216154721274, "grad_norm": 2.7341692722816355, "learning_rate": 1.2358928004079095e-06, "loss": 0.204, "step": 1490 }, { "epoch": 0.3392491467576792, "grad_norm": 1.938938866233754, "learning_rate": 1.2358739172165108e-06, "loss": 0.2461, "step": 1491 }, { "epoch": 0.33947667804323095, "grad_norm": 2.4567051660275325, "learning_rate": 1.235855021539968e-06, "loss": 0.108, "step": 1492 }, { "epoch": 0.3397042093287827, "grad_norm": 1.7865319339113264, "learning_rate": 1.2358361133786668e-06, "loss": 0.1137, "step": 1493 }, { "epoch": 0.33993174061433445, "grad_norm": 1.2860622511723663, "learning_rate": 1.2358171927329946e-06, "loss": 0.1066, "step": 1494 }, { "epoch": 0.34015927189988626, "grad_norm": 2.3217877744819155, "learning_rate": 1.2357982596033374e-06, "loss": 0.1195, "step": 1495 }, { "epoch": 0.340386803185438, "grad_norm": 1.405655578490845, "learning_rate": 1.2357793139900823e-06, "loss": 0.1675, "step": 1496 }, { "epoch": 0.34061433447098977, "grad_norm": 1.590690138510479, "learning_rate": 1.2357603558936168e-06, "loss": 0.0715, "step": 1497 }, { "epoch": 0.3408418657565415, "grad_norm": 3.6662652923931276, "learning_rate": 1.2357413853143282e-06, "loss": 0.2245, "step": 1498 }, { "epoch": 0.34106939704209327, "grad_norm": 2.0102829011772885, "learning_rate": 1.2357224022526041e-06, "loss": 0.1646, "step": 1499 }, { "epoch": 0.3412969283276451, "grad_norm": 2.0702589837056724, "learning_rate": 1.2357034067088327e-06, "loss": 0.1011, "step": 1500 }, { "epoch": 0.34152445961319683, "grad_norm": 2.054708483083456, "learning_rate": 1.235684398683402e-06, "loss": 0.2029, "step": 1501 }, { "epoch": 0.3417519908987486, "grad_norm": 3.1447213877676923, "learning_rate": 1.2356653781767009e-06, "loss": 0.1381, "step": 1502 }, { "epoch": 0.34197952218430033, "grad_norm": 3.052797117025516, "learning_rate": 1.2356463451891174e-06, "loss": 0.1964, "step": 1503 }, { "epoch": 0.3422070534698521, "grad_norm": 1.4103275005890912, "learning_rate": 1.2356272997210414e-06, "loss": 0.0826, "step": 1504 }, { "epoch": 0.3424345847554039, "grad_norm": 1.5146376320654487, "learning_rate": 1.2356082417728612e-06, "loss": 0.1341, "step": 1505 }, { "epoch": 0.34266211604095564, "grad_norm": 1.3688524329968725, "learning_rate": 1.2355891713449672e-06, "loss": 0.085, "step": 1506 }, { "epoch": 0.3428896473265074, "grad_norm": 1.8416893881250955, "learning_rate": 1.2355700884377485e-06, "loss": 0.078, "step": 1507 }, { "epoch": 0.34311717861205915, "grad_norm": 1.8673717383483042, "learning_rate": 1.2355509930515958e-06, "loss": 0.1891, "step": 1508 }, { "epoch": 0.3433447098976109, "grad_norm": 1.6669190438483317, "learning_rate": 1.2355318851868987e-06, "loss": 0.1358, "step": 1509 }, { "epoch": 0.3435722411831627, "grad_norm": 1.5281076865918781, "learning_rate": 1.235512764844048e-06, "loss": 0.0914, "step": 1510 }, { "epoch": 0.34379977246871446, "grad_norm": 1.8126618025798658, "learning_rate": 1.2354936320234345e-06, "loss": 0.0873, "step": 1511 }, { "epoch": 0.3440273037542662, "grad_norm": 1.5533427781258378, "learning_rate": 1.2354744867254493e-06, "loss": 0.1394, "step": 1512 }, { "epoch": 0.34425483503981796, "grad_norm": 2.7412588250152417, "learning_rate": 1.2354553289504836e-06, "loss": 0.1263, "step": 1513 }, { "epoch": 0.3444823663253697, "grad_norm": 1.9701757720367468, "learning_rate": 1.2354361586989287e-06, "loss": 0.1668, "step": 1514 }, { "epoch": 0.3447098976109215, "grad_norm": 2.234561622535395, "learning_rate": 1.235416975971177e-06, "loss": 0.2347, "step": 1515 }, { "epoch": 0.3449374288964733, "grad_norm": 1.640428050965421, "learning_rate": 1.2353977807676205e-06, "loss": 0.1271, "step": 1516 }, { "epoch": 0.345164960182025, "grad_norm": 1.6546462677875515, "learning_rate": 1.2353785730886506e-06, "loss": 0.1331, "step": 1517 }, { "epoch": 0.3453924914675768, "grad_norm": 2.5775712943835503, "learning_rate": 1.235359352934661e-06, "loss": 0.1179, "step": 1518 }, { "epoch": 0.34562002275312853, "grad_norm": 1.7488081801237039, "learning_rate": 1.235340120306044e-06, "loss": 0.1999, "step": 1519 }, { "epoch": 0.34584755403868034, "grad_norm": 2.313669007056231, "learning_rate": 1.2353208752031925e-06, "loss": 0.1494, "step": 1520 }, { "epoch": 0.3460750853242321, "grad_norm": 2.9872932892021216, "learning_rate": 1.2353016176265002e-06, "loss": 0.1356, "step": 1521 }, { "epoch": 0.34630261660978384, "grad_norm": 1.7333666870847044, "learning_rate": 1.2352823475763603e-06, "loss": 0.1156, "step": 1522 }, { "epoch": 0.3465301478953356, "grad_norm": 2.1394646023986685, "learning_rate": 1.2352630650531672e-06, "loss": 0.1081, "step": 1523 }, { "epoch": 0.34675767918088735, "grad_norm": 4.030786885073691, "learning_rate": 1.2352437700573147e-06, "loss": 0.1657, "step": 1524 }, { "epoch": 0.34698521046643915, "grad_norm": 1.1100438837053874, "learning_rate": 1.235224462589197e-06, "loss": 0.0775, "step": 1525 }, { "epoch": 0.3472127417519909, "grad_norm": 2.0720474848328188, "learning_rate": 1.2352051426492089e-06, "loss": 0.1879, "step": 1526 }, { "epoch": 0.34744027303754266, "grad_norm": 2.7187132915415995, "learning_rate": 1.2351858102377455e-06, "loss": 0.1459, "step": 1527 }, { "epoch": 0.3476678043230944, "grad_norm": 1.5373928030910593, "learning_rate": 1.2351664653552012e-06, "loss": 0.0697, "step": 1528 }, { "epoch": 0.34789533560864616, "grad_norm": 14.12497611556846, "learning_rate": 1.235147108001972e-06, "loss": 0.0612, "step": 1529 }, { "epoch": 0.34812286689419797, "grad_norm": 2.619961242647469, "learning_rate": 1.2351277381784532e-06, "loss": 0.1477, "step": 1530 }, { "epoch": 0.3483503981797497, "grad_norm": 1.410493095940194, "learning_rate": 1.235108355885041e-06, "loss": 0.1716, "step": 1531 }, { "epoch": 0.3485779294653015, "grad_norm": 1.8615160375782975, "learning_rate": 1.2350889611221315e-06, "loss": 0.1329, "step": 1532 }, { "epoch": 0.3488054607508532, "grad_norm": 1.701233603216574, "learning_rate": 1.2350695538901207e-06, "loss": 0.1123, "step": 1533 }, { "epoch": 0.34903299203640503, "grad_norm": 2.439441991830968, "learning_rate": 1.2350501341894055e-06, "loss": 0.1771, "step": 1534 }, { "epoch": 0.3492605233219568, "grad_norm": 2.203771442439418, "learning_rate": 1.235030702020383e-06, "loss": 0.2083, "step": 1535 }, { "epoch": 0.34948805460750854, "grad_norm": 1.69346918827836, "learning_rate": 1.23501125738345e-06, "loss": 0.1102, "step": 1536 }, { "epoch": 0.3497155858930603, "grad_norm": 1.5090268058773, "learning_rate": 1.2349918002790043e-06, "loss": 0.1208, "step": 1537 }, { "epoch": 0.34994311717861204, "grad_norm": 1.0946145352592496, "learning_rate": 1.2349723307074432e-06, "loss": 0.0996, "step": 1538 }, { "epoch": 0.35017064846416385, "grad_norm": 2.301142408035351, "learning_rate": 1.2349528486691648e-06, "loss": 0.1756, "step": 1539 }, { "epoch": 0.3503981797497156, "grad_norm": 2.4673785079867843, "learning_rate": 1.2349333541645672e-06, "loss": 0.1278, "step": 1540 }, { "epoch": 0.35062571103526735, "grad_norm": 1.481886020980225, "learning_rate": 1.2349138471940489e-06, "loss": 0.1291, "step": 1541 }, { "epoch": 0.3508532423208191, "grad_norm": 2.1610786496075063, "learning_rate": 1.2348943277580086e-06, "loss": 0.0983, "step": 1542 }, { "epoch": 0.35108077360637086, "grad_norm": 1.3504864412021873, "learning_rate": 1.2348747958568452e-06, "loss": 0.0784, "step": 1543 }, { "epoch": 0.35130830489192266, "grad_norm": 1.7504763814802757, "learning_rate": 1.2348552514909579e-06, "loss": 0.1448, "step": 1544 }, { "epoch": 0.3515358361774744, "grad_norm": 2.9148856498776805, "learning_rate": 1.2348356946607462e-06, "loss": 0.1094, "step": 1545 }, { "epoch": 0.35176336746302617, "grad_norm": 1.550887500201074, "learning_rate": 1.2348161253666096e-06, "loss": 0.0963, "step": 1546 }, { "epoch": 0.3519908987485779, "grad_norm": 2.3639406464976824, "learning_rate": 1.2347965436089484e-06, "loss": 0.2153, "step": 1547 }, { "epoch": 0.35221843003412967, "grad_norm": 4.095826967593627, "learning_rate": 1.2347769493881625e-06, "loss": 0.1153, "step": 1548 }, { "epoch": 0.3524459613196815, "grad_norm": 1.277515077329962, "learning_rate": 1.2347573427046527e-06, "loss": 0.1465, "step": 1549 }, { "epoch": 0.35267349260523323, "grad_norm": 1.9893288512267644, "learning_rate": 1.2347377235588193e-06, "loss": 0.267, "step": 1550 }, { "epoch": 0.352901023890785, "grad_norm": 1.8718250436694113, "learning_rate": 1.2347180919510637e-06, "loss": 0.0973, "step": 1551 }, { "epoch": 0.35312855517633673, "grad_norm": 3.36620771557597, "learning_rate": 1.234698447881787e-06, "loss": 0.1827, "step": 1552 }, { "epoch": 0.3533560864618885, "grad_norm": 2.5974374464162717, "learning_rate": 1.2346787913513904e-06, "loss": 0.0969, "step": 1553 }, { "epoch": 0.3535836177474403, "grad_norm": 2.360372812373607, "learning_rate": 1.234659122360276e-06, "loss": 0.1914, "step": 1554 }, { "epoch": 0.35381114903299204, "grad_norm": 1.5248022456878274, "learning_rate": 1.2346394409088457e-06, "loss": 0.0785, "step": 1555 }, { "epoch": 0.3540386803185438, "grad_norm": 1.2961890354523629, "learning_rate": 1.2346197469975016e-06, "loss": 0.1526, "step": 1556 }, { "epoch": 0.35426621160409555, "grad_norm": 2.592957397339881, "learning_rate": 1.2346000406266466e-06, "loss": 0.1947, "step": 1557 }, { "epoch": 0.3544937428896473, "grad_norm": 2.701062801999542, "learning_rate": 1.2345803217966829e-06, "loss": 0.1241, "step": 1558 }, { "epoch": 0.3547212741751991, "grad_norm": 1.957155061679964, "learning_rate": 1.2345605905080141e-06, "loss": 0.0938, "step": 1559 }, { "epoch": 0.35494880546075086, "grad_norm": 1.613669040940011, "learning_rate": 1.234540846761043e-06, "loss": 0.1728, "step": 1560 }, { "epoch": 0.3551763367463026, "grad_norm": 3.0998396775844705, "learning_rate": 1.2345210905561733e-06, "loss": 0.1917, "step": 1561 }, { "epoch": 0.35540386803185436, "grad_norm": 1.9882010530259928, "learning_rate": 1.2345013218938089e-06, "loss": 0.1309, "step": 1562 }, { "epoch": 0.3556313993174061, "grad_norm": 1.5264967935473548, "learning_rate": 1.2344815407743537e-06, "loss": 0.0714, "step": 1563 }, { "epoch": 0.3558589306029579, "grad_norm": 1.752943768112035, "learning_rate": 1.2344617471982119e-06, "loss": 0.1269, "step": 1564 }, { "epoch": 0.3560864618885097, "grad_norm": 2.8753549426732405, "learning_rate": 1.2344419411657883e-06, "loss": 0.1418, "step": 1565 }, { "epoch": 0.3563139931740614, "grad_norm": 2.8850586384790553, "learning_rate": 1.2344221226774874e-06, "loss": 0.1861, "step": 1566 }, { "epoch": 0.3565415244596132, "grad_norm": 2.273061381543351, "learning_rate": 1.2344022917337147e-06, "loss": 0.1687, "step": 1567 }, { "epoch": 0.356769055745165, "grad_norm": 1.6580131621970915, "learning_rate": 1.234382448334875e-06, "loss": 0.0952, "step": 1568 }, { "epoch": 0.35699658703071674, "grad_norm": 1.453031205202229, "learning_rate": 1.2343625924813741e-06, "loss": 0.0823, "step": 1569 }, { "epoch": 0.3572241183162685, "grad_norm": 2.9435349750610493, "learning_rate": 1.234342724173618e-06, "loss": 0.1503, "step": 1570 }, { "epoch": 0.35745164960182024, "grad_norm": 1.6110988488812807, "learning_rate": 1.2343228434120124e-06, "loss": 0.1525, "step": 1571 }, { "epoch": 0.357679180887372, "grad_norm": 1.6748300562704266, "learning_rate": 1.2343029501969638e-06, "loss": 0.1568, "step": 1572 }, { "epoch": 0.3579067121729238, "grad_norm": 1.6603809620526928, "learning_rate": 1.2342830445288788e-06, "loss": 0.1728, "step": 1573 }, { "epoch": 0.35813424345847555, "grad_norm": 1.1014107617300568, "learning_rate": 1.2342631264081643e-06, "loss": 0.0965, "step": 1574 }, { "epoch": 0.3583617747440273, "grad_norm": 1.2732581681154054, "learning_rate": 1.234243195835227e-06, "loss": 0.1203, "step": 1575 }, { "epoch": 0.35858930602957906, "grad_norm": 2.5995048914773053, "learning_rate": 1.234223252810475e-06, "loss": 0.1603, "step": 1576 }, { "epoch": 0.3588168373151308, "grad_norm": 2.444792569525692, "learning_rate": 1.2342032973343152e-06, "loss": 0.149, "step": 1577 }, { "epoch": 0.3590443686006826, "grad_norm": 3.7704137884261915, "learning_rate": 1.2341833294071558e-06, "loss": 0.2355, "step": 1578 }, { "epoch": 0.35927189988623437, "grad_norm": 1.4763457287663684, "learning_rate": 1.2341633490294046e-06, "loss": 0.1272, "step": 1579 }, { "epoch": 0.3594994311717861, "grad_norm": 2.020387401103254, "learning_rate": 1.2341433562014705e-06, "loss": 0.1376, "step": 1580 }, { "epoch": 0.3597269624573379, "grad_norm": 2.0277864066708378, "learning_rate": 1.2341233509237616e-06, "loss": 0.1355, "step": 1581 }, { "epoch": 0.3599544937428896, "grad_norm": 1.9258577073033267, "learning_rate": 1.234103333196687e-06, "loss": 0.1073, "step": 1582 }, { "epoch": 0.36018202502844143, "grad_norm": 1.7051549966202966, "learning_rate": 1.2340833030206558e-06, "loss": 0.0829, "step": 1583 }, { "epoch": 0.3604095563139932, "grad_norm": 3.206269578758429, "learning_rate": 1.2340632603960774e-06, "loss": 0.1355, "step": 1584 }, { "epoch": 0.36063708759954494, "grad_norm": 2.1361631881752707, "learning_rate": 1.2340432053233615e-06, "loss": 0.0976, "step": 1585 }, { "epoch": 0.3608646188850967, "grad_norm": 1.443174416772564, "learning_rate": 1.2340231378029177e-06, "loss": 0.1334, "step": 1586 }, { "epoch": 0.36109215017064844, "grad_norm": 1.6382188653330987, "learning_rate": 1.2340030578351564e-06, "loss": 0.1066, "step": 1587 }, { "epoch": 0.36131968145620025, "grad_norm": 2.198309549797981, "learning_rate": 1.2339829654204878e-06, "loss": 0.1208, "step": 1588 }, { "epoch": 0.361547212741752, "grad_norm": 1.8122457718538585, "learning_rate": 1.2339628605593229e-06, "loss": 0.0885, "step": 1589 }, { "epoch": 0.36177474402730375, "grad_norm": 2.132309804894969, "learning_rate": 1.2339427432520722e-06, "loss": 0.1064, "step": 1590 }, { "epoch": 0.3620022753128555, "grad_norm": 2.3756453959322026, "learning_rate": 1.2339226134991471e-06, "loss": 0.2288, "step": 1591 }, { "epoch": 0.36222980659840726, "grad_norm": 2.265871972514948, "learning_rate": 1.2339024713009592e-06, "loss": 0.1249, "step": 1592 }, { "epoch": 0.36245733788395906, "grad_norm": 1.243536992034958, "learning_rate": 1.2338823166579197e-06, "loss": 0.1503, "step": 1593 }, { "epoch": 0.3626848691695108, "grad_norm": 1.9724737870995313, "learning_rate": 1.2338621495704409e-06, "loss": 0.1493, "step": 1594 }, { "epoch": 0.36291240045506257, "grad_norm": 2.065008978290742, "learning_rate": 1.2338419700389349e-06, "loss": 0.1542, "step": 1595 }, { "epoch": 0.3631399317406143, "grad_norm": 2.958677711950205, "learning_rate": 1.2338217780638137e-06, "loss": 0.1661, "step": 1596 }, { "epoch": 0.36336746302616607, "grad_norm": 1.7562621032856343, "learning_rate": 1.2338015736454908e-06, "loss": 0.2308, "step": 1597 }, { "epoch": 0.3635949943117179, "grad_norm": 2.403965977937276, "learning_rate": 1.2337813567843784e-06, "loss": 0.1, "step": 1598 }, { "epoch": 0.36382252559726963, "grad_norm": 2.1245624482048955, "learning_rate": 1.2337611274808901e-06, "loss": 0.1712, "step": 1599 }, { "epoch": 0.3640500568828214, "grad_norm": 2.6993144740475326, "learning_rate": 1.2337408857354394e-06, "loss": 0.1376, "step": 1600 }, { "epoch": 0.36427758816837313, "grad_norm": 1.8364645187073159, "learning_rate": 1.2337206315484396e-06, "loss": 0.1144, "step": 1601 }, { "epoch": 0.36450511945392494, "grad_norm": 2.803014924439121, "learning_rate": 1.2337003649203049e-06, "loss": 0.2327, "step": 1602 }, { "epoch": 0.3647326507394767, "grad_norm": 3.0921523919013425, "learning_rate": 1.2336800858514498e-06, "loss": 0.186, "step": 1603 }, { "epoch": 0.36496018202502845, "grad_norm": 4.351581648162927, "learning_rate": 1.2336597943422883e-06, "loss": 0.1506, "step": 1604 }, { "epoch": 0.3651877133105802, "grad_norm": 2.507983408971188, "learning_rate": 1.2336394903932353e-06, "loss": 0.1458, "step": 1605 }, { "epoch": 0.36541524459613195, "grad_norm": 1.6768145183868413, "learning_rate": 1.233619174004706e-06, "loss": 0.137, "step": 1606 }, { "epoch": 0.36564277588168376, "grad_norm": 1.324685955900903, "learning_rate": 1.233598845177115e-06, "loss": 0.1097, "step": 1607 }, { "epoch": 0.3658703071672355, "grad_norm": 2.1338037745135443, "learning_rate": 1.2335785039108787e-06, "loss": 0.1784, "step": 1608 }, { "epoch": 0.36609783845278726, "grad_norm": 2.3694737994247217, "learning_rate": 1.233558150206412e-06, "loss": 0.1991, "step": 1609 }, { "epoch": 0.366325369738339, "grad_norm": 2.262254134148611, "learning_rate": 1.2335377840641314e-06, "loss": 0.1146, "step": 1610 }, { "epoch": 0.36655290102389076, "grad_norm": 3.1588719561640985, "learning_rate": 1.233517405484453e-06, "loss": 0.1852, "step": 1611 }, { "epoch": 0.3667804323094426, "grad_norm": 1.8006159718642254, "learning_rate": 1.2334970144677929e-06, "loss": 0.0956, "step": 1612 }, { "epoch": 0.3670079635949943, "grad_norm": 2.563120316581933, "learning_rate": 1.2334766110145684e-06, "loss": 0.1088, "step": 1613 }, { "epoch": 0.3672354948805461, "grad_norm": 2.235035261873256, "learning_rate": 1.2334561951251967e-06, "loss": 0.1607, "step": 1614 }, { "epoch": 0.36746302616609783, "grad_norm": 2.3167227182441885, "learning_rate": 1.2334357668000943e-06, "loss": 0.1144, "step": 1615 }, { "epoch": 0.3676905574516496, "grad_norm": 1.6955497804569648, "learning_rate": 1.2334153260396795e-06, "loss": 0.1868, "step": 1616 }, { "epoch": 0.3679180887372014, "grad_norm": 3.4310526929941485, "learning_rate": 1.2333948728443692e-06, "loss": 0.173, "step": 1617 }, { "epoch": 0.36814562002275314, "grad_norm": 1.5138266896493986, "learning_rate": 1.2333744072145824e-06, "loss": 0.0827, "step": 1618 }, { "epoch": 0.3683731513083049, "grad_norm": 1.4784636330255214, "learning_rate": 1.2333539291507365e-06, "loss": 0.1569, "step": 1619 }, { "epoch": 0.36860068259385664, "grad_norm": 2.704193421290089, "learning_rate": 1.2333334386532507e-06, "loss": 0.2538, "step": 1620 }, { "epoch": 0.3688282138794084, "grad_norm": 1.742774697152776, "learning_rate": 1.2333129357225434e-06, "loss": 0.1055, "step": 1621 }, { "epoch": 0.3690557451649602, "grad_norm": 2.9047485648418165, "learning_rate": 1.2332924203590341e-06, "loss": 0.1663, "step": 1622 }, { "epoch": 0.36928327645051195, "grad_norm": 1.314984403604908, "learning_rate": 1.2332718925631414e-06, "loss": 0.1806, "step": 1623 }, { "epoch": 0.3695108077360637, "grad_norm": 1.9510383304155832, "learning_rate": 1.2332513523352853e-06, "loss": 0.1242, "step": 1624 }, { "epoch": 0.36973833902161546, "grad_norm": 2.559935844387003, "learning_rate": 1.2332307996758854e-06, "loss": 0.1478, "step": 1625 }, { "epoch": 0.3699658703071672, "grad_norm": 2.88434227714402, "learning_rate": 1.233210234585362e-06, "loss": 0.2302, "step": 1626 }, { "epoch": 0.370193401592719, "grad_norm": 2.1413595772930156, "learning_rate": 1.2331896570641354e-06, "loss": 0.1838, "step": 1627 }, { "epoch": 0.37042093287827077, "grad_norm": 1.7922287054453576, "learning_rate": 1.233169067112626e-06, "loss": 0.0986, "step": 1628 }, { "epoch": 0.3706484641638225, "grad_norm": 1.3323542239151287, "learning_rate": 1.2331484647312545e-06, "loss": 0.1167, "step": 1629 }, { "epoch": 0.3708759954493743, "grad_norm": 2.46069963324228, "learning_rate": 1.2331278499204423e-06, "loss": 0.1553, "step": 1630 }, { "epoch": 0.371103526734926, "grad_norm": 2.1291335035057104, "learning_rate": 1.2331072226806107e-06, "loss": 0.1213, "step": 1631 }, { "epoch": 0.37133105802047783, "grad_norm": 2.2993804494788184, "learning_rate": 1.233086583012181e-06, "loss": 0.189, "step": 1632 }, { "epoch": 0.3715585893060296, "grad_norm": 1.1991511815123632, "learning_rate": 1.233065930915575e-06, "loss": 0.1255, "step": 1633 }, { "epoch": 0.37178612059158134, "grad_norm": 1.8590842904843217, "learning_rate": 1.2330452663912155e-06, "loss": 0.1271, "step": 1634 }, { "epoch": 0.3720136518771331, "grad_norm": 2.373387407985538, "learning_rate": 1.233024589439524e-06, "loss": 0.2041, "step": 1635 }, { "epoch": 0.3722411831626849, "grad_norm": 2.5782541251038333, "learning_rate": 1.2330039000609233e-06, "loss": 0.1329, "step": 1636 }, { "epoch": 0.37246871444823665, "grad_norm": 2.671348916705849, "learning_rate": 1.2329831982558365e-06, "loss": 0.1618, "step": 1637 }, { "epoch": 0.3726962457337884, "grad_norm": 3.5366418411008933, "learning_rate": 1.2329624840246867e-06, "loss": 0.1783, "step": 1638 }, { "epoch": 0.37292377701934015, "grad_norm": 2.0065876352581062, "learning_rate": 1.2329417573678974e-06, "loss": 0.1156, "step": 1639 }, { "epoch": 0.3731513083048919, "grad_norm": 1.975723329702582, "learning_rate": 1.2329210182858915e-06, "loss": 0.0976, "step": 1640 }, { "epoch": 0.3733788395904437, "grad_norm": 1.7544914994439575, "learning_rate": 1.2329002667790937e-06, "loss": 0.1745, "step": 1641 }, { "epoch": 0.37360637087599546, "grad_norm": 2.7660619782133193, "learning_rate": 1.2328795028479275e-06, "loss": 0.1375, "step": 1642 }, { "epoch": 0.3738339021615472, "grad_norm": 1.9338461955860702, "learning_rate": 1.2328587264928176e-06, "loss": 0.1323, "step": 1643 }, { "epoch": 0.37406143344709897, "grad_norm": 1.8716686944849417, "learning_rate": 1.2328379377141885e-06, "loss": 0.1937, "step": 1644 }, { "epoch": 0.3742889647326507, "grad_norm": 2.1413388813058014, "learning_rate": 1.2328171365124655e-06, "loss": 0.1235, "step": 1645 }, { "epoch": 0.3745164960182025, "grad_norm": 2.857793709939454, "learning_rate": 1.2327963228880733e-06, "loss": 0.1416, "step": 1646 }, { "epoch": 0.3747440273037543, "grad_norm": 1.714384370948363, "learning_rate": 1.2327754968414372e-06, "loss": 0.1492, "step": 1647 }, { "epoch": 0.37497155858930603, "grad_norm": 3.7781621231583724, "learning_rate": 1.232754658372983e-06, "loss": 0.1682, "step": 1648 }, { "epoch": 0.3751990898748578, "grad_norm": 2.0029578055155244, "learning_rate": 1.2327338074831366e-06, "loss": 0.147, "step": 1649 }, { "epoch": 0.37542662116040953, "grad_norm": 1.529791159285724, "learning_rate": 1.2327129441723242e-06, "loss": 0.1046, "step": 1650 }, { "epoch": 0.37565415244596134, "grad_norm": 2.2299976733099482, "learning_rate": 1.2326920684409724e-06, "loss": 0.1617, "step": 1651 }, { "epoch": 0.3758816837315131, "grad_norm": 3.398460731332957, "learning_rate": 1.2326711802895077e-06, "loss": 0.1554, "step": 1652 }, { "epoch": 0.37610921501706485, "grad_norm": 2.130256184128101, "learning_rate": 1.2326502797183568e-06, "loss": 0.1392, "step": 1653 }, { "epoch": 0.3763367463026166, "grad_norm": 1.741889659518372, "learning_rate": 1.2326293667279472e-06, "loss": 0.1305, "step": 1654 }, { "epoch": 0.37656427758816835, "grad_norm": 2.3236871523378024, "learning_rate": 1.232608441318706e-06, "loss": 0.1208, "step": 1655 }, { "epoch": 0.37679180887372016, "grad_norm": 1.9685344629470136, "learning_rate": 1.232587503491061e-06, "loss": 0.1238, "step": 1656 }, { "epoch": 0.3770193401592719, "grad_norm": 3.0384258350922804, "learning_rate": 1.2325665532454403e-06, "loss": 0.205, "step": 1657 }, { "epoch": 0.37724687144482366, "grad_norm": 1.997898726616711, "learning_rate": 1.2325455905822719e-06, "loss": 0.1299, "step": 1658 }, { "epoch": 0.3774744027303754, "grad_norm": 1.5727587532720877, "learning_rate": 1.2325246155019844e-06, "loss": 0.0932, "step": 1659 }, { "epoch": 0.37770193401592717, "grad_norm": 1.8064403636927258, "learning_rate": 1.2325036280050063e-06, "loss": 0.0926, "step": 1660 }, { "epoch": 0.377929465301479, "grad_norm": 1.8737817001388601, "learning_rate": 1.2324826280917664e-06, "loss": 0.1008, "step": 1661 }, { "epoch": 0.3781569965870307, "grad_norm": 2.459023941480425, "learning_rate": 1.2324616157626943e-06, "loss": 0.1157, "step": 1662 }, { "epoch": 0.3783845278725825, "grad_norm": 2.3398340438241125, "learning_rate": 1.2324405910182195e-06, "loss": 0.2354, "step": 1663 }, { "epoch": 0.37861205915813423, "grad_norm": 5.2055530727041175, "learning_rate": 1.2324195538587713e-06, "loss": 0.2025, "step": 1664 }, { "epoch": 0.378839590443686, "grad_norm": 1.413281070506484, "learning_rate": 1.23239850428478e-06, "loss": 0.1108, "step": 1665 }, { "epoch": 0.3790671217292378, "grad_norm": 1.7169043667269983, "learning_rate": 1.2323774422966756e-06, "loss": 0.0666, "step": 1666 }, { "epoch": 0.37929465301478954, "grad_norm": 2.0458621353853834, "learning_rate": 1.2323563678948885e-06, "loss": 0.1231, "step": 1667 }, { "epoch": 0.3795221843003413, "grad_norm": 1.5816448796619285, "learning_rate": 1.2323352810798498e-06, "loss": 0.0909, "step": 1668 }, { "epoch": 0.37974971558589304, "grad_norm": 1.6282393884382793, "learning_rate": 1.23231418185199e-06, "loss": 0.1704, "step": 1669 }, { "epoch": 0.3799772468714448, "grad_norm": 2.698679980786322, "learning_rate": 1.2322930702117406e-06, "loss": 0.1471, "step": 1670 }, { "epoch": 0.3802047781569966, "grad_norm": 2.5205534849870093, "learning_rate": 1.232271946159533e-06, "loss": 0.1428, "step": 1671 }, { "epoch": 0.38043230944254836, "grad_norm": 2.8663298664670402, "learning_rate": 1.2322508096957992e-06, "loss": 0.1582, "step": 1672 }, { "epoch": 0.3806598407281001, "grad_norm": 3.606634062381101, "learning_rate": 1.2322296608209709e-06, "loss": 0.2957, "step": 1673 }, { "epoch": 0.38088737201365186, "grad_norm": 3.3538078455291354, "learning_rate": 1.2322084995354805e-06, "loss": 0.1774, "step": 1674 }, { "epoch": 0.38111490329920367, "grad_norm": 2.5478844933367513, "learning_rate": 1.2321873258397602e-06, "loss": 0.1554, "step": 1675 }, { "epoch": 0.3813424345847554, "grad_norm": 1.540805779875073, "learning_rate": 1.232166139734243e-06, "loss": 0.0745, "step": 1676 }, { "epoch": 0.38156996587030717, "grad_norm": 2.9923127366387146, "learning_rate": 1.2321449412193622e-06, "loss": 0.1788, "step": 1677 }, { "epoch": 0.3817974971558589, "grad_norm": 2.6896661400495088, "learning_rate": 1.2321237302955505e-06, "loss": 0.1626, "step": 1678 }, { "epoch": 0.3820250284414107, "grad_norm": 1.648292924059167, "learning_rate": 1.2321025069632416e-06, "loss": 0.0765, "step": 1679 }, { "epoch": 0.3822525597269625, "grad_norm": 2.91462725493712, "learning_rate": 1.2320812712228694e-06, "loss": 0.2092, "step": 1680 }, { "epoch": 0.38248009101251423, "grad_norm": 1.5588683602741686, "learning_rate": 1.2320600230748677e-06, "loss": 0.1276, "step": 1681 }, { "epoch": 0.382707622298066, "grad_norm": 1.7369346142612618, "learning_rate": 1.232038762519671e-06, "loss": 0.0779, "step": 1682 }, { "epoch": 0.38293515358361774, "grad_norm": 1.6511070866583553, "learning_rate": 1.2320174895577138e-06, "loss": 0.252, "step": 1683 }, { "epoch": 0.3831626848691695, "grad_norm": 1.3968162606976187, "learning_rate": 1.2319962041894307e-06, "loss": 0.0834, "step": 1684 }, { "epoch": 0.3833902161547213, "grad_norm": 2.653016956021873, "learning_rate": 1.2319749064152569e-06, "loss": 0.2423, "step": 1685 }, { "epoch": 0.38361774744027305, "grad_norm": 3.176933366606272, "learning_rate": 1.2319535962356277e-06, "loss": 0.1773, "step": 1686 }, { "epoch": 0.3838452787258248, "grad_norm": 1.800240710088086, "learning_rate": 1.2319322736509784e-06, "loss": 0.1566, "step": 1687 }, { "epoch": 0.38407281001137655, "grad_norm": 2.2064113571623496, "learning_rate": 1.2319109386617452e-06, "loss": 0.1613, "step": 1688 }, { "epoch": 0.3843003412969283, "grad_norm": 2.3087080662459387, "learning_rate": 1.2318895912683638e-06, "loss": 0.1545, "step": 1689 }, { "epoch": 0.3845278725824801, "grad_norm": 1.837126601147182, "learning_rate": 1.2318682314712706e-06, "loss": 0.129, "step": 1690 }, { "epoch": 0.38475540386803186, "grad_norm": 4.830481371425988, "learning_rate": 1.2318468592709022e-06, "loss": 0.266, "step": 1691 }, { "epoch": 0.3849829351535836, "grad_norm": 2.441725289188966, "learning_rate": 1.2318254746676954e-06, "loss": 0.1554, "step": 1692 }, { "epoch": 0.38521046643913537, "grad_norm": 1.6831246409088816, "learning_rate": 1.2318040776620872e-06, "loss": 0.1086, "step": 1693 }, { "epoch": 0.3854379977246871, "grad_norm": 2.296389371722253, "learning_rate": 1.231782668254515e-06, "loss": 0.2497, "step": 1694 }, { "epoch": 0.3856655290102389, "grad_norm": 2.8107479812520917, "learning_rate": 1.2317612464454161e-06, "loss": 0.1653, "step": 1695 }, { "epoch": 0.3858930602957907, "grad_norm": 2.616629150374977, "learning_rate": 1.2317398122352289e-06, "loss": 0.1879, "step": 1696 }, { "epoch": 0.38612059158134243, "grad_norm": 3.376176637253404, "learning_rate": 1.2317183656243912e-06, "loss": 0.3059, "step": 1697 }, { "epoch": 0.3863481228668942, "grad_norm": 2.017884168409449, "learning_rate": 1.231696906613341e-06, "loss": 0.1138, "step": 1698 }, { "epoch": 0.38657565415244594, "grad_norm": 2.383903076889702, "learning_rate": 1.2316754352025173e-06, "loss": 0.158, "step": 1699 }, { "epoch": 0.38680318543799774, "grad_norm": 2.70005415107345, "learning_rate": 1.2316539513923585e-06, "loss": 0.1242, "step": 1700 }, { "epoch": 0.3870307167235495, "grad_norm": 1.4233225917656729, "learning_rate": 1.2316324551833042e-06, "loss": 0.1382, "step": 1701 }, { "epoch": 0.38725824800910125, "grad_norm": 1.3613270033001337, "learning_rate": 1.2316109465757934e-06, "loss": 0.0959, "step": 1702 }, { "epoch": 0.387485779294653, "grad_norm": 1.7422130731580474, "learning_rate": 1.231589425570266e-06, "loss": 0.1694, "step": 1703 }, { "epoch": 0.38771331058020475, "grad_norm": 1.1477314066351112, "learning_rate": 1.2315678921671615e-06, "loss": 0.1187, "step": 1704 }, { "epoch": 0.38794084186575656, "grad_norm": 2.0773570604590708, "learning_rate": 1.2315463463669202e-06, "loss": 0.1594, "step": 1705 }, { "epoch": 0.3881683731513083, "grad_norm": 1.3478575868276468, "learning_rate": 1.2315247881699825e-06, "loss": 0.0991, "step": 1706 }, { "epoch": 0.38839590443686006, "grad_norm": 2.354162536662197, "learning_rate": 1.2315032175767887e-06, "loss": 0.1008, "step": 1707 }, { "epoch": 0.3886234357224118, "grad_norm": 1.6538240421949093, "learning_rate": 1.23148163458778e-06, "loss": 0.1275, "step": 1708 }, { "epoch": 0.3888509670079636, "grad_norm": 1.5311610006843637, "learning_rate": 1.2314600392033974e-06, "loss": 0.0968, "step": 1709 }, { "epoch": 0.3890784982935154, "grad_norm": 2.4683314063286734, "learning_rate": 1.2314384314240824e-06, "loss": 0.1078, "step": 1710 }, { "epoch": 0.3893060295790671, "grad_norm": 2.2142084705382636, "learning_rate": 1.2314168112502765e-06, "loss": 0.2684, "step": 1711 }, { "epoch": 0.3895335608646189, "grad_norm": 1.2536342442255415, "learning_rate": 1.2313951786824213e-06, "loss": 0.1248, "step": 1712 }, { "epoch": 0.38976109215017063, "grad_norm": 2.6941191297381293, "learning_rate": 1.2313735337209593e-06, "loss": 0.2081, "step": 1713 }, { "epoch": 0.38998862343572244, "grad_norm": 2.30131500883838, "learning_rate": 1.231351876366333e-06, "loss": 0.1491, "step": 1714 }, { "epoch": 0.3902161547212742, "grad_norm": 2.3841970341168204, "learning_rate": 1.2313302066189846e-06, "loss": 0.0996, "step": 1715 }, { "epoch": 0.39044368600682594, "grad_norm": 2.6347474164019937, "learning_rate": 1.2313085244793573e-06, "loss": 0.1772, "step": 1716 }, { "epoch": 0.3906712172923777, "grad_norm": 1.824344810986505, "learning_rate": 1.2312868299478944e-06, "loss": 0.0914, "step": 1717 }, { "epoch": 0.39089874857792944, "grad_norm": 1.773023478059839, "learning_rate": 1.2312651230250387e-06, "loss": 0.182, "step": 1718 }, { "epoch": 0.39112627986348125, "grad_norm": 2.558986871552195, "learning_rate": 1.2312434037112345e-06, "loss": 0.1102, "step": 1719 }, { "epoch": 0.391353811149033, "grad_norm": 1.547999236334684, "learning_rate": 1.2312216720069251e-06, "loss": 0.1488, "step": 1720 }, { "epoch": 0.39158134243458476, "grad_norm": 2.8418021765667247, "learning_rate": 1.2311999279125552e-06, "loss": 0.1658, "step": 1721 }, { "epoch": 0.3918088737201365, "grad_norm": 5.673024628891561, "learning_rate": 1.2311781714285689e-06, "loss": 0.221, "step": 1722 }, { "epoch": 0.39203640500568826, "grad_norm": 2.3720745639494494, "learning_rate": 1.231156402555411e-06, "loss": 0.1788, "step": 1723 }, { "epoch": 0.39226393629124007, "grad_norm": 1.7411798788297947, "learning_rate": 1.2311346212935262e-06, "loss": 0.1318, "step": 1724 }, { "epoch": 0.3924914675767918, "grad_norm": 1.8671484665662437, "learning_rate": 1.23111282764336e-06, "loss": 0.145, "step": 1725 }, { "epoch": 0.39271899886234357, "grad_norm": 1.257862162167775, "learning_rate": 1.2310910216053576e-06, "loss": 0.1059, "step": 1726 }, { "epoch": 0.3929465301478953, "grad_norm": 2.2794733420992275, "learning_rate": 1.2310692031799646e-06, "loss": 0.1938, "step": 1727 }, { "epoch": 0.3931740614334471, "grad_norm": 2.1119176270114313, "learning_rate": 1.2310473723676272e-06, "loss": 0.1157, "step": 1728 }, { "epoch": 0.3934015927189989, "grad_norm": 1.3881889344482166, "learning_rate": 1.2310255291687913e-06, "loss": 0.0832, "step": 1729 }, { "epoch": 0.39362912400455063, "grad_norm": 2.023136430703965, "learning_rate": 1.2310036735839037e-06, "loss": 0.1242, "step": 1730 }, { "epoch": 0.3938566552901024, "grad_norm": 2.624591745407245, "learning_rate": 1.2309818056134108e-06, "loss": 0.1507, "step": 1731 }, { "epoch": 0.39408418657565414, "grad_norm": 1.2844255816526005, "learning_rate": 1.2309599252577593e-06, "loss": 0.074, "step": 1732 }, { "epoch": 0.3943117178612059, "grad_norm": 2.43035532680509, "learning_rate": 1.230938032517397e-06, "loss": 0.2302, "step": 1733 }, { "epoch": 0.3945392491467577, "grad_norm": 2.4967953244354817, "learning_rate": 1.2309161273927708e-06, "loss": 0.122, "step": 1734 }, { "epoch": 0.39476678043230945, "grad_norm": 2.8201501794880643, "learning_rate": 1.2308942098843289e-06, "loss": 0.1314, "step": 1735 }, { "epoch": 0.3949943117178612, "grad_norm": 1.377416323825032, "learning_rate": 1.2308722799925188e-06, "loss": 0.1068, "step": 1736 }, { "epoch": 0.39522184300341295, "grad_norm": 3.157882549207335, "learning_rate": 1.2308503377177887e-06, "loss": 0.1845, "step": 1737 }, { "epoch": 0.3954493742889647, "grad_norm": 2.8098606380126943, "learning_rate": 1.2308283830605877e-06, "loss": 0.1387, "step": 1738 }, { "epoch": 0.3956769055745165, "grad_norm": 1.773927388786271, "learning_rate": 1.230806416021364e-06, "loss": 0.059, "step": 1739 }, { "epoch": 0.39590443686006827, "grad_norm": 2.440949931918227, "learning_rate": 1.2307844366005665e-06, "loss": 0.1398, "step": 1740 }, { "epoch": 0.39613196814562, "grad_norm": 1.5057479299724394, "learning_rate": 1.2307624447986446e-06, "loss": 0.1438, "step": 1741 }, { "epoch": 0.39635949943117177, "grad_norm": 1.7016670626096497, "learning_rate": 1.2307404406160476e-06, "loss": 0.1971, "step": 1742 }, { "epoch": 0.3965870307167236, "grad_norm": 2.1741262002450465, "learning_rate": 1.2307184240532255e-06, "loss": 0.1, "step": 1743 }, { "epoch": 0.39681456200227533, "grad_norm": 1.6480951134265682, "learning_rate": 1.2306963951106283e-06, "loss": 0.144, "step": 1744 }, { "epoch": 0.3970420932878271, "grad_norm": 1.4849711465690703, "learning_rate": 1.2306743537887058e-06, "loss": 0.2173, "step": 1745 }, { "epoch": 0.39726962457337883, "grad_norm": 1.750478097654071, "learning_rate": 1.2306523000879086e-06, "loss": 0.0965, "step": 1746 }, { "epoch": 0.3974971558589306, "grad_norm": 1.3984444468696073, "learning_rate": 1.230630234008688e-06, "loss": 0.1522, "step": 1747 }, { "epoch": 0.3977246871444824, "grad_norm": 1.6859723238809896, "learning_rate": 1.2306081555514942e-06, "loss": 0.0826, "step": 1748 }, { "epoch": 0.39795221843003414, "grad_norm": 2.580780832144414, "learning_rate": 1.2305860647167792e-06, "loss": 0.1193, "step": 1749 }, { "epoch": 0.3981797497155859, "grad_norm": 2.4353217528542532, "learning_rate": 1.2305639615049938e-06, "loss": 0.1157, "step": 1750 }, { "epoch": 0.39840728100113765, "grad_norm": 4.900966801210391, "learning_rate": 1.2305418459165902e-06, "loss": 0.1967, "step": 1751 }, { "epoch": 0.3986348122866894, "grad_norm": 2.340473825012059, "learning_rate": 1.2305197179520203e-06, "loss": 0.1513, "step": 1752 }, { "epoch": 0.3988623435722412, "grad_norm": 1.6987905011562776, "learning_rate": 1.2304975776117362e-06, "loss": 0.1526, "step": 1753 }, { "epoch": 0.39908987485779296, "grad_norm": 1.6154688960203794, "learning_rate": 1.2304754248961906e-06, "loss": 0.1199, "step": 1754 }, { "epoch": 0.3993174061433447, "grad_norm": 1.8354123927057464, "learning_rate": 1.2304532598058363e-06, "loss": 0.1382, "step": 1755 }, { "epoch": 0.39954493742889646, "grad_norm": 2.2202210353934757, "learning_rate": 1.230431082341126e-06, "loss": 0.1486, "step": 1756 }, { "epoch": 0.3997724687144482, "grad_norm": 2.1595657089748337, "learning_rate": 1.2304088925025133e-06, "loss": 0.1111, "step": 1757 }, { "epoch": 0.4, "grad_norm": 2.051418292936696, "learning_rate": 1.2303866902904515e-06, "loss": 0.1125, "step": 1758 }, { "epoch": 0.4002275312855518, "grad_norm": 1.4683812530262192, "learning_rate": 1.2303644757053945e-06, "loss": 0.121, "step": 1759 }, { "epoch": 0.4004550625711035, "grad_norm": 2.9222870011841544, "learning_rate": 1.2303422487477965e-06, "loss": 0.1909, "step": 1760 }, { "epoch": 0.4006825938566553, "grad_norm": 2.4890537658855485, "learning_rate": 1.230320009418111e-06, "loss": 0.1728, "step": 1761 }, { "epoch": 0.40091012514220703, "grad_norm": 1.6296292199347002, "learning_rate": 1.2302977577167937e-06, "loss": 0.1238, "step": 1762 }, { "epoch": 0.40113765642775884, "grad_norm": 1.9686403685728797, "learning_rate": 1.2302754936442986e-06, "loss": 0.1157, "step": 1763 }, { "epoch": 0.4013651877133106, "grad_norm": 3.3666708406816315, "learning_rate": 1.2302532172010809e-06, "loss": 0.1081, "step": 1764 }, { "epoch": 0.40159271899886234, "grad_norm": 1.2014023232311797, "learning_rate": 1.2302309283875958e-06, "loss": 0.0927, "step": 1765 }, { "epoch": 0.4018202502844141, "grad_norm": 1.896842215979294, "learning_rate": 1.230208627204299e-06, "loss": 0.1408, "step": 1766 }, { "epoch": 0.40204778156996585, "grad_norm": 2.6032820946847584, "learning_rate": 1.2301863136516463e-06, "loss": 0.2446, "step": 1767 }, { "epoch": 0.40227531285551765, "grad_norm": 2.6105518283568214, "learning_rate": 1.2301639877300937e-06, "loss": 0.1439, "step": 1768 }, { "epoch": 0.4025028441410694, "grad_norm": 1.3240539738643666, "learning_rate": 1.2301416494400974e-06, "loss": 0.106, "step": 1769 }, { "epoch": 0.40273037542662116, "grad_norm": 3.0980518217773265, "learning_rate": 1.2301192987821142e-06, "loss": 0.1403, "step": 1770 }, { "epoch": 0.4029579067121729, "grad_norm": 3.422689409153713, "learning_rate": 1.2300969357566008e-06, "loss": 0.1786, "step": 1771 }, { "epoch": 0.40318543799772466, "grad_norm": 2.4443605233075725, "learning_rate": 1.230074560364014e-06, "loss": 0.1532, "step": 1772 }, { "epoch": 0.40341296928327647, "grad_norm": 1.1122594786359905, "learning_rate": 1.2300521726048114e-06, "loss": 0.1026, "step": 1773 }, { "epoch": 0.4036405005688282, "grad_norm": 2.4538948369426996, "learning_rate": 1.2300297724794506e-06, "loss": 0.1436, "step": 1774 }, { "epoch": 0.40386803185437997, "grad_norm": 1.1549269839884009, "learning_rate": 1.2300073599883892e-06, "loss": 0.1081, "step": 1775 }, { "epoch": 0.4040955631399317, "grad_norm": 2.8038987971188822, "learning_rate": 1.2299849351320854e-06, "loss": 0.1752, "step": 1776 }, { "epoch": 0.40432309442548353, "grad_norm": 1.7076470891369002, "learning_rate": 1.2299624979109976e-06, "loss": 0.1199, "step": 1777 }, { "epoch": 0.4045506257110353, "grad_norm": 1.2055567774564526, "learning_rate": 1.229940048325584e-06, "loss": 0.1149, "step": 1778 }, { "epoch": 0.40477815699658704, "grad_norm": 3.58239703526112, "learning_rate": 1.229917586376304e-06, "loss": 0.1486, "step": 1779 }, { "epoch": 0.4050056882821388, "grad_norm": 1.4724200301684793, "learning_rate": 1.2298951120636163e-06, "loss": 0.1487, "step": 1780 }, { "epoch": 0.40523321956769054, "grad_norm": 2.180162524813868, "learning_rate": 1.2298726253879802e-06, "loss": 0.1407, "step": 1781 }, { "epoch": 0.40546075085324235, "grad_norm": 2.5371024976546424, "learning_rate": 1.2298501263498557e-06, "loss": 0.2582, "step": 1782 }, { "epoch": 0.4056882821387941, "grad_norm": 2.3758196386707136, "learning_rate": 1.229827614949702e-06, "loss": 0.153, "step": 1783 }, { "epoch": 0.40591581342434585, "grad_norm": 1.7394821341749753, "learning_rate": 1.22980509118798e-06, "loss": 0.1211, "step": 1784 }, { "epoch": 0.4061433447098976, "grad_norm": 1.8213502745824992, "learning_rate": 1.2297825550651491e-06, "loss": 0.1351, "step": 1785 }, { "epoch": 0.40637087599544935, "grad_norm": 1.5366804604898765, "learning_rate": 1.2297600065816707e-06, "loss": 0.172, "step": 1786 }, { "epoch": 0.40659840728100116, "grad_norm": 2.867711224850475, "learning_rate": 1.229737445738005e-06, "loss": 0.1413, "step": 1787 }, { "epoch": 0.4068259385665529, "grad_norm": 2.8801832410299717, "learning_rate": 1.2297148725346137e-06, "loss": 0.1068, "step": 1788 }, { "epoch": 0.40705346985210467, "grad_norm": 2.26302057191021, "learning_rate": 1.229692286971958e-06, "loss": 0.2012, "step": 1789 }, { "epoch": 0.4072810011376564, "grad_norm": 2.025110259831313, "learning_rate": 1.2296696890504992e-06, "loss": 0.0936, "step": 1790 }, { "epoch": 0.40750853242320817, "grad_norm": 2.038765984004919, "learning_rate": 1.2296470787706993e-06, "loss": 0.1893, "step": 1791 }, { "epoch": 0.40773606370876, "grad_norm": 1.2472833998526156, "learning_rate": 1.2296244561330206e-06, "loss": 0.1077, "step": 1792 }, { "epoch": 0.40796359499431173, "grad_norm": 1.652590726736059, "learning_rate": 1.2296018211379253e-06, "loss": 0.1657, "step": 1793 }, { "epoch": 0.4081911262798635, "grad_norm": 1.9370986933072707, "learning_rate": 1.229579173785876e-06, "loss": 0.1483, "step": 1794 }, { "epoch": 0.40841865756541523, "grad_norm": 2.1166679197631946, "learning_rate": 1.2295565140773357e-06, "loss": 0.097, "step": 1795 }, { "epoch": 0.408646188850967, "grad_norm": 1.6183560555534648, "learning_rate": 1.2295338420127673e-06, "loss": 0.1116, "step": 1796 }, { "epoch": 0.4088737201365188, "grad_norm": 4.024689412752317, "learning_rate": 1.2295111575926344e-06, "loss": 0.1566, "step": 1797 }, { "epoch": 0.40910125142207054, "grad_norm": 1.645924937894827, "learning_rate": 1.2294884608174007e-06, "loss": 0.0893, "step": 1798 }, { "epoch": 0.4093287827076223, "grad_norm": 2.1920700631018435, "learning_rate": 1.2294657516875297e-06, "loss": 0.1111, "step": 1799 }, { "epoch": 0.40955631399317405, "grad_norm": 2.1814995815983105, "learning_rate": 1.229443030203486e-06, "loss": 0.0974, "step": 1800 }, { "epoch": 0.4097838452787258, "grad_norm": 3.4034833350789553, "learning_rate": 1.2294202963657335e-06, "loss": 0.1251, "step": 1801 }, { "epoch": 0.4100113765642776, "grad_norm": 2.072295418124302, "learning_rate": 1.2293975501747372e-06, "loss": 0.1566, "step": 1802 }, { "epoch": 0.41023890784982936, "grad_norm": 3.023299474624079, "learning_rate": 1.229374791630962e-06, "loss": 0.1615, "step": 1803 }, { "epoch": 0.4104664391353811, "grad_norm": 2.96052428349603, "learning_rate": 1.2293520207348727e-06, "loss": 0.1246, "step": 1804 }, { "epoch": 0.41069397042093286, "grad_norm": 2.3232889792011404, "learning_rate": 1.229329237486935e-06, "loss": 0.119, "step": 1805 }, { "epoch": 0.4109215017064846, "grad_norm": 1.680210318484648, "learning_rate": 1.2293064418876145e-06, "loss": 0.0759, "step": 1806 }, { "epoch": 0.4111490329920364, "grad_norm": 2.2603194082450946, "learning_rate": 1.2292836339373771e-06, "loss": 0.1255, "step": 1807 }, { "epoch": 0.4113765642775882, "grad_norm": 2.9474726641163396, "learning_rate": 1.2292608136366887e-06, "loss": 0.1411, "step": 1808 }, { "epoch": 0.4116040955631399, "grad_norm": 2.7451292492288313, "learning_rate": 1.2292379809860162e-06, "loss": 0.1366, "step": 1809 }, { "epoch": 0.4118316268486917, "grad_norm": 1.7748176376725044, "learning_rate": 1.2292151359858258e-06, "loss": 0.0838, "step": 1810 }, { "epoch": 0.4120591581342435, "grad_norm": 1.9600147202758083, "learning_rate": 1.2291922786365846e-06, "loss": 0.0611, "step": 1811 }, { "epoch": 0.41228668941979524, "grad_norm": 1.9430142401566255, "learning_rate": 1.2291694089387599e-06, "loss": 0.1047, "step": 1812 }, { "epoch": 0.412514220705347, "grad_norm": 2.10075635319286, "learning_rate": 1.2291465268928187e-06, "loss": 0.1982, "step": 1813 }, { "epoch": 0.41274175199089874, "grad_norm": 3.157474099750552, "learning_rate": 1.2291236324992291e-06, "loss": 0.2055, "step": 1814 }, { "epoch": 0.4129692832764505, "grad_norm": 2.0954118584949657, "learning_rate": 1.229100725758459e-06, "loss": 0.1466, "step": 1815 }, { "epoch": 0.4131968145620023, "grad_norm": 1.8718484159029354, "learning_rate": 1.2290778066709763e-06, "loss": 0.0949, "step": 1816 }, { "epoch": 0.41342434584755405, "grad_norm": 2.2325202956764625, "learning_rate": 1.2290548752372494e-06, "loss": 0.1135, "step": 1817 }, { "epoch": 0.4136518771331058, "grad_norm": 1.942418916883084, "learning_rate": 1.2290319314577473e-06, "loss": 0.153, "step": 1818 }, { "epoch": 0.41387940841865756, "grad_norm": 1.252464692558786, "learning_rate": 1.2290089753329386e-06, "loss": 0.0687, "step": 1819 }, { "epoch": 0.4141069397042093, "grad_norm": 2.285517486522719, "learning_rate": 1.2289860068632929e-06, "loss": 0.1432, "step": 1820 }, { "epoch": 0.4143344709897611, "grad_norm": 2.752384746112051, "learning_rate": 1.228963026049279e-06, "loss": 0.1178, "step": 1821 }, { "epoch": 0.41456200227531287, "grad_norm": 2.4041920784703175, "learning_rate": 1.228940032891367e-06, "loss": 0.1721, "step": 1822 }, { "epoch": 0.4147895335608646, "grad_norm": 1.8795669463811113, "learning_rate": 1.2289170273900272e-06, "loss": 0.0916, "step": 1823 }, { "epoch": 0.4150170648464164, "grad_norm": 1.1852055506021697, "learning_rate": 1.228894009545729e-06, "loss": 0.1252, "step": 1824 }, { "epoch": 0.4152445961319681, "grad_norm": 1.9135382951198285, "learning_rate": 1.2288709793589434e-06, "loss": 0.2363, "step": 1825 }, { "epoch": 0.41547212741751993, "grad_norm": 5.3507052059831555, "learning_rate": 1.2288479368301408e-06, "loss": 0.1991, "step": 1826 }, { "epoch": 0.4156996587030717, "grad_norm": 2.0538462354836255, "learning_rate": 1.2288248819597922e-06, "loss": 0.1979, "step": 1827 }, { "epoch": 0.41592718998862344, "grad_norm": 1.5733199096098156, "learning_rate": 1.228801814748369e-06, "loss": 0.0897, "step": 1828 }, { "epoch": 0.4161547212741752, "grad_norm": 2.1146593845270916, "learning_rate": 1.2287787351963427e-06, "loss": 0.1143, "step": 1829 }, { "epoch": 0.41638225255972694, "grad_norm": 1.9141333315182336, "learning_rate": 1.2287556433041845e-06, "loss": 0.1447, "step": 1830 }, { "epoch": 0.41660978384527875, "grad_norm": 2.665142936614018, "learning_rate": 1.2287325390723669e-06, "loss": 0.1329, "step": 1831 }, { "epoch": 0.4168373151308305, "grad_norm": 2.19183346029768, "learning_rate": 1.2287094225013618e-06, "loss": 0.2281, "step": 1832 }, { "epoch": 0.41706484641638225, "grad_norm": 2.127380551764798, "learning_rate": 1.2286862935916416e-06, "loss": 0.1784, "step": 1833 }, { "epoch": 0.417292377701934, "grad_norm": 2.3095877045434, "learning_rate": 1.2286631523436793e-06, "loss": 0.1159, "step": 1834 }, { "epoch": 0.41751990898748575, "grad_norm": 3.9175539200968386, "learning_rate": 1.2286399987579478e-06, "loss": 0.2352, "step": 1835 }, { "epoch": 0.41774744027303756, "grad_norm": 1.5984970371973604, "learning_rate": 1.2286168328349202e-06, "loss": 0.106, "step": 1836 }, { "epoch": 0.4179749715585893, "grad_norm": 1.3035202220513378, "learning_rate": 1.2285936545750698e-06, "loss": 0.09, "step": 1837 }, { "epoch": 0.41820250284414107, "grad_norm": 2.7776624038109707, "learning_rate": 1.2285704639788707e-06, "loss": 0.0956, "step": 1838 }, { "epoch": 0.4184300341296928, "grad_norm": 2.176123621315213, "learning_rate": 1.2285472610467969e-06, "loss": 0.0822, "step": 1839 }, { "epoch": 0.41865756541524457, "grad_norm": 3.1080618782037064, "learning_rate": 1.228524045779322e-06, "loss": 0.2253, "step": 1840 }, { "epoch": 0.4188850967007964, "grad_norm": 3.107268409079957, "learning_rate": 1.2285008181769212e-06, "loss": 0.1601, "step": 1841 }, { "epoch": 0.41911262798634813, "grad_norm": 2.166037873871064, "learning_rate": 1.228477578240069e-06, "loss": 0.1026, "step": 1842 }, { "epoch": 0.4193401592718999, "grad_norm": 2.4389639267330607, "learning_rate": 1.22845432596924e-06, "loss": 0.1287, "step": 1843 }, { "epoch": 0.41956769055745163, "grad_norm": 2.5484705835141583, "learning_rate": 1.22843106136491e-06, "loss": 0.1095, "step": 1844 }, { "epoch": 0.4197952218430034, "grad_norm": 2.908823301090787, "learning_rate": 1.2284077844275543e-06, "loss": 0.1573, "step": 1845 }, { "epoch": 0.4200227531285552, "grad_norm": 3.0404493249523386, "learning_rate": 1.2283844951576484e-06, "loss": 0.1426, "step": 1846 }, { "epoch": 0.42025028441410694, "grad_norm": 2.187557277461029, "learning_rate": 1.2283611935556686e-06, "loss": 0.1289, "step": 1847 }, { "epoch": 0.4204778156996587, "grad_norm": 2.0438695218752154, "learning_rate": 1.2283378796220909e-06, "loss": 0.1464, "step": 1848 }, { "epoch": 0.42070534698521045, "grad_norm": 2.973625989974048, "learning_rate": 1.228314553357392e-06, "loss": 0.1576, "step": 1849 }, { "epoch": 0.42093287827076226, "grad_norm": 2.065578760595499, "learning_rate": 1.2282912147620483e-06, "loss": 0.1228, "step": 1850 }, { "epoch": 0.421160409556314, "grad_norm": 1.087351915862162, "learning_rate": 1.2282678638365373e-06, "loss": 0.1229, "step": 1851 }, { "epoch": 0.42138794084186576, "grad_norm": 2.683413990345536, "learning_rate": 1.2282445005813359e-06, "loss": 0.175, "step": 1852 }, { "epoch": 0.4216154721274175, "grad_norm": 2.6502454120286743, "learning_rate": 1.2282211249969217e-06, "loss": 0.1437, "step": 1853 }, { "epoch": 0.42184300341296926, "grad_norm": 2.546457693607218, "learning_rate": 1.2281977370837725e-06, "loss": 0.1637, "step": 1854 }, { "epoch": 0.42207053469852107, "grad_norm": 1.7164185473669251, "learning_rate": 1.2281743368423662e-06, "loss": 0.0716, "step": 1855 }, { "epoch": 0.4222980659840728, "grad_norm": 1.1259934133581069, "learning_rate": 1.2281509242731813e-06, "loss": 0.1674, "step": 1856 }, { "epoch": 0.4225255972696246, "grad_norm": 1.824612068932172, "learning_rate": 1.228127499376696e-06, "loss": 0.1473, "step": 1857 }, { "epoch": 0.4227531285551763, "grad_norm": 3.052950993125296, "learning_rate": 1.228104062153389e-06, "loss": 0.145, "step": 1858 }, { "epoch": 0.4229806598407281, "grad_norm": 0.890302498925592, "learning_rate": 1.2280806126037396e-06, "loss": 0.0978, "step": 1859 }, { "epoch": 0.4232081911262799, "grad_norm": 1.936861534299759, "learning_rate": 1.2280571507282272e-06, "loss": 0.1774, "step": 1860 }, { "epoch": 0.42343572241183164, "grad_norm": 1.7891865799019433, "learning_rate": 1.2280336765273309e-06, "loss": 0.1108, "step": 1861 }, { "epoch": 0.4236632536973834, "grad_norm": 1.8865955624918218, "learning_rate": 1.2280101900015306e-06, "loss": 0.1417, "step": 1862 }, { "epoch": 0.42389078498293514, "grad_norm": 1.8128448248353213, "learning_rate": 1.2279866911513064e-06, "loss": 0.1428, "step": 1863 }, { "epoch": 0.4241183162684869, "grad_norm": 2.901810032716033, "learning_rate": 1.2279631799771386e-06, "loss": 0.1846, "step": 1864 }, { "epoch": 0.4243458475540387, "grad_norm": 2.262057679433472, "learning_rate": 1.2279396564795077e-06, "loss": 0.2503, "step": 1865 }, { "epoch": 0.42457337883959045, "grad_norm": 3.0660607522891485, "learning_rate": 1.2279161206588944e-06, "loss": 0.1034, "step": 1866 }, { "epoch": 0.4248009101251422, "grad_norm": 2.3354669669388852, "learning_rate": 1.2278925725157798e-06, "loss": 0.1624, "step": 1867 }, { "epoch": 0.42502844141069396, "grad_norm": 1.5871965476689007, "learning_rate": 1.2278690120506451e-06, "loss": 0.1101, "step": 1868 }, { "epoch": 0.4252559726962457, "grad_norm": 2.709885114592366, "learning_rate": 1.2278454392639722e-06, "loss": 0.1347, "step": 1869 }, { "epoch": 0.4254835039817975, "grad_norm": 1.413512267705387, "learning_rate": 1.2278218541562422e-06, "loss": 0.0734, "step": 1870 }, { "epoch": 0.42571103526734927, "grad_norm": 1.7321041587522092, "learning_rate": 1.2277982567279377e-06, "loss": 0.1911, "step": 1871 }, { "epoch": 0.425938566552901, "grad_norm": 2.066073629245216, "learning_rate": 1.2277746469795407e-06, "loss": 0.1626, "step": 1872 }, { "epoch": 0.4261660978384528, "grad_norm": 2.0535165144225056, "learning_rate": 1.227751024911534e-06, "loss": 0.0855, "step": 1873 }, { "epoch": 0.4263936291240045, "grad_norm": 3.005758093068664, "learning_rate": 1.2277273905244002e-06, "loss": 0.1643, "step": 1874 }, { "epoch": 0.42662116040955633, "grad_norm": 2.3459498044051137, "learning_rate": 1.2277037438186224e-06, "loss": 0.1279, "step": 1875 }, { "epoch": 0.4268486916951081, "grad_norm": 1.5882472504054521, "learning_rate": 1.2276800847946839e-06, "loss": 0.0926, "step": 1876 }, { "epoch": 0.42707622298065984, "grad_norm": 1.7112545971646451, "learning_rate": 1.227656413453068e-06, "loss": 0.1147, "step": 1877 }, { "epoch": 0.4273037542662116, "grad_norm": 2.282517318806134, "learning_rate": 1.227632729794259e-06, "loss": 0.1405, "step": 1878 }, { "epoch": 0.42753128555176334, "grad_norm": 2.782117068218183, "learning_rate": 1.2276090338187403e-06, "loss": 0.2318, "step": 1879 }, { "epoch": 0.42775881683731515, "grad_norm": 5.886268196659077, "learning_rate": 1.2275853255269967e-06, "loss": 0.1079, "step": 1880 }, { "epoch": 0.4279863481228669, "grad_norm": 2.3326199322014394, "learning_rate": 1.2275616049195129e-06, "loss": 0.1448, "step": 1881 }, { "epoch": 0.42821387940841865, "grad_norm": 1.8816717252042165, "learning_rate": 1.2275378719967733e-06, "loss": 0.0942, "step": 1882 }, { "epoch": 0.4284414106939704, "grad_norm": 1.7985569079604016, "learning_rate": 1.227514126759263e-06, "loss": 0.112, "step": 1883 }, { "epoch": 0.4286689419795222, "grad_norm": 3.0259876193386646, "learning_rate": 1.2274903692074674e-06, "loss": 0.1872, "step": 1884 }, { "epoch": 0.42889647326507396, "grad_norm": 3.1916760297795417, "learning_rate": 1.2274665993418722e-06, "loss": 0.1792, "step": 1885 }, { "epoch": 0.4291240045506257, "grad_norm": 2.5906641441781613, "learning_rate": 1.227442817162963e-06, "loss": 0.1644, "step": 1886 }, { "epoch": 0.42935153583617747, "grad_norm": 3.430163291900366, "learning_rate": 1.227419022671226e-06, "loss": 0.1339, "step": 1887 }, { "epoch": 0.4295790671217292, "grad_norm": 3.041561020625739, "learning_rate": 1.2273952158671472e-06, "loss": 0.1736, "step": 1888 }, { "epoch": 0.429806598407281, "grad_norm": 3.0955843388329347, "learning_rate": 1.2273713967512137e-06, "loss": 0.1665, "step": 1889 }, { "epoch": 0.4300341296928328, "grad_norm": 6.520120337578789, "learning_rate": 1.227347565323912e-06, "loss": 0.1274, "step": 1890 }, { "epoch": 0.43026166097838453, "grad_norm": 3.436953118763558, "learning_rate": 1.2273237215857293e-06, "loss": 0.1299, "step": 1891 }, { "epoch": 0.4304891922639363, "grad_norm": 3.4857387853802364, "learning_rate": 1.2272998655371526e-06, "loss": 0.1726, "step": 1892 }, { "epoch": 0.43071672354948803, "grad_norm": 3.6540817320958228, "learning_rate": 1.22727599717867e-06, "loss": 0.1921, "step": 1893 }, { "epoch": 0.43094425483503984, "grad_norm": 1.8546702242131714, "learning_rate": 1.2272521165107687e-06, "loss": 0.1954, "step": 1894 }, { "epoch": 0.4311717861205916, "grad_norm": 2.906647877087954, "learning_rate": 1.2272282235339372e-06, "loss": 0.1436, "step": 1895 }, { "epoch": 0.43139931740614335, "grad_norm": 2.2434613722644863, "learning_rate": 1.2272043182486638e-06, "loss": 0.1086, "step": 1896 }, { "epoch": 0.4316268486916951, "grad_norm": 3.00916232200625, "learning_rate": 1.227180400655437e-06, "loss": 0.1753, "step": 1897 }, { "epoch": 0.43185437997724685, "grad_norm": 2.818766317391872, "learning_rate": 1.2271564707547457e-06, "loss": 0.3096, "step": 1898 }, { "epoch": 0.43208191126279866, "grad_norm": 1.1542786147828004, "learning_rate": 1.227132528547079e-06, "loss": 0.1049, "step": 1899 }, { "epoch": 0.4323094425483504, "grad_norm": 2.4796477207464207, "learning_rate": 1.2271085740329261e-06, "loss": 0.2142, "step": 1900 }, { "epoch": 0.43253697383390216, "grad_norm": 1.9623610484322256, "learning_rate": 1.2270846072127764e-06, "loss": 0.1611, "step": 1901 }, { "epoch": 0.4327645051194539, "grad_norm": 2.554393698647313, "learning_rate": 1.2270606280871205e-06, "loss": 0.0902, "step": 1902 }, { "epoch": 0.43299203640500566, "grad_norm": 2.0537100903512364, "learning_rate": 1.2270366366564476e-06, "loss": 0.1265, "step": 1903 }, { "epoch": 0.43321956769055747, "grad_norm": 1.9963013061766377, "learning_rate": 1.2270126329212486e-06, "loss": 0.1739, "step": 1904 }, { "epoch": 0.4334470989761092, "grad_norm": 1.1842645267471326, "learning_rate": 1.2269886168820138e-06, "loss": 0.0839, "step": 1905 }, { "epoch": 0.433674630261661, "grad_norm": 1.7356609118289905, "learning_rate": 1.2269645885392342e-06, "loss": 0.1932, "step": 1906 }, { "epoch": 0.43390216154721273, "grad_norm": 2.7617703436331835, "learning_rate": 1.226940547893401e-06, "loss": 0.1339, "step": 1907 }, { "epoch": 0.4341296928327645, "grad_norm": 1.5346172373445122, "learning_rate": 1.2269164949450052e-06, "loss": 0.0955, "step": 1908 }, { "epoch": 0.4343572241183163, "grad_norm": 3.5636615948411, "learning_rate": 1.2268924296945387e-06, "loss": 0.1829, "step": 1909 }, { "epoch": 0.43458475540386804, "grad_norm": 1.67295814200305, "learning_rate": 1.2268683521424932e-06, "loss": 0.0815, "step": 1910 }, { "epoch": 0.4348122866894198, "grad_norm": 2.6841445059138054, "learning_rate": 1.226844262289361e-06, "loss": 0.1003, "step": 1911 }, { "epoch": 0.43503981797497154, "grad_norm": 2.8366861529632206, "learning_rate": 1.2268201601356342e-06, "loss": 0.1634, "step": 1912 }, { "epoch": 0.4352673492605233, "grad_norm": 1.6905197116385013, "learning_rate": 1.2267960456818054e-06, "loss": 0.0796, "step": 1913 }, { "epoch": 0.4354948805460751, "grad_norm": 3.505560593587699, "learning_rate": 1.2267719189283676e-06, "loss": 0.1547, "step": 1914 }, { "epoch": 0.43572241183162685, "grad_norm": 8.776691522016812, "learning_rate": 1.2267477798758141e-06, "loss": 0.151, "step": 1915 }, { "epoch": 0.4359499431171786, "grad_norm": 2.3591467713222807, "learning_rate": 1.2267236285246376e-06, "loss": 0.1631, "step": 1916 }, { "epoch": 0.43617747440273036, "grad_norm": 3.0422098264181887, "learning_rate": 1.2266994648753325e-06, "loss": 0.1776, "step": 1917 }, { "epoch": 0.43640500568828217, "grad_norm": 1.537478908875297, "learning_rate": 1.2266752889283923e-06, "loss": 0.1669, "step": 1918 }, { "epoch": 0.4366325369738339, "grad_norm": 1.8827801159749011, "learning_rate": 1.226651100684311e-06, "loss": 0.1161, "step": 1919 }, { "epoch": 0.43686006825938567, "grad_norm": 3.9329321893042124, "learning_rate": 1.2266269001435829e-06, "loss": 0.1344, "step": 1920 }, { "epoch": 0.4370875995449374, "grad_norm": 3.1389052206999666, "learning_rate": 1.226602687306703e-06, "loss": 0.1233, "step": 1921 }, { "epoch": 0.4373151308304892, "grad_norm": 2.998637701201584, "learning_rate": 1.226578462174166e-06, "loss": 0.1134, "step": 1922 }, { "epoch": 0.437542662116041, "grad_norm": 1.8514157151948887, "learning_rate": 1.2265542247464668e-06, "loss": 0.1484, "step": 1923 }, { "epoch": 0.43777019340159273, "grad_norm": 2.304538146026998, "learning_rate": 1.226529975024101e-06, "loss": 0.2034, "step": 1924 }, { "epoch": 0.4379977246871445, "grad_norm": 1.744388776164245, "learning_rate": 1.2265057130075641e-06, "loss": 0.1854, "step": 1925 }, { "epoch": 0.43822525597269624, "grad_norm": 2.6565426243412165, "learning_rate": 1.2264814386973523e-06, "loss": 0.1572, "step": 1926 }, { "epoch": 0.438452787258248, "grad_norm": 6.9733970699782954, "learning_rate": 1.2264571520939612e-06, "loss": 0.0721, "step": 1927 }, { "epoch": 0.4386803185437998, "grad_norm": 1.9165403272554284, "learning_rate": 1.2264328531978875e-06, "loss": 0.1644, "step": 1928 }, { "epoch": 0.43890784982935155, "grad_norm": 2.125664734049219, "learning_rate": 1.2264085420096277e-06, "loss": 0.1061, "step": 1929 }, { "epoch": 0.4391353811149033, "grad_norm": 1.905504057373527, "learning_rate": 1.226384218529679e-06, "loss": 0.153, "step": 1930 }, { "epoch": 0.43936291240045505, "grad_norm": 1.6818760874849463, "learning_rate": 1.2263598827585379e-06, "loss": 0.1771, "step": 1931 }, { "epoch": 0.4395904436860068, "grad_norm": 0.7483156859971863, "learning_rate": 1.2263355346967023e-06, "loss": 0.0657, "step": 1932 }, { "epoch": 0.4398179749715586, "grad_norm": 2.8922141342339915, "learning_rate": 1.2263111743446697e-06, "loss": 0.1127, "step": 1933 }, { "epoch": 0.44004550625711036, "grad_norm": 2.254842475988348, "learning_rate": 1.2262868017029377e-06, "loss": 0.1213, "step": 1934 }, { "epoch": 0.4402730375426621, "grad_norm": 2.5250433582131477, "learning_rate": 1.226262416772005e-06, "loss": 0.1912, "step": 1935 }, { "epoch": 0.44050056882821387, "grad_norm": 1.7654279959465413, "learning_rate": 1.2262380195523696e-06, "loss": 0.0965, "step": 1936 }, { "epoch": 0.4407281001137656, "grad_norm": 1.9288183994078796, "learning_rate": 1.2262136100445303e-06, "loss": 0.1442, "step": 1937 }, { "epoch": 0.4409556313993174, "grad_norm": 3.208052375402599, "learning_rate": 1.2261891882489855e-06, "loss": 0.1476, "step": 1938 }, { "epoch": 0.4411831626848692, "grad_norm": 1.9111076610363271, "learning_rate": 1.226164754166235e-06, "loss": 0.1038, "step": 1939 }, { "epoch": 0.44141069397042093, "grad_norm": 2.755324988033307, "learning_rate": 1.2261403077967778e-06, "loss": 0.1473, "step": 1940 }, { "epoch": 0.4416382252559727, "grad_norm": 9.71923868520851, "learning_rate": 1.2261158491411136e-06, "loss": 0.124, "step": 1941 }, { "epoch": 0.44186575654152443, "grad_norm": 2.7390582242923887, "learning_rate": 1.2260913781997425e-06, "loss": 0.1482, "step": 1942 }, { "epoch": 0.44209328782707624, "grad_norm": 2.2870752287666964, "learning_rate": 1.2260668949731644e-06, "loss": 0.1578, "step": 1943 }, { "epoch": 0.442320819112628, "grad_norm": 3.234690008586509, "learning_rate": 1.2260423994618798e-06, "loss": 0.1443, "step": 1944 }, { "epoch": 0.44254835039817975, "grad_norm": 3.0788870765568865, "learning_rate": 1.2260178916663892e-06, "loss": 0.1467, "step": 1945 }, { "epoch": 0.4427758816837315, "grad_norm": 1.1800230173128983, "learning_rate": 1.2259933715871935e-06, "loss": 0.1334, "step": 1946 }, { "epoch": 0.44300341296928325, "grad_norm": 1.9016957352422574, "learning_rate": 1.2259688392247942e-06, "loss": 0.0917, "step": 1947 }, { "epoch": 0.44323094425483506, "grad_norm": 3.0210466868569505, "learning_rate": 1.2259442945796926e-06, "loss": 0.2328, "step": 1948 }, { "epoch": 0.4434584755403868, "grad_norm": 1.5788241191031314, "learning_rate": 1.2259197376523898e-06, "loss": 0.1697, "step": 1949 }, { "epoch": 0.44368600682593856, "grad_norm": 1.998070514722676, "learning_rate": 1.2258951684433883e-06, "loss": 0.1603, "step": 1950 }, { "epoch": 0.4439135381114903, "grad_norm": 2.6869498999380674, "learning_rate": 1.2258705869531901e-06, "loss": 0.1949, "step": 1951 }, { "epoch": 0.4441410693970421, "grad_norm": 2.1632784019049325, "learning_rate": 1.2258459931822974e-06, "loss": 0.205, "step": 1952 }, { "epoch": 0.4443686006825939, "grad_norm": 2.5366287820014897, "learning_rate": 1.225821387131213e-06, "loss": 0.0819, "step": 1953 }, { "epoch": 0.4445961319681456, "grad_norm": 1.069933102487221, "learning_rate": 1.22579676880044e-06, "loss": 0.0658, "step": 1954 }, { "epoch": 0.4448236632536974, "grad_norm": 2.159006669286408, "learning_rate": 1.2257721381904811e-06, "loss": 0.1409, "step": 1955 }, { "epoch": 0.44505119453924913, "grad_norm": 2.4876680890845644, "learning_rate": 1.22574749530184e-06, "loss": 0.1376, "step": 1956 }, { "epoch": 0.44527872582480094, "grad_norm": 2.824828278975413, "learning_rate": 1.2257228401350205e-06, "loss": 0.1554, "step": 1957 }, { "epoch": 0.4455062571103527, "grad_norm": 1.021893819715831, "learning_rate": 1.2256981726905262e-06, "loss": 0.0644, "step": 1958 }, { "epoch": 0.44573378839590444, "grad_norm": 2.3996775721551034, "learning_rate": 1.2256734929688612e-06, "loss": 0.1624, "step": 1959 }, { "epoch": 0.4459613196814562, "grad_norm": 1.6628903100272334, "learning_rate": 1.2256488009705303e-06, "loss": 0.0861, "step": 1960 }, { "epoch": 0.44618885096700794, "grad_norm": 1.7833579074549226, "learning_rate": 1.225624096696038e-06, "loss": 0.0887, "step": 1961 }, { "epoch": 0.44641638225255975, "grad_norm": 0.9965946886914974, "learning_rate": 1.225599380145889e-06, "loss": 0.0664, "step": 1962 }, { "epoch": 0.4466439135381115, "grad_norm": 3.226735520877417, "learning_rate": 1.2255746513205889e-06, "loss": 0.1774, "step": 1963 }, { "epoch": 0.44687144482366326, "grad_norm": 1.7796640096245704, "learning_rate": 1.2255499102206423e-06, "loss": 0.0894, "step": 1964 }, { "epoch": 0.447098976109215, "grad_norm": 4.62578287316141, "learning_rate": 1.2255251568465558e-06, "loss": 0.179, "step": 1965 }, { "epoch": 0.44732650739476676, "grad_norm": 1.4148303981446813, "learning_rate": 1.2255003911988348e-06, "loss": 0.1264, "step": 1966 }, { "epoch": 0.44755403868031857, "grad_norm": 2.0366633980384785, "learning_rate": 1.2254756132779855e-06, "loss": 0.1721, "step": 1967 }, { "epoch": 0.4477815699658703, "grad_norm": 1.571525400347696, "learning_rate": 1.2254508230845144e-06, "loss": 0.1231, "step": 1968 }, { "epoch": 0.44800910125142207, "grad_norm": 1.8759159261783616, "learning_rate": 1.2254260206189283e-06, "loss": 0.1027, "step": 1969 }, { "epoch": 0.4482366325369738, "grad_norm": 1.3834676958640604, "learning_rate": 1.2254012058817337e-06, "loss": 0.1492, "step": 1970 }, { "epoch": 0.4484641638225256, "grad_norm": 2.6467245289779733, "learning_rate": 1.2253763788734384e-06, "loss": 0.1474, "step": 1971 }, { "epoch": 0.4486916951080774, "grad_norm": 1.5990498535799054, "learning_rate": 1.2253515395945492e-06, "loss": 0.1472, "step": 1972 }, { "epoch": 0.44891922639362913, "grad_norm": 1.437136755746043, "learning_rate": 1.2253266880455742e-06, "loss": 0.1196, "step": 1973 }, { "epoch": 0.4491467576791809, "grad_norm": 1.2239358879138267, "learning_rate": 1.225301824227021e-06, "loss": 0.0703, "step": 1974 }, { "epoch": 0.44937428896473264, "grad_norm": 1.6035752105793946, "learning_rate": 1.2252769481393979e-06, "loss": 0.0868, "step": 1975 }, { "epoch": 0.4496018202502844, "grad_norm": 3.2371327667206096, "learning_rate": 1.2252520597832132e-06, "loss": 0.1704, "step": 1976 }, { "epoch": 0.4498293515358362, "grad_norm": 1.6184063611102666, "learning_rate": 1.2252271591589759e-06, "loss": 0.1344, "step": 1977 }, { "epoch": 0.45005688282138795, "grad_norm": 2.7294883577821833, "learning_rate": 1.2252022462671947e-06, "loss": 0.1336, "step": 1978 }, { "epoch": 0.4502844141069397, "grad_norm": 1.5187257522965836, "learning_rate": 1.2251773211083789e-06, "loss": 0.1412, "step": 1979 }, { "epoch": 0.45051194539249145, "grad_norm": 1.3575191114107326, "learning_rate": 1.225152383683038e-06, "loss": 0.1915, "step": 1980 }, { "epoch": 0.4507394766780432, "grad_norm": 3.573777818440175, "learning_rate": 1.225127433991681e-06, "loss": 0.2149, "step": 1981 }, { "epoch": 0.450967007963595, "grad_norm": 1.9064246989112013, "learning_rate": 1.2251024720348186e-06, "loss": 0.131, "step": 1982 }, { "epoch": 0.45119453924914676, "grad_norm": 2.60894949438164, "learning_rate": 1.2250774978129606e-06, "loss": 0.1191, "step": 1983 }, { "epoch": 0.4514220705346985, "grad_norm": 2.0893541262163655, "learning_rate": 1.2250525113266175e-06, "loss": 0.1246, "step": 1984 }, { "epoch": 0.45164960182025027, "grad_norm": 3.058414234549828, "learning_rate": 1.2250275125763002e-06, "loss": 0.1562, "step": 1985 }, { "epoch": 0.451877133105802, "grad_norm": 2.7361733850127288, "learning_rate": 1.2250025015625194e-06, "loss": 0.1386, "step": 1986 }, { "epoch": 0.4521046643913538, "grad_norm": 3.551478539562137, "learning_rate": 1.2249774782857863e-06, "loss": 0.1592, "step": 1987 }, { "epoch": 0.4523321956769056, "grad_norm": 1.7962854205539769, "learning_rate": 1.2249524427466123e-06, "loss": 0.0993, "step": 1988 }, { "epoch": 0.45255972696245733, "grad_norm": 1.7250148293655851, "learning_rate": 1.2249273949455092e-06, "loss": 0.1935, "step": 1989 }, { "epoch": 0.4527872582480091, "grad_norm": 2.798595683002779, "learning_rate": 1.224902334882989e-06, "loss": 0.1092, "step": 1990 }, { "epoch": 0.4530147895335609, "grad_norm": 1.5382254747774728, "learning_rate": 1.2248772625595633e-06, "loss": 0.1337, "step": 1991 }, { "epoch": 0.45324232081911264, "grad_norm": 2.0009706394613227, "learning_rate": 1.2248521779757455e-06, "loss": 0.1426, "step": 1992 }, { "epoch": 0.4534698521046644, "grad_norm": 1.9370488492395648, "learning_rate": 1.2248270811320473e-06, "loss": 0.119, "step": 1993 }, { "epoch": 0.45369738339021615, "grad_norm": 2.5525945012477345, "learning_rate": 1.2248019720289826e-06, "loss": 0.1389, "step": 1994 }, { "epoch": 0.4539249146757679, "grad_norm": 1.218140356587049, "learning_rate": 1.2247768506670639e-06, "loss": 0.1386, "step": 1995 }, { "epoch": 0.4541524459613197, "grad_norm": 2.258231427971832, "learning_rate": 1.2247517170468045e-06, "loss": 0.2193, "step": 1996 }, { "epoch": 0.45437997724687146, "grad_norm": 2.1071468220391996, "learning_rate": 1.2247265711687186e-06, "loss": 0.1245, "step": 1997 }, { "epoch": 0.4546075085324232, "grad_norm": 1.873057521166509, "learning_rate": 1.2247014130333198e-06, "loss": 0.1437, "step": 1998 }, { "epoch": 0.45483503981797496, "grad_norm": 1.9807431832192035, "learning_rate": 1.2246762426411227e-06, "loss": 0.1464, "step": 1999 }, { "epoch": 0.4550625711035267, "grad_norm": 1.6280778924472747, "learning_rate": 1.2246510599926412e-06, "loss": 0.121, "step": 2000 }, { "epoch": 0.4552901023890785, "grad_norm": 2.8593046291182826, "learning_rate": 1.2246258650883904e-06, "loss": 0.1397, "step": 2001 }, { "epoch": 0.4555176336746303, "grad_norm": 2.1546576597681906, "learning_rate": 1.224600657928885e-06, "loss": 0.1574, "step": 2002 }, { "epoch": 0.455745164960182, "grad_norm": 2.023369886602771, "learning_rate": 1.2245754385146402e-06, "loss": 0.1274, "step": 2003 }, { "epoch": 0.4559726962457338, "grad_norm": 2.1812153004323, "learning_rate": 1.2245502068461718e-06, "loss": 0.0905, "step": 2004 }, { "epoch": 0.45620022753128553, "grad_norm": 2.3877035049857342, "learning_rate": 1.224524962923995e-06, "loss": 0.1017, "step": 2005 }, { "epoch": 0.45642775881683734, "grad_norm": 1.5777250547989092, "learning_rate": 1.2244997067486258e-06, "loss": 0.2139, "step": 2006 }, { "epoch": 0.4566552901023891, "grad_norm": 2.9772883997052997, "learning_rate": 1.2244744383205807e-06, "loss": 0.1471, "step": 2007 }, { "epoch": 0.45688282138794084, "grad_norm": 3.180677452250655, "learning_rate": 1.224449157640376e-06, "loss": 0.1183, "step": 2008 }, { "epoch": 0.4571103526734926, "grad_norm": 2.3851861401264145, "learning_rate": 1.2244238647085283e-06, "loss": 0.1519, "step": 2009 }, { "epoch": 0.45733788395904434, "grad_norm": 3.1889875080379615, "learning_rate": 1.2243985595255546e-06, "loss": 0.1488, "step": 2010 }, { "epoch": 0.45756541524459615, "grad_norm": 3.3499971452048825, "learning_rate": 1.224373242091972e-06, "loss": 0.1913, "step": 2011 }, { "epoch": 0.4577929465301479, "grad_norm": 2.114007694687429, "learning_rate": 1.224347912408298e-06, "loss": 0.1205, "step": 2012 }, { "epoch": 0.45802047781569966, "grad_norm": 3.372165092943733, "learning_rate": 1.2243225704750506e-06, "loss": 0.1729, "step": 2013 }, { "epoch": 0.4582480091012514, "grad_norm": 2.7776776213730234, "learning_rate": 1.2242972162927472e-06, "loss": 0.1789, "step": 2014 }, { "epoch": 0.45847554038680316, "grad_norm": 1.5378497020029431, "learning_rate": 1.2242718498619063e-06, "loss": 0.1046, "step": 2015 }, { "epoch": 0.45870307167235497, "grad_norm": 1.9245485247734166, "learning_rate": 1.2242464711830463e-06, "loss": 0.1296, "step": 2016 }, { "epoch": 0.4589306029579067, "grad_norm": 3.571578005575834, "learning_rate": 1.224221080256686e-06, "loss": 0.1493, "step": 2017 }, { "epoch": 0.45915813424345847, "grad_norm": 3.0687747713314506, "learning_rate": 1.2241956770833444e-06, "loss": 0.1298, "step": 2018 }, { "epoch": 0.4593856655290102, "grad_norm": 1.4963685838255636, "learning_rate": 1.2241702616635403e-06, "loss": 0.0692, "step": 2019 }, { "epoch": 0.459613196814562, "grad_norm": 3.5237300833320346, "learning_rate": 1.2241448339977931e-06, "loss": 0.213, "step": 2020 }, { "epoch": 0.4598407281001138, "grad_norm": 1.5853111616974174, "learning_rate": 1.2241193940866234e-06, "loss": 0.0529, "step": 2021 }, { "epoch": 0.46006825938566553, "grad_norm": 1.5166819743159363, "learning_rate": 1.22409394193055e-06, "loss": 0.1126, "step": 2022 }, { "epoch": 0.4602957906712173, "grad_norm": 2.7441332092646924, "learning_rate": 1.224068477530094e-06, "loss": 0.1397, "step": 2023 }, { "epoch": 0.46052332195676904, "grad_norm": 2.324643380132358, "learning_rate": 1.2240430008857751e-06, "loss": 0.2038, "step": 2024 }, { "epoch": 0.46075085324232085, "grad_norm": 1.0420451784612332, "learning_rate": 1.2240175119981146e-06, "loss": 0.1546, "step": 2025 }, { "epoch": 0.4609783845278726, "grad_norm": 2.3221839473002692, "learning_rate": 1.223992010867633e-06, "loss": 0.1177, "step": 2026 }, { "epoch": 0.46120591581342435, "grad_norm": 0.7809955408321576, "learning_rate": 1.223966497494852e-06, "loss": 0.0408, "step": 2027 }, { "epoch": 0.4614334470989761, "grad_norm": 2.944141184764414, "learning_rate": 1.2239409718802927e-06, "loss": 0.196, "step": 2028 }, { "epoch": 0.46166097838452785, "grad_norm": 2.485798960402695, "learning_rate": 1.2239154340244766e-06, "loss": 0.1661, "step": 2029 }, { "epoch": 0.46188850967007966, "grad_norm": 2.204789648433421, "learning_rate": 1.2238898839279262e-06, "loss": 0.1651, "step": 2030 }, { "epoch": 0.4621160409556314, "grad_norm": 1.7965983672501258, "learning_rate": 1.223864321591163e-06, "loss": 0.0958, "step": 2031 }, { "epoch": 0.46234357224118316, "grad_norm": 1.663374784429922, "learning_rate": 1.2238387470147103e-06, "loss": 0.1122, "step": 2032 }, { "epoch": 0.4625711035267349, "grad_norm": 2.7681620279158974, "learning_rate": 1.22381316019909e-06, "loss": 0.1038, "step": 2033 }, { "epoch": 0.46279863481228667, "grad_norm": 0.941266935569426, "learning_rate": 1.2237875611448254e-06, "loss": 0.1504, "step": 2034 }, { "epoch": 0.4630261660978385, "grad_norm": 1.5788004965037967, "learning_rate": 1.2237619498524397e-06, "loss": 0.0884, "step": 2035 }, { "epoch": 0.46325369738339023, "grad_norm": 1.7335921265291445, "learning_rate": 1.2237363263224564e-06, "loss": 0.1805, "step": 2036 }, { "epoch": 0.463481228668942, "grad_norm": 1.7271541406470394, "learning_rate": 1.2237106905553991e-06, "loss": 0.1099, "step": 2037 }, { "epoch": 0.46370875995449373, "grad_norm": 0.9884238734669359, "learning_rate": 1.2236850425517918e-06, "loss": 0.0713, "step": 2038 }, { "epoch": 0.4639362912400455, "grad_norm": 5.002976224886838, "learning_rate": 1.2236593823121586e-06, "loss": 0.2421, "step": 2039 }, { "epoch": 0.4641638225255973, "grad_norm": 3.663063876653759, "learning_rate": 1.223633709837024e-06, "loss": 0.126, "step": 2040 }, { "epoch": 0.46439135381114904, "grad_norm": 2.3948083765141526, "learning_rate": 1.2236080251269129e-06, "loss": 0.2088, "step": 2041 }, { "epoch": 0.4646188850967008, "grad_norm": 2.728688530304909, "learning_rate": 1.2235823281823498e-06, "loss": 0.1795, "step": 2042 }, { "epoch": 0.46484641638225255, "grad_norm": 2.7033015325769054, "learning_rate": 1.2235566190038602e-06, "loss": 0.2331, "step": 2043 }, { "epoch": 0.4650739476678043, "grad_norm": 1.4528538728854552, "learning_rate": 1.2235308975919697e-06, "loss": 0.0993, "step": 2044 }, { "epoch": 0.4653014789533561, "grad_norm": 3.747453107446523, "learning_rate": 1.2235051639472037e-06, "loss": 0.1748, "step": 2045 }, { "epoch": 0.46552901023890786, "grad_norm": 3.8624694024749027, "learning_rate": 1.2234794180700883e-06, "loss": 0.279, "step": 2046 }, { "epoch": 0.4657565415244596, "grad_norm": 2.899667813559601, "learning_rate": 1.2234536599611496e-06, "loss": 0.1747, "step": 2047 }, { "epoch": 0.46598407281001136, "grad_norm": 1.6765710152355593, "learning_rate": 1.2234278896209142e-06, "loss": 0.1285, "step": 2048 }, { "epoch": 0.4662116040955631, "grad_norm": 1.7242970700826221, "learning_rate": 1.2234021070499086e-06, "loss": 0.1298, "step": 2049 }, { "epoch": 0.4664391353811149, "grad_norm": 1.677717106231001, "learning_rate": 1.22337631224866e-06, "loss": 0.1136, "step": 2050 }, { "epoch": 0.4666666666666667, "grad_norm": 2.5864911073545533, "learning_rate": 1.2233505052176952e-06, "loss": 0.1429, "step": 2051 }, { "epoch": 0.4668941979522184, "grad_norm": 2.8514324884488667, "learning_rate": 1.223324685957542e-06, "loss": 0.1401, "step": 2052 }, { "epoch": 0.4671217292377702, "grad_norm": 1.1457675992429823, "learning_rate": 1.2232988544687282e-06, "loss": 0.1492, "step": 2053 }, { "epoch": 0.46734926052332193, "grad_norm": 1.049329465937079, "learning_rate": 1.2232730107517813e-06, "loss": 0.0931, "step": 2054 }, { "epoch": 0.46757679180887374, "grad_norm": 3.4990351755141864, "learning_rate": 1.22324715480723e-06, "loss": 0.155, "step": 2055 }, { "epoch": 0.4678043230944255, "grad_norm": 1.915743720906956, "learning_rate": 1.2232212866356022e-06, "loss": 0.1268, "step": 2056 }, { "epoch": 0.46803185437997724, "grad_norm": 2.1421674895019907, "learning_rate": 1.2231954062374272e-06, "loss": 0.1223, "step": 2057 }, { "epoch": 0.468259385665529, "grad_norm": 2.422880416406602, "learning_rate": 1.2231695136132333e-06, "loss": 0.1921, "step": 2058 }, { "epoch": 0.4684869169510808, "grad_norm": 13.197158009983157, "learning_rate": 1.2231436087635504e-06, "loss": 0.1583, "step": 2059 }, { "epoch": 0.46871444823663255, "grad_norm": 2.1847258298952767, "learning_rate": 1.2231176916889072e-06, "loss": 0.1362, "step": 2060 }, { "epoch": 0.4689419795221843, "grad_norm": 1.444519439515334, "learning_rate": 1.223091762389834e-06, "loss": 0.1375, "step": 2061 }, { "epoch": 0.46916951080773606, "grad_norm": 1.5232732099653936, "learning_rate": 1.2230658208668606e-06, "loss": 0.0855, "step": 2062 }, { "epoch": 0.4693970420932878, "grad_norm": 1.8870106277847236, "learning_rate": 1.223039867120517e-06, "loss": 0.1874, "step": 2063 }, { "epoch": 0.4696245733788396, "grad_norm": 3.196966642834261, "learning_rate": 1.2230139011513338e-06, "loss": 0.125, "step": 2064 }, { "epoch": 0.46985210466439137, "grad_norm": 1.5297510970560153, "learning_rate": 1.2229879229598416e-06, "loss": 0.0823, "step": 2065 }, { "epoch": 0.4700796359499431, "grad_norm": 2.8720469940024027, "learning_rate": 1.2229619325465717e-06, "loss": 0.1788, "step": 2066 }, { "epoch": 0.47030716723549487, "grad_norm": 1.3771574147519354, "learning_rate": 1.2229359299120548e-06, "loss": 0.1141, "step": 2067 }, { "epoch": 0.4705346985210466, "grad_norm": 3.590921354687459, "learning_rate": 1.2229099150568224e-06, "loss": 0.1523, "step": 2068 }, { "epoch": 0.47076222980659843, "grad_norm": 2.7712958332833653, "learning_rate": 1.2228838879814065e-06, "loss": 0.1356, "step": 2069 }, { "epoch": 0.4709897610921502, "grad_norm": 2.71072596202505, "learning_rate": 1.222857848686339e-06, "loss": 0.2181, "step": 2070 }, { "epoch": 0.47121729237770194, "grad_norm": 1.555950330379804, "learning_rate": 1.2228317971721518e-06, "loss": 0.1346, "step": 2071 }, { "epoch": 0.4714448236632537, "grad_norm": 1.642590700515521, "learning_rate": 1.2228057334393777e-06, "loss": 0.0998, "step": 2072 }, { "epoch": 0.47167235494880544, "grad_norm": 1.943206327307487, "learning_rate": 1.2227796574885493e-06, "loss": 0.1742, "step": 2073 }, { "epoch": 0.47189988623435725, "grad_norm": 1.8704660178513137, "learning_rate": 1.2227535693201994e-06, "loss": 0.0758, "step": 2074 }, { "epoch": 0.472127417519909, "grad_norm": 2.657426724814391, "learning_rate": 1.2227274689348612e-06, "loss": 0.1379, "step": 2075 }, { "epoch": 0.47235494880546075, "grad_norm": 4.70640052962396, "learning_rate": 1.2227013563330684e-06, "loss": 0.2874, "step": 2076 }, { "epoch": 0.4725824800910125, "grad_norm": 1.6118104022489745, "learning_rate": 1.2226752315153543e-06, "loss": 0.0888, "step": 2077 }, { "epoch": 0.47281001137656425, "grad_norm": 2.1906220939459287, "learning_rate": 1.2226490944822533e-06, "loss": 0.1162, "step": 2078 }, { "epoch": 0.47303754266211606, "grad_norm": 1.5961395424007108, "learning_rate": 1.2226229452342991e-06, "loss": 0.0706, "step": 2079 }, { "epoch": 0.4732650739476678, "grad_norm": 2.752087193860096, "learning_rate": 1.2225967837720265e-06, "loss": 0.1306, "step": 2080 }, { "epoch": 0.47349260523321957, "grad_norm": 1.519564459692136, "learning_rate": 1.2225706100959702e-06, "loss": 0.144, "step": 2081 }, { "epoch": 0.4737201365187713, "grad_norm": 2.5145870829703068, "learning_rate": 1.2225444242066648e-06, "loss": 0.1257, "step": 2082 }, { "epoch": 0.47394766780432307, "grad_norm": 2.625233347705291, "learning_rate": 1.2225182261046458e-06, "loss": 0.2017, "step": 2083 }, { "epoch": 0.4741751990898749, "grad_norm": 1.5951025219500239, "learning_rate": 1.2224920157904487e-06, "loss": 0.2029, "step": 2084 }, { "epoch": 0.47440273037542663, "grad_norm": 1.882372906280183, "learning_rate": 1.222465793264609e-06, "loss": 0.1386, "step": 2085 }, { "epoch": 0.4746302616609784, "grad_norm": 2.312787899846543, "learning_rate": 1.2224395585276626e-06, "loss": 0.21, "step": 2086 }, { "epoch": 0.47485779294653013, "grad_norm": 2.7346134087318985, "learning_rate": 1.2224133115801458e-06, "loss": 0.1635, "step": 2087 }, { "epoch": 0.4750853242320819, "grad_norm": 1.9967379262226481, "learning_rate": 1.222387052422595e-06, "loss": 0.1022, "step": 2088 }, { "epoch": 0.4753128555176337, "grad_norm": 2.567172930928881, "learning_rate": 1.222360781055547e-06, "loss": 0.1497, "step": 2089 }, { "epoch": 0.47554038680318544, "grad_norm": 1.4244534395274109, "learning_rate": 1.2223344974795386e-06, "loss": 0.2069, "step": 2090 }, { "epoch": 0.4757679180887372, "grad_norm": 2.156175658506119, "learning_rate": 1.2223082016951071e-06, "loss": 0.1239, "step": 2091 }, { "epoch": 0.47599544937428895, "grad_norm": 2.834630845876437, "learning_rate": 1.2222818937027898e-06, "loss": 0.0872, "step": 2092 }, { "epoch": 0.47622298065984076, "grad_norm": 1.8380588142136158, "learning_rate": 1.2222555735031245e-06, "loss": 0.1091, "step": 2093 }, { "epoch": 0.4764505119453925, "grad_norm": 2.5737283640174122, "learning_rate": 1.222229241096649e-06, "loss": 0.2635, "step": 2094 }, { "epoch": 0.47667804323094426, "grad_norm": 1.4566454597308833, "learning_rate": 1.2222028964839017e-06, "loss": 0.1348, "step": 2095 }, { "epoch": 0.476905574516496, "grad_norm": 1.9697307912334634, "learning_rate": 1.2221765396654209e-06, "loss": 0.1558, "step": 2096 }, { "epoch": 0.47713310580204776, "grad_norm": 1.451409908091677, "learning_rate": 1.2221501706417454e-06, "loss": 0.12, "step": 2097 }, { "epoch": 0.47736063708759957, "grad_norm": 2.303809415774566, "learning_rate": 1.2221237894134138e-06, "loss": 0.1487, "step": 2098 }, { "epoch": 0.4775881683731513, "grad_norm": 2.6908048074102426, "learning_rate": 1.222097395980966e-06, "loss": 0.2478, "step": 2099 }, { "epoch": 0.4778156996587031, "grad_norm": 1.9153149162558512, "learning_rate": 1.2220709903449403e-06, "loss": 0.1633, "step": 2100 }, { "epoch": 0.4780432309442548, "grad_norm": 1.5349309710920869, "learning_rate": 1.2220445725058775e-06, "loss": 0.111, "step": 2101 }, { "epoch": 0.4782707622298066, "grad_norm": 1.8132178697871648, "learning_rate": 1.222018142464317e-06, "loss": 0.2154, "step": 2102 }, { "epoch": 0.4784982935153584, "grad_norm": 1.7599313333363966, "learning_rate": 1.221991700220799e-06, "loss": 0.1222, "step": 2103 }, { "epoch": 0.47872582480091014, "grad_norm": 2.029241242931466, "learning_rate": 1.221965245775864e-06, "loss": 0.1307, "step": 2104 }, { "epoch": 0.4789533560864619, "grad_norm": 2.960462408034195, "learning_rate": 1.2219387791300527e-06, "loss": 0.1382, "step": 2105 }, { "epoch": 0.47918088737201364, "grad_norm": 2.347329357814124, "learning_rate": 1.2219123002839058e-06, "loss": 0.1531, "step": 2106 }, { "epoch": 0.4794084186575654, "grad_norm": 1.4488221446198866, "learning_rate": 1.2218858092379648e-06, "loss": 0.1309, "step": 2107 }, { "epoch": 0.4796359499431172, "grad_norm": 4.331492523033653, "learning_rate": 1.221859305992771e-06, "loss": 0.1518, "step": 2108 }, { "epoch": 0.47986348122866895, "grad_norm": 2.2907841977007695, "learning_rate": 1.2218327905488662e-06, "loss": 0.2048, "step": 2109 }, { "epoch": 0.4800910125142207, "grad_norm": 3.226955121067652, "learning_rate": 1.221806262906792e-06, "loss": 0.1442, "step": 2110 }, { "epoch": 0.48031854379977246, "grad_norm": 3.004082420754334, "learning_rate": 1.221779723067091e-06, "loss": 0.1558, "step": 2111 }, { "epoch": 0.4805460750853242, "grad_norm": 1.2580166705137232, "learning_rate": 1.2217531710303053e-06, "loss": 0.108, "step": 2112 }, { "epoch": 0.480773606370876, "grad_norm": 2.219396640494861, "learning_rate": 1.2217266067969778e-06, "loss": 0.2107, "step": 2113 }, { "epoch": 0.48100113765642777, "grad_norm": 1.497489945400175, "learning_rate": 1.221700030367651e-06, "loss": 0.1004, "step": 2114 }, { "epoch": 0.4812286689419795, "grad_norm": 2.1517574031007523, "learning_rate": 1.2216734417428686e-06, "loss": 0.2276, "step": 2115 }, { "epoch": 0.4814562002275313, "grad_norm": 1.3524221649994943, "learning_rate": 1.2216468409231738e-06, "loss": 0.1168, "step": 2116 }, { "epoch": 0.481683731513083, "grad_norm": 2.8589810452438913, "learning_rate": 1.2216202279091104e-06, "loss": 0.1496, "step": 2117 }, { "epoch": 0.48191126279863483, "grad_norm": 3.14821100455351, "learning_rate": 1.2215936027012221e-06, "loss": 0.1713, "step": 2118 }, { "epoch": 0.4821387940841866, "grad_norm": 3.0269143310768363, "learning_rate": 1.2215669653000532e-06, "loss": 0.1253, "step": 2119 }, { "epoch": 0.48236632536973834, "grad_norm": 1.620917497810277, "learning_rate": 1.2215403157061478e-06, "loss": 0.1045, "step": 2120 }, { "epoch": 0.4825938566552901, "grad_norm": 4.356411863912081, "learning_rate": 1.2215136539200512e-06, "loss": 0.1898, "step": 2121 }, { "epoch": 0.48282138794084184, "grad_norm": 2.0989757571956296, "learning_rate": 1.2214869799423078e-06, "loss": 0.0811, "step": 2122 }, { "epoch": 0.48304891922639365, "grad_norm": 2.7274929171067592, "learning_rate": 1.2214602937734632e-06, "loss": 0.1439, "step": 2123 }, { "epoch": 0.4832764505119454, "grad_norm": 2.424703742996382, "learning_rate": 1.2214335954140624e-06, "loss": 0.1519, "step": 2124 }, { "epoch": 0.48350398179749715, "grad_norm": 1.890465690299916, "learning_rate": 1.221406884864651e-06, "loss": 0.0998, "step": 2125 }, { "epoch": 0.4837315130830489, "grad_norm": 2.326638607730014, "learning_rate": 1.2213801621257754e-06, "loss": 0.2102, "step": 2126 }, { "epoch": 0.48395904436860065, "grad_norm": 2.066734671660341, "learning_rate": 1.2213534271979815e-06, "loss": 0.1374, "step": 2127 }, { "epoch": 0.48418657565415246, "grad_norm": 2.172208059040071, "learning_rate": 1.2213266800818158e-06, "loss": 0.1312, "step": 2128 }, { "epoch": 0.4844141069397042, "grad_norm": 1.8499994829117998, "learning_rate": 1.2212999207778246e-06, "loss": 0.1154, "step": 2129 }, { "epoch": 0.48464163822525597, "grad_norm": 2.6816295680240256, "learning_rate": 1.2212731492865552e-06, "loss": 0.0923, "step": 2130 }, { "epoch": 0.4848691695108077, "grad_norm": 3.0796575929883354, "learning_rate": 1.2212463656085548e-06, "loss": 0.1856, "step": 2131 }, { "epoch": 0.4850967007963595, "grad_norm": 1.684680537758439, "learning_rate": 1.2212195697443704e-06, "loss": 0.1398, "step": 2132 }, { "epoch": 0.4853242320819113, "grad_norm": 2.25319089168003, "learning_rate": 1.2211927616945502e-06, "loss": 0.1027, "step": 2133 }, { "epoch": 0.48555176336746303, "grad_norm": 3.0215260043450307, "learning_rate": 1.2211659414596417e-06, "loss": 0.1564, "step": 2134 }, { "epoch": 0.4857792946530148, "grad_norm": 3.420547138984465, "learning_rate": 1.2211391090401931e-06, "loss": 0.1629, "step": 2135 }, { "epoch": 0.48600682593856653, "grad_norm": 2.7151912053510223, "learning_rate": 1.2211122644367531e-06, "loss": 0.1562, "step": 2136 }, { "epoch": 0.48623435722411834, "grad_norm": 3.168254535519415, "learning_rate": 1.2210854076498699e-06, "loss": 0.2003, "step": 2137 }, { "epoch": 0.4864618885096701, "grad_norm": 2.4342182669126435, "learning_rate": 1.2210585386800927e-06, "loss": 0.142, "step": 2138 }, { "epoch": 0.48668941979522184, "grad_norm": 2.365252540519636, "learning_rate": 1.2210316575279707e-06, "loss": 0.1409, "step": 2139 }, { "epoch": 0.4869169510807736, "grad_norm": 1.1372019419508703, "learning_rate": 1.221004764194053e-06, "loss": 0.1547, "step": 2140 }, { "epoch": 0.48714448236632535, "grad_norm": 3.75609449121758, "learning_rate": 1.22097785867889e-06, "loss": 0.1512, "step": 2141 }, { "epoch": 0.48737201365187716, "grad_norm": 1.7473221878185898, "learning_rate": 1.2209509409830304e-06, "loss": 0.2868, "step": 2142 }, { "epoch": 0.4875995449374289, "grad_norm": 2.0082173427217382, "learning_rate": 1.2209240111070254e-06, "loss": 0.1812, "step": 2143 }, { "epoch": 0.48782707622298066, "grad_norm": 2.8863675080606415, "learning_rate": 1.2208970690514247e-06, "loss": 0.1066, "step": 2144 }, { "epoch": 0.4880546075085324, "grad_norm": 1.4857978026455614, "learning_rate": 1.2208701148167795e-06, "loss": 0.0811, "step": 2145 }, { "epoch": 0.48828213879408416, "grad_norm": 3.2054667060001383, "learning_rate": 1.2208431484036405e-06, "loss": 0.194, "step": 2146 }, { "epoch": 0.48850967007963597, "grad_norm": 2.7115726560617133, "learning_rate": 1.2208161698125584e-06, "loss": 0.1733, "step": 2147 }, { "epoch": 0.4887372013651877, "grad_norm": 3.272715328933079, "learning_rate": 1.2207891790440852e-06, "loss": 0.2408, "step": 2148 }, { "epoch": 0.4889647326507395, "grad_norm": 2.7359042131162714, "learning_rate": 1.2207621760987723e-06, "loss": 0.1235, "step": 2149 }, { "epoch": 0.4891922639362912, "grad_norm": 3.3510438031026646, "learning_rate": 1.2207351609771718e-06, "loss": 0.1649, "step": 2150 }, { "epoch": 0.489419795221843, "grad_norm": 1.914954577682318, "learning_rate": 1.2207081336798352e-06, "loss": 0.2042, "step": 2151 }, { "epoch": 0.4896473265073948, "grad_norm": 3.0994780231949215, "learning_rate": 1.2206810942073158e-06, "loss": 0.0953, "step": 2152 }, { "epoch": 0.48987485779294654, "grad_norm": 1.927931591193398, "learning_rate": 1.2206540425601653e-06, "loss": 0.1011, "step": 2153 }, { "epoch": 0.4901023890784983, "grad_norm": 1.23851496034413, "learning_rate": 1.2206269787389374e-06, "loss": 0.0811, "step": 2154 }, { "epoch": 0.49032992036405004, "grad_norm": 2.31064364212512, "learning_rate": 1.220599902744185e-06, "loss": 0.1341, "step": 2155 }, { "epoch": 0.4905574516496018, "grad_norm": 1.7800269493495693, "learning_rate": 1.220572814576461e-06, "loss": 0.0611, "step": 2156 }, { "epoch": 0.4907849829351536, "grad_norm": 1.4294481100641636, "learning_rate": 1.2205457142363197e-06, "loss": 0.1242, "step": 2157 }, { "epoch": 0.49101251422070535, "grad_norm": 3.32236728544913, "learning_rate": 1.2205186017243146e-06, "loss": 0.1439, "step": 2158 }, { "epoch": 0.4912400455062571, "grad_norm": 2.1161347164300737, "learning_rate": 1.2204914770409999e-06, "loss": 0.1312, "step": 2159 }, { "epoch": 0.49146757679180886, "grad_norm": 1.6760298562325886, "learning_rate": 1.22046434018693e-06, "loss": 0.1367, "step": 2160 }, { "epoch": 0.4916951080773606, "grad_norm": 2.5053031123422844, "learning_rate": 1.2204371911626597e-06, "loss": 0.1004, "step": 2161 }, { "epoch": 0.4919226393629124, "grad_norm": 1.8628349503031747, "learning_rate": 1.2204100299687436e-06, "loss": 0.2272, "step": 2162 }, { "epoch": 0.49215017064846417, "grad_norm": 2.9254446518246207, "learning_rate": 1.2203828566057368e-06, "loss": 0.1306, "step": 2163 }, { "epoch": 0.4923777019340159, "grad_norm": 2.156502914233145, "learning_rate": 1.2203556710741948e-06, "loss": 0.1165, "step": 2164 }, { "epoch": 0.4926052332195677, "grad_norm": 1.765376175489098, "learning_rate": 1.2203284733746734e-06, "loss": 0.1386, "step": 2165 }, { "epoch": 0.4928327645051195, "grad_norm": 2.680620034477845, "learning_rate": 1.2203012635077283e-06, "loss": 0.1641, "step": 2166 }, { "epoch": 0.49306029579067123, "grad_norm": 2.096581132566952, "learning_rate": 1.2202740414739153e-06, "loss": 0.1464, "step": 2167 }, { "epoch": 0.493287827076223, "grad_norm": 2.330371824149322, "learning_rate": 1.2202468072737914e-06, "loss": 0.0977, "step": 2168 }, { "epoch": 0.49351535836177474, "grad_norm": 2.736555613980781, "learning_rate": 1.2202195609079128e-06, "loss": 0.1212, "step": 2169 }, { "epoch": 0.4937428896473265, "grad_norm": 1.370889091477706, "learning_rate": 1.2201923023768365e-06, "loss": 0.1147, "step": 2170 }, { "epoch": 0.4939704209328783, "grad_norm": 1.9931953231084023, "learning_rate": 1.2201650316811194e-06, "loss": 0.1843, "step": 2171 }, { "epoch": 0.49419795221843005, "grad_norm": 1.9873229116599542, "learning_rate": 1.2201377488213195e-06, "loss": 0.2833, "step": 2172 }, { "epoch": 0.4944254835039818, "grad_norm": 2.766859520035512, "learning_rate": 1.2201104537979934e-06, "loss": 0.1065, "step": 2173 }, { "epoch": 0.49465301478953355, "grad_norm": 2.8257426123215947, "learning_rate": 1.2200831466117e-06, "loss": 0.171, "step": 2174 }, { "epoch": 0.4948805460750853, "grad_norm": 2.416755735243659, "learning_rate": 1.2200558272629967e-06, "loss": 0.1049, "step": 2175 }, { "epoch": 0.4951080773606371, "grad_norm": 3.847603230089162, "learning_rate": 1.2200284957524421e-06, "loss": 0.1543, "step": 2176 }, { "epoch": 0.49533560864618886, "grad_norm": 1.808070223158287, "learning_rate": 1.2200011520805947e-06, "loss": 0.1589, "step": 2177 }, { "epoch": 0.4955631399317406, "grad_norm": 1.4634936792045132, "learning_rate": 1.2199737962480136e-06, "loss": 0.1005, "step": 2178 }, { "epoch": 0.49579067121729237, "grad_norm": 2.0415979620186433, "learning_rate": 1.2199464282552576e-06, "loss": 0.1504, "step": 2179 }, { "epoch": 0.4960182025028441, "grad_norm": 1.8340897113272197, "learning_rate": 1.2199190481028864e-06, "loss": 0.1168, "step": 2180 }, { "epoch": 0.4962457337883959, "grad_norm": 1.9694309627394886, "learning_rate": 1.2198916557914592e-06, "loss": 0.0702, "step": 2181 }, { "epoch": 0.4964732650739477, "grad_norm": 1.757184086126526, "learning_rate": 1.2198642513215362e-06, "loss": 0.0998, "step": 2182 }, { "epoch": 0.49670079635949943, "grad_norm": 1.2539776130317066, "learning_rate": 1.2198368346936774e-06, "loss": 0.0843, "step": 2183 }, { "epoch": 0.4969283276450512, "grad_norm": 1.3188419596574201, "learning_rate": 1.219809405908443e-06, "loss": 0.0788, "step": 2184 }, { "epoch": 0.49715585893060293, "grad_norm": 1.523980855726603, "learning_rate": 1.2197819649663934e-06, "loss": 0.1011, "step": 2185 }, { "epoch": 0.49738339021615474, "grad_norm": 2.409527037495328, "learning_rate": 1.21975451186809e-06, "loss": 0.0966, "step": 2186 }, { "epoch": 0.4976109215017065, "grad_norm": 1.3037564823590608, "learning_rate": 1.2197270466140936e-06, "loss": 0.1047, "step": 2187 }, { "epoch": 0.49783845278725825, "grad_norm": 1.789706258984112, "learning_rate": 1.2196995692049656e-06, "loss": 0.072, "step": 2188 }, { "epoch": 0.49806598407281, "grad_norm": 2.4890313700185382, "learning_rate": 1.2196720796412675e-06, "loss": 0.1822, "step": 2189 }, { "epoch": 0.49829351535836175, "grad_norm": 3.43016997911828, "learning_rate": 1.219644577923561e-06, "loss": 0.1781, "step": 2190 }, { "epoch": 0.49852104664391356, "grad_norm": 1.6871311149806307, "learning_rate": 1.2196170640524087e-06, "loss": 0.1616, "step": 2191 }, { "epoch": 0.4987485779294653, "grad_norm": 2.2297408965863132, "learning_rate": 1.2195895380283725e-06, "loss": 0.2451, "step": 2192 }, { "epoch": 0.49897610921501706, "grad_norm": 1.389848256901662, "learning_rate": 1.219561999852015e-06, "loss": 0.0917, "step": 2193 }, { "epoch": 0.4992036405005688, "grad_norm": 3.041396119145658, "learning_rate": 1.2195344495238993e-06, "loss": 0.1329, "step": 2194 }, { "epoch": 0.49943117178612056, "grad_norm": 2.216904135088197, "learning_rate": 1.2195068870445878e-06, "loss": 0.1071, "step": 2195 }, { "epoch": 0.49965870307167237, "grad_norm": 1.9688477353283649, "learning_rate": 1.2194793124146447e-06, "loss": 0.0971, "step": 2196 }, { "epoch": 0.4998862343572241, "grad_norm": 2.164052002018189, "learning_rate": 1.2194517256346333e-06, "loss": 0.1602, "step": 2197 }, { "epoch": 0.5001137656427759, "grad_norm": 1.388280113458421, "learning_rate": 1.2194241267051172e-06, "loss": 0.0848, "step": 2198 }, { "epoch": 0.5003412969283276, "grad_norm": 1.9472805252357173, "learning_rate": 1.2193965156266605e-06, "loss": 0.1418, "step": 2199 }, { "epoch": 0.5005688282138794, "grad_norm": 4.465127702734097, "learning_rate": 1.2193688923998277e-06, "loss": 0.1968, "step": 2200 }, { "epoch": 0.5007963594994311, "grad_norm": 2.8204878994002147, "learning_rate": 1.219341257025183e-06, "loss": 0.1636, "step": 2201 }, { "epoch": 0.5010238907849829, "grad_norm": 4.7787213981946834, "learning_rate": 1.2193136095032918e-06, "loss": 0.1761, "step": 2202 }, { "epoch": 0.5012514220705347, "grad_norm": 2.2115998556881116, "learning_rate": 1.2192859498347189e-06, "loss": 0.1736, "step": 2203 }, { "epoch": 0.5014789533560865, "grad_norm": 2.3947307414083907, "learning_rate": 1.2192582780200293e-06, "loss": 0.1562, "step": 2204 }, { "epoch": 0.5017064846416383, "grad_norm": 1.137850645365097, "learning_rate": 1.219230594059789e-06, "loss": 0.1245, "step": 2205 }, { "epoch": 0.50193401592719, "grad_norm": 1.3050370676179703, "learning_rate": 1.2192028979545637e-06, "loss": 0.144, "step": 2206 }, { "epoch": 0.5021615472127418, "grad_norm": 1.5689422022860104, "learning_rate": 1.2191751897049192e-06, "loss": 0.1109, "step": 2207 }, { "epoch": 0.5023890784982935, "grad_norm": 1.1140260533548134, "learning_rate": 1.2191474693114223e-06, "loss": 0.0729, "step": 2208 }, { "epoch": 0.5026166097838453, "grad_norm": 3.1668244036808404, "learning_rate": 1.2191197367746389e-06, "loss": 0.1086, "step": 2209 }, { "epoch": 0.502844141069397, "grad_norm": 1.2112158809642222, "learning_rate": 1.2190919920951363e-06, "loss": 0.0925, "step": 2210 }, { "epoch": 0.5030716723549488, "grad_norm": 3.140103243583768, "learning_rate": 1.2190642352734814e-06, "loss": 0.1309, "step": 2211 }, { "epoch": 0.5032992036405005, "grad_norm": 2.20383956504207, "learning_rate": 1.2190364663102417e-06, "loss": 0.1261, "step": 2212 }, { "epoch": 0.5035267349260524, "grad_norm": 4.70556372527374, "learning_rate": 1.2190086852059844e-06, "loss": 0.1457, "step": 2213 }, { "epoch": 0.5037542662116041, "grad_norm": 1.5904720691715128, "learning_rate": 1.2189808919612773e-06, "loss": 0.117, "step": 2214 }, { "epoch": 0.5039817974971559, "grad_norm": 4.614372733382792, "learning_rate": 1.2189530865766888e-06, "loss": 0.2221, "step": 2215 }, { "epoch": 0.5042093287827076, "grad_norm": 2.1481536506375143, "learning_rate": 1.218925269052787e-06, "loss": 0.1181, "step": 2216 }, { "epoch": 0.5044368600682594, "grad_norm": 3.0881852694485157, "learning_rate": 1.2188974393901404e-06, "loss": 0.1824, "step": 2217 }, { "epoch": 0.5046643913538111, "grad_norm": 2.38294561984735, "learning_rate": 1.2188695975893177e-06, "loss": 0.1518, "step": 2218 }, { "epoch": 0.5048919226393629, "grad_norm": 2.1925852844845783, "learning_rate": 1.218841743650888e-06, "loss": 0.172, "step": 2219 }, { "epoch": 0.5051194539249146, "grad_norm": 1.3374825429070463, "learning_rate": 1.2188138775754207e-06, "loss": 0.0952, "step": 2220 }, { "epoch": 0.5053469852104664, "grad_norm": 1.7224866619431942, "learning_rate": 1.2187859993634854e-06, "loss": 0.1261, "step": 2221 }, { "epoch": 0.5055745164960183, "grad_norm": 1.4650603873711627, "learning_rate": 1.2187581090156518e-06, "loss": 0.0962, "step": 2222 }, { "epoch": 0.50580204778157, "grad_norm": 2.2148606700249736, "learning_rate": 1.2187302065324896e-06, "loss": 0.2173, "step": 2223 }, { "epoch": 0.5060295790671218, "grad_norm": 2.3828933955331864, "learning_rate": 1.2187022919145695e-06, "loss": 0.133, "step": 2224 }, { "epoch": 0.5062571103526735, "grad_norm": 2.529903147299925, "learning_rate": 1.2186743651624617e-06, "loss": 0.1873, "step": 2225 }, { "epoch": 0.5064846416382253, "grad_norm": 1.0288567052421398, "learning_rate": 1.2186464262767372e-06, "loss": 0.1036, "step": 2226 }, { "epoch": 0.506712172923777, "grad_norm": 2.7669101294284904, "learning_rate": 1.2186184752579671e-06, "loss": 0.1161, "step": 2227 }, { "epoch": 0.5069397042093288, "grad_norm": 2.613686872517249, "learning_rate": 1.2185905121067223e-06, "loss": 0.1654, "step": 2228 }, { "epoch": 0.5071672354948805, "grad_norm": 1.5815125330195172, "learning_rate": 1.218562536823575e-06, "loss": 0.161, "step": 2229 }, { "epoch": 0.5073947667804323, "grad_norm": 2.5849262983185803, "learning_rate": 1.2185345494090959e-06, "loss": 0.1458, "step": 2230 }, { "epoch": 0.507622298065984, "grad_norm": 1.7848106830254082, "learning_rate": 1.218506549863858e-06, "loss": 0.0774, "step": 2231 }, { "epoch": 0.5078498293515359, "grad_norm": 2.0041648260692617, "learning_rate": 1.2184785381884332e-06, "loss": 0.1407, "step": 2232 }, { "epoch": 0.5080773606370876, "grad_norm": 2.6691932199960813, "learning_rate": 1.218450514383394e-06, "loss": 0.2101, "step": 2233 }, { "epoch": 0.5083048919226394, "grad_norm": 0.8618265034013133, "learning_rate": 1.2184224784493127e-06, "loss": 0.1081, "step": 2234 }, { "epoch": 0.5085324232081911, "grad_norm": 2.7387414496667972, "learning_rate": 1.218394430386763e-06, "loss": 0.2138, "step": 2235 }, { "epoch": 0.5087599544937429, "grad_norm": 3.077864568174803, "learning_rate": 1.2183663701963181e-06, "loss": 0.1218, "step": 2236 }, { "epoch": 0.5089874857792946, "grad_norm": 2.689017818647376, "learning_rate": 1.218338297878551e-06, "loss": 0.1825, "step": 2237 }, { "epoch": 0.5092150170648464, "grad_norm": 3.296215386410284, "learning_rate": 1.2183102134340361e-06, "loss": 0.1954, "step": 2238 }, { "epoch": 0.5094425483503981, "grad_norm": 3.577016938326506, "learning_rate": 1.2182821168633466e-06, "loss": 0.1965, "step": 2239 }, { "epoch": 0.5096700796359499, "grad_norm": 1.4427654273235515, "learning_rate": 1.2182540081670574e-06, "loss": 0.1356, "step": 2240 }, { "epoch": 0.5098976109215017, "grad_norm": 2.246195129100446, "learning_rate": 1.2182258873457428e-06, "loss": 0.2082, "step": 2241 }, { "epoch": 0.5101251422070535, "grad_norm": 1.8974029135655925, "learning_rate": 1.2181977543999776e-06, "loss": 0.1416, "step": 2242 }, { "epoch": 0.5103526734926053, "grad_norm": 1.1656107173765236, "learning_rate": 1.2181696093303363e-06, "loss": 0.0992, "step": 2243 }, { "epoch": 0.510580204778157, "grad_norm": 1.7060747898678588, "learning_rate": 1.218141452137395e-06, "loss": 0.1836, "step": 2244 }, { "epoch": 0.5108077360637088, "grad_norm": 2.7425212256939204, "learning_rate": 1.2181132828217284e-06, "loss": 0.1172, "step": 2245 }, { "epoch": 0.5110352673492605, "grad_norm": 3.4984114514747735, "learning_rate": 1.2180851013839127e-06, "loss": 0.1448, "step": 2246 }, { "epoch": 0.5112627986348123, "grad_norm": 1.0541301873031321, "learning_rate": 1.2180569078245236e-06, "loss": 0.1137, "step": 2247 }, { "epoch": 0.511490329920364, "grad_norm": 1.9754045421213982, "learning_rate": 1.2180287021441372e-06, "loss": 0.1268, "step": 2248 }, { "epoch": 0.5117178612059158, "grad_norm": 2.2348913132386135, "learning_rate": 1.2180004843433305e-06, "loss": 0.151, "step": 2249 }, { "epoch": 0.5119453924914675, "grad_norm": 2.69280556200464, "learning_rate": 1.21797225442268e-06, "loss": 0.1318, "step": 2250 }, { "epoch": 0.5121729237770194, "grad_norm": 2.8889424540900004, "learning_rate": 1.2179440123827625e-06, "loss": 0.2037, "step": 2251 }, { "epoch": 0.5124004550625711, "grad_norm": 1.7014653966715638, "learning_rate": 1.2179157582241554e-06, "loss": 0.1011, "step": 2252 }, { "epoch": 0.5126279863481229, "grad_norm": 1.2440635472766928, "learning_rate": 1.2178874919474359e-06, "loss": 0.0976, "step": 2253 }, { "epoch": 0.5128555176336747, "grad_norm": 1.9265304355947193, "learning_rate": 1.217859213553182e-06, "loss": 0.1251, "step": 2254 }, { "epoch": 0.5130830489192264, "grad_norm": 2.692342674859063, "learning_rate": 1.2178309230419714e-06, "loss": 0.1062, "step": 2255 }, { "epoch": 0.5133105802047782, "grad_norm": 2.851487861587013, "learning_rate": 1.2178026204143827e-06, "loss": 0.1802, "step": 2256 }, { "epoch": 0.5135381114903299, "grad_norm": 2.593347894957166, "learning_rate": 1.217774305670994e-06, "loss": 0.1785, "step": 2257 }, { "epoch": 0.5137656427758817, "grad_norm": 3.0650056170459696, "learning_rate": 1.2177459788123841e-06, "loss": 0.134, "step": 2258 }, { "epoch": 0.5139931740614334, "grad_norm": 1.4532346128286195, "learning_rate": 1.217717639839132e-06, "loss": 0.1381, "step": 2259 }, { "epoch": 0.5142207053469852, "grad_norm": 1.7176834033996482, "learning_rate": 1.2176892887518166e-06, "loss": 0.1169, "step": 2260 }, { "epoch": 0.514448236632537, "grad_norm": 1.7341224196467013, "learning_rate": 1.2176609255510176e-06, "loss": 0.1231, "step": 2261 }, { "epoch": 0.5146757679180888, "grad_norm": 1.629200246930976, "learning_rate": 1.217632550237315e-06, "loss": 0.0887, "step": 2262 }, { "epoch": 0.5149032992036405, "grad_norm": 1.30225190150621, "learning_rate": 1.2176041628112884e-06, "loss": 0.1014, "step": 2263 }, { "epoch": 0.5151308304891923, "grad_norm": 2.250042339940742, "learning_rate": 1.217575763273518e-06, "loss": 0.1716, "step": 2264 }, { "epoch": 0.515358361774744, "grad_norm": 4.277366750571248, "learning_rate": 1.217547351624584e-06, "loss": 0.1202, "step": 2265 }, { "epoch": 0.5155858930602958, "grad_norm": 2.1130660104324037, "learning_rate": 1.2175189278650677e-06, "loss": 0.1429, "step": 2266 }, { "epoch": 0.5158134243458475, "grad_norm": 1.897460974800647, "learning_rate": 1.2174904919955493e-06, "loss": 0.1326, "step": 2267 }, { "epoch": 0.5160409556313993, "grad_norm": 1.55299514699498, "learning_rate": 1.2174620440166105e-06, "loss": 0.1039, "step": 2268 }, { "epoch": 0.516268486916951, "grad_norm": 1.4624259135108133, "learning_rate": 1.2174335839288326e-06, "loss": 0.1051, "step": 2269 }, { "epoch": 0.5164960182025028, "grad_norm": 2.4152486054409628, "learning_rate": 1.2174051117327972e-06, "loss": 0.1334, "step": 2270 }, { "epoch": 0.5167235494880547, "grad_norm": 2.7224350669895383, "learning_rate": 1.2173766274290864e-06, "loss": 0.1522, "step": 2271 }, { "epoch": 0.5169510807736064, "grad_norm": 2.6898213271514364, "learning_rate": 1.217348131018282e-06, "loss": 0.1178, "step": 2272 }, { "epoch": 0.5171786120591582, "grad_norm": 2.17692457415729, "learning_rate": 1.2173196225009666e-06, "loss": 0.1005, "step": 2273 }, { "epoch": 0.5174061433447099, "grad_norm": 2.183356804193101, "learning_rate": 1.217291101877723e-06, "loss": 0.1492, "step": 2274 }, { "epoch": 0.5176336746302617, "grad_norm": 1.8408045884703919, "learning_rate": 1.2172625691491342e-06, "loss": 0.1157, "step": 2275 }, { "epoch": 0.5178612059158134, "grad_norm": 1.182594251283049, "learning_rate": 1.217234024315783e-06, "loss": 0.0909, "step": 2276 }, { "epoch": 0.5180887372013652, "grad_norm": 2.3264197436803267, "learning_rate": 1.2172054673782527e-06, "loss": 0.1377, "step": 2277 }, { "epoch": 0.5183162684869169, "grad_norm": 1.8746855825776054, "learning_rate": 1.2171768983371276e-06, "loss": 0.1614, "step": 2278 }, { "epoch": 0.5185437997724687, "grad_norm": 2.681521889311292, "learning_rate": 1.217148317192991e-06, "loss": 0.1334, "step": 2279 }, { "epoch": 0.5187713310580204, "grad_norm": 1.4510905197833242, "learning_rate": 1.2171197239464274e-06, "loss": 0.0984, "step": 2280 }, { "epoch": 0.5189988623435723, "grad_norm": 2.7722645937153088, "learning_rate": 1.217091118598021e-06, "loss": 0.1025, "step": 2281 }, { "epoch": 0.519226393629124, "grad_norm": 2.018163494797303, "learning_rate": 1.2170625011483565e-06, "loss": 0.115, "step": 2282 }, { "epoch": 0.5194539249146758, "grad_norm": 3.7374281896722232, "learning_rate": 1.2170338715980187e-06, "loss": 0.1673, "step": 2283 }, { "epoch": 0.5196814562002275, "grad_norm": 2.8429408206378035, "learning_rate": 1.217005229947593e-06, "loss": 0.1083, "step": 2284 }, { "epoch": 0.5199089874857793, "grad_norm": 1.7024003746323926, "learning_rate": 1.2169765761976646e-06, "loss": 0.1382, "step": 2285 }, { "epoch": 0.520136518771331, "grad_norm": 2.1767008349124892, "learning_rate": 1.216947910348819e-06, "loss": 0.1101, "step": 2286 }, { "epoch": 0.5203640500568828, "grad_norm": 2.9882776274660254, "learning_rate": 1.2169192324016423e-06, "loss": 0.1401, "step": 2287 }, { "epoch": 0.5205915813424346, "grad_norm": 1.8462104806775126, "learning_rate": 1.2168905423567205e-06, "loss": 0.2272, "step": 2288 }, { "epoch": 0.5208191126279863, "grad_norm": 3.1696493697631962, "learning_rate": 1.21686184021464e-06, "loss": 0.296, "step": 2289 }, { "epoch": 0.5210466439135382, "grad_norm": 2.0242733158944315, "learning_rate": 1.2168331259759875e-06, "loss": 0.1394, "step": 2290 }, { "epoch": 0.5212741751990899, "grad_norm": 3.175915917997839, "learning_rate": 1.2168043996413497e-06, "loss": 0.1532, "step": 2291 }, { "epoch": 0.5215017064846417, "grad_norm": 1.8835065784179783, "learning_rate": 1.216775661211314e-06, "loss": 0.1239, "step": 2292 }, { "epoch": 0.5217292377701934, "grad_norm": 1.9703941833763796, "learning_rate": 1.2167469106864673e-06, "loss": 0.218, "step": 2293 }, { "epoch": 0.5219567690557452, "grad_norm": 1.7379669142562126, "learning_rate": 1.2167181480673977e-06, "loss": 0.138, "step": 2294 }, { "epoch": 0.5221843003412969, "grad_norm": 1.0392802621173225, "learning_rate": 1.2166893733546927e-06, "loss": 0.1262, "step": 2295 }, { "epoch": 0.5224118316268487, "grad_norm": 1.8936366842951171, "learning_rate": 1.2166605865489406e-06, "loss": 0.2228, "step": 2296 }, { "epoch": 0.5226393629124004, "grad_norm": 1.6981025004152621, "learning_rate": 1.2166317876507296e-06, "loss": 0.1228, "step": 2297 }, { "epoch": 0.5228668941979522, "grad_norm": 2.1909663135538695, "learning_rate": 1.2166029766606486e-06, "loss": 0.1409, "step": 2298 }, { "epoch": 0.5230944254835039, "grad_norm": 2.1158950054230634, "learning_rate": 1.2165741535792861e-06, "loss": 0.1606, "step": 2299 }, { "epoch": 0.5233219567690558, "grad_norm": 1.4135780574595918, "learning_rate": 1.2165453184072312e-06, "loss": 0.1169, "step": 2300 }, { "epoch": 0.5235494880546075, "grad_norm": 2.1057935187163737, "learning_rate": 1.2165164711450735e-06, "loss": 0.2336, "step": 2301 }, { "epoch": 0.5237770193401593, "grad_norm": 2.248987100684698, "learning_rate": 1.2164876117934024e-06, "loss": 0.1074, "step": 2302 }, { "epoch": 0.524004550625711, "grad_norm": 3.3409227566362385, "learning_rate": 1.2164587403528078e-06, "loss": 0.1466, "step": 2303 }, { "epoch": 0.5242320819112628, "grad_norm": 2.662759790566142, "learning_rate": 1.2164298568238797e-06, "loss": 0.2104, "step": 2304 }, { "epoch": 0.5244596131968146, "grad_norm": 1.830035008905779, "learning_rate": 1.2164009612072085e-06, "loss": 0.1283, "step": 2305 }, { "epoch": 0.5246871444823663, "grad_norm": 1.8990428054871098, "learning_rate": 1.2163720535033845e-06, "loss": 0.1127, "step": 2306 }, { "epoch": 0.5249146757679181, "grad_norm": 2.9544210428909, "learning_rate": 1.2163431337129993e-06, "loss": 0.131, "step": 2307 }, { "epoch": 0.5251422070534698, "grad_norm": 1.5069043409936649, "learning_rate": 1.216314201836643e-06, "loss": 0.1278, "step": 2308 }, { "epoch": 0.5253697383390216, "grad_norm": 3.795761311329223, "learning_rate": 1.2162852578749076e-06, "loss": 0.1419, "step": 2309 }, { "epoch": 0.5255972696245734, "grad_norm": 2.967399207541332, "learning_rate": 1.2162563018283843e-06, "loss": 0.1838, "step": 2310 }, { "epoch": 0.5258248009101252, "grad_norm": 1.1243093423898234, "learning_rate": 1.216227333697665e-06, "loss": 0.0939, "step": 2311 }, { "epoch": 0.5260523321956769, "grad_norm": 1.6963647749955448, "learning_rate": 1.216198353483342e-06, "loss": 0.0794, "step": 2312 }, { "epoch": 0.5262798634812287, "grad_norm": 2.0805603023570023, "learning_rate": 1.2161693611860072e-06, "loss": 0.1097, "step": 2313 }, { "epoch": 0.5265073947667804, "grad_norm": 1.7107342546351574, "learning_rate": 1.2161403568062533e-06, "loss": 0.0994, "step": 2314 }, { "epoch": 0.5267349260523322, "grad_norm": 1.9862157408748995, "learning_rate": 1.2161113403446733e-06, "loss": 0.1259, "step": 2315 }, { "epoch": 0.5269624573378839, "grad_norm": 2.482118779617471, "learning_rate": 1.21608231180186e-06, "loss": 0.1643, "step": 2316 }, { "epoch": 0.5271899886234357, "grad_norm": 2.0726950491499423, "learning_rate": 1.2160532711784066e-06, "loss": 0.1502, "step": 2317 }, { "epoch": 0.5274175199089874, "grad_norm": 1.4883744563296912, "learning_rate": 1.2160242184749069e-06, "loss": 0.1251, "step": 2318 }, { "epoch": 0.5276450511945392, "grad_norm": 2.304698494036364, "learning_rate": 1.2159951536919547e-06, "loss": 0.1392, "step": 2319 }, { "epoch": 0.5278725824800911, "grad_norm": 3.8449144926935035, "learning_rate": 1.2159660768301438e-06, "loss": 0.1403, "step": 2320 }, { "epoch": 0.5281001137656428, "grad_norm": 2.9094583253548225, "learning_rate": 1.2159369878900687e-06, "loss": 0.129, "step": 2321 }, { "epoch": 0.5283276450511946, "grad_norm": 4.3378705361683, "learning_rate": 1.2159078868723238e-06, "loss": 0.2085, "step": 2322 }, { "epoch": 0.5285551763367463, "grad_norm": 1.7024973102380594, "learning_rate": 1.2158787737775037e-06, "loss": 0.1317, "step": 2323 }, { "epoch": 0.5287827076222981, "grad_norm": 1.3655076255928897, "learning_rate": 1.2158496486062039e-06, "loss": 0.1327, "step": 2324 }, { "epoch": 0.5290102389078498, "grad_norm": 2.8976980348396433, "learning_rate": 1.215820511359019e-06, "loss": 0.1634, "step": 2325 }, { "epoch": 0.5292377701934016, "grad_norm": 3.3802109886132867, "learning_rate": 1.215791362036545e-06, "loss": 0.1762, "step": 2326 }, { "epoch": 0.5294653014789533, "grad_norm": 1.8540359020863921, "learning_rate": 1.2157622006393777e-06, "loss": 0.1649, "step": 2327 }, { "epoch": 0.5296928327645051, "grad_norm": 2.841457854773911, "learning_rate": 1.2157330271681129e-06, "loss": 0.1474, "step": 2328 }, { "epoch": 0.5299203640500569, "grad_norm": 4.1952377999594, "learning_rate": 1.215703841623347e-06, "loss": 0.1585, "step": 2329 }, { "epoch": 0.5301478953356087, "grad_norm": 1.4873684025860323, "learning_rate": 1.2156746440056762e-06, "loss": 0.1165, "step": 2330 }, { "epoch": 0.5303754266211604, "grad_norm": 2.5751493361989657, "learning_rate": 1.2156454343156976e-06, "loss": 0.1522, "step": 2331 }, { "epoch": 0.5306029579067122, "grad_norm": 1.3229989088243255, "learning_rate": 1.2156162125540078e-06, "loss": 0.061, "step": 2332 }, { "epoch": 0.5308304891922639, "grad_norm": 1.0015280999927394, "learning_rate": 1.2155869787212046e-06, "loss": 0.0875, "step": 2333 }, { "epoch": 0.5310580204778157, "grad_norm": 1.7236628455878646, "learning_rate": 1.215557732817885e-06, "loss": 0.1357, "step": 2334 }, { "epoch": 0.5312855517633674, "grad_norm": 2.143419078889929, "learning_rate": 1.2155284748446469e-06, "loss": 0.1297, "step": 2335 }, { "epoch": 0.5315130830489192, "grad_norm": 1.6815351249254196, "learning_rate": 1.2154992048020882e-06, "loss": 0.1414, "step": 2336 }, { "epoch": 0.531740614334471, "grad_norm": 2.133575319364267, "learning_rate": 1.2154699226908072e-06, "loss": 0.1321, "step": 2337 }, { "epoch": 0.5319681456200227, "grad_norm": 1.139669924038715, "learning_rate": 1.2154406285114025e-06, "loss": 0.0903, "step": 2338 }, { "epoch": 0.5321956769055746, "grad_norm": 1.7837908668506595, "learning_rate": 1.2154113222644727e-06, "loss": 0.1191, "step": 2339 }, { "epoch": 0.5324232081911263, "grad_norm": 4.103642831661849, "learning_rate": 1.2153820039506167e-06, "loss": 0.1628, "step": 2340 }, { "epoch": 0.5326507394766781, "grad_norm": 1.6530276808213893, "learning_rate": 1.2153526735704337e-06, "loss": 0.1123, "step": 2341 }, { "epoch": 0.5328782707622298, "grad_norm": 1.6519117615379917, "learning_rate": 1.2153233311245234e-06, "loss": 0.1547, "step": 2342 }, { "epoch": 0.5331058020477816, "grad_norm": 1.8534786765494804, "learning_rate": 1.2152939766134852e-06, "loss": 0.1322, "step": 2343 }, { "epoch": 0.5333333333333333, "grad_norm": 2.0466229249542747, "learning_rate": 1.2152646100379193e-06, "loss": 0.1735, "step": 2344 }, { "epoch": 0.5335608646188851, "grad_norm": 1.7038078681513562, "learning_rate": 1.2152352313984257e-06, "loss": 0.0974, "step": 2345 }, { "epoch": 0.5337883959044368, "grad_norm": 2.924087638349447, "learning_rate": 1.2152058406956049e-06, "loss": 0.1973, "step": 2346 }, { "epoch": 0.5340159271899886, "grad_norm": 2.7275068217190723, "learning_rate": 1.2151764379300578e-06, "loss": 0.1237, "step": 2347 }, { "epoch": 0.5342434584755403, "grad_norm": 1.424984124254766, "learning_rate": 1.2151470231023851e-06, "loss": 0.1238, "step": 2348 }, { "epoch": 0.5344709897610922, "grad_norm": 1.5436354239233119, "learning_rate": 1.2151175962131881e-06, "loss": 0.1482, "step": 2349 }, { "epoch": 0.534698521046644, "grad_norm": 1.3973484809650978, "learning_rate": 1.215088157263068e-06, "loss": 0.08, "step": 2350 }, { "epoch": 0.5349260523321957, "grad_norm": 1.0011406880829632, "learning_rate": 1.2150587062526267e-06, "loss": 0.0743, "step": 2351 }, { "epoch": 0.5351535836177475, "grad_norm": 1.681422253336755, "learning_rate": 1.2150292431824663e-06, "loss": 0.2121, "step": 2352 }, { "epoch": 0.5353811149032992, "grad_norm": 2.071137301181157, "learning_rate": 1.2149997680531886e-06, "loss": 0.1329, "step": 2353 }, { "epoch": 0.535608646188851, "grad_norm": 2.7731157561747932, "learning_rate": 1.214970280865396e-06, "loss": 0.1483, "step": 2354 }, { "epoch": 0.5358361774744027, "grad_norm": 1.825127983213737, "learning_rate": 1.2149407816196917e-06, "loss": 0.2009, "step": 2355 }, { "epoch": 0.5360637087599545, "grad_norm": 1.5271969230637406, "learning_rate": 1.214911270316678e-06, "loss": 0.109, "step": 2356 }, { "epoch": 0.5362912400455062, "grad_norm": 1.1546891312653111, "learning_rate": 1.2148817469569584e-06, "loss": 0.152, "step": 2357 }, { "epoch": 0.5365187713310581, "grad_norm": 1.201446827839472, "learning_rate": 1.214852211541136e-06, "loss": 0.0874, "step": 2358 }, { "epoch": 0.5367463026166098, "grad_norm": 1.5029157511372113, "learning_rate": 1.2148226640698148e-06, "loss": 0.0655, "step": 2359 }, { "epoch": 0.5369738339021616, "grad_norm": 1.3717547829576855, "learning_rate": 1.2147931045435988e-06, "loss": 0.1699, "step": 2360 }, { "epoch": 0.5372013651877133, "grad_norm": 1.3834832471577652, "learning_rate": 1.2147635329630916e-06, "loss": 0.1904, "step": 2361 }, { "epoch": 0.5374288964732651, "grad_norm": 2.35690597522568, "learning_rate": 1.214733949328898e-06, "loss": 0.1102, "step": 2362 }, { "epoch": 0.5376564277588168, "grad_norm": 2.0463590798211033, "learning_rate": 1.2147043536416226e-06, "loss": 0.1773, "step": 2363 }, { "epoch": 0.5378839590443686, "grad_norm": 2.5222590318625384, "learning_rate": 1.21467474590187e-06, "loss": 0.2526, "step": 2364 }, { "epoch": 0.5381114903299203, "grad_norm": 3.296081575537043, "learning_rate": 1.2146451261102458e-06, "loss": 0.1592, "step": 2365 }, { "epoch": 0.5383390216154721, "grad_norm": 2.6706214296804336, "learning_rate": 1.2146154942673548e-06, "loss": 0.1196, "step": 2366 }, { "epoch": 0.5385665529010238, "grad_norm": 2.9249686559098795, "learning_rate": 1.2145858503738032e-06, "loss": 0.1877, "step": 2367 }, { "epoch": 0.5387940841865757, "grad_norm": 2.2953281999149593, "learning_rate": 1.2145561944301963e-06, "loss": 0.1972, "step": 2368 }, { "epoch": 0.5390216154721275, "grad_norm": 1.4636027746143558, "learning_rate": 1.2145265264371406e-06, "loss": 0.162, "step": 2369 }, { "epoch": 0.5392491467576792, "grad_norm": 2.161526587594214, "learning_rate": 1.2144968463952425e-06, "loss": 0.0907, "step": 2370 }, { "epoch": 0.539476678043231, "grad_norm": 2.5292055598654875, "learning_rate": 1.2144671543051085e-06, "loss": 0.1697, "step": 2371 }, { "epoch": 0.5397042093287827, "grad_norm": 1.129436740546767, "learning_rate": 1.2144374501673454e-06, "loss": 0.0512, "step": 2372 }, { "epoch": 0.5399317406143345, "grad_norm": 3.143413485445767, "learning_rate": 1.2144077339825603e-06, "loss": 0.2778, "step": 2373 }, { "epoch": 0.5401592718998862, "grad_norm": 1.9332739586254322, "learning_rate": 1.2143780057513605e-06, "loss": 0.1697, "step": 2374 }, { "epoch": 0.540386803185438, "grad_norm": 2.328675689936947, "learning_rate": 1.2143482654743535e-06, "loss": 0.1321, "step": 2375 }, { "epoch": 0.5406143344709897, "grad_norm": 1.2157576056753703, "learning_rate": 1.2143185131521475e-06, "loss": 0.1111, "step": 2376 }, { "epoch": 0.5408418657565415, "grad_norm": 2.407893236147383, "learning_rate": 1.2142887487853503e-06, "loss": 0.1563, "step": 2377 }, { "epoch": 0.5410693970420933, "grad_norm": 2.9189820741426376, "learning_rate": 1.2142589723745705e-06, "loss": 0.1164, "step": 2378 }, { "epoch": 0.5412969283276451, "grad_norm": 2.396152562102295, "learning_rate": 1.2142291839204163e-06, "loss": 0.1382, "step": 2379 }, { "epoch": 0.5415244596131968, "grad_norm": 1.2309543853665252, "learning_rate": 1.2141993834234967e-06, "loss": 0.1599, "step": 2380 }, { "epoch": 0.5417519908987486, "grad_norm": 1.4815415846583502, "learning_rate": 1.2141695708844209e-06, "loss": 0.0808, "step": 2381 }, { "epoch": 0.5419795221843003, "grad_norm": 1.7440274898378374, "learning_rate": 1.214139746303798e-06, "loss": 0.1357, "step": 2382 }, { "epoch": 0.5422070534698521, "grad_norm": 2.501198944799206, "learning_rate": 1.2141099096822376e-06, "loss": 0.1619, "step": 2383 }, { "epoch": 0.5424345847554038, "grad_norm": 1.830304404422833, "learning_rate": 1.2140800610203497e-06, "loss": 0.225, "step": 2384 }, { "epoch": 0.5426621160409556, "grad_norm": 1.997345142772124, "learning_rate": 1.214050200318744e-06, "loss": 0.1129, "step": 2385 }, { "epoch": 0.5428896473265074, "grad_norm": 2.6541224516196724, "learning_rate": 1.214020327578031e-06, "loss": 0.1106, "step": 2386 }, { "epoch": 0.5431171786120591, "grad_norm": 1.3841215073352757, "learning_rate": 1.2139904427988213e-06, "loss": 0.0714, "step": 2387 }, { "epoch": 0.543344709897611, "grad_norm": 1.3495168470574121, "learning_rate": 1.2139605459817259e-06, "loss": 0.1468, "step": 2388 }, { "epoch": 0.5435722411831627, "grad_norm": 1.2198752931302614, "learning_rate": 1.2139306371273552e-06, "loss": 0.1431, "step": 2389 }, { "epoch": 0.5437997724687145, "grad_norm": 1.685309275084401, "learning_rate": 1.213900716236321e-06, "loss": 0.0955, "step": 2390 }, { "epoch": 0.5440273037542662, "grad_norm": 1.9310383949097503, "learning_rate": 1.2138707833092345e-06, "loss": 0.1583, "step": 2391 }, { "epoch": 0.544254835039818, "grad_norm": 1.3819179331884706, "learning_rate": 1.213840838346708e-06, "loss": 0.145, "step": 2392 }, { "epoch": 0.5444823663253697, "grad_norm": 1.9093745108266076, "learning_rate": 1.213810881349353e-06, "loss": 0.1082, "step": 2393 }, { "epoch": 0.5447098976109215, "grad_norm": 4.561592933421953, "learning_rate": 1.213780912317782e-06, "loss": 0.1692, "step": 2394 }, { "epoch": 0.5449374288964732, "grad_norm": 2.0605709034236868, "learning_rate": 1.2137509312526074e-06, "loss": 0.1049, "step": 2395 }, { "epoch": 0.545164960182025, "grad_norm": 1.4601179535219537, "learning_rate": 1.213720938154442e-06, "loss": 0.1286, "step": 2396 }, { "epoch": 0.5453924914675768, "grad_norm": 2.3642573322951894, "learning_rate": 1.2136909330238988e-06, "loss": 0.1955, "step": 2397 }, { "epoch": 0.5456200227531286, "grad_norm": 2.2512568505689403, "learning_rate": 1.213660915861591e-06, "loss": 0.2434, "step": 2398 }, { "epoch": 0.5458475540386803, "grad_norm": 1.992478458605465, "learning_rate": 1.2136308866681323e-06, "loss": 0.1372, "step": 2399 }, { "epoch": 0.5460750853242321, "grad_norm": 1.8111222939771605, "learning_rate": 1.2136008454441363e-06, "loss": 0.197, "step": 2400 }, { "epoch": 0.5463026166097839, "grad_norm": 1.3777489796507147, "learning_rate": 1.213570792190217e-06, "loss": 0.0908, "step": 2401 }, { "epoch": 0.5465301478953356, "grad_norm": 2.6063902777425105, "learning_rate": 1.2135407269069885e-06, "loss": 0.1692, "step": 2402 }, { "epoch": 0.5467576791808874, "grad_norm": 1.9011415061442813, "learning_rate": 1.2135106495950655e-06, "loss": 0.0622, "step": 2403 }, { "epoch": 0.5469852104664391, "grad_norm": 2.3440439149150287, "learning_rate": 1.2134805602550625e-06, "loss": 0.1539, "step": 2404 }, { "epoch": 0.5472127417519909, "grad_norm": 2.50409667066478, "learning_rate": 1.2134504588875948e-06, "loss": 0.1633, "step": 2405 }, { "epoch": 0.5474402730375426, "grad_norm": 3.3789456844433285, "learning_rate": 1.2134203454932772e-06, "loss": 0.2344, "step": 2406 }, { "epoch": 0.5476678043230945, "grad_norm": 2.1006436694907333, "learning_rate": 1.2133902200727256e-06, "loss": 0.1003, "step": 2407 }, { "epoch": 0.5478953356086462, "grad_norm": 2.1646662277721638, "learning_rate": 1.2133600826265555e-06, "loss": 0.1144, "step": 2408 }, { "epoch": 0.548122866894198, "grad_norm": 2.48456294214119, "learning_rate": 1.2133299331553826e-06, "loss": 0.0843, "step": 2409 }, { "epoch": 0.5483503981797497, "grad_norm": 2.445609950491129, "learning_rate": 1.2132997716598236e-06, "loss": 0.1332, "step": 2410 }, { "epoch": 0.5485779294653015, "grad_norm": 1.607891388562304, "learning_rate": 1.2132695981404943e-06, "loss": 0.0809, "step": 2411 }, { "epoch": 0.5488054607508532, "grad_norm": 3.0954255254232175, "learning_rate": 1.2132394125980122e-06, "loss": 0.1231, "step": 2412 }, { "epoch": 0.549032992036405, "grad_norm": 1.6018022170636927, "learning_rate": 1.2132092150329936e-06, "loss": 0.1073, "step": 2413 }, { "epoch": 0.5492605233219567, "grad_norm": 3.405787206112886, "learning_rate": 1.213179005446056e-06, "loss": 0.1619, "step": 2414 }, { "epoch": 0.5494880546075085, "grad_norm": 1.6685361812270334, "learning_rate": 1.2131487838378167e-06, "loss": 0.0928, "step": 2415 }, { "epoch": 0.5497155858930602, "grad_norm": 2.2102317570381405, "learning_rate": 1.2131185502088932e-06, "loss": 0.0894, "step": 2416 }, { "epoch": 0.5499431171786121, "grad_norm": 2.796381970818353, "learning_rate": 1.213088304559904e-06, "loss": 0.1034, "step": 2417 }, { "epoch": 0.5501706484641639, "grad_norm": 2.7540797302904996, "learning_rate": 1.2130580468914665e-06, "loss": 0.1926, "step": 2418 }, { "epoch": 0.5503981797497156, "grad_norm": 2.1184433609657303, "learning_rate": 1.2130277772041999e-06, "loss": 0.1385, "step": 2419 }, { "epoch": 0.5506257110352674, "grad_norm": 1.5929408368756812, "learning_rate": 1.2129974954987222e-06, "loss": 0.0825, "step": 2420 }, { "epoch": 0.5508532423208191, "grad_norm": 1.7635977427083063, "learning_rate": 1.2129672017756524e-06, "loss": 0.1236, "step": 2421 }, { "epoch": 0.5510807736063709, "grad_norm": 1.71615894112007, "learning_rate": 1.2129368960356102e-06, "loss": 0.1382, "step": 2422 }, { "epoch": 0.5513083048919226, "grad_norm": 1.937244032383735, "learning_rate": 1.2129065782792142e-06, "loss": 0.1291, "step": 2423 }, { "epoch": 0.5515358361774744, "grad_norm": 3.1352304902387385, "learning_rate": 1.2128762485070848e-06, "loss": 0.1544, "step": 2424 }, { "epoch": 0.5517633674630261, "grad_norm": 2.1008217911515046, "learning_rate": 1.2128459067198414e-06, "loss": 0.1314, "step": 2425 }, { "epoch": 0.5519908987485779, "grad_norm": 4.964728468745166, "learning_rate": 1.2128155529181042e-06, "loss": 0.1358, "step": 2426 }, { "epoch": 0.5522184300341297, "grad_norm": 2.8349612253194088, "learning_rate": 1.2127851871024937e-06, "loss": 0.2205, "step": 2427 }, { "epoch": 0.5524459613196815, "grad_norm": 2.389556505433841, "learning_rate": 1.2127548092736305e-06, "loss": 0.1562, "step": 2428 }, { "epoch": 0.5526734926052332, "grad_norm": 1.9336117163609754, "learning_rate": 1.2127244194321353e-06, "loss": 0.1665, "step": 2429 }, { "epoch": 0.552901023890785, "grad_norm": 2.3714953779733223, "learning_rate": 1.2126940175786294e-06, "loss": 0.1747, "step": 2430 }, { "epoch": 0.5531285551763367, "grad_norm": 1.6953148543099945, "learning_rate": 1.212663603713734e-06, "loss": 0.0945, "step": 2431 }, { "epoch": 0.5533560864618885, "grad_norm": 1.737943346359041, "learning_rate": 1.212633177838071e-06, "loss": 0.1451, "step": 2432 }, { "epoch": 0.5535836177474402, "grad_norm": 2.211570479238446, "learning_rate": 1.2126027399522617e-06, "loss": 0.1064, "step": 2433 }, { "epoch": 0.553811149032992, "grad_norm": 1.7857339852544778, "learning_rate": 1.2125722900569288e-06, "loss": 0.1806, "step": 2434 }, { "epoch": 0.5540386803185438, "grad_norm": 3.0559649821603245, "learning_rate": 1.2125418281526944e-06, "loss": 0.2878, "step": 2435 }, { "epoch": 0.5542662116040956, "grad_norm": 3.234604579214076, "learning_rate": 1.212511354240181e-06, "loss": 0.1531, "step": 2436 }, { "epoch": 0.5544937428896474, "grad_norm": 2.0773489813309967, "learning_rate": 1.2124808683200113e-06, "loss": 0.1326, "step": 2437 }, { "epoch": 0.5547212741751991, "grad_norm": 1.2340445485090055, "learning_rate": 1.2124503703928088e-06, "loss": 0.1116, "step": 2438 }, { "epoch": 0.5549488054607509, "grad_norm": 3.5232591527626824, "learning_rate": 1.2124198604591965e-06, "loss": 0.1686, "step": 2439 }, { "epoch": 0.5551763367463026, "grad_norm": 2.8726622730449503, "learning_rate": 1.2123893385197982e-06, "loss": 0.1791, "step": 2440 }, { "epoch": 0.5554038680318544, "grad_norm": 2.091853211246227, "learning_rate": 1.2123588045752373e-06, "loss": 0.1481, "step": 2441 }, { "epoch": 0.5556313993174061, "grad_norm": 2.3690335770551294, "learning_rate": 1.2123282586261384e-06, "loss": 0.1708, "step": 2442 }, { "epoch": 0.5558589306029579, "grad_norm": 2.233061824072559, "learning_rate": 1.2122977006731256e-06, "loss": 0.1571, "step": 2443 }, { "epoch": 0.5560864618885096, "grad_norm": 1.9900055676960977, "learning_rate": 1.2122671307168232e-06, "loss": 0.0985, "step": 2444 }, { "epoch": 0.5563139931740614, "grad_norm": 1.669304123397253, "learning_rate": 1.212236548757856e-06, "loss": 0.2418, "step": 2445 }, { "epoch": 0.5565415244596132, "grad_norm": 2.0839688820404247, "learning_rate": 1.2122059547968496e-06, "loss": 0.11, "step": 2446 }, { "epoch": 0.556769055745165, "grad_norm": 2.3322979495915543, "learning_rate": 1.2121753488344286e-06, "loss": 0.1758, "step": 2447 }, { "epoch": 0.5569965870307167, "grad_norm": 3.079512271260833, "learning_rate": 1.212144730871219e-06, "loss": 0.1408, "step": 2448 }, { "epoch": 0.5572241183162685, "grad_norm": 3.057705857554608, "learning_rate": 1.2121141009078462e-06, "loss": 0.1547, "step": 2449 }, { "epoch": 0.5574516496018203, "grad_norm": 2.754720342311703, "learning_rate": 1.2120834589449368e-06, "loss": 0.1227, "step": 2450 }, { "epoch": 0.557679180887372, "grad_norm": 2.0039807911619696, "learning_rate": 1.2120528049831165e-06, "loss": 0.1627, "step": 2451 }, { "epoch": 0.5579067121729238, "grad_norm": 2.152939979711796, "learning_rate": 1.212022139023012e-06, "loss": 0.224, "step": 2452 }, { "epoch": 0.5581342434584755, "grad_norm": 3.2411118966017964, "learning_rate": 1.21199146106525e-06, "loss": 0.1193, "step": 2453 }, { "epoch": 0.5583617747440273, "grad_norm": 1.5945262516296992, "learning_rate": 1.2119607711104574e-06, "loss": 0.1496, "step": 2454 }, { "epoch": 0.558589306029579, "grad_norm": 1.5388366931624589, "learning_rate": 1.211930069159262e-06, "loss": 0.1171, "step": 2455 }, { "epoch": 0.5588168373151309, "grad_norm": 1.4040745534326453, "learning_rate": 1.2118993552122907e-06, "loss": 0.13, "step": 2456 }, { "epoch": 0.5590443686006826, "grad_norm": 1.4683917259528867, "learning_rate": 1.2118686292701715e-06, "loss": 0.0933, "step": 2457 }, { "epoch": 0.5592718998862344, "grad_norm": 2.3779110713148492, "learning_rate": 1.211837891333532e-06, "loss": 0.131, "step": 2458 }, { "epoch": 0.5594994311717861, "grad_norm": 1.9444812735417465, "learning_rate": 1.211807141403001e-06, "loss": 0.197, "step": 2459 }, { "epoch": 0.5597269624573379, "grad_norm": 1.7785495979173072, "learning_rate": 1.211776379479207e-06, "loss": 0.0819, "step": 2460 }, { "epoch": 0.5599544937428896, "grad_norm": 2.8186861224308446, "learning_rate": 1.211745605562778e-06, "loss": 0.1185, "step": 2461 }, { "epoch": 0.5601820250284414, "grad_norm": 1.36323819201261, "learning_rate": 1.2117148196543436e-06, "loss": 0.1724, "step": 2462 }, { "epoch": 0.5604095563139931, "grad_norm": 4.03855027641924, "learning_rate": 1.2116840217545329e-06, "loss": 0.2283, "step": 2463 }, { "epoch": 0.5606370875995449, "grad_norm": 2.123596815339297, "learning_rate": 1.211653211863975e-06, "loss": 0.163, "step": 2464 }, { "epoch": 0.5608646188850968, "grad_norm": 2.591456710871039, "learning_rate": 1.2116223899833e-06, "loss": 0.1665, "step": 2465 }, { "epoch": 0.5610921501706485, "grad_norm": 2.277844579413468, "learning_rate": 1.2115915561131376e-06, "loss": 0.1107, "step": 2466 }, { "epoch": 0.5613196814562003, "grad_norm": 2.6316966240447526, "learning_rate": 1.211560710254118e-06, "loss": 0.1288, "step": 2467 }, { "epoch": 0.561547212741752, "grad_norm": 2.2537204656115493, "learning_rate": 1.211529852406872e-06, "loss": 0.1563, "step": 2468 }, { "epoch": 0.5617747440273038, "grad_norm": 2.1713954845528676, "learning_rate": 1.2114989825720298e-06, "loss": 0.1442, "step": 2469 }, { "epoch": 0.5620022753128555, "grad_norm": 2.693618014034486, "learning_rate": 1.2114681007502227e-06, "loss": 0.132, "step": 2470 }, { "epoch": 0.5622298065984073, "grad_norm": 1.342955875247325, "learning_rate": 1.2114372069420815e-06, "loss": 0.1661, "step": 2471 }, { "epoch": 0.562457337883959, "grad_norm": 2.295134838913565, "learning_rate": 1.2114063011482378e-06, "loss": 0.1068, "step": 2472 }, { "epoch": 0.5626848691695108, "grad_norm": 2.2269684435425594, "learning_rate": 1.2113753833693234e-06, "loss": 0.0884, "step": 2473 }, { "epoch": 0.5629124004550625, "grad_norm": 1.8251560760537473, "learning_rate": 1.2113444536059699e-06, "loss": 0.1569, "step": 2474 }, { "epoch": 0.5631399317406144, "grad_norm": 1.2373817468358461, "learning_rate": 1.2113135118588096e-06, "loss": 0.1253, "step": 2475 }, { "epoch": 0.5633674630261661, "grad_norm": 1.5518735019134444, "learning_rate": 1.2112825581284752e-06, "loss": 0.1655, "step": 2476 }, { "epoch": 0.5635949943117179, "grad_norm": 2.821927704907384, "learning_rate": 1.2112515924155987e-06, "loss": 0.1968, "step": 2477 }, { "epoch": 0.5638225255972696, "grad_norm": 2.6034604032683424, "learning_rate": 1.2112206147208134e-06, "loss": 0.1579, "step": 2478 }, { "epoch": 0.5640500568828214, "grad_norm": 3.005350448867513, "learning_rate": 1.2111896250447525e-06, "loss": 0.1824, "step": 2479 }, { "epoch": 0.5642775881683731, "grad_norm": 2.0319627346292846, "learning_rate": 1.211158623388049e-06, "loss": 0.1139, "step": 2480 }, { "epoch": 0.5645051194539249, "grad_norm": 1.6717615590204162, "learning_rate": 1.2111276097513369e-06, "loss": 0.1229, "step": 2481 }, { "epoch": 0.5647326507394766, "grad_norm": 1.0856506812146736, "learning_rate": 1.2110965841352498e-06, "loss": 0.1326, "step": 2482 }, { "epoch": 0.5649601820250284, "grad_norm": 2.1610866648004623, "learning_rate": 1.211065546540422e-06, "loss": 0.1317, "step": 2483 }, { "epoch": 0.5651877133105802, "grad_norm": 1.6835926280266125, "learning_rate": 1.2110344969674877e-06, "loss": 0.0876, "step": 2484 }, { "epoch": 0.565415244596132, "grad_norm": 1.8868431281124944, "learning_rate": 1.2110034354170816e-06, "loss": 0.2136, "step": 2485 }, { "epoch": 0.5656427758816838, "grad_norm": 1.2849513459232036, "learning_rate": 1.2109723618898383e-06, "loss": 0.1176, "step": 2486 }, { "epoch": 0.5658703071672355, "grad_norm": 2.225412733583598, "learning_rate": 1.2109412763863933e-06, "loss": 0.2349, "step": 2487 }, { "epoch": 0.5660978384527873, "grad_norm": 1.7394007182133215, "learning_rate": 1.2109101789073815e-06, "loss": 0.1889, "step": 2488 }, { "epoch": 0.566325369738339, "grad_norm": 1.0417756307418755, "learning_rate": 1.2108790694534389e-06, "loss": 0.1295, "step": 2489 }, { "epoch": 0.5665529010238908, "grad_norm": 2.8490023502098913, "learning_rate": 1.2108479480252011e-06, "loss": 0.2094, "step": 2490 }, { "epoch": 0.5667804323094425, "grad_norm": 1.4969528469131224, "learning_rate": 1.210816814623304e-06, "loss": 0.189, "step": 2491 }, { "epoch": 0.5670079635949943, "grad_norm": 2.272829112716015, "learning_rate": 1.2107856692483843e-06, "loss": 0.1269, "step": 2492 }, { "epoch": 0.567235494880546, "grad_norm": 2.108513421489345, "learning_rate": 1.2107545119010783e-06, "loss": 0.1345, "step": 2493 }, { "epoch": 0.5674630261660978, "grad_norm": 2.201970892175689, "learning_rate": 1.2107233425820229e-06, "loss": 0.225, "step": 2494 }, { "epoch": 0.5676905574516496, "grad_norm": 3.4046924059583548, "learning_rate": 1.2106921612918549e-06, "loss": 0.0863, "step": 2495 }, { "epoch": 0.5679180887372014, "grad_norm": 2.18740259779597, "learning_rate": 1.2106609680312117e-06, "loss": 0.1988, "step": 2496 }, { "epoch": 0.5681456200227532, "grad_norm": 1.5283955361497255, "learning_rate": 1.210629762800731e-06, "loss": 0.1219, "step": 2497 }, { "epoch": 0.5683731513083049, "grad_norm": 1.3678120343081321, "learning_rate": 1.2105985456010506e-06, "loss": 0.0836, "step": 2498 }, { "epoch": 0.5686006825938567, "grad_norm": 1.8601115221660813, "learning_rate": 1.2105673164328081e-06, "loss": 0.1723, "step": 2499 }, { "epoch": 0.5688282138794084, "grad_norm": 1.7842260253011806, "learning_rate": 1.2105360752966424e-06, "loss": 0.118, "step": 2500 }, { "epoch": 0.5690557451649602, "grad_norm": 2.221443756494453, "learning_rate": 1.2105048221931915e-06, "loss": 0.109, "step": 2501 }, { "epoch": 0.5692832764505119, "grad_norm": 1.961751419767975, "learning_rate": 1.2104735571230944e-06, "loss": 0.0913, "step": 2502 }, { "epoch": 0.5695108077360637, "grad_norm": 1.8181307632990853, "learning_rate": 1.21044228008699e-06, "loss": 0.1361, "step": 2503 }, { "epoch": 0.5697383390216155, "grad_norm": 1.9118720943978658, "learning_rate": 1.2104109910855176e-06, "loss": 0.1627, "step": 2504 }, { "epoch": 0.5699658703071673, "grad_norm": 1.5414220442454112, "learning_rate": 1.2103796901193166e-06, "loss": 0.1813, "step": 2505 }, { "epoch": 0.570193401592719, "grad_norm": 2.8373335682103336, "learning_rate": 1.210348377189027e-06, "loss": 0.1289, "step": 2506 }, { "epoch": 0.5704209328782708, "grad_norm": 3.083061389684008, "learning_rate": 1.2103170522952885e-06, "loss": 0.2029, "step": 2507 }, { "epoch": 0.5706484641638225, "grad_norm": 1.8935818147405985, "learning_rate": 1.2102857154387413e-06, "loss": 0.1268, "step": 2508 }, { "epoch": 0.5708759954493743, "grad_norm": 2.1706672948764685, "learning_rate": 1.210254366620026e-06, "loss": 0.146, "step": 2509 }, { "epoch": 0.571103526734926, "grad_norm": 1.779554233102803, "learning_rate": 1.2102230058397832e-06, "loss": 0.1588, "step": 2510 }, { "epoch": 0.5713310580204778, "grad_norm": 1.8784546833055895, "learning_rate": 1.210191633098654e-06, "loss": 0.1458, "step": 2511 }, { "epoch": 0.5715585893060295, "grad_norm": 1.8629674419414763, "learning_rate": 1.2101602483972797e-06, "loss": 0.1051, "step": 2512 }, { "epoch": 0.5717861205915813, "grad_norm": 2.262837973386215, "learning_rate": 1.2101288517363016e-06, "loss": 0.0856, "step": 2513 }, { "epoch": 0.5720136518771332, "grad_norm": 1.123397475538139, "learning_rate": 1.2100974431163614e-06, "loss": 0.0578, "step": 2514 }, { "epoch": 0.5722411831626849, "grad_norm": 2.735408428715252, "learning_rate": 1.2100660225381008e-06, "loss": 0.112, "step": 2515 }, { "epoch": 0.5724687144482367, "grad_norm": 1.5475871665351315, "learning_rate": 1.2100345900021624e-06, "loss": 0.1233, "step": 2516 }, { "epoch": 0.5726962457337884, "grad_norm": 2.598039263582099, "learning_rate": 1.2100031455091883e-06, "loss": 0.2105, "step": 2517 }, { "epoch": 0.5729237770193402, "grad_norm": 1.5283635000190452, "learning_rate": 1.2099716890598212e-06, "loss": 0.1105, "step": 2518 }, { "epoch": 0.5731513083048919, "grad_norm": 2.3919998558423092, "learning_rate": 1.2099402206547042e-06, "loss": 0.1701, "step": 2519 }, { "epoch": 0.5733788395904437, "grad_norm": 1.704386974569275, "learning_rate": 1.2099087402944805e-06, "loss": 0.1535, "step": 2520 }, { "epoch": 0.5736063708759954, "grad_norm": 2.1761169232651105, "learning_rate": 1.2098772479797933e-06, "loss": 0.0741, "step": 2521 }, { "epoch": 0.5738339021615472, "grad_norm": 2.2691369150088305, "learning_rate": 1.2098457437112862e-06, "loss": 0.1228, "step": 2522 }, { "epoch": 0.5740614334470989, "grad_norm": 3.3253296009015556, "learning_rate": 1.2098142274896033e-06, "loss": 0.257, "step": 2523 }, { "epoch": 0.5742889647326508, "grad_norm": 2.266880563348357, "learning_rate": 1.2097826993153886e-06, "loss": 0.1444, "step": 2524 }, { "epoch": 0.5745164960182025, "grad_norm": 3.1698627124068297, "learning_rate": 1.2097511591892863e-06, "loss": 0.1051, "step": 2525 }, { "epoch": 0.5747440273037543, "grad_norm": 2.8868768890738834, "learning_rate": 1.2097196071119415e-06, "loss": 0.0974, "step": 2526 }, { "epoch": 0.574971558589306, "grad_norm": 1.9086447497414667, "learning_rate": 1.2096880430839985e-06, "loss": 0.127, "step": 2527 }, { "epoch": 0.5751990898748578, "grad_norm": 1.9202653666719622, "learning_rate": 1.2096564671061031e-06, "loss": 0.1134, "step": 2528 }, { "epoch": 0.5754266211604095, "grad_norm": 1.8265247037292291, "learning_rate": 1.2096248791789e-06, "loss": 0.1667, "step": 2529 }, { "epoch": 0.5756541524459613, "grad_norm": 1.5150790673231787, "learning_rate": 1.2095932793030353e-06, "loss": 0.0952, "step": 2530 }, { "epoch": 0.575881683731513, "grad_norm": 1.960225707664033, "learning_rate": 1.2095616674791543e-06, "loss": 0.1164, "step": 2531 }, { "epoch": 0.5761092150170648, "grad_norm": 1.6828800349037405, "learning_rate": 1.2095300437079034e-06, "loss": 0.1587, "step": 2532 }, { "epoch": 0.5763367463026167, "grad_norm": 1.4207974667109202, "learning_rate": 1.2094984079899292e-06, "loss": 0.0687, "step": 2533 }, { "epoch": 0.5765642775881684, "grad_norm": 2.334850140119816, "learning_rate": 1.2094667603258779e-06, "loss": 0.1068, "step": 2534 }, { "epoch": 0.5767918088737202, "grad_norm": 4.35410919897498, "learning_rate": 1.2094351007163962e-06, "loss": 0.1886, "step": 2535 }, { "epoch": 0.5770193401592719, "grad_norm": 1.6200824503597024, "learning_rate": 1.2094034291621315e-06, "loss": 0.1223, "step": 2536 }, { "epoch": 0.5772468714448237, "grad_norm": 2.362865286588217, "learning_rate": 1.2093717456637311e-06, "loss": 0.1375, "step": 2537 }, { "epoch": 0.5774744027303754, "grad_norm": 2.308641909521365, "learning_rate": 1.2093400502218422e-06, "loss": 0.2275, "step": 2538 }, { "epoch": 0.5777019340159272, "grad_norm": 2.463338738431784, "learning_rate": 1.209308342837113e-06, "loss": 0.1802, "step": 2539 }, { "epoch": 0.5779294653014789, "grad_norm": 1.8971271189178798, "learning_rate": 1.2092766235101917e-06, "loss": 0.1305, "step": 2540 }, { "epoch": 0.5781569965870307, "grad_norm": 2.1589012912488044, "learning_rate": 1.2092448922417258e-06, "loss": 0.1804, "step": 2541 }, { "epoch": 0.5783845278725824, "grad_norm": 1.551935572792547, "learning_rate": 1.2092131490323644e-06, "loss": 0.1, "step": 2542 }, { "epoch": 0.5786120591581343, "grad_norm": 1.868154527807695, "learning_rate": 1.2091813938827563e-06, "loss": 0.1133, "step": 2543 }, { "epoch": 0.578839590443686, "grad_norm": 1.989931342962644, "learning_rate": 1.2091496267935502e-06, "loss": 0.163, "step": 2544 }, { "epoch": 0.5790671217292378, "grad_norm": 0.9701995130312155, "learning_rate": 1.2091178477653957e-06, "loss": 0.1338, "step": 2545 }, { "epoch": 0.5792946530147896, "grad_norm": 1.45039507553725, "learning_rate": 1.209086056798942e-06, "loss": 0.097, "step": 2546 }, { "epoch": 0.5795221843003413, "grad_norm": 1.955389923714404, "learning_rate": 1.2090542538948392e-06, "loss": 0.1777, "step": 2547 }, { "epoch": 0.579749715585893, "grad_norm": 1.4334415366285986, "learning_rate": 1.209022439053737e-06, "loss": 0.0969, "step": 2548 }, { "epoch": 0.5799772468714448, "grad_norm": 2.3655923077610312, "learning_rate": 1.2089906122762859e-06, "loss": 0.1219, "step": 2549 }, { "epoch": 0.5802047781569966, "grad_norm": 2.243979043940131, "learning_rate": 1.208958773563136e-06, "loss": 0.1543, "step": 2550 }, { "epoch": 0.5804323094425483, "grad_norm": 2.87195064677509, "learning_rate": 1.2089269229149383e-06, "loss": 0.2538, "step": 2551 }, { "epoch": 0.5806598407281001, "grad_norm": 1.6061469454663684, "learning_rate": 1.2088950603323438e-06, "loss": 0.1271, "step": 2552 }, { "epoch": 0.5808873720136519, "grad_norm": 2.7705649289893666, "learning_rate": 1.2088631858160033e-06, "loss": 0.1284, "step": 2553 }, { "epoch": 0.5811149032992037, "grad_norm": 2.2423139977954203, "learning_rate": 1.2088312993665689e-06, "loss": 0.1697, "step": 2554 }, { "epoch": 0.5813424345847554, "grad_norm": 1.5090404390691508, "learning_rate": 1.208799400984692e-06, "loss": 0.1707, "step": 2555 }, { "epoch": 0.5815699658703072, "grad_norm": 1.8137511001931925, "learning_rate": 1.2087674906710242e-06, "loss": 0.0793, "step": 2556 }, { "epoch": 0.5817974971558589, "grad_norm": 1.2084657511133172, "learning_rate": 1.2087355684262183e-06, "loss": 0.1209, "step": 2557 }, { "epoch": 0.5820250284414107, "grad_norm": 1.6868878164350085, "learning_rate": 1.2087036342509265e-06, "loss": 0.0798, "step": 2558 }, { "epoch": 0.5822525597269624, "grad_norm": 2.4820318013238345, "learning_rate": 1.208671688145801e-06, "loss": 0.1487, "step": 2559 }, { "epoch": 0.5824800910125142, "grad_norm": 1.956438161841264, "learning_rate": 1.2086397301114955e-06, "loss": 0.2435, "step": 2560 }, { "epoch": 0.5827076222980659, "grad_norm": 1.8160672855837017, "learning_rate": 1.2086077601486627e-06, "loss": 0.1238, "step": 2561 }, { "epoch": 0.5829351535836177, "grad_norm": 1.893090448329992, "learning_rate": 1.2085757782579562e-06, "loss": 0.2005, "step": 2562 }, { "epoch": 0.5831626848691696, "grad_norm": 2.6162511578342778, "learning_rate": 1.2085437844400293e-06, "loss": 0.1268, "step": 2563 }, { "epoch": 0.5833902161547213, "grad_norm": 1.4268922855878878, "learning_rate": 1.2085117786955366e-06, "loss": 0.1082, "step": 2564 }, { "epoch": 0.5836177474402731, "grad_norm": 1.7555654121905828, "learning_rate": 1.2084797610251314e-06, "loss": 0.1628, "step": 2565 }, { "epoch": 0.5838452787258248, "grad_norm": 2.7712208071139197, "learning_rate": 1.2084477314294688e-06, "loss": 0.1259, "step": 2566 }, { "epoch": 0.5840728100113766, "grad_norm": 2.2477692009821886, "learning_rate": 1.2084156899092028e-06, "loss": 0.1151, "step": 2567 }, { "epoch": 0.5843003412969283, "grad_norm": 1.4787985647224473, "learning_rate": 1.2083836364649888e-06, "loss": 0.1413, "step": 2568 }, { "epoch": 0.5845278725824801, "grad_norm": 2.6543995947788397, "learning_rate": 1.2083515710974813e-06, "loss": 0.1298, "step": 2569 }, { "epoch": 0.5847554038680318, "grad_norm": 2.211923597091862, "learning_rate": 1.2083194938073363e-06, "loss": 0.1284, "step": 2570 }, { "epoch": 0.5849829351535836, "grad_norm": 3.9781424289807363, "learning_rate": 1.2082874045952094e-06, "loss": 0.1472, "step": 2571 }, { "epoch": 0.5852104664391354, "grad_norm": 5.160608841853167, "learning_rate": 1.2082553034617559e-06, "loss": 0.1942, "step": 2572 }, { "epoch": 0.5854379977246872, "grad_norm": 2.160091359131514, "learning_rate": 1.2082231904076323e-06, "loss": 0.1089, "step": 2573 }, { "epoch": 0.5856655290102389, "grad_norm": 4.892526132201958, "learning_rate": 1.2081910654334948e-06, "loss": 0.2815, "step": 2574 }, { "epoch": 0.5858930602957907, "grad_norm": 1.5921622052362032, "learning_rate": 1.2081589285399998e-06, "loss": 0.0935, "step": 2575 }, { "epoch": 0.5861205915813424, "grad_norm": 1.9760107843414554, "learning_rate": 1.2081267797278043e-06, "loss": 0.1158, "step": 2576 }, { "epoch": 0.5863481228668942, "grad_norm": 1.402210261553332, "learning_rate": 1.2080946189975656e-06, "loss": 0.1351, "step": 2577 }, { "epoch": 0.586575654152446, "grad_norm": 2.437290370611529, "learning_rate": 1.2080624463499407e-06, "loss": 0.1348, "step": 2578 }, { "epoch": 0.5868031854379977, "grad_norm": 1.569823831308918, "learning_rate": 1.2080302617855874e-06, "loss": 0.2161, "step": 2579 }, { "epoch": 0.5870307167235495, "grad_norm": 2.0597197185846032, "learning_rate": 1.2079980653051629e-06, "loss": 0.0922, "step": 2580 }, { "epoch": 0.5872582480091012, "grad_norm": 1.7138455088041311, "learning_rate": 1.207965856909326e-06, "loss": 0.1029, "step": 2581 }, { "epoch": 0.5874857792946531, "grad_norm": 1.2897243188730496, "learning_rate": 1.2079336365987345e-06, "loss": 0.1481, "step": 2582 }, { "epoch": 0.5877133105802048, "grad_norm": 1.8906043234612928, "learning_rate": 1.2079014043740471e-06, "loss": 0.1316, "step": 2583 }, { "epoch": 0.5879408418657566, "grad_norm": 1.7419270898501555, "learning_rate": 1.2078691602359224e-06, "loss": 0.1193, "step": 2584 }, { "epoch": 0.5881683731513083, "grad_norm": 1.819163624808354, "learning_rate": 1.2078369041850197e-06, "loss": 0.1174, "step": 2585 }, { "epoch": 0.5883959044368601, "grad_norm": 1.877871761831476, "learning_rate": 1.207804636221998e-06, "loss": 0.1372, "step": 2586 }, { "epoch": 0.5886234357224118, "grad_norm": 2.0626590360093133, "learning_rate": 1.207772356347517e-06, "loss": 0.095, "step": 2587 }, { "epoch": 0.5888509670079636, "grad_norm": 2.456994619660604, "learning_rate": 1.2077400645622363e-06, "loss": 0.1224, "step": 2588 }, { "epoch": 0.5890784982935153, "grad_norm": 1.5356214520040972, "learning_rate": 1.207707760866816e-06, "loss": 0.1166, "step": 2589 }, { "epoch": 0.5893060295790671, "grad_norm": 5.018978969106035, "learning_rate": 1.207675445261916e-06, "loss": 0.1457, "step": 2590 }, { "epoch": 0.5895335608646188, "grad_norm": 2.359188809516013, "learning_rate": 1.207643117748197e-06, "loss": 0.2648, "step": 2591 }, { "epoch": 0.5897610921501707, "grad_norm": 2.4510393228560647, "learning_rate": 1.2076107783263199e-06, "loss": 0.1325, "step": 2592 }, { "epoch": 0.5899886234357224, "grad_norm": 1.934483134443095, "learning_rate": 1.2075784269969457e-06, "loss": 0.1164, "step": 2593 }, { "epoch": 0.5902161547212742, "grad_norm": 1.6195788093768093, "learning_rate": 1.2075460637607351e-06, "loss": 0.0678, "step": 2594 }, { "epoch": 0.590443686006826, "grad_norm": 1.7633862699028207, "learning_rate": 1.20751368861835e-06, "loss": 0.1427, "step": 2595 }, { "epoch": 0.5906712172923777, "grad_norm": 2.8589827615450503, "learning_rate": 1.2074813015704518e-06, "loss": 0.1232, "step": 2596 }, { "epoch": 0.5908987485779295, "grad_norm": 2.237662265223862, "learning_rate": 1.2074489026177024e-06, "loss": 0.1414, "step": 2597 }, { "epoch": 0.5911262798634812, "grad_norm": 1.6741078305495773, "learning_rate": 1.2074164917607644e-06, "loss": 0.1525, "step": 2598 }, { "epoch": 0.591353811149033, "grad_norm": 1.7511984201836153, "learning_rate": 1.2073840690003e-06, "loss": 0.1283, "step": 2599 }, { "epoch": 0.5915813424345847, "grad_norm": 0.9823739657729703, "learning_rate": 1.2073516343369717e-06, "loss": 0.1253, "step": 2600 }, { "epoch": 0.5918088737201365, "grad_norm": 1.426041876858941, "learning_rate": 1.2073191877714424e-06, "loss": 0.1472, "step": 2601 }, { "epoch": 0.5920364050056883, "grad_norm": 1.5467081035937693, "learning_rate": 1.2072867293043755e-06, "loss": 0.1129, "step": 2602 }, { "epoch": 0.5922639362912401, "grad_norm": 2.4176519282492372, "learning_rate": 1.2072542589364343e-06, "loss": 0.1302, "step": 2603 }, { "epoch": 0.5924914675767918, "grad_norm": 1.5431029441975226, "learning_rate": 1.2072217766682822e-06, "loss": 0.1259, "step": 2604 }, { "epoch": 0.5927189988623436, "grad_norm": 2.1679653478901075, "learning_rate": 1.2071892825005835e-06, "loss": 0.0746, "step": 2605 }, { "epoch": 0.5929465301478953, "grad_norm": 1.581497780473167, "learning_rate": 1.207156776434002e-06, "loss": 0.1668, "step": 2606 }, { "epoch": 0.5931740614334471, "grad_norm": 1.9548539073094118, "learning_rate": 1.2071242584692022e-06, "loss": 0.1719, "step": 2607 }, { "epoch": 0.5934015927189988, "grad_norm": 1.2461220647331583, "learning_rate": 1.2070917286068486e-06, "loss": 0.1041, "step": 2608 }, { "epoch": 0.5936291240045506, "grad_norm": 1.3095375732805556, "learning_rate": 1.2070591868476062e-06, "loss": 0.0823, "step": 2609 }, { "epoch": 0.5938566552901023, "grad_norm": 2.1017791753609347, "learning_rate": 1.20702663319214e-06, "loss": 0.098, "step": 2610 }, { "epoch": 0.5940841865756542, "grad_norm": 1.3808889393646218, "learning_rate": 1.2069940676411154e-06, "loss": 0.1266, "step": 2611 }, { "epoch": 0.594311717861206, "grad_norm": 1.7153990981225686, "learning_rate": 1.2069614901951978e-06, "loss": 0.0988, "step": 2612 }, { "epoch": 0.5945392491467577, "grad_norm": 2.130551381955189, "learning_rate": 1.2069289008550533e-06, "loss": 0.1193, "step": 2613 }, { "epoch": 0.5947667804323095, "grad_norm": 2.178000241862761, "learning_rate": 1.2068962996213476e-06, "loss": 0.125, "step": 2614 }, { "epoch": 0.5949943117178612, "grad_norm": 3.035579264083657, "learning_rate": 1.2068636864947475e-06, "loss": 0.1585, "step": 2615 }, { "epoch": 0.595221843003413, "grad_norm": 2.5260264361065476, "learning_rate": 1.206831061475919e-06, "loss": 0.1217, "step": 2616 }, { "epoch": 0.5954493742889647, "grad_norm": 1.949452015122984, "learning_rate": 1.2067984245655292e-06, "loss": 0.068, "step": 2617 }, { "epoch": 0.5956769055745165, "grad_norm": 1.911993378147549, "learning_rate": 1.2067657757642453e-06, "loss": 0.134, "step": 2618 }, { "epoch": 0.5959044368600682, "grad_norm": 1.6307557736669274, "learning_rate": 1.2067331150727343e-06, "loss": 0.1071, "step": 2619 }, { "epoch": 0.59613196814562, "grad_norm": 2.3641591554636165, "learning_rate": 1.206700442491664e-06, "loss": 0.1015, "step": 2620 }, { "epoch": 0.5963594994311718, "grad_norm": 3.0546256940872776, "learning_rate": 1.2066677580217018e-06, "loss": 0.1353, "step": 2621 }, { "epoch": 0.5965870307167236, "grad_norm": 2.0389636631107706, "learning_rate": 1.2066350616635159e-06, "loss": 0.0953, "step": 2622 }, { "epoch": 0.5968145620022753, "grad_norm": 3.626384851780054, "learning_rate": 1.2066023534177746e-06, "loss": 0.2982, "step": 2623 }, { "epoch": 0.5970420932878271, "grad_norm": 3.692303324562613, "learning_rate": 1.2065696332851463e-06, "loss": 0.1916, "step": 2624 }, { "epoch": 0.5972696245733788, "grad_norm": 1.9357626525567908, "learning_rate": 1.2065369012662999e-06, "loss": 0.1351, "step": 2625 }, { "epoch": 0.5974971558589306, "grad_norm": 1.690947384729753, "learning_rate": 1.206504157361904e-06, "loss": 0.1787, "step": 2626 }, { "epoch": 0.5977246871444823, "grad_norm": 1.5180922357697746, "learning_rate": 1.2064714015726283e-06, "loss": 0.1399, "step": 2627 }, { "epoch": 0.5979522184300341, "grad_norm": 1.9498622882163583, "learning_rate": 1.2064386338991423e-06, "loss": 0.1524, "step": 2628 }, { "epoch": 0.5981797497155859, "grad_norm": 1.5948413120183693, "learning_rate": 1.2064058543421152e-06, "loss": 0.127, "step": 2629 }, { "epoch": 0.5984072810011376, "grad_norm": 2.564122919945674, "learning_rate": 1.2063730629022173e-06, "loss": 0.1033, "step": 2630 }, { "epoch": 0.5986348122866895, "grad_norm": 3.1247451680105227, "learning_rate": 1.2063402595801187e-06, "loss": 0.1595, "step": 2631 }, { "epoch": 0.5988623435722412, "grad_norm": 1.1117218292823476, "learning_rate": 1.2063074443764897e-06, "loss": 0.1578, "step": 2632 }, { "epoch": 0.599089874857793, "grad_norm": 3.166944891339004, "learning_rate": 1.2062746172920014e-06, "loss": 0.2975, "step": 2633 }, { "epoch": 0.5993174061433447, "grad_norm": 3.3367189368333787, "learning_rate": 1.2062417783273246e-06, "loss": 0.155, "step": 2634 }, { "epoch": 0.5995449374288965, "grad_norm": 2.581770735788427, "learning_rate": 1.2062089274831299e-06, "loss": 0.14, "step": 2635 }, { "epoch": 0.5997724687144482, "grad_norm": 1.6918956017307545, "learning_rate": 1.2061760647600894e-06, "loss": 0.0787, "step": 2636 }, { "epoch": 0.6, "grad_norm": 2.4426051263794752, "learning_rate": 1.2061431901588747e-06, "loss": 0.1761, "step": 2637 }, { "epoch": 0.6002275312855517, "grad_norm": 1.5852858580417466, "learning_rate": 1.2061103036801573e-06, "loss": 0.0991, "step": 2638 }, { "epoch": 0.6004550625711035, "grad_norm": 2.9361978747865205, "learning_rate": 1.2060774053246096e-06, "loss": 0.1966, "step": 2639 }, { "epoch": 0.6006825938566553, "grad_norm": 1.1851482113806004, "learning_rate": 1.2060444950929038e-06, "loss": 0.1298, "step": 2640 }, { "epoch": 0.6009101251422071, "grad_norm": 1.5583103094664725, "learning_rate": 1.2060115729857128e-06, "loss": 0.1305, "step": 2641 }, { "epoch": 0.6011376564277588, "grad_norm": 2.0530153143627103, "learning_rate": 1.2059786390037093e-06, "loss": 0.1194, "step": 2642 }, { "epoch": 0.6013651877133106, "grad_norm": 1.3816354544320564, "learning_rate": 1.2059456931475663e-06, "loss": 0.0506, "step": 2643 }, { "epoch": 0.6015927189988624, "grad_norm": 1.8067957499098637, "learning_rate": 1.2059127354179573e-06, "loss": 0.1829, "step": 2644 }, { "epoch": 0.6018202502844141, "grad_norm": 2.2404814486972278, "learning_rate": 1.205879765815556e-06, "loss": 0.135, "step": 2645 }, { "epoch": 0.6020477815699659, "grad_norm": 2.0019027614965728, "learning_rate": 1.205846784341036e-06, "loss": 0.1412, "step": 2646 }, { "epoch": 0.6022753128555176, "grad_norm": 2.3427613212776186, "learning_rate": 1.2058137909950719e-06, "loss": 0.1094, "step": 2647 }, { "epoch": 0.6025028441410694, "grad_norm": 2.551361820189807, "learning_rate": 1.2057807857783371e-06, "loss": 0.1278, "step": 2648 }, { "epoch": 0.6027303754266211, "grad_norm": 2.4318258413096108, "learning_rate": 1.205747768691507e-06, "loss": 0.1063, "step": 2649 }, { "epoch": 0.602957906712173, "grad_norm": 1.5810203336032662, "learning_rate": 1.2057147397352559e-06, "loss": 0.1529, "step": 2650 }, { "epoch": 0.6031854379977247, "grad_norm": 1.5838470813817764, "learning_rate": 1.2056816989102591e-06, "loss": 0.1376, "step": 2651 }, { "epoch": 0.6034129692832765, "grad_norm": 3.9369996093181014, "learning_rate": 1.2056486462171918e-06, "loss": 0.2464, "step": 2652 }, { "epoch": 0.6036405005688282, "grad_norm": 1.7557943491318282, "learning_rate": 1.2056155816567297e-06, "loss": 0.1506, "step": 2653 }, { "epoch": 0.60386803185438, "grad_norm": 1.4902613853193347, "learning_rate": 1.2055825052295486e-06, "loss": 0.1356, "step": 2654 }, { "epoch": 0.6040955631399317, "grad_norm": 1.6515131979163473, "learning_rate": 1.205549416936324e-06, "loss": 0.1648, "step": 2655 }, { "epoch": 0.6043230944254835, "grad_norm": 1.4640203024864866, "learning_rate": 1.2055163167777328e-06, "loss": 0.1356, "step": 2656 }, { "epoch": 0.6045506257110352, "grad_norm": 3.266508891752542, "learning_rate": 1.2054832047544512e-06, "loss": 0.173, "step": 2657 }, { "epoch": 0.604778156996587, "grad_norm": 1.3631002432598898, "learning_rate": 1.205450080867156e-06, "loss": 0.1096, "step": 2658 }, { "epoch": 0.6050056882821387, "grad_norm": 2.242984662905767, "learning_rate": 1.205416945116524e-06, "loss": 0.1335, "step": 2659 }, { "epoch": 0.6052332195676906, "grad_norm": 2.3400648593117745, "learning_rate": 1.2053837975032328e-06, "loss": 0.0818, "step": 2660 }, { "epoch": 0.6054607508532424, "grad_norm": 2.283977245108534, "learning_rate": 1.2053506380279597e-06, "loss": 0.1264, "step": 2661 }, { "epoch": 0.6056882821387941, "grad_norm": 1.5198638190414868, "learning_rate": 1.2053174666913826e-06, "loss": 0.1303, "step": 2662 }, { "epoch": 0.6059158134243459, "grad_norm": 1.3638719142798548, "learning_rate": 1.2052842834941791e-06, "loss": 0.0996, "step": 2663 }, { "epoch": 0.6061433447098976, "grad_norm": 2.0164471068005305, "learning_rate": 1.2052510884370274e-06, "loss": 0.1338, "step": 2664 }, { "epoch": 0.6063708759954494, "grad_norm": 1.7349041320402774, "learning_rate": 1.2052178815206064e-06, "loss": 0.1023, "step": 2665 }, { "epoch": 0.6065984072810011, "grad_norm": 1.4876292213814133, "learning_rate": 1.2051846627455946e-06, "loss": 0.1226, "step": 2666 }, { "epoch": 0.6068259385665529, "grad_norm": 1.6427948767677636, "learning_rate": 1.2051514321126705e-06, "loss": 0.141, "step": 2667 }, { "epoch": 0.6070534698521046, "grad_norm": 2.0737972304672083, "learning_rate": 1.2051181896225139e-06, "loss": 0.1221, "step": 2668 }, { "epoch": 0.6072810011376564, "grad_norm": 1.9014861477421603, "learning_rate": 1.205084935275804e-06, "loss": 0.1461, "step": 2669 }, { "epoch": 0.6075085324232082, "grad_norm": 2.80889588671251, "learning_rate": 1.20505166907322e-06, "loss": 0.1772, "step": 2670 }, { "epoch": 0.60773606370876, "grad_norm": 1.5524251793071988, "learning_rate": 1.2050183910154425e-06, "loss": 0.1063, "step": 2671 }, { "epoch": 0.6079635949943117, "grad_norm": 2.282729570815144, "learning_rate": 1.2049851011031514e-06, "loss": 0.1548, "step": 2672 }, { "epoch": 0.6081911262798635, "grad_norm": 2.1806480444286747, "learning_rate": 1.2049517993370269e-06, "loss": 0.1622, "step": 2673 }, { "epoch": 0.6084186575654152, "grad_norm": 2.3188525896490906, "learning_rate": 1.2049184857177498e-06, "loss": 0.2115, "step": 2674 }, { "epoch": 0.608646188850967, "grad_norm": 1.700801852435792, "learning_rate": 1.204885160246001e-06, "loss": 0.2375, "step": 2675 }, { "epoch": 0.6088737201365187, "grad_norm": 3.07988581510915, "learning_rate": 1.2048518229224613e-06, "loss": 0.1452, "step": 2676 }, { "epoch": 0.6091012514220705, "grad_norm": 1.579825688333633, "learning_rate": 1.2048184737478124e-06, "loss": 0.1165, "step": 2677 }, { "epoch": 0.6093287827076223, "grad_norm": 1.7635258179947948, "learning_rate": 1.2047851127227358e-06, "loss": 0.1015, "step": 2678 }, { "epoch": 0.6095563139931741, "grad_norm": 1.9171716270170973, "learning_rate": 1.2047517398479135e-06, "loss": 0.1093, "step": 2679 }, { "epoch": 0.6097838452787259, "grad_norm": 1.6246200869035246, "learning_rate": 1.204718355124027e-06, "loss": 0.1477, "step": 2680 }, { "epoch": 0.6100113765642776, "grad_norm": 2.033819677297613, "learning_rate": 1.2046849585517595e-06, "loss": 0.1112, "step": 2681 }, { "epoch": 0.6102389078498294, "grad_norm": 1.5057733369483088, "learning_rate": 1.2046515501317927e-06, "loss": 0.1144, "step": 2682 }, { "epoch": 0.6104664391353811, "grad_norm": 1.9207619842965977, "learning_rate": 1.2046181298648101e-06, "loss": 0.165, "step": 2683 }, { "epoch": 0.6106939704209329, "grad_norm": 2.1068795511508536, "learning_rate": 1.2045846977514943e-06, "loss": 0.1119, "step": 2684 }, { "epoch": 0.6109215017064846, "grad_norm": 1.5765181731137923, "learning_rate": 1.2045512537925287e-06, "loss": 0.0938, "step": 2685 }, { "epoch": 0.6111490329920364, "grad_norm": 1.652132408051153, "learning_rate": 1.2045177979885969e-06, "loss": 0.1119, "step": 2686 }, { "epoch": 0.6113765642775881, "grad_norm": 1.9933401024918733, "learning_rate": 1.2044843303403827e-06, "loss": 0.1127, "step": 2687 }, { "epoch": 0.6116040955631399, "grad_norm": 1.2553964688013253, "learning_rate": 1.20445085084857e-06, "loss": 0.117, "step": 2688 }, { "epoch": 0.6118316268486917, "grad_norm": 1.9328161675026851, "learning_rate": 1.204417359513843e-06, "loss": 0.1055, "step": 2689 }, { "epoch": 0.6120591581342435, "grad_norm": 1.6135864750332363, "learning_rate": 1.2043838563368865e-06, "loss": 0.1311, "step": 2690 }, { "epoch": 0.6122866894197952, "grad_norm": 2.8366202101928386, "learning_rate": 1.204350341318385e-06, "loss": 0.1576, "step": 2691 }, { "epoch": 0.612514220705347, "grad_norm": 1.9115878659986092, "learning_rate": 1.2043168144590237e-06, "loss": 0.098, "step": 2692 }, { "epoch": 0.6127417519908988, "grad_norm": 1.6553419799458462, "learning_rate": 1.2042832757594875e-06, "loss": 0.1482, "step": 2693 }, { "epoch": 0.6129692832764505, "grad_norm": 1.3298128063922734, "learning_rate": 1.204249725220462e-06, "loss": 0.0801, "step": 2694 }, { "epoch": 0.6131968145620023, "grad_norm": 2.112063522190015, "learning_rate": 1.204216162842633e-06, "loss": 0.1534, "step": 2695 }, { "epoch": 0.613424345847554, "grad_norm": 1.9789341169381445, "learning_rate": 1.2041825886266866e-06, "loss": 0.0911, "step": 2696 }, { "epoch": 0.6136518771331058, "grad_norm": 2.121057060579873, "learning_rate": 1.2041490025733089e-06, "loss": 0.1465, "step": 2697 }, { "epoch": 0.6138794084186575, "grad_norm": 1.8588673717447004, "learning_rate": 1.2041154046831859e-06, "loss": 0.1035, "step": 2698 }, { "epoch": 0.6141069397042094, "grad_norm": 1.734446448883455, "learning_rate": 1.2040817949570046e-06, "loss": 0.1563, "step": 2699 }, { "epoch": 0.6143344709897611, "grad_norm": 1.9933118037524564, "learning_rate": 1.2040481733954523e-06, "loss": 0.1024, "step": 2700 }, { "epoch": 0.6145620022753129, "grad_norm": 3.17488856013429, "learning_rate": 1.2040145399992157e-06, "loss": 0.1226, "step": 2701 }, { "epoch": 0.6147895335608646, "grad_norm": 1.591059020243372, "learning_rate": 1.203980894768982e-06, "loss": 0.1195, "step": 2702 }, { "epoch": 0.6150170648464164, "grad_norm": 1.601828253256089, "learning_rate": 1.2039472377054395e-06, "loss": 0.1385, "step": 2703 }, { "epoch": 0.6152445961319681, "grad_norm": 3.476222784517199, "learning_rate": 1.2039135688092757e-06, "loss": 0.2026, "step": 2704 }, { "epoch": 0.6154721274175199, "grad_norm": 2.5679221489922814, "learning_rate": 1.203879888081179e-06, "loss": 0.1373, "step": 2705 }, { "epoch": 0.6156996587030716, "grad_norm": 2.1049982546040584, "learning_rate": 1.203846195521837e-06, "loss": 0.1353, "step": 2706 }, { "epoch": 0.6159271899886234, "grad_norm": 2.449412628808893, "learning_rate": 1.2038124911319393e-06, "loss": 0.136, "step": 2707 }, { "epoch": 0.6161547212741753, "grad_norm": 2.8316029320784857, "learning_rate": 1.2037787749121741e-06, "loss": 0.1887, "step": 2708 }, { "epoch": 0.616382252559727, "grad_norm": 2.0505324298226517, "learning_rate": 1.203745046863231e-06, "loss": 0.1824, "step": 2709 }, { "epoch": 0.6166097838452788, "grad_norm": 2.354091965640521, "learning_rate": 1.203711306985799e-06, "loss": 0.1024, "step": 2710 }, { "epoch": 0.6168373151308305, "grad_norm": 1.497422096890729, "learning_rate": 1.2036775552805674e-06, "loss": 0.1001, "step": 2711 }, { "epoch": 0.6170648464163823, "grad_norm": 1.6367535767277195, "learning_rate": 1.2036437917482267e-06, "loss": 0.1951, "step": 2712 }, { "epoch": 0.617292377701934, "grad_norm": 1.311702192960621, "learning_rate": 1.2036100163894665e-06, "loss": 0.0991, "step": 2713 }, { "epoch": 0.6175199089874858, "grad_norm": 1.1374509331150917, "learning_rate": 1.2035762292049772e-06, "loss": 0.1279, "step": 2714 }, { "epoch": 0.6177474402730375, "grad_norm": 2.0690667114321437, "learning_rate": 1.2035424301954496e-06, "loss": 0.1213, "step": 2715 }, { "epoch": 0.6179749715585893, "grad_norm": 1.6164688794833326, "learning_rate": 1.2035086193615743e-06, "loss": 0.0896, "step": 2716 }, { "epoch": 0.618202502844141, "grad_norm": 1.4130291059742248, "learning_rate": 1.203474796704042e-06, "loss": 0.1107, "step": 2717 }, { "epoch": 0.6184300341296929, "grad_norm": 2.317711426962595, "learning_rate": 1.2034409622235444e-06, "loss": 0.1119, "step": 2718 }, { "epoch": 0.6186575654152446, "grad_norm": 1.70184913449098, "learning_rate": 1.203407115920773e-06, "loss": 0.0904, "step": 2719 }, { "epoch": 0.6188850967007964, "grad_norm": 1.6503172233844747, "learning_rate": 1.2033732577964194e-06, "loss": 0.1266, "step": 2720 }, { "epoch": 0.6191126279863481, "grad_norm": 2.4366209863100523, "learning_rate": 1.2033393878511756e-06, "loss": 0.1257, "step": 2721 }, { "epoch": 0.6193401592718999, "grad_norm": 1.582050028709684, "learning_rate": 1.203305506085734e-06, "loss": 0.0962, "step": 2722 }, { "epoch": 0.6195676905574516, "grad_norm": 1.2773158456960036, "learning_rate": 1.2032716125007868e-06, "loss": 0.1288, "step": 2723 }, { "epoch": 0.6197952218430034, "grad_norm": 2.7972062325966607, "learning_rate": 1.2032377070970268e-06, "loss": 0.1169, "step": 2724 }, { "epoch": 0.6200227531285551, "grad_norm": 1.7121884786477586, "learning_rate": 1.2032037898751475e-06, "loss": 0.0864, "step": 2725 }, { "epoch": 0.6202502844141069, "grad_norm": 1.8870447056565103, "learning_rate": 1.2031698608358414e-06, "loss": 0.0722, "step": 2726 }, { "epoch": 0.6204778156996587, "grad_norm": 1.8796137296419013, "learning_rate": 1.2031359199798021e-06, "loss": 0.1111, "step": 2727 }, { "epoch": 0.6207053469852105, "grad_norm": 1.6206526861878807, "learning_rate": 1.2031019673077237e-06, "loss": 0.1443, "step": 2728 }, { "epoch": 0.6209328782707623, "grad_norm": 1.359835269149287, "learning_rate": 1.2030680028202995e-06, "loss": 0.1357, "step": 2729 }, { "epoch": 0.621160409556314, "grad_norm": 2.7444994804468434, "learning_rate": 1.2030340265182242e-06, "loss": 0.123, "step": 2730 }, { "epoch": 0.6213879408418658, "grad_norm": 1.601609743110609, "learning_rate": 1.2030000384021919e-06, "loss": 0.2195, "step": 2731 }, { "epoch": 0.6216154721274175, "grad_norm": 3.331047575038035, "learning_rate": 1.2029660384728974e-06, "loss": 0.1182, "step": 2732 }, { "epoch": 0.6218430034129693, "grad_norm": 2.934494529592981, "learning_rate": 1.2029320267310359e-06, "loss": 0.1967, "step": 2733 }, { "epoch": 0.622070534698521, "grad_norm": 3.0682381227751043, "learning_rate": 1.2028980031773018e-06, "loss": 0.1732, "step": 2734 }, { "epoch": 0.6222980659840728, "grad_norm": 1.8442015868919528, "learning_rate": 1.202863967812391e-06, "loss": 0.1799, "step": 2735 }, { "epoch": 0.6225255972696245, "grad_norm": 1.8593176813243222, "learning_rate": 1.2028299206369991e-06, "loss": 0.1908, "step": 2736 }, { "epoch": 0.6227531285551763, "grad_norm": 2.728968371383416, "learning_rate": 1.2027958616518218e-06, "loss": 0.152, "step": 2737 }, { "epoch": 0.6229806598407281, "grad_norm": 2.6619054180942214, "learning_rate": 1.2027617908575553e-06, "loss": 0.1727, "step": 2738 }, { "epoch": 0.6232081911262799, "grad_norm": 2.0414592133205303, "learning_rate": 1.2027277082548958e-06, "loss": 0.1424, "step": 2739 }, { "epoch": 0.6234357224118316, "grad_norm": 3.1585074562667583, "learning_rate": 1.20269361384454e-06, "loss": 0.1412, "step": 2740 }, { "epoch": 0.6236632536973834, "grad_norm": 2.520373823420793, "learning_rate": 1.2026595076271848e-06, "loss": 0.1181, "step": 2741 }, { "epoch": 0.6238907849829352, "grad_norm": 1.8119986570748203, "learning_rate": 1.2026253896035273e-06, "loss": 0.1567, "step": 2742 }, { "epoch": 0.6241183162684869, "grad_norm": 1.5487296455776507, "learning_rate": 1.2025912597742646e-06, "loss": 0.1557, "step": 2743 }, { "epoch": 0.6243458475540387, "grad_norm": 2.5231360246572656, "learning_rate": 1.2025571181400944e-06, "loss": 0.1992, "step": 2744 }, { "epoch": 0.6245733788395904, "grad_norm": 3.0625635018387585, "learning_rate": 1.2025229647017145e-06, "loss": 0.1716, "step": 2745 }, { "epoch": 0.6248009101251422, "grad_norm": 3.1177129975506657, "learning_rate": 1.2024887994598227e-06, "loss": 0.1792, "step": 2746 }, { "epoch": 0.625028441410694, "grad_norm": 2.1464563875652187, "learning_rate": 1.2024546224151176e-06, "loss": 0.0871, "step": 2747 }, { "epoch": 0.6252559726962458, "grad_norm": 2.0265120439546136, "learning_rate": 1.2024204335682977e-06, "loss": 0.1361, "step": 2748 }, { "epoch": 0.6254835039817975, "grad_norm": 3.7277830645236354, "learning_rate": 1.2023862329200613e-06, "loss": 0.1569, "step": 2749 }, { "epoch": 0.6257110352673493, "grad_norm": 1.4107955390021432, "learning_rate": 1.2023520204711078e-06, "loss": 0.0982, "step": 2750 }, { "epoch": 0.625938566552901, "grad_norm": 1.802956258585525, "learning_rate": 1.2023177962221366e-06, "loss": 0.202, "step": 2751 }, { "epoch": 0.6261660978384528, "grad_norm": 2.570407091125862, "learning_rate": 1.2022835601738467e-06, "loss": 0.1389, "step": 2752 }, { "epoch": 0.6263936291240045, "grad_norm": 1.9988049268762371, "learning_rate": 1.2022493123269383e-06, "loss": 0.1377, "step": 2753 }, { "epoch": 0.6266211604095563, "grad_norm": 2.191923581547263, "learning_rate": 1.202215052682111e-06, "loss": 0.2517, "step": 2754 }, { "epoch": 0.626848691695108, "grad_norm": 1.851526943619937, "learning_rate": 1.2021807812400652e-06, "loss": 0.1705, "step": 2755 }, { "epoch": 0.6270762229806598, "grad_norm": 1.6140773681120666, "learning_rate": 1.2021464980015014e-06, "loss": 0.1198, "step": 2756 }, { "epoch": 0.6273037542662117, "grad_norm": 1.5559727173563562, "learning_rate": 1.20211220296712e-06, "loss": 0.145, "step": 2757 }, { "epoch": 0.6275312855517634, "grad_norm": 2.8683941887361746, "learning_rate": 1.2020778961376223e-06, "loss": 0.0834, "step": 2758 }, { "epoch": 0.6277588168373152, "grad_norm": 2.155047837786128, "learning_rate": 1.202043577513709e-06, "loss": 0.1452, "step": 2759 }, { "epoch": 0.6279863481228669, "grad_norm": 2.433600771864086, "learning_rate": 1.202009247096082e-06, "loss": 0.1903, "step": 2760 }, { "epoch": 0.6282138794084187, "grad_norm": 1.9186109054985514, "learning_rate": 1.2019749048854426e-06, "loss": 0.1177, "step": 2761 }, { "epoch": 0.6284414106939704, "grad_norm": 1.109450269371729, "learning_rate": 1.2019405508824927e-06, "loss": 0.0703, "step": 2762 }, { "epoch": 0.6286689419795222, "grad_norm": 1.5591658788371794, "learning_rate": 1.201906185087935e-06, "loss": 0.101, "step": 2763 }, { "epoch": 0.6288964732650739, "grad_norm": 2.141050899035466, "learning_rate": 1.201871807502471e-06, "loss": 0.1387, "step": 2764 }, { "epoch": 0.6291240045506257, "grad_norm": 2.7514196386904746, "learning_rate": 1.2018374181268039e-06, "loss": 0.1553, "step": 2765 }, { "epoch": 0.6293515358361774, "grad_norm": 2.535759371521438, "learning_rate": 1.2018030169616363e-06, "loss": 0.1287, "step": 2766 }, { "epoch": 0.6295790671217293, "grad_norm": 2.770098292987776, "learning_rate": 1.2017686040076715e-06, "loss": 0.1689, "step": 2767 }, { "epoch": 0.629806598407281, "grad_norm": 1.5676839925835293, "learning_rate": 1.2017341792656129e-06, "loss": 0.1506, "step": 2768 }, { "epoch": 0.6300341296928328, "grad_norm": 2.568669185611603, "learning_rate": 1.2016997427361635e-06, "loss": 0.1183, "step": 2769 }, { "epoch": 0.6302616609783845, "grad_norm": 1.4579610759738018, "learning_rate": 1.201665294420028e-06, "loss": 0.1407, "step": 2770 }, { "epoch": 0.6304891922639363, "grad_norm": 2.951412169453364, "learning_rate": 1.2016308343179098e-06, "loss": 0.1178, "step": 2771 }, { "epoch": 0.630716723549488, "grad_norm": 2.0276568694084953, "learning_rate": 1.2015963624305132e-06, "loss": 0.1649, "step": 2772 }, { "epoch": 0.6309442548350398, "grad_norm": 2.280981687544379, "learning_rate": 1.201561878758543e-06, "loss": 0.1413, "step": 2773 }, { "epoch": 0.6311717861205915, "grad_norm": 2.1266711894469736, "learning_rate": 1.2015273833027041e-06, "loss": 0.2162, "step": 2774 }, { "epoch": 0.6313993174061433, "grad_norm": 3.8642578300165384, "learning_rate": 1.2014928760637014e-06, "loss": 0.2027, "step": 2775 }, { "epoch": 0.631626848691695, "grad_norm": 3.216392739870665, "learning_rate": 1.20145835704224e-06, "loss": 0.1329, "step": 2776 }, { "epoch": 0.6318543799772469, "grad_norm": 2.0044712366152053, "learning_rate": 1.2014238262390254e-06, "loss": 0.1391, "step": 2777 }, { "epoch": 0.6320819112627987, "grad_norm": 3.2153887606485982, "learning_rate": 1.2013892836547635e-06, "loss": 0.2441, "step": 2778 }, { "epoch": 0.6323094425483504, "grad_norm": 1.3533799512884244, "learning_rate": 1.2013547292901605e-06, "loss": 0.1688, "step": 2779 }, { "epoch": 0.6325369738339022, "grad_norm": 1.751487029561619, "learning_rate": 1.2013201631459222e-06, "loss": 0.207, "step": 2780 }, { "epoch": 0.6327645051194539, "grad_norm": 3.9713087691369635, "learning_rate": 1.201285585222755e-06, "loss": 0.2355, "step": 2781 }, { "epoch": 0.6329920364050057, "grad_norm": 2.953556617774688, "learning_rate": 1.2012509955213664e-06, "loss": 0.1799, "step": 2782 }, { "epoch": 0.6332195676905574, "grad_norm": 1.430149829222375, "learning_rate": 1.2012163940424624e-06, "loss": 0.1034, "step": 2783 }, { "epoch": 0.6334470989761092, "grad_norm": 2.0923129272368164, "learning_rate": 1.2011817807867507e-06, "loss": 0.1246, "step": 2784 }, { "epoch": 0.6336746302616609, "grad_norm": 3.407094572241824, "learning_rate": 1.2011471557549387e-06, "loss": 0.1728, "step": 2785 }, { "epoch": 0.6339021615472128, "grad_norm": 2.0611288538144197, "learning_rate": 1.2011125189477339e-06, "loss": 0.1469, "step": 2786 }, { "epoch": 0.6341296928327645, "grad_norm": 2.2966058332401587, "learning_rate": 1.2010778703658441e-06, "loss": 0.1559, "step": 2787 }, { "epoch": 0.6343572241183163, "grad_norm": 1.545873831830917, "learning_rate": 1.2010432100099781e-06, "loss": 0.1571, "step": 2788 }, { "epoch": 0.634584755403868, "grad_norm": 3.1456139874110156, "learning_rate": 1.2010085378808437e-06, "loss": 0.1906, "step": 2789 }, { "epoch": 0.6348122866894198, "grad_norm": 2.2942374687881317, "learning_rate": 1.2009738539791497e-06, "loss": 0.2298, "step": 2790 }, { "epoch": 0.6350398179749716, "grad_norm": 1.8483321215374322, "learning_rate": 1.2009391583056048e-06, "loss": 0.1769, "step": 2791 }, { "epoch": 0.6352673492605233, "grad_norm": 2.097634425130467, "learning_rate": 1.2009044508609182e-06, "loss": 0.1369, "step": 2792 }, { "epoch": 0.6354948805460751, "grad_norm": 2.181756009813163, "learning_rate": 1.2008697316457997e-06, "loss": 0.1671, "step": 2793 }, { "epoch": 0.6357224118316268, "grad_norm": 2.4427121188314853, "learning_rate": 1.2008350006609584e-06, "loss": 0.1311, "step": 2794 }, { "epoch": 0.6359499431171786, "grad_norm": 1.9343877225744588, "learning_rate": 1.2008002579071043e-06, "loss": 0.1908, "step": 2795 }, { "epoch": 0.6361774744027304, "grad_norm": 1.8080071533035844, "learning_rate": 1.2007655033849474e-06, "loss": 0.1446, "step": 2796 }, { "epoch": 0.6364050056882822, "grad_norm": 2.0360388556721034, "learning_rate": 1.2007307370951983e-06, "loss": 0.1262, "step": 2797 }, { "epoch": 0.6366325369738339, "grad_norm": 1.6254628406869118, "learning_rate": 1.200695959038567e-06, "loss": 0.1256, "step": 2798 }, { "epoch": 0.6368600682593857, "grad_norm": 1.9561465852797444, "learning_rate": 1.2006611692157648e-06, "loss": 0.1436, "step": 2799 }, { "epoch": 0.6370875995449374, "grad_norm": 1.8226458153348386, "learning_rate": 1.2006263676275026e-06, "loss": 0.1756, "step": 2800 }, { "epoch": 0.6373151308304892, "grad_norm": 1.9581606556638822, "learning_rate": 1.2005915542744915e-06, "loss": 0.1508, "step": 2801 }, { "epoch": 0.6375426621160409, "grad_norm": 2.042581734647661, "learning_rate": 1.2005567291574434e-06, "loss": 0.1042, "step": 2802 }, { "epoch": 0.6377701934015927, "grad_norm": 2.1726322807786067, "learning_rate": 1.2005218922770695e-06, "loss": 0.1318, "step": 2803 }, { "epoch": 0.6379977246871444, "grad_norm": 1.9412456138719576, "learning_rate": 1.2004870436340824e-06, "loss": 0.1893, "step": 2804 }, { "epoch": 0.6382252559726962, "grad_norm": 2.0304542382347113, "learning_rate": 1.2004521832291943e-06, "loss": 0.0981, "step": 2805 }, { "epoch": 0.6384527872582481, "grad_norm": 1.6325066329724485, "learning_rate": 1.200417311063117e-06, "loss": 0.1384, "step": 2806 }, { "epoch": 0.6386803185437998, "grad_norm": 1.8108206373995843, "learning_rate": 1.200382427136564e-06, "loss": 0.1196, "step": 2807 }, { "epoch": 0.6389078498293516, "grad_norm": 2.5409921749520086, "learning_rate": 1.2003475314502477e-06, "loss": 0.1537, "step": 2808 }, { "epoch": 0.6391353811149033, "grad_norm": 1.2748621426670415, "learning_rate": 1.200312624004882e-06, "loss": 0.0795, "step": 2809 }, { "epoch": 0.6393629124004551, "grad_norm": 1.5734235185818852, "learning_rate": 1.2002777048011794e-06, "loss": 0.1221, "step": 2810 }, { "epoch": 0.6395904436860068, "grad_norm": 2.8473432803680416, "learning_rate": 1.2002427738398543e-06, "loss": 0.1685, "step": 2811 }, { "epoch": 0.6398179749715586, "grad_norm": 2.5860893642979956, "learning_rate": 1.2002078311216205e-06, "loss": 0.1225, "step": 2812 }, { "epoch": 0.6400455062571103, "grad_norm": 2.173062793872002, "learning_rate": 1.2001728766471919e-06, "loss": 0.1418, "step": 2813 }, { "epoch": 0.6402730375426621, "grad_norm": 2.341929374014376, "learning_rate": 1.2001379104172832e-06, "loss": 0.1501, "step": 2814 }, { "epoch": 0.6405005688282139, "grad_norm": 1.9717123924624265, "learning_rate": 1.2001029324326087e-06, "loss": 0.1034, "step": 2815 }, { "epoch": 0.6407281001137657, "grad_norm": 1.5832378059806158, "learning_rate": 1.2000679426938838e-06, "loss": 0.0655, "step": 2816 }, { "epoch": 0.6409556313993174, "grad_norm": 2.5340372908959856, "learning_rate": 1.2000329412018233e-06, "loss": 0.1382, "step": 2817 }, { "epoch": 0.6411831626848692, "grad_norm": 1.221154684672452, "learning_rate": 1.1999979279571425e-06, "loss": 0.1122, "step": 2818 }, { "epoch": 0.6414106939704209, "grad_norm": 2.0522471249218666, "learning_rate": 1.1999629029605572e-06, "loss": 0.0947, "step": 2819 }, { "epoch": 0.6416382252559727, "grad_norm": 1.2769878053312456, "learning_rate": 1.1999278662127832e-06, "loss": 0.0864, "step": 2820 }, { "epoch": 0.6418657565415244, "grad_norm": 2.2645164597272216, "learning_rate": 1.1998928177145363e-06, "loss": 0.1158, "step": 2821 }, { "epoch": 0.6420932878270762, "grad_norm": 2.1530974045268034, "learning_rate": 1.1998577574665334e-06, "loss": 0.1692, "step": 2822 }, { "epoch": 0.642320819112628, "grad_norm": 1.962935725540055, "learning_rate": 1.1998226854694906e-06, "loss": 0.1575, "step": 2823 }, { "epoch": 0.6425483503981797, "grad_norm": 2.392603352353799, "learning_rate": 1.1997876017241248e-06, "loss": 0.1013, "step": 2824 }, { "epoch": 0.6427758816837316, "grad_norm": 1.7118896384760522, "learning_rate": 1.199752506231153e-06, "loss": 0.1192, "step": 2825 }, { "epoch": 0.6430034129692833, "grad_norm": 2.563449128448653, "learning_rate": 1.1997173989912928e-06, "loss": 0.1531, "step": 2826 }, { "epoch": 0.6432309442548351, "grad_norm": 2.3688066675487196, "learning_rate": 1.1996822800052614e-06, "loss": 0.1159, "step": 2827 }, { "epoch": 0.6434584755403868, "grad_norm": 1.8278311039923627, "learning_rate": 1.1996471492737767e-06, "loss": 0.141, "step": 2828 }, { "epoch": 0.6436860068259386, "grad_norm": 1.4624967632207888, "learning_rate": 1.1996120067975568e-06, "loss": 0.1154, "step": 2829 }, { "epoch": 0.6439135381114903, "grad_norm": 2.2364175356014333, "learning_rate": 1.1995768525773195e-06, "loss": 0.1283, "step": 2830 }, { "epoch": 0.6441410693970421, "grad_norm": 3.2767392401427062, "learning_rate": 1.1995416866137837e-06, "loss": 0.1808, "step": 2831 }, { "epoch": 0.6443686006825938, "grad_norm": 2.586327741034491, "learning_rate": 1.1995065089076682e-06, "loss": 0.154, "step": 2832 }, { "epoch": 0.6445961319681456, "grad_norm": 1.2535014504137996, "learning_rate": 1.199471319459692e-06, "loss": 0.1147, "step": 2833 }, { "epoch": 0.6448236632536973, "grad_norm": 1.7122556783292748, "learning_rate": 1.199436118270574e-06, "loss": 0.1146, "step": 2834 }, { "epoch": 0.6450511945392492, "grad_norm": 1.8732917825303554, "learning_rate": 1.1994009053410336e-06, "loss": 0.0699, "step": 2835 }, { "epoch": 0.645278725824801, "grad_norm": 1.6917793123575706, "learning_rate": 1.1993656806717906e-06, "loss": 0.1035, "step": 2836 }, { "epoch": 0.6455062571103527, "grad_norm": 1.7398376655862977, "learning_rate": 1.199330444263565e-06, "loss": 0.1377, "step": 2837 }, { "epoch": 0.6457337883959045, "grad_norm": 3.6823862200780524, "learning_rate": 1.1992951961170774e-06, "loss": 0.2285, "step": 2838 }, { "epoch": 0.6459613196814562, "grad_norm": 2.7082289619854807, "learning_rate": 1.1992599362330474e-06, "loss": 0.1617, "step": 2839 }, { "epoch": 0.646188850967008, "grad_norm": 1.911147339712701, "learning_rate": 1.199224664612196e-06, "loss": 0.1181, "step": 2840 }, { "epoch": 0.6464163822525597, "grad_norm": 2.6044902420279006, "learning_rate": 1.199189381255244e-06, "loss": 0.1986, "step": 2841 }, { "epoch": 0.6466439135381115, "grad_norm": 3.1518629789469284, "learning_rate": 1.199154086162913e-06, "loss": 0.2413, "step": 2842 }, { "epoch": 0.6468714448236632, "grad_norm": 1.64619969788916, "learning_rate": 1.1991187793359239e-06, "loss": 0.1469, "step": 2843 }, { "epoch": 0.647098976109215, "grad_norm": 3.4988801724543994, "learning_rate": 1.1990834607749981e-06, "loss": 0.0906, "step": 2844 }, { "epoch": 0.6473265073947668, "grad_norm": 1.4716329070831264, "learning_rate": 1.199048130480858e-06, "loss": 0.1628, "step": 2845 }, { "epoch": 0.6475540386803186, "grad_norm": 1.7378731147169513, "learning_rate": 1.1990127884542251e-06, "loss": 0.1317, "step": 2846 }, { "epoch": 0.6477815699658703, "grad_norm": 2.249048740116717, "learning_rate": 1.1989774346958225e-06, "loss": 0.0967, "step": 2847 }, { "epoch": 0.6480091012514221, "grad_norm": 1.1255899263681175, "learning_rate": 1.1989420692063723e-06, "loss": 0.1335, "step": 2848 }, { "epoch": 0.6482366325369738, "grad_norm": 1.8914124945151989, "learning_rate": 1.198906691986597e-06, "loss": 0.1352, "step": 2849 }, { "epoch": 0.6484641638225256, "grad_norm": 1.8849598926256956, "learning_rate": 1.1988713030372202e-06, "loss": 0.1649, "step": 2850 }, { "epoch": 0.6486916951080773, "grad_norm": 2.417708669547903, "learning_rate": 1.198835902358965e-06, "loss": 0.111, "step": 2851 }, { "epoch": 0.6489192263936291, "grad_norm": 0.8906821438798999, "learning_rate": 1.1988004899525547e-06, "loss": 0.0551, "step": 2852 }, { "epoch": 0.6491467576791808, "grad_norm": 2.3567391474624837, "learning_rate": 1.1987650658187133e-06, "loss": 0.1235, "step": 2853 }, { "epoch": 0.6493742889647327, "grad_norm": 1.0836164298505087, "learning_rate": 1.1987296299581648e-06, "loss": 0.1158, "step": 2854 }, { "epoch": 0.6496018202502845, "grad_norm": 2.4559644918599246, "learning_rate": 1.1986941823716333e-06, "loss": 0.1147, "step": 2855 }, { "epoch": 0.6498293515358362, "grad_norm": 2.2390322585917604, "learning_rate": 1.1986587230598437e-06, "loss": 0.09, "step": 2856 }, { "epoch": 0.650056882821388, "grad_norm": 1.344402365377071, "learning_rate": 1.19862325202352e-06, "loss": 0.1436, "step": 2857 }, { "epoch": 0.6502844141069397, "grad_norm": 1.426926920816382, "learning_rate": 1.198587769263388e-06, "loss": 0.087, "step": 2858 }, { "epoch": 0.6505119453924915, "grad_norm": 2.0719268939322713, "learning_rate": 1.198552274780172e-06, "loss": 0.1156, "step": 2859 }, { "epoch": 0.6507394766780432, "grad_norm": 1.4286233376888462, "learning_rate": 1.1985167685745982e-06, "loss": 0.107, "step": 2860 }, { "epoch": 0.650967007963595, "grad_norm": 1.5231976360847568, "learning_rate": 1.198481250647392e-06, "loss": 0.0846, "step": 2861 }, { "epoch": 0.6511945392491467, "grad_norm": 1.595972966209539, "learning_rate": 1.1984457209992792e-06, "loss": 0.1834, "step": 2862 }, { "epoch": 0.6514220705346985, "grad_norm": 1.7030298326486626, "learning_rate": 1.1984101796309862e-06, "loss": 0.1557, "step": 2863 }, { "epoch": 0.6516496018202503, "grad_norm": 2.6294697295392324, "learning_rate": 1.1983746265432392e-06, "loss": 0.206, "step": 2864 }, { "epoch": 0.6518771331058021, "grad_norm": 1.9056557077654461, "learning_rate": 1.1983390617367649e-06, "loss": 0.1442, "step": 2865 }, { "epoch": 0.6521046643913538, "grad_norm": 3.364370208484872, "learning_rate": 1.1983034852122902e-06, "loss": 0.2494, "step": 2866 }, { "epoch": 0.6523321956769056, "grad_norm": 1.5505126489167045, "learning_rate": 1.1982678969705425e-06, "loss": 0.0815, "step": 2867 }, { "epoch": 0.6525597269624573, "grad_norm": 2.3551775420223597, "learning_rate": 1.1982322970122485e-06, "loss": 0.1385, "step": 2868 }, { "epoch": 0.6527872582480091, "grad_norm": 2.411065323169576, "learning_rate": 1.1981966853381364e-06, "loss": 0.1974, "step": 2869 }, { "epoch": 0.6530147895335608, "grad_norm": 2.242797006771464, "learning_rate": 1.1981610619489337e-06, "loss": 0.1136, "step": 2870 }, { "epoch": 0.6532423208191126, "grad_norm": 1.510260522760697, "learning_rate": 1.1981254268453684e-06, "loss": 0.0959, "step": 2871 }, { "epoch": 0.6534698521046644, "grad_norm": 2.904454351577721, "learning_rate": 1.1980897800281694e-06, "loss": 0.2106, "step": 2872 }, { "epoch": 0.6536973833902161, "grad_norm": 2.6314570278686444, "learning_rate": 1.1980541214980646e-06, "loss": 0.1762, "step": 2873 }, { "epoch": 0.653924914675768, "grad_norm": 2.5532198759232023, "learning_rate": 1.1980184512557833e-06, "loss": 0.1413, "step": 2874 }, { "epoch": 0.6541524459613197, "grad_norm": 2.3410192204822318, "learning_rate": 1.1979827693020541e-06, "loss": 0.1382, "step": 2875 }, { "epoch": 0.6543799772468715, "grad_norm": 1.7142557174328752, "learning_rate": 1.1979470756376064e-06, "loss": 0.1048, "step": 2876 }, { "epoch": 0.6546075085324232, "grad_norm": 1.0889093964458614, "learning_rate": 1.1979113702631697e-06, "loss": 0.1071, "step": 2877 }, { "epoch": 0.654835039817975, "grad_norm": 1.8563209707215407, "learning_rate": 1.197875653179474e-06, "loss": 0.088, "step": 2878 }, { "epoch": 0.6550625711035267, "grad_norm": 2.7001720187434572, "learning_rate": 1.1978399243872492e-06, "loss": 0.1437, "step": 2879 }, { "epoch": 0.6552901023890785, "grad_norm": 2.258002197022518, "learning_rate": 1.1978041838872253e-06, "loss": 0.1762, "step": 2880 }, { "epoch": 0.6555176336746302, "grad_norm": 1.6515838649974548, "learning_rate": 1.197768431680133e-06, "loss": 0.1046, "step": 2881 }, { "epoch": 0.655745164960182, "grad_norm": 2.4946899822007507, "learning_rate": 1.197732667766703e-06, "loss": 0.1262, "step": 2882 }, { "epoch": 0.6559726962457337, "grad_norm": 4.602680866014901, "learning_rate": 1.1976968921476662e-06, "loss": 0.1744, "step": 2883 }, { "epoch": 0.6562002275312856, "grad_norm": 2.360092535792725, "learning_rate": 1.1976611048237534e-06, "loss": 0.0931, "step": 2884 }, { "epoch": 0.6564277588168373, "grad_norm": 1.483983811262198, "learning_rate": 1.1976253057956968e-06, "loss": 0.0807, "step": 2885 }, { "epoch": 0.6566552901023891, "grad_norm": 2.842807886661957, "learning_rate": 1.1975894950642276e-06, "loss": 0.1619, "step": 2886 }, { "epoch": 0.6568828213879409, "grad_norm": 2.486726037530193, "learning_rate": 1.1975536726300776e-06, "loss": 0.1901, "step": 2887 }, { "epoch": 0.6571103526734926, "grad_norm": 2.049571239290376, "learning_rate": 1.1975178384939793e-06, "loss": 0.1047, "step": 2888 }, { "epoch": 0.6573378839590444, "grad_norm": 1.7524729733051039, "learning_rate": 1.197481992656665e-06, "loss": 0.1674, "step": 2889 }, { "epoch": 0.6575654152445961, "grad_norm": 2.080752113172955, "learning_rate": 1.1974461351188668e-06, "loss": 0.11, "step": 2890 }, { "epoch": 0.6577929465301479, "grad_norm": 1.8490641173720084, "learning_rate": 1.1974102658813183e-06, "loss": 0.0879, "step": 2891 }, { "epoch": 0.6580204778156996, "grad_norm": 1.5276572210559733, "learning_rate": 1.1973743849447522e-06, "loss": 0.1732, "step": 2892 }, { "epoch": 0.6582480091012515, "grad_norm": 2.25951432823213, "learning_rate": 1.197338492309902e-06, "loss": 0.144, "step": 2893 }, { "epoch": 0.6584755403868032, "grad_norm": 1.6131229701226704, "learning_rate": 1.1973025879775011e-06, "loss": 0.1721, "step": 2894 }, { "epoch": 0.658703071672355, "grad_norm": 1.9025169260113142, "learning_rate": 1.1972666719482833e-06, "loss": 0.1233, "step": 2895 }, { "epoch": 0.6589306029579067, "grad_norm": 1.9930955290074996, "learning_rate": 1.197230744222983e-06, "loss": 0.0945, "step": 2896 }, { "epoch": 0.6591581342434585, "grad_norm": 1.6272211241544459, "learning_rate": 1.1971948048023343e-06, "loss": 0.1287, "step": 2897 }, { "epoch": 0.6593856655290102, "grad_norm": 1.7105829399675778, "learning_rate": 1.1971588536870717e-06, "loss": 0.1636, "step": 2898 }, { "epoch": 0.659613196814562, "grad_norm": 1.6038784432273976, "learning_rate": 1.19712289087793e-06, "loss": 0.1555, "step": 2899 }, { "epoch": 0.6598407281001137, "grad_norm": 2.189018947914395, "learning_rate": 1.197086916375644e-06, "loss": 0.1999, "step": 2900 }, { "epoch": 0.6600682593856655, "grad_norm": 2.1933170276472493, "learning_rate": 1.1970509301809493e-06, "loss": 0.0573, "step": 2901 }, { "epoch": 0.6602957906712172, "grad_norm": 1.3496992062217439, "learning_rate": 1.1970149322945812e-06, "loss": 0.1642, "step": 2902 }, { "epoch": 0.6605233219567691, "grad_norm": 1.2111324014689482, "learning_rate": 1.1969789227172755e-06, "loss": 0.0847, "step": 2903 }, { "epoch": 0.6607508532423209, "grad_norm": 1.5516745653341248, "learning_rate": 1.1969429014497684e-06, "loss": 0.1287, "step": 2904 }, { "epoch": 0.6609783845278726, "grad_norm": 4.0240616277853904, "learning_rate": 1.1969068684927954e-06, "loss": 0.1983, "step": 2905 }, { "epoch": 0.6612059158134244, "grad_norm": 2.3056372453856953, "learning_rate": 1.1968708238470936e-06, "loss": 0.1888, "step": 2906 }, { "epoch": 0.6614334470989761, "grad_norm": 2.8003705602565643, "learning_rate": 1.1968347675133995e-06, "loss": 0.1145, "step": 2907 }, { "epoch": 0.6616609783845279, "grad_norm": 1.8246199835217014, "learning_rate": 1.19679869949245e-06, "loss": 0.212, "step": 2908 }, { "epoch": 0.6618885096700796, "grad_norm": 3.15585157724945, "learning_rate": 1.1967626197849824e-06, "loss": 0.1361, "step": 2909 }, { "epoch": 0.6621160409556314, "grad_norm": 2.387258120107012, "learning_rate": 1.1967265283917339e-06, "loss": 0.1856, "step": 2910 }, { "epoch": 0.6623435722411831, "grad_norm": 2.1362133210531575, "learning_rate": 1.1966904253134422e-06, "loss": 0.1408, "step": 2911 }, { "epoch": 0.6625711035267349, "grad_norm": 3.166323287444489, "learning_rate": 1.1966543105508454e-06, "loss": 0.1418, "step": 2912 }, { "epoch": 0.6627986348122867, "grad_norm": 3.551198157075063, "learning_rate": 1.1966181841046812e-06, "loss": 0.1901, "step": 2913 }, { "epoch": 0.6630261660978385, "grad_norm": 1.4633198936003367, "learning_rate": 1.1965820459756882e-06, "loss": 0.0924, "step": 2914 }, { "epoch": 0.6632536973833902, "grad_norm": 2.2531543446585616, "learning_rate": 1.1965458961646051e-06, "loss": 0.1933, "step": 2915 }, { "epoch": 0.663481228668942, "grad_norm": 2.0085037030014132, "learning_rate": 1.1965097346721707e-06, "loss": 0.1298, "step": 2916 }, { "epoch": 0.6637087599544937, "grad_norm": 1.25673364297024, "learning_rate": 1.1964735614991237e-06, "loss": 0.1956, "step": 2917 }, { "epoch": 0.6639362912400455, "grad_norm": 2.8836800293112352, "learning_rate": 1.196437376646204e-06, "loss": 0.1327, "step": 2918 }, { "epoch": 0.6641638225255972, "grad_norm": 3.1266651348730172, "learning_rate": 1.1964011801141505e-06, "loss": 0.2037, "step": 2919 }, { "epoch": 0.664391353811149, "grad_norm": 2.619788668251123, "learning_rate": 1.1963649719037037e-06, "loss": 0.1558, "step": 2920 }, { "epoch": 0.6646188850967008, "grad_norm": 1.7667541052371545, "learning_rate": 1.196328752015603e-06, "loss": 0.1748, "step": 2921 }, { "epoch": 0.6648464163822526, "grad_norm": 1.3147225620045102, "learning_rate": 1.1962925204505894e-06, "loss": 0.0753, "step": 2922 }, { "epoch": 0.6650739476678044, "grad_norm": 2.131649606321363, "learning_rate": 1.1962562772094024e-06, "loss": 0.2134, "step": 2923 }, { "epoch": 0.6653014789533561, "grad_norm": 1.6622774393728745, "learning_rate": 1.1962200222927836e-06, "loss": 0.0936, "step": 2924 }, { "epoch": 0.6655290102389079, "grad_norm": 1.585708645419218, "learning_rate": 1.1961837557014736e-06, "loss": 0.1026, "step": 2925 }, { "epoch": 0.6657565415244596, "grad_norm": 1.146941765850051, "learning_rate": 1.196147477436214e-06, "loss": 0.1825, "step": 2926 }, { "epoch": 0.6659840728100114, "grad_norm": 2.0034538246710287, "learning_rate": 1.1961111874977455e-06, "loss": 0.1406, "step": 2927 }, { "epoch": 0.6662116040955631, "grad_norm": 2.0528707029612137, "learning_rate": 1.1960748858868104e-06, "loss": 0.1129, "step": 2928 }, { "epoch": 0.6664391353811149, "grad_norm": 2.2619718630209387, "learning_rate": 1.1960385726041507e-06, "loss": 0.2007, "step": 2929 }, { "epoch": 0.6666666666666666, "grad_norm": 1.5600095714292874, "learning_rate": 1.1960022476505082e-06, "loss": 0.0834, "step": 2930 }, { "epoch": 0.6668941979522184, "grad_norm": 1.80261641322924, "learning_rate": 1.1959659110266256e-06, "loss": 0.105, "step": 2931 }, { "epoch": 0.6671217292377702, "grad_norm": 1.7004492925954158, "learning_rate": 1.1959295627332454e-06, "loss": 0.1787, "step": 2932 }, { "epoch": 0.667349260523322, "grad_norm": 5.737730720018478, "learning_rate": 1.1958932027711106e-06, "loss": 0.2202, "step": 2933 }, { "epoch": 0.6675767918088737, "grad_norm": 3.2248265845939916, "learning_rate": 1.1958568311409643e-06, "loss": 0.1432, "step": 2934 }, { "epoch": 0.6678043230944255, "grad_norm": 1.611673867911579, "learning_rate": 1.1958204478435497e-06, "loss": 0.1198, "step": 2935 }, { "epoch": 0.6680318543799773, "grad_norm": 1.8159369259356373, "learning_rate": 1.195784052879611e-06, "loss": 0.0952, "step": 2936 }, { "epoch": 0.668259385665529, "grad_norm": 3.609361552599488, "learning_rate": 1.195747646249891e-06, "loss": 0.0806, "step": 2937 }, { "epoch": 0.6684869169510808, "grad_norm": 1.9817411361850743, "learning_rate": 1.1957112279551347e-06, "loss": 0.1723, "step": 2938 }, { "epoch": 0.6687144482366325, "grad_norm": 3.475296677730135, "learning_rate": 1.195674797996086e-06, "loss": 0.1626, "step": 2939 }, { "epoch": 0.6689419795221843, "grad_norm": 1.0280004164275525, "learning_rate": 1.1956383563734897e-06, "loss": 0.0776, "step": 2940 }, { "epoch": 0.669169510807736, "grad_norm": 2.0465094431015025, "learning_rate": 1.1956019030880902e-06, "loss": 0.2099, "step": 2941 }, { "epoch": 0.6693970420932879, "grad_norm": 1.520908408928458, "learning_rate": 1.1955654381406331e-06, "loss": 0.1446, "step": 2942 }, { "epoch": 0.6696245733788396, "grad_norm": 1.8121961010303413, "learning_rate": 1.1955289615318632e-06, "loss": 0.1224, "step": 2943 }, { "epoch": 0.6698521046643914, "grad_norm": 2.0001610941499375, "learning_rate": 1.1954924732625264e-06, "loss": 0.1547, "step": 2944 }, { "epoch": 0.6700796359499431, "grad_norm": 1.6700743442908508, "learning_rate": 1.1954559733333681e-06, "loss": 0.0778, "step": 2945 }, { "epoch": 0.6703071672354949, "grad_norm": 2.3975672802355428, "learning_rate": 1.1954194617451345e-06, "loss": 0.139, "step": 2946 }, { "epoch": 0.6705346985210466, "grad_norm": 1.8024278725077454, "learning_rate": 1.1953829384985716e-06, "loss": 0.113, "step": 2947 }, { "epoch": 0.6707622298065984, "grad_norm": 3.145969371692759, "learning_rate": 1.1953464035944262e-06, "loss": 0.1883, "step": 2948 }, { "epoch": 0.6709897610921501, "grad_norm": 14.466120052896613, "learning_rate": 1.195309857033445e-06, "loss": 0.1307, "step": 2949 }, { "epoch": 0.6712172923777019, "grad_norm": 1.5503689850377973, "learning_rate": 1.1952732988163745e-06, "loss": 0.1009, "step": 2950 }, { "epoch": 0.6714448236632536, "grad_norm": 1.9518492837230381, "learning_rate": 1.1952367289439624e-06, "loss": 0.1653, "step": 2951 }, { "epoch": 0.6716723549488055, "grad_norm": 2.3329213171041734, "learning_rate": 1.1952001474169558e-06, "loss": 0.138, "step": 2952 }, { "epoch": 0.6718998862343573, "grad_norm": 1.7820092144519726, "learning_rate": 1.1951635542361025e-06, "loss": 0.14, "step": 2953 }, { "epoch": 0.672127417519909, "grad_norm": 3.0377950533141385, "learning_rate": 1.1951269494021503e-06, "loss": 0.1433, "step": 2954 }, { "epoch": 0.6723549488054608, "grad_norm": 1.3355396724116602, "learning_rate": 1.1950903329158475e-06, "loss": 0.0866, "step": 2955 }, { "epoch": 0.6725824800910125, "grad_norm": 1.593734083800843, "learning_rate": 1.1950537047779424e-06, "loss": 0.1, "step": 2956 }, { "epoch": 0.6728100113765643, "grad_norm": 1.5275734610546747, "learning_rate": 1.1950170649891836e-06, "loss": 0.079, "step": 2957 }, { "epoch": 0.673037542662116, "grad_norm": 1.8841850472581299, "learning_rate": 1.1949804135503196e-06, "loss": 0.1615, "step": 2958 }, { "epoch": 0.6732650739476678, "grad_norm": 1.1481325273828376, "learning_rate": 1.1949437504621e-06, "loss": 0.0845, "step": 2959 }, { "epoch": 0.6734926052332195, "grad_norm": 1.6878277522438039, "learning_rate": 1.194907075725274e-06, "loss": 0.1545, "step": 2960 }, { "epoch": 0.6737201365187714, "grad_norm": 2.1451059139863413, "learning_rate": 1.1948703893405911e-06, "loss": 0.1137, "step": 2961 }, { "epoch": 0.6739476678043231, "grad_norm": 2.215251205467777, "learning_rate": 1.194833691308801e-06, "loss": 0.1348, "step": 2962 }, { "epoch": 0.6741751990898749, "grad_norm": 2.906612033660968, "learning_rate": 1.194796981630654e-06, "loss": 0.1405, "step": 2963 }, { "epoch": 0.6744027303754266, "grad_norm": 1.6397552216723599, "learning_rate": 1.1947602603069002e-06, "loss": 0.1068, "step": 2964 }, { "epoch": 0.6746302616609784, "grad_norm": 3.1336175876135868, "learning_rate": 1.19472352733829e-06, "loss": 0.1591, "step": 2965 }, { "epoch": 0.6748577929465301, "grad_norm": 1.5562237586152252, "learning_rate": 1.1946867827255744e-06, "loss": 0.11, "step": 2966 }, { "epoch": 0.6750853242320819, "grad_norm": 2.282197724231165, "learning_rate": 1.1946500264695044e-06, "loss": 0.2207, "step": 2967 }, { "epoch": 0.6753128555176336, "grad_norm": 1.9433531473768753, "learning_rate": 1.194613258570831e-06, "loss": 0.1495, "step": 2968 }, { "epoch": 0.6755403868031854, "grad_norm": 1.6130949175423062, "learning_rate": 1.1945764790303059e-06, "loss": 0.1114, "step": 2969 }, { "epoch": 0.6757679180887372, "grad_norm": 2.2200546809210087, "learning_rate": 1.1945396878486805e-06, "loss": 0.1359, "step": 2970 }, { "epoch": 0.675995449374289, "grad_norm": 2.3411085380950403, "learning_rate": 1.194502885026707e-06, "loss": 0.174, "step": 2971 }, { "epoch": 0.6762229806598408, "grad_norm": 1.0390908830744956, "learning_rate": 1.1944660705651375e-06, "loss": 0.1238, "step": 2972 }, { "epoch": 0.6764505119453925, "grad_norm": 1.7477836354627838, "learning_rate": 1.1944292444647248e-06, "loss": 0.1505, "step": 2973 }, { "epoch": 0.6766780432309443, "grad_norm": 1.5463097681692215, "learning_rate": 1.1943924067262208e-06, "loss": 0.0973, "step": 2974 }, { "epoch": 0.676905574516496, "grad_norm": 2.0006077731515184, "learning_rate": 1.194355557350379e-06, "loss": 0.1278, "step": 2975 }, { "epoch": 0.6771331058020478, "grad_norm": 2.428149037379935, "learning_rate": 1.1943186963379522e-06, "loss": 0.2203, "step": 2976 }, { "epoch": 0.6773606370875995, "grad_norm": 1.691046625500558, "learning_rate": 1.194281823689694e-06, "loss": 0.127, "step": 2977 }, { "epoch": 0.6775881683731513, "grad_norm": 3.951131260976323, "learning_rate": 1.194244939406358e-06, "loss": 0.1884, "step": 2978 }, { "epoch": 0.677815699658703, "grad_norm": 2.579071481165006, "learning_rate": 1.1942080434886978e-06, "loss": 0.1256, "step": 2979 }, { "epoch": 0.6780432309442548, "grad_norm": 1.4931981146612179, "learning_rate": 1.1941711359374678e-06, "loss": 0.1169, "step": 2980 }, { "epoch": 0.6782707622298066, "grad_norm": 1.2787114819440706, "learning_rate": 1.194134216753422e-06, "loss": 0.089, "step": 2981 }, { "epoch": 0.6784982935153584, "grad_norm": 2.7311666892685196, "learning_rate": 1.1940972859373151e-06, "loss": 0.1461, "step": 2982 }, { "epoch": 0.6787258248009101, "grad_norm": 1.8785492720960564, "learning_rate": 1.194060343489902e-06, "loss": 0.1885, "step": 2983 }, { "epoch": 0.6789533560864619, "grad_norm": 2.724028446550519, "learning_rate": 1.1940233894119377e-06, "loss": 0.1386, "step": 2984 }, { "epoch": 0.6791808873720137, "grad_norm": 1.6619664728697439, "learning_rate": 1.1939864237041774e-06, "loss": 0.1606, "step": 2985 }, { "epoch": 0.6794084186575654, "grad_norm": 1.8345758168918145, "learning_rate": 1.1939494463673767e-06, "loss": 0.1092, "step": 2986 }, { "epoch": 0.6796359499431172, "grad_norm": 2.981218053944757, "learning_rate": 1.1939124574022914e-06, "loss": 0.1748, "step": 2987 }, { "epoch": 0.6798634812286689, "grad_norm": 2.7677489635883123, "learning_rate": 1.1938754568096771e-06, "loss": 0.2211, "step": 2988 }, { "epoch": 0.6800910125142207, "grad_norm": 0.9903369979817118, "learning_rate": 1.1938384445902905e-06, "loss": 0.085, "step": 2989 }, { "epoch": 0.6803185437997725, "grad_norm": 1.219724518175553, "learning_rate": 1.1938014207448877e-06, "loss": 0.0765, "step": 2990 }, { "epoch": 0.6805460750853243, "grad_norm": 1.4640170722241062, "learning_rate": 1.1937643852742258e-06, "loss": 0.0597, "step": 2991 }, { "epoch": 0.680773606370876, "grad_norm": 3.2799154475490773, "learning_rate": 1.1937273381790615e-06, "loss": 0.1073, "step": 2992 }, { "epoch": 0.6810011376564278, "grad_norm": 1.545857022318275, "learning_rate": 1.1936902794601518e-06, "loss": 0.09, "step": 2993 }, { "epoch": 0.6812286689419795, "grad_norm": 2.248965391138622, "learning_rate": 1.1936532091182544e-06, "loss": 0.1727, "step": 2994 }, { "epoch": 0.6814562002275313, "grad_norm": 1.298847302563234, "learning_rate": 1.1936161271541268e-06, "loss": 0.1177, "step": 2995 }, { "epoch": 0.681683731513083, "grad_norm": 1.7590140590212042, "learning_rate": 1.1935790335685272e-06, "loss": 0.1594, "step": 2996 }, { "epoch": 0.6819112627986348, "grad_norm": 2.558222426527528, "learning_rate": 1.193541928362213e-06, "loss": 0.1225, "step": 2997 }, { "epoch": 0.6821387940841865, "grad_norm": 2.283950297920294, "learning_rate": 1.1935048115359432e-06, "loss": 0.1604, "step": 2998 }, { "epoch": 0.6823663253697383, "grad_norm": 1.7491142515816718, "learning_rate": 1.1934676830904763e-06, "loss": 0.155, "step": 2999 }, { "epoch": 0.6825938566552902, "grad_norm": 2.984227416216763, "learning_rate": 1.193430543026571e-06, "loss": 0.169, "step": 3000 }, { "epoch": 0.6828213879408419, "grad_norm": 2.1040763156780544, "learning_rate": 1.1933933913449867e-06, "loss": 0.1525, "step": 3001 }, { "epoch": 0.6830489192263937, "grad_norm": 2.1389125690606186, "learning_rate": 1.193356228046482e-06, "loss": 0.1844, "step": 3002 }, { "epoch": 0.6832764505119454, "grad_norm": 3.0465279486690147, "learning_rate": 1.1933190531318172e-06, "loss": 0.1603, "step": 3003 }, { "epoch": 0.6835039817974972, "grad_norm": 1.9423392499102479, "learning_rate": 1.1932818666017516e-06, "loss": 0.1198, "step": 3004 }, { "epoch": 0.6837315130830489, "grad_norm": 2.7426424574978308, "learning_rate": 1.1932446684570455e-06, "loss": 0.1811, "step": 3005 }, { "epoch": 0.6839590443686007, "grad_norm": 2.3988027673451424, "learning_rate": 1.1932074586984592e-06, "loss": 0.1173, "step": 3006 }, { "epoch": 0.6841865756541524, "grad_norm": 1.757923475127038, "learning_rate": 1.1931702373267527e-06, "loss": 0.0871, "step": 3007 }, { "epoch": 0.6844141069397042, "grad_norm": 1.9363103132107797, "learning_rate": 1.1931330043426872e-06, "loss": 0.212, "step": 3008 }, { "epoch": 0.6846416382252559, "grad_norm": 1.9686025748503317, "learning_rate": 1.1930957597470238e-06, "loss": 0.1664, "step": 3009 }, { "epoch": 0.6848691695108078, "grad_norm": 1.726511800870621, "learning_rate": 1.1930585035405235e-06, "loss": 0.1566, "step": 3010 }, { "epoch": 0.6850967007963595, "grad_norm": 2.6585618793727046, "learning_rate": 1.1930212357239475e-06, "loss": 0.1212, "step": 3011 }, { "epoch": 0.6853242320819113, "grad_norm": 1.910724842176571, "learning_rate": 1.1929839562980579e-06, "loss": 0.1618, "step": 3012 }, { "epoch": 0.685551763367463, "grad_norm": 1.903571460689554, "learning_rate": 1.1929466652636164e-06, "loss": 0.1138, "step": 3013 }, { "epoch": 0.6857792946530148, "grad_norm": 2.3424987856505846, "learning_rate": 1.1929093626213852e-06, "loss": 0.208, "step": 3014 }, { "epoch": 0.6860068259385665, "grad_norm": 1.7442259260132478, "learning_rate": 1.1928720483721269e-06, "loss": 0.1205, "step": 3015 }, { "epoch": 0.6862343572241183, "grad_norm": 2.3431955332980343, "learning_rate": 1.1928347225166035e-06, "loss": 0.0955, "step": 3016 }, { "epoch": 0.68646188850967, "grad_norm": 2.3675636221872574, "learning_rate": 1.1927973850555785e-06, "loss": 0.1031, "step": 3017 }, { "epoch": 0.6866894197952218, "grad_norm": 2.3230512432118755, "learning_rate": 1.192760035989815e-06, "loss": 0.1114, "step": 3018 }, { "epoch": 0.6869169510807736, "grad_norm": 2.2869670912635818, "learning_rate": 1.192722675320076e-06, "loss": 0.1193, "step": 3019 }, { "epoch": 0.6871444823663254, "grad_norm": 2.077036790853124, "learning_rate": 1.1926853030471253e-06, "loss": 0.1072, "step": 3020 }, { "epoch": 0.6873720136518772, "grad_norm": 1.745378445366266, "learning_rate": 1.1926479191717267e-06, "loss": 0.1406, "step": 3021 }, { "epoch": 0.6875995449374289, "grad_norm": 1.0136707384794403, "learning_rate": 1.1926105236946443e-06, "loss": 0.0706, "step": 3022 }, { "epoch": 0.6878270762229807, "grad_norm": 2.3015181855491256, "learning_rate": 1.192573116616642e-06, "loss": 0.1252, "step": 3023 }, { "epoch": 0.6880546075085324, "grad_norm": 2.1547079811494805, "learning_rate": 1.192535697938485e-06, "loss": 0.1187, "step": 3024 }, { "epoch": 0.6882821387940842, "grad_norm": 1.822837821217239, "learning_rate": 1.1924982676609377e-06, "loss": 0.1658, "step": 3025 }, { "epoch": 0.6885096700796359, "grad_norm": 2.124153777097937, "learning_rate": 1.1924608257847651e-06, "loss": 0.1041, "step": 3026 }, { "epoch": 0.6887372013651877, "grad_norm": 1.0872503933411688, "learning_rate": 1.1924233723107322e-06, "loss": 0.1241, "step": 3027 }, { "epoch": 0.6889647326507394, "grad_norm": 1.29773367128732, "learning_rate": 1.1923859072396051e-06, "loss": 0.1299, "step": 3028 }, { "epoch": 0.6891922639362913, "grad_norm": 1.0511026164299604, "learning_rate": 1.1923484305721489e-06, "loss": 0.0724, "step": 3029 }, { "epoch": 0.689419795221843, "grad_norm": 1.990350741929456, "learning_rate": 1.19231094230913e-06, "loss": 0.1034, "step": 3030 }, { "epoch": 0.6896473265073948, "grad_norm": 1.3035401289261104, "learning_rate": 1.1922734424513144e-06, "loss": 0.1207, "step": 3031 }, { "epoch": 0.6898748577929465, "grad_norm": 1.9244132444178317, "learning_rate": 1.1922359309994685e-06, "loss": 0.0979, "step": 3032 }, { "epoch": 0.6901023890784983, "grad_norm": 1.1055660037716095, "learning_rate": 1.192198407954359e-06, "loss": 0.0855, "step": 3033 }, { "epoch": 0.69032992036405, "grad_norm": 2.862275522322784, "learning_rate": 1.192160873316753e-06, "loss": 0.1768, "step": 3034 }, { "epoch": 0.6905574516496018, "grad_norm": 2.1121426133206027, "learning_rate": 1.1921233270874174e-06, "loss": 0.1465, "step": 3035 }, { "epoch": 0.6907849829351536, "grad_norm": 1.4975621183670067, "learning_rate": 1.1920857692671196e-06, "loss": 0.1224, "step": 3036 }, { "epoch": 0.6910125142207053, "grad_norm": 1.4957707166752725, "learning_rate": 1.192048199856627e-06, "loss": 0.1212, "step": 3037 }, { "epoch": 0.6912400455062571, "grad_norm": 1.9181007196013113, "learning_rate": 1.192010618856708e-06, "loss": 0.1744, "step": 3038 }, { "epoch": 0.6914675767918089, "grad_norm": 1.689194988103543, "learning_rate": 1.1919730262681304e-06, "loss": 0.1563, "step": 3039 }, { "epoch": 0.6916951080773607, "grad_norm": 1.989563122278693, "learning_rate": 1.1919354220916624e-06, "loss": 0.1061, "step": 3040 }, { "epoch": 0.6919226393629124, "grad_norm": 3.094413273343531, "learning_rate": 1.1918978063280726e-06, "loss": 0.2185, "step": 3041 }, { "epoch": 0.6921501706484642, "grad_norm": 1.9118711208773385, "learning_rate": 1.1918601789781299e-06, "loss": 0.1, "step": 3042 }, { "epoch": 0.6923777019340159, "grad_norm": 2.826520192849369, "learning_rate": 1.1918225400426032e-06, "loss": 0.1389, "step": 3043 }, { "epoch": 0.6926052332195677, "grad_norm": 2.32332931486, "learning_rate": 1.191784889522262e-06, "loss": 0.124, "step": 3044 }, { "epoch": 0.6928327645051194, "grad_norm": 3.3162758751367267, "learning_rate": 1.1917472274178757e-06, "loss": 0.1755, "step": 3045 }, { "epoch": 0.6930602957906712, "grad_norm": 1.7652192594849543, "learning_rate": 1.191709553730214e-06, "loss": 0.136, "step": 3046 }, { "epoch": 0.6932878270762229, "grad_norm": 1.9194876138624921, "learning_rate": 1.1916718684600469e-06, "loss": 0.1409, "step": 3047 }, { "epoch": 0.6935153583617747, "grad_norm": 1.5068265619508907, "learning_rate": 1.1916341716081446e-06, "loss": 0.1221, "step": 3048 }, { "epoch": 0.6937428896473266, "grad_norm": 2.5710593050636157, "learning_rate": 1.1915964631752775e-06, "loss": 0.0891, "step": 3049 }, { "epoch": 0.6939704209328783, "grad_norm": 1.8315542955459863, "learning_rate": 1.1915587431622164e-06, "loss": 0.1503, "step": 3050 }, { "epoch": 0.6941979522184301, "grad_norm": 1.98668392468283, "learning_rate": 1.1915210115697324e-06, "loss": 0.1622, "step": 3051 }, { "epoch": 0.6944254835039818, "grad_norm": 1.5107124398660574, "learning_rate": 1.1914832683985962e-06, "loss": 0.0928, "step": 3052 }, { "epoch": 0.6946530147895336, "grad_norm": 3.73846147303143, "learning_rate": 1.1914455136495796e-06, "loss": 0.2613, "step": 3053 }, { "epoch": 0.6948805460750853, "grad_norm": 2.8035148694959453, "learning_rate": 1.191407747323454e-06, "loss": 0.1517, "step": 3054 }, { "epoch": 0.6951080773606371, "grad_norm": 1.895552274543866, "learning_rate": 1.1913699694209914e-06, "loss": 0.0942, "step": 3055 }, { "epoch": 0.6953356086461888, "grad_norm": 1.7459829471709587, "learning_rate": 1.191332179942964e-06, "loss": 0.1396, "step": 3056 }, { "epoch": 0.6955631399317406, "grad_norm": 3.815224090825608, "learning_rate": 1.1912943788901438e-06, "loss": 0.2051, "step": 3057 }, { "epoch": 0.6957906712172923, "grad_norm": 1.6661183563844684, "learning_rate": 1.191256566263304e-06, "loss": 0.1971, "step": 3058 }, { "epoch": 0.6960182025028442, "grad_norm": 1.8000907111995663, "learning_rate": 1.1912187420632165e-06, "loss": 0.1623, "step": 3059 }, { "epoch": 0.6962457337883959, "grad_norm": 1.6926787312264402, "learning_rate": 1.1911809062906552e-06, "loss": 0.1535, "step": 3060 }, { "epoch": 0.6964732650739477, "grad_norm": 2.146640045867537, "learning_rate": 1.1911430589463931e-06, "loss": 0.1584, "step": 3061 }, { "epoch": 0.6967007963594994, "grad_norm": 2.573487768453337, "learning_rate": 1.1911052000312038e-06, "loss": 0.1839, "step": 3062 }, { "epoch": 0.6969283276450512, "grad_norm": 1.2062041272330437, "learning_rate": 1.1910673295458607e-06, "loss": 0.1174, "step": 3063 }, { "epoch": 0.697155858930603, "grad_norm": 2.2446368133366015, "learning_rate": 1.1910294474911382e-06, "loss": 0.1186, "step": 3064 }, { "epoch": 0.6973833902161547, "grad_norm": 1.832728201805095, "learning_rate": 1.1909915538678105e-06, "loss": 0.201, "step": 3065 }, { "epoch": 0.6976109215017064, "grad_norm": 0.8873694701683021, "learning_rate": 1.1909536486766522e-06, "loss": 0.0817, "step": 3066 }, { "epoch": 0.6978384527872582, "grad_norm": 1.6140383266242742, "learning_rate": 1.1909157319184373e-06, "loss": 0.1116, "step": 3067 }, { "epoch": 0.6980659840728101, "grad_norm": 1.571905366138055, "learning_rate": 1.1908778035939416e-06, "loss": 0.1295, "step": 3068 }, { "epoch": 0.6982935153583618, "grad_norm": 3.001382201945936, "learning_rate": 1.1908398637039398e-06, "loss": 0.0721, "step": 3069 }, { "epoch": 0.6985210466439136, "grad_norm": 2.750456856631494, "learning_rate": 1.1908019122492077e-06, "loss": 0.1422, "step": 3070 }, { "epoch": 0.6987485779294653, "grad_norm": 2.7469566413459425, "learning_rate": 1.1907639492305205e-06, "loss": 0.1605, "step": 3071 }, { "epoch": 0.6989761092150171, "grad_norm": 1.573510557079222, "learning_rate": 1.1907259746486547e-06, "loss": 0.0679, "step": 3072 }, { "epoch": 0.6992036405005688, "grad_norm": 1.535165170922043, "learning_rate": 1.1906879885043856e-06, "loss": 0.1303, "step": 3073 }, { "epoch": 0.6994311717861206, "grad_norm": 1.51597365006242, "learning_rate": 1.1906499907984903e-06, "loss": 0.1091, "step": 3074 }, { "epoch": 0.6996587030716723, "grad_norm": 1.9433457342834564, "learning_rate": 1.190611981531745e-06, "loss": 0.1875, "step": 3075 }, { "epoch": 0.6998862343572241, "grad_norm": 1.3852493508640962, "learning_rate": 1.1905739607049267e-06, "loss": 0.11, "step": 3076 }, { "epoch": 0.7001137656427758, "grad_norm": 1.6634488720193932, "learning_rate": 1.1905359283188126e-06, "loss": 0.0649, "step": 3077 }, { "epoch": 0.7003412969283277, "grad_norm": 1.5075356462841907, "learning_rate": 1.1904978843741796e-06, "loss": 0.0771, "step": 3078 }, { "epoch": 0.7005688282138794, "grad_norm": 2.8860526665616706, "learning_rate": 1.1904598288718055e-06, "loss": 0.2098, "step": 3079 }, { "epoch": 0.7007963594994312, "grad_norm": 1.3469428884610732, "learning_rate": 1.1904217618124684e-06, "loss": 0.1217, "step": 3080 }, { "epoch": 0.701023890784983, "grad_norm": 1.534126034264905, "learning_rate": 1.1903836831969458e-06, "loss": 0.1307, "step": 3081 }, { "epoch": 0.7012514220705347, "grad_norm": 1.036473399987068, "learning_rate": 1.190345593026016e-06, "loss": 0.0627, "step": 3082 }, { "epoch": 0.7014789533560865, "grad_norm": 2.8331577249200866, "learning_rate": 1.1903074913004579e-06, "loss": 0.2588, "step": 3083 }, { "epoch": 0.7017064846416382, "grad_norm": 2.6619142781192973, "learning_rate": 1.19026937802105e-06, "loss": 0.2616, "step": 3084 }, { "epoch": 0.70193401592719, "grad_norm": 1.5193886249818858, "learning_rate": 1.1902312531885712e-06, "loss": 0.0836, "step": 3085 }, { "epoch": 0.7021615472127417, "grad_norm": 1.9617101099614265, "learning_rate": 1.1901931168038007e-06, "loss": 0.1756, "step": 3086 }, { "epoch": 0.7023890784982935, "grad_norm": 1.5141276225062847, "learning_rate": 1.190154968867518e-06, "loss": 0.0929, "step": 3087 }, { "epoch": 0.7026166097838453, "grad_norm": 1.4372083252193162, "learning_rate": 1.190116809380503e-06, "loss": 0.0679, "step": 3088 }, { "epoch": 0.7028441410693971, "grad_norm": 1.5581282033103077, "learning_rate": 1.190078638343535e-06, "loss": 0.1009, "step": 3089 }, { "epoch": 0.7030716723549488, "grad_norm": 1.6889378087890292, "learning_rate": 1.1900404557573948e-06, "loss": 0.0775, "step": 3090 }, { "epoch": 0.7032992036405006, "grad_norm": 2.245465693257155, "learning_rate": 1.1900022616228624e-06, "loss": 0.205, "step": 3091 }, { "epoch": 0.7035267349260523, "grad_norm": 1.8215824095731956, "learning_rate": 1.1899640559407186e-06, "loss": 0.2364, "step": 3092 }, { "epoch": 0.7037542662116041, "grad_norm": 1.6915516378179793, "learning_rate": 1.189925838711744e-06, "loss": 0.1389, "step": 3093 }, { "epoch": 0.7039817974971558, "grad_norm": 1.6928362600822655, "learning_rate": 1.18988760993672e-06, "loss": 0.1829, "step": 3094 }, { "epoch": 0.7042093287827076, "grad_norm": 2.931008813048908, "learning_rate": 1.1898493696164279e-06, "loss": 0.0923, "step": 3095 }, { "epoch": 0.7044368600682593, "grad_norm": 1.1154029557797203, "learning_rate": 1.1898111177516488e-06, "loss": 0.0757, "step": 3096 }, { "epoch": 0.7046643913538112, "grad_norm": 2.711712559556853, "learning_rate": 1.1897728543431653e-06, "loss": 0.2867, "step": 3097 }, { "epoch": 0.704891922639363, "grad_norm": 2.705535470633257, "learning_rate": 1.1897345793917589e-06, "loss": 0.1299, "step": 3098 }, { "epoch": 0.7051194539249147, "grad_norm": 1.9634050602340667, "learning_rate": 1.1896962928982116e-06, "loss": 0.0929, "step": 3099 }, { "epoch": 0.7053469852104665, "grad_norm": 1.925806274179465, "learning_rate": 1.1896579948633067e-06, "loss": 0.1962, "step": 3100 }, { "epoch": 0.7055745164960182, "grad_norm": 1.4954397243767268, "learning_rate": 1.1896196852878262e-06, "loss": 0.1621, "step": 3101 }, { "epoch": 0.70580204778157, "grad_norm": 2.3069003202986362, "learning_rate": 1.1895813641725535e-06, "loss": 0.106, "step": 3102 }, { "epoch": 0.7060295790671217, "grad_norm": 1.7569589217138821, "learning_rate": 1.1895430315182719e-06, "loss": 0.0941, "step": 3103 }, { "epoch": 0.7062571103526735, "grad_norm": 1.6341215878240687, "learning_rate": 1.1895046873257644e-06, "loss": 0.0992, "step": 3104 }, { "epoch": 0.7064846416382252, "grad_norm": 2.0620355437898654, "learning_rate": 1.189466331595815e-06, "loss": 0.1256, "step": 3105 }, { "epoch": 0.706712172923777, "grad_norm": 2.6829703904365045, "learning_rate": 1.1894279643292074e-06, "loss": 0.1548, "step": 3106 }, { "epoch": 0.7069397042093288, "grad_norm": 2.0525899216404313, "learning_rate": 1.1893895855267262e-06, "loss": 0.1323, "step": 3107 }, { "epoch": 0.7071672354948806, "grad_norm": 2.3112715545673983, "learning_rate": 1.1893511951891553e-06, "loss": 0.1359, "step": 3108 }, { "epoch": 0.7073947667804323, "grad_norm": 2.473665918442311, "learning_rate": 1.1893127933172794e-06, "loss": 0.1766, "step": 3109 }, { "epoch": 0.7076222980659841, "grad_norm": 2.6324961135431195, "learning_rate": 1.1892743799118838e-06, "loss": 0.2513, "step": 3110 }, { "epoch": 0.7078498293515358, "grad_norm": 2.0428028154854663, "learning_rate": 1.189235954973753e-06, "loss": 0.1409, "step": 3111 }, { "epoch": 0.7080773606370876, "grad_norm": 1.4954878901352442, "learning_rate": 1.189197518503673e-06, "loss": 0.1985, "step": 3112 }, { "epoch": 0.7083048919226393, "grad_norm": 1.7220766086751687, "learning_rate": 1.1891590705024288e-06, "loss": 0.1868, "step": 3113 }, { "epoch": 0.7085324232081911, "grad_norm": 1.5383065815934964, "learning_rate": 1.1891206109708065e-06, "loss": 0.1796, "step": 3114 }, { "epoch": 0.7087599544937428, "grad_norm": 1.3010401003361285, "learning_rate": 1.1890821399095917e-06, "loss": 0.1327, "step": 3115 }, { "epoch": 0.7089874857792946, "grad_norm": 2.550340002997386, "learning_rate": 1.1890436573195714e-06, "loss": 0.2581, "step": 3116 }, { "epoch": 0.7092150170648465, "grad_norm": 5.680850722665896, "learning_rate": 1.1890051632015315e-06, "loss": 0.1181, "step": 3117 }, { "epoch": 0.7094425483503982, "grad_norm": 2.055782957788213, "learning_rate": 1.1889666575562593e-06, "loss": 0.1056, "step": 3118 }, { "epoch": 0.70967007963595, "grad_norm": 1.8000182980206194, "learning_rate": 1.1889281403845413e-06, "loss": 0.1555, "step": 3119 }, { "epoch": 0.7098976109215017, "grad_norm": 1.4086799175857945, "learning_rate": 1.1888896116871649e-06, "loss": 0.1702, "step": 3120 }, { "epoch": 0.7101251422070535, "grad_norm": 2.825675781255187, "learning_rate": 1.1888510714649176e-06, "loss": 0.1559, "step": 3121 }, { "epoch": 0.7103526734926052, "grad_norm": 1.516178187565781, "learning_rate": 1.1888125197185867e-06, "loss": 0.1457, "step": 3122 }, { "epoch": 0.710580204778157, "grad_norm": 2.146464562262945, "learning_rate": 1.1887739564489608e-06, "loss": 0.1336, "step": 3123 }, { "epoch": 0.7108077360637087, "grad_norm": 2.326773332109297, "learning_rate": 1.1887353816568277e-06, "loss": 0.1892, "step": 3124 }, { "epoch": 0.7110352673492605, "grad_norm": 2.7537460461611363, "learning_rate": 1.188696795342976e-06, "loss": 0.1751, "step": 3125 }, { "epoch": 0.7112627986348122, "grad_norm": 1.6542807081780677, "learning_rate": 1.188658197508194e-06, "loss": 0.1314, "step": 3126 }, { "epoch": 0.7114903299203641, "grad_norm": 2.275977763130455, "learning_rate": 1.1886195881532705e-06, "loss": 0.1304, "step": 3127 }, { "epoch": 0.7117178612059158, "grad_norm": 2.741548691257323, "learning_rate": 1.1885809672789953e-06, "loss": 0.1057, "step": 3128 }, { "epoch": 0.7119453924914676, "grad_norm": 2.576016774238181, "learning_rate": 1.188542334886157e-06, "loss": 0.144, "step": 3129 }, { "epoch": 0.7121729237770194, "grad_norm": 2.17965083103891, "learning_rate": 1.1885036909755454e-06, "loss": 0.1345, "step": 3130 }, { "epoch": 0.7124004550625711, "grad_norm": 3.896449546672936, "learning_rate": 1.1884650355479505e-06, "loss": 0.1726, "step": 3131 }, { "epoch": 0.7126279863481229, "grad_norm": 1.7651769113033686, "learning_rate": 1.1884263686041622e-06, "loss": 0.1649, "step": 3132 }, { "epoch": 0.7128555176336746, "grad_norm": 3.270644942804106, "learning_rate": 1.1883876901449707e-06, "loss": 0.1106, "step": 3133 }, { "epoch": 0.7130830489192264, "grad_norm": 0.9540720474270146, "learning_rate": 1.1883490001711667e-06, "loss": 0.0479, "step": 3134 }, { "epoch": 0.7133105802047781, "grad_norm": 1.5281389835207844, "learning_rate": 1.1883102986835408e-06, "loss": 0.1265, "step": 3135 }, { "epoch": 0.71353811149033, "grad_norm": 1.7275445515308603, "learning_rate": 1.1882715856828842e-06, "loss": 0.143, "step": 3136 }, { "epoch": 0.7137656427758817, "grad_norm": 1.4760935486211386, "learning_rate": 1.1882328611699879e-06, "loss": 0.1286, "step": 3137 }, { "epoch": 0.7139931740614335, "grad_norm": 2.061027793910494, "learning_rate": 1.1881941251456434e-06, "loss": 0.1199, "step": 3138 }, { "epoch": 0.7142207053469852, "grad_norm": 1.6504084382846145, "learning_rate": 1.1881553776106423e-06, "loss": 0.1572, "step": 3139 }, { "epoch": 0.714448236632537, "grad_norm": 2.871038430361009, "learning_rate": 1.1881166185657765e-06, "loss": 0.1957, "step": 3140 }, { "epoch": 0.7146757679180887, "grad_norm": 1.7942243510846807, "learning_rate": 1.1880778480118388e-06, "loss": 0.0828, "step": 3141 }, { "epoch": 0.7149032992036405, "grad_norm": 1.4934742711037803, "learning_rate": 1.1880390659496207e-06, "loss": 0.1775, "step": 3142 }, { "epoch": 0.7151308304891922, "grad_norm": 1.5497263135986263, "learning_rate": 1.1880002723799155e-06, "loss": 0.1242, "step": 3143 }, { "epoch": 0.715358361774744, "grad_norm": 1.9759475029193152, "learning_rate": 1.1879614673035158e-06, "loss": 0.1099, "step": 3144 }, { "epoch": 0.7155858930602957, "grad_norm": 3.273086972853465, "learning_rate": 1.1879226507212146e-06, "loss": 0.1725, "step": 3145 }, { "epoch": 0.7158134243458476, "grad_norm": 2.692056389406711, "learning_rate": 1.1878838226338054e-06, "loss": 0.1295, "step": 3146 }, { "epoch": 0.7160409556313994, "grad_norm": 3.0726031872665662, "learning_rate": 1.187844983042082e-06, "loss": 0.2144, "step": 3147 }, { "epoch": 0.7162684869169511, "grad_norm": 2.296547635428459, "learning_rate": 1.1878061319468376e-06, "loss": 0.1033, "step": 3148 }, { "epoch": 0.7164960182025029, "grad_norm": 1.109110237947707, "learning_rate": 1.1877672693488669e-06, "loss": 0.1335, "step": 3149 }, { "epoch": 0.7167235494880546, "grad_norm": 2.1949665597550783, "learning_rate": 1.1877283952489636e-06, "loss": 0.1547, "step": 3150 }, { "epoch": 0.7169510807736064, "grad_norm": 1.7458101584938244, "learning_rate": 1.1876895096479226e-06, "loss": 0.1027, "step": 3151 }, { "epoch": 0.7171786120591581, "grad_norm": 2.5253782285983917, "learning_rate": 1.1876506125465386e-06, "loss": 0.1677, "step": 3152 }, { "epoch": 0.7174061433447099, "grad_norm": 1.7720829535802851, "learning_rate": 1.1876117039456065e-06, "loss": 0.0918, "step": 3153 }, { "epoch": 0.7176336746302616, "grad_norm": 3.444761369547945, "learning_rate": 1.1875727838459213e-06, "loss": 0.1905, "step": 3154 }, { "epoch": 0.7178612059158134, "grad_norm": 2.7071394507983033, "learning_rate": 1.187533852248279e-06, "loss": 0.1088, "step": 3155 }, { "epoch": 0.7180887372013652, "grad_norm": 1.389997018526983, "learning_rate": 1.1874949091534749e-06, "loss": 0.1409, "step": 3156 }, { "epoch": 0.718316268486917, "grad_norm": 2.4375044265295345, "learning_rate": 1.1874559545623049e-06, "loss": 0.1502, "step": 3157 }, { "epoch": 0.7185437997724687, "grad_norm": 1.6111336105377863, "learning_rate": 1.1874169884755654e-06, "loss": 0.0958, "step": 3158 }, { "epoch": 0.7187713310580205, "grad_norm": 2.4785884630082182, "learning_rate": 1.1873780108940527e-06, "loss": 0.0979, "step": 3159 }, { "epoch": 0.7189988623435722, "grad_norm": 2.309500047954281, "learning_rate": 1.1873390218185636e-06, "loss": 0.0857, "step": 3160 }, { "epoch": 0.719226393629124, "grad_norm": 1.9974160071993916, "learning_rate": 1.1873000212498942e-06, "loss": 0.1217, "step": 3161 }, { "epoch": 0.7194539249146757, "grad_norm": 1.2977760267071896, "learning_rate": 1.1872610091888426e-06, "loss": 0.0946, "step": 3162 }, { "epoch": 0.7196814562002275, "grad_norm": 3.3747565831591646, "learning_rate": 1.1872219856362057e-06, "loss": 0.1854, "step": 3163 }, { "epoch": 0.7199089874857793, "grad_norm": 1.8430938679177409, "learning_rate": 1.187182950592781e-06, "loss": 0.1771, "step": 3164 }, { "epoch": 0.7201365187713311, "grad_norm": 1.3979587929565391, "learning_rate": 1.1871439040593663e-06, "loss": 0.178, "step": 3165 }, { "epoch": 0.7203640500568829, "grad_norm": 1.9035217199652052, "learning_rate": 1.1871048460367598e-06, "loss": 0.1594, "step": 3166 }, { "epoch": 0.7205915813424346, "grad_norm": 3.1235740653373747, "learning_rate": 1.1870657765257595e-06, "loss": 0.1065, "step": 3167 }, { "epoch": 0.7208191126279864, "grad_norm": 2.0971860071500523, "learning_rate": 1.1870266955271645e-06, "loss": 0.1457, "step": 3168 }, { "epoch": 0.7210466439135381, "grad_norm": 2.773067534867043, "learning_rate": 1.186987603041773e-06, "loss": 0.2245, "step": 3169 }, { "epoch": 0.7212741751990899, "grad_norm": 1.6672498177883812, "learning_rate": 1.1869484990703839e-06, "loss": 0.1552, "step": 3170 }, { "epoch": 0.7215017064846416, "grad_norm": 1.037913927891429, "learning_rate": 1.1869093836137968e-06, "loss": 0.0904, "step": 3171 }, { "epoch": 0.7217292377701934, "grad_norm": 2.496116324041325, "learning_rate": 1.186870256672811e-06, "loss": 0.1904, "step": 3172 }, { "epoch": 0.7219567690557451, "grad_norm": 2.7658500953788, "learning_rate": 1.1868311182482262e-06, "loss": 0.1968, "step": 3173 }, { "epoch": 0.7221843003412969, "grad_norm": 2.2669129231199143, "learning_rate": 1.1867919683408421e-06, "loss": 0.0807, "step": 3174 }, { "epoch": 0.7224118316268487, "grad_norm": 2.139798819306396, "learning_rate": 1.1867528069514591e-06, "loss": 0.2389, "step": 3175 }, { "epoch": 0.7226393629124005, "grad_norm": 1.8865928408275647, "learning_rate": 1.1867136340808778e-06, "loss": 0.0814, "step": 3176 }, { "epoch": 0.7228668941979522, "grad_norm": 2.0538790027428595, "learning_rate": 1.1866744497298982e-06, "loss": 0.1128, "step": 3177 }, { "epoch": 0.723094425483504, "grad_norm": 2.3275182667078638, "learning_rate": 1.1866352538993216e-06, "loss": 0.1125, "step": 3178 }, { "epoch": 0.7233219567690558, "grad_norm": 2.5368864511406133, "learning_rate": 1.1865960465899492e-06, "loss": 0.1257, "step": 3179 }, { "epoch": 0.7235494880546075, "grad_norm": 2.208159889999602, "learning_rate": 1.186556827802582e-06, "loss": 0.1483, "step": 3180 }, { "epoch": 0.7237770193401593, "grad_norm": 1.3791871408723215, "learning_rate": 1.1865175975380218e-06, "loss": 0.1221, "step": 3181 }, { "epoch": 0.724004550625711, "grad_norm": 1.977376136232679, "learning_rate": 1.18647835579707e-06, "loss": 0.1394, "step": 3182 }, { "epoch": 0.7242320819112628, "grad_norm": 2.4697637699058537, "learning_rate": 1.186439102580529e-06, "loss": 0.1847, "step": 3183 }, { "epoch": 0.7244596131968145, "grad_norm": 1.8882424053371538, "learning_rate": 1.1863998378892011e-06, "loss": 0.0924, "step": 3184 }, { "epoch": 0.7246871444823664, "grad_norm": 3.5850536595775604, "learning_rate": 1.1863605617238885e-06, "loss": 0.2122, "step": 3185 }, { "epoch": 0.7249146757679181, "grad_norm": 1.511139760494177, "learning_rate": 1.1863212740853941e-06, "loss": 0.1896, "step": 3186 }, { "epoch": 0.7251422070534699, "grad_norm": 2.5563678633022837, "learning_rate": 1.1862819749745212e-06, "loss": 0.1247, "step": 3187 }, { "epoch": 0.7253697383390216, "grad_norm": 2.3301556693527123, "learning_rate": 1.1862426643920722e-06, "loss": 0.1177, "step": 3188 }, { "epoch": 0.7255972696245734, "grad_norm": 2.33880572168744, "learning_rate": 1.1862033423388513e-06, "loss": 0.177, "step": 3189 }, { "epoch": 0.7258248009101251, "grad_norm": 1.3976182704723772, "learning_rate": 1.1861640088156617e-06, "loss": 0.0935, "step": 3190 }, { "epoch": 0.7260523321956769, "grad_norm": 1.6758681316019886, "learning_rate": 1.1861246638233077e-06, "loss": 0.1094, "step": 3191 }, { "epoch": 0.7262798634812286, "grad_norm": 1.600929618729876, "learning_rate": 1.1860853073625931e-06, "loss": 0.1332, "step": 3192 }, { "epoch": 0.7265073947667804, "grad_norm": 1.8021701788400442, "learning_rate": 1.1860459394343223e-06, "loss": 0.1689, "step": 3193 }, { "epoch": 0.7267349260523321, "grad_norm": 2.1221863719292786, "learning_rate": 1.1860065600393002e-06, "loss": 0.1398, "step": 3194 }, { "epoch": 0.726962457337884, "grad_norm": 3.5848223977448335, "learning_rate": 1.1859671691783315e-06, "loss": 0.1911, "step": 3195 }, { "epoch": 0.7271899886234358, "grad_norm": 1.4735878220175993, "learning_rate": 1.1859277668522209e-06, "loss": 0.0927, "step": 3196 }, { "epoch": 0.7274175199089875, "grad_norm": 2.6836187261265048, "learning_rate": 1.1858883530617743e-06, "loss": 0.3431, "step": 3197 }, { "epoch": 0.7276450511945393, "grad_norm": 2.8853311096517023, "learning_rate": 1.185848927807797e-06, "loss": 0.16, "step": 3198 }, { "epoch": 0.727872582480091, "grad_norm": 1.9070608581564725, "learning_rate": 1.1858094910910945e-06, "loss": 0.0933, "step": 3199 }, { "epoch": 0.7281001137656428, "grad_norm": 1.636161711581521, "learning_rate": 1.1857700429124733e-06, "loss": 0.0921, "step": 3200 }, { "epoch": 0.7283276450511945, "grad_norm": 1.163604378500458, "learning_rate": 1.1857305832727395e-06, "loss": 0.0819, "step": 3201 }, { "epoch": 0.7285551763367463, "grad_norm": 1.9828883754527078, "learning_rate": 1.1856911121726993e-06, "loss": 0.1576, "step": 3202 }, { "epoch": 0.728782707622298, "grad_norm": 3.2149916841430355, "learning_rate": 1.1856516296131596e-06, "loss": 0.1322, "step": 3203 }, { "epoch": 0.7290102389078499, "grad_norm": 2.0052514996312136, "learning_rate": 1.1856121355949276e-06, "loss": 0.214, "step": 3204 }, { "epoch": 0.7292377701934016, "grad_norm": 2.5233532450432206, "learning_rate": 1.18557263011881e-06, "loss": 0.126, "step": 3205 }, { "epoch": 0.7294653014789534, "grad_norm": 1.6359218526238526, "learning_rate": 1.1855331131856146e-06, "loss": 0.1913, "step": 3206 }, { "epoch": 0.7296928327645051, "grad_norm": 1.6181503958338594, "learning_rate": 1.185493584796149e-06, "loss": 0.0989, "step": 3207 }, { "epoch": 0.7299203640500569, "grad_norm": 1.9166380253206605, "learning_rate": 1.185454044951221e-06, "loss": 0.0894, "step": 3208 }, { "epoch": 0.7301478953356086, "grad_norm": 1.3700990896238152, "learning_rate": 1.1854144936516388e-06, "loss": 0.0844, "step": 3209 }, { "epoch": 0.7303754266211604, "grad_norm": 2.6450641956403476, "learning_rate": 1.1853749308982107e-06, "loss": 0.1549, "step": 3210 }, { "epoch": 0.7306029579067121, "grad_norm": 2.1756090269876314, "learning_rate": 1.1853353566917452e-06, "loss": 0.1004, "step": 3211 }, { "epoch": 0.7308304891922639, "grad_norm": 1.2519258630474208, "learning_rate": 1.1852957710330511e-06, "loss": 0.0763, "step": 3212 }, { "epoch": 0.7310580204778157, "grad_norm": 18.651298184865464, "learning_rate": 1.1852561739229377e-06, "loss": 0.1599, "step": 3213 }, { "epoch": 0.7312855517633675, "grad_norm": 3.12678058720072, "learning_rate": 1.1852165653622141e-06, "loss": 0.3179, "step": 3214 }, { "epoch": 0.7315130830489193, "grad_norm": 1.2066479080845354, "learning_rate": 1.18517694535169e-06, "loss": 0.0688, "step": 3215 }, { "epoch": 0.731740614334471, "grad_norm": 1.80310804012313, "learning_rate": 1.1851373138921752e-06, "loss": 0.1103, "step": 3216 }, { "epoch": 0.7319681456200228, "grad_norm": 1.366328303092527, "learning_rate": 1.1850976709844792e-06, "loss": 0.1247, "step": 3217 }, { "epoch": 0.7321956769055745, "grad_norm": 1.9493249225988984, "learning_rate": 1.1850580166294127e-06, "loss": 0.0962, "step": 3218 }, { "epoch": 0.7324232081911263, "grad_norm": 1.1976897071979897, "learning_rate": 1.1850183508277862e-06, "loss": 0.1775, "step": 3219 }, { "epoch": 0.732650739476678, "grad_norm": 3.0289421262567084, "learning_rate": 1.18497867358041e-06, "loss": 0.2875, "step": 3220 }, { "epoch": 0.7328782707622298, "grad_norm": 1.6390476846704543, "learning_rate": 1.1849389848880955e-06, "loss": 0.099, "step": 3221 }, { "epoch": 0.7331058020477815, "grad_norm": 1.8644621568477064, "learning_rate": 1.1848992847516535e-06, "loss": 0.0925, "step": 3222 }, { "epoch": 0.7333333333333333, "grad_norm": 2.158548498055859, "learning_rate": 1.1848595731718955e-06, "loss": 0.0937, "step": 3223 }, { "epoch": 0.7335608646188851, "grad_norm": 1.6735365679375431, "learning_rate": 1.1848198501496331e-06, "loss": 0.1359, "step": 3224 }, { "epoch": 0.7337883959044369, "grad_norm": 1.3225066929770561, "learning_rate": 1.1847801156856783e-06, "loss": 0.1176, "step": 3225 }, { "epoch": 0.7340159271899886, "grad_norm": 2.1054486369243106, "learning_rate": 1.1847403697808433e-06, "loss": 0.1962, "step": 3226 }, { "epoch": 0.7342434584755404, "grad_norm": 1.7746017108416965, "learning_rate": 1.18470061243594e-06, "loss": 0.1681, "step": 3227 }, { "epoch": 0.7344709897610922, "grad_norm": 2.9480584375351646, "learning_rate": 1.1846608436517813e-06, "loss": 0.1256, "step": 3228 }, { "epoch": 0.7346985210466439, "grad_norm": 2.623755146702941, "learning_rate": 1.1846210634291799e-06, "loss": 0.1623, "step": 3229 }, { "epoch": 0.7349260523321957, "grad_norm": 2.1291378433809114, "learning_rate": 1.184581271768949e-06, "loss": 0.1612, "step": 3230 }, { "epoch": 0.7351535836177474, "grad_norm": 2.1608869767979457, "learning_rate": 1.1845414686719014e-06, "loss": 0.2046, "step": 3231 }, { "epoch": 0.7353811149032992, "grad_norm": 1.3738772434928717, "learning_rate": 1.1845016541388513e-06, "loss": 0.1205, "step": 3232 }, { "epoch": 0.7356086461888509, "grad_norm": 2.046143944672607, "learning_rate": 1.184461828170612e-06, "loss": 0.165, "step": 3233 }, { "epoch": 0.7358361774744028, "grad_norm": 1.732382614564042, "learning_rate": 1.1844219907679973e-06, "loss": 0.1074, "step": 3234 }, { "epoch": 0.7360637087599545, "grad_norm": 2.1175601609684698, "learning_rate": 1.184382141931822e-06, "loss": 0.1152, "step": 3235 }, { "epoch": 0.7362912400455063, "grad_norm": 2.735577181836857, "learning_rate": 1.1843422816628998e-06, "loss": 0.1559, "step": 3236 }, { "epoch": 0.736518771331058, "grad_norm": 1.551253205529087, "learning_rate": 1.184302409962046e-06, "loss": 0.0872, "step": 3237 }, { "epoch": 0.7367463026166098, "grad_norm": 1.6777152959131918, "learning_rate": 1.1842625268300754e-06, "loss": 0.098, "step": 3238 }, { "epoch": 0.7369738339021615, "grad_norm": 1.9068533548096838, "learning_rate": 1.1842226322678028e-06, "loss": 0.076, "step": 3239 }, { "epoch": 0.7372013651877133, "grad_norm": 3.027756278397639, "learning_rate": 1.1841827262760436e-06, "loss": 0.1511, "step": 3240 }, { "epoch": 0.737428896473265, "grad_norm": 3.2145156468158245, "learning_rate": 1.1841428088556137e-06, "loss": 0.1693, "step": 3241 }, { "epoch": 0.7376564277588168, "grad_norm": 1.640064862711897, "learning_rate": 1.184102880007329e-06, "loss": 0.1045, "step": 3242 }, { "epoch": 0.7378839590443687, "grad_norm": 2.2190011378264556, "learning_rate": 1.1840629397320052e-06, "loss": 0.1467, "step": 3243 }, { "epoch": 0.7381114903299204, "grad_norm": 2.0452904656567608, "learning_rate": 1.1840229880304589e-06, "loss": 0.1435, "step": 3244 }, { "epoch": 0.7383390216154722, "grad_norm": 2.8918271598629346, "learning_rate": 1.1839830249035062e-06, "loss": 0.1346, "step": 3245 }, { "epoch": 0.7385665529010239, "grad_norm": 2.342345834795013, "learning_rate": 1.1839430503519645e-06, "loss": 0.1392, "step": 3246 }, { "epoch": 0.7387940841865757, "grad_norm": 3.264140845800353, "learning_rate": 1.1839030643766505e-06, "loss": 0.2778, "step": 3247 }, { "epoch": 0.7390216154721274, "grad_norm": 2.4607222935859494, "learning_rate": 1.1838630669783814e-06, "loss": 0.1624, "step": 3248 }, { "epoch": 0.7392491467576792, "grad_norm": 1.9445345822890676, "learning_rate": 1.1838230581579746e-06, "loss": 0.0794, "step": 3249 }, { "epoch": 0.7394766780432309, "grad_norm": 2.1685275554098613, "learning_rate": 1.183783037916248e-06, "loss": 0.1796, "step": 3250 }, { "epoch": 0.7397042093287827, "grad_norm": 3.2158244936505236, "learning_rate": 1.1837430062540196e-06, "loss": 0.1964, "step": 3251 }, { "epoch": 0.7399317406143344, "grad_norm": 1.7184491733113405, "learning_rate": 1.1837029631721072e-06, "loss": 0.1352, "step": 3252 }, { "epoch": 0.7401592718998863, "grad_norm": 1.7778593056510787, "learning_rate": 1.1836629086713296e-06, "loss": 0.0873, "step": 3253 }, { "epoch": 0.740386803185438, "grad_norm": 1.7203195667874172, "learning_rate": 1.1836228427525054e-06, "loss": 0.0676, "step": 3254 }, { "epoch": 0.7406143344709898, "grad_norm": 2.922732576924441, "learning_rate": 1.183582765416453e-06, "loss": 0.172, "step": 3255 }, { "epoch": 0.7408418657565415, "grad_norm": 2.039786020178238, "learning_rate": 1.1835426766639923e-06, "loss": 0.135, "step": 3256 }, { "epoch": 0.7410693970420933, "grad_norm": 2.2399538962699017, "learning_rate": 1.183502576495942e-06, "loss": 0.116, "step": 3257 }, { "epoch": 0.741296928327645, "grad_norm": 1.3632994430066812, "learning_rate": 1.1834624649131218e-06, "loss": 0.1134, "step": 3258 }, { "epoch": 0.7415244596131968, "grad_norm": 1.2918804407041258, "learning_rate": 1.1834223419163518e-06, "loss": 0.112, "step": 3259 }, { "epoch": 0.7417519908987485, "grad_norm": 2.6887841828247234, "learning_rate": 1.1833822075064517e-06, "loss": 0.1191, "step": 3260 }, { "epoch": 0.7419795221843003, "grad_norm": 2.197636783508214, "learning_rate": 1.183342061684242e-06, "loss": 0.1279, "step": 3261 }, { "epoch": 0.742207053469852, "grad_norm": 1.6494974684467505, "learning_rate": 1.183301904450543e-06, "loss": 0.1166, "step": 3262 }, { "epoch": 0.7424345847554039, "grad_norm": 2.828124132396838, "learning_rate": 1.1832617358061756e-06, "loss": 0.1858, "step": 3263 }, { "epoch": 0.7426621160409557, "grad_norm": 2.6045427793496727, "learning_rate": 1.1832215557519608e-06, "loss": 0.1324, "step": 3264 }, { "epoch": 0.7428896473265074, "grad_norm": 2.127063173518277, "learning_rate": 1.1831813642887196e-06, "loss": 0.1959, "step": 3265 }, { "epoch": 0.7431171786120592, "grad_norm": 2.7387306057680267, "learning_rate": 1.1831411614172735e-06, "loss": 0.1821, "step": 3266 }, { "epoch": 0.7433447098976109, "grad_norm": 2.940295950006921, "learning_rate": 1.1831009471384445e-06, "loss": 0.1243, "step": 3267 }, { "epoch": 0.7435722411831627, "grad_norm": 1.4605085364781514, "learning_rate": 1.1830607214530543e-06, "loss": 0.1861, "step": 3268 }, { "epoch": 0.7437997724687144, "grad_norm": 3.3366956312883143, "learning_rate": 1.1830204843619248e-06, "loss": 0.1764, "step": 3269 }, { "epoch": 0.7440273037542662, "grad_norm": 2.2222642240615094, "learning_rate": 1.1829802358658785e-06, "loss": 0.215, "step": 3270 }, { "epoch": 0.7442548350398179, "grad_norm": 1.5340999546765406, "learning_rate": 1.1829399759657383e-06, "loss": 0.1379, "step": 3271 }, { "epoch": 0.7444823663253698, "grad_norm": 1.553840846268558, "learning_rate": 1.1828997046623267e-06, "loss": 0.1392, "step": 3272 }, { "epoch": 0.7447098976109215, "grad_norm": 2.597096922534138, "learning_rate": 1.1828594219564669e-06, "loss": 0.1466, "step": 3273 }, { "epoch": 0.7449374288964733, "grad_norm": 1.3052925878692923, "learning_rate": 1.182819127848982e-06, "loss": 0.0779, "step": 3274 }, { "epoch": 0.745164960182025, "grad_norm": 3.485548486880467, "learning_rate": 1.1827788223406959e-06, "loss": 0.2671, "step": 3275 }, { "epoch": 0.7453924914675768, "grad_norm": 2.3473098076936907, "learning_rate": 1.1827385054324323e-06, "loss": 0.1415, "step": 3276 }, { "epoch": 0.7456200227531286, "grad_norm": 4.3291248044860815, "learning_rate": 1.1826981771250148e-06, "loss": 0.2232, "step": 3277 }, { "epoch": 0.7458475540386803, "grad_norm": 3.7337389140136463, "learning_rate": 1.1826578374192681e-06, "loss": 0.2382, "step": 3278 }, { "epoch": 0.7460750853242321, "grad_norm": 3.3043996653981846, "learning_rate": 1.1826174863160168e-06, "loss": 0.1457, "step": 3279 }, { "epoch": 0.7463026166097838, "grad_norm": 1.984573811971679, "learning_rate": 1.182577123816085e-06, "loss": 0.1282, "step": 3280 }, { "epoch": 0.7465301478953356, "grad_norm": 2.5055449072250084, "learning_rate": 1.1825367499202978e-06, "loss": 0.152, "step": 3281 }, { "epoch": 0.7467576791808874, "grad_norm": 2.566420978783162, "learning_rate": 1.1824963646294806e-06, "loss": 0.1344, "step": 3282 }, { "epoch": 0.7469852104664392, "grad_norm": 3.030207356215811, "learning_rate": 1.1824559679444588e-06, "loss": 0.1618, "step": 3283 }, { "epoch": 0.7472127417519909, "grad_norm": 1.1348070316131165, "learning_rate": 1.182415559866058e-06, "loss": 0.118, "step": 3284 }, { "epoch": 0.7474402730375427, "grad_norm": 1.0639490098963196, "learning_rate": 1.182375140395104e-06, "loss": 0.0784, "step": 3285 }, { "epoch": 0.7476678043230944, "grad_norm": 1.6785139016792132, "learning_rate": 1.1823347095324228e-06, "loss": 0.1616, "step": 3286 }, { "epoch": 0.7478953356086462, "grad_norm": 1.4785752433396468, "learning_rate": 1.1822942672788409e-06, "loss": 0.065, "step": 3287 }, { "epoch": 0.7481228668941979, "grad_norm": 2.3836521148460688, "learning_rate": 1.1822538136351849e-06, "loss": 0.1318, "step": 3288 }, { "epoch": 0.7483503981797497, "grad_norm": 1.9833187363432578, "learning_rate": 1.1822133486022815e-06, "loss": 0.1365, "step": 3289 }, { "epoch": 0.7485779294653014, "grad_norm": 2.2065405901117074, "learning_rate": 1.1821728721809577e-06, "loss": 0.1553, "step": 3290 }, { "epoch": 0.7488054607508532, "grad_norm": 3.0349788681713017, "learning_rate": 1.1821323843720408e-06, "loss": 0.1817, "step": 3291 }, { "epoch": 0.749032992036405, "grad_norm": 1.9694888646938773, "learning_rate": 1.1820918851763582e-06, "loss": 0.125, "step": 3292 }, { "epoch": 0.7492605233219568, "grad_norm": 1.5124900798168812, "learning_rate": 1.182051374594738e-06, "loss": 0.1254, "step": 3293 }, { "epoch": 0.7494880546075086, "grad_norm": 3.67837925062901, "learning_rate": 1.1820108526280076e-06, "loss": 0.1798, "step": 3294 }, { "epoch": 0.7497155858930603, "grad_norm": 3.3239018085434786, "learning_rate": 1.1819703192769955e-06, "loss": 0.1614, "step": 3295 }, { "epoch": 0.7499431171786121, "grad_norm": 2.8547697014823945, "learning_rate": 1.1819297745425304e-06, "loss": 0.1254, "step": 3296 }, { "epoch": 0.7501706484641638, "grad_norm": 1.615743914055625, "learning_rate": 1.1818892184254404e-06, "loss": 0.1423, "step": 3297 }, { "epoch": 0.7503981797497156, "grad_norm": 2.3926722908231683, "learning_rate": 1.1818486509265547e-06, "loss": 0.1005, "step": 3298 }, { "epoch": 0.7506257110352673, "grad_norm": 1.659490567501026, "learning_rate": 1.1818080720467026e-06, "loss": 0.093, "step": 3299 }, { "epoch": 0.7508532423208191, "grad_norm": 1.4636043042566025, "learning_rate": 1.1817674817867131e-06, "loss": 0.1194, "step": 3300 }, { "epoch": 0.7510807736063708, "grad_norm": 1.859124523017859, "learning_rate": 1.181726880147416e-06, "loss": 0.1264, "step": 3301 }, { "epoch": 0.7513083048919227, "grad_norm": 0.7197511547852126, "learning_rate": 1.181686267129641e-06, "loss": 0.0518, "step": 3302 }, { "epoch": 0.7515358361774744, "grad_norm": 2.2033546472464542, "learning_rate": 1.1816456427342181e-06, "loss": 0.1269, "step": 3303 }, { "epoch": 0.7517633674630262, "grad_norm": 1.6378674348907998, "learning_rate": 1.181605006961978e-06, "loss": 0.0921, "step": 3304 }, { "epoch": 0.7519908987485779, "grad_norm": 1.7124569436254014, "learning_rate": 1.1815643598137507e-06, "loss": 0.1369, "step": 3305 }, { "epoch": 0.7522184300341297, "grad_norm": 1.5770049353688607, "learning_rate": 1.1815237012903675e-06, "loss": 0.1099, "step": 3306 }, { "epoch": 0.7524459613196814, "grad_norm": 2.571994096255603, "learning_rate": 1.1814830313926589e-06, "loss": 0.175, "step": 3307 }, { "epoch": 0.7526734926052332, "grad_norm": 1.823553996843364, "learning_rate": 1.1814423501214562e-06, "loss": 0.1043, "step": 3308 }, { "epoch": 0.752901023890785, "grad_norm": 2.167497133328913, "learning_rate": 1.1814016574775909e-06, "loss": 0.1003, "step": 3309 }, { "epoch": 0.7531285551763367, "grad_norm": 1.3244656811190443, "learning_rate": 1.1813609534618948e-06, "loss": 0.089, "step": 3310 }, { "epoch": 0.7533560864618886, "grad_norm": 1.9888469659716759, "learning_rate": 1.1813202380751998e-06, "loss": 0.0601, "step": 3311 }, { "epoch": 0.7535836177474403, "grad_norm": 1.765491538331343, "learning_rate": 1.1812795113183378e-06, "loss": 0.1248, "step": 3312 }, { "epoch": 0.7538111490329921, "grad_norm": 2.0478968271407383, "learning_rate": 1.1812387731921415e-06, "loss": 0.1382, "step": 3313 }, { "epoch": 0.7540386803185438, "grad_norm": 3.2784426559764985, "learning_rate": 1.1811980236974435e-06, "loss": 0.146, "step": 3314 }, { "epoch": 0.7542662116040956, "grad_norm": 1.4734056038714625, "learning_rate": 1.1811572628350764e-06, "loss": 0.127, "step": 3315 }, { "epoch": 0.7544937428896473, "grad_norm": 2.0370268043177115, "learning_rate": 1.1811164906058735e-06, "loss": 0.1445, "step": 3316 }, { "epoch": 0.7547212741751991, "grad_norm": 2.96415685198712, "learning_rate": 1.1810757070106678e-06, "loss": 0.2705, "step": 3317 }, { "epoch": 0.7549488054607508, "grad_norm": 2.2971763536780943, "learning_rate": 1.1810349120502932e-06, "loss": 0.1491, "step": 3318 }, { "epoch": 0.7551763367463026, "grad_norm": 2.681036370900885, "learning_rate": 1.1809941057255834e-06, "loss": 0.1695, "step": 3319 }, { "epoch": 0.7554038680318543, "grad_norm": 2.724601949111482, "learning_rate": 1.1809532880373721e-06, "loss": 0.2026, "step": 3320 }, { "epoch": 0.7556313993174062, "grad_norm": 1.8617956455441396, "learning_rate": 1.180912458986494e-06, "loss": 0.0897, "step": 3321 }, { "epoch": 0.755858930602958, "grad_norm": 2.53226962131568, "learning_rate": 1.180871618573783e-06, "loss": 0.1572, "step": 3322 }, { "epoch": 0.7560864618885097, "grad_norm": 2.21274255033511, "learning_rate": 1.1808307668000745e-06, "loss": 0.1583, "step": 3323 }, { "epoch": 0.7563139931740614, "grad_norm": 2.125706306020661, "learning_rate": 1.180789903666203e-06, "loss": 0.1883, "step": 3324 }, { "epoch": 0.7565415244596132, "grad_norm": 1.6246651739254347, "learning_rate": 1.1807490291730036e-06, "loss": 0.1668, "step": 3325 }, { "epoch": 0.756769055745165, "grad_norm": 1.8005307284189929, "learning_rate": 1.1807081433213122e-06, "loss": 0.1145, "step": 3326 }, { "epoch": 0.7569965870307167, "grad_norm": 1.405317196271188, "learning_rate": 1.1806672461119637e-06, "loss": 0.1173, "step": 3327 }, { "epoch": 0.7572241183162685, "grad_norm": 1.8080670242907921, "learning_rate": 1.1806263375457947e-06, "loss": 0.0874, "step": 3328 }, { "epoch": 0.7574516496018202, "grad_norm": 2.4167979568785842, "learning_rate": 1.1805854176236406e-06, "loss": 0.1695, "step": 3329 }, { "epoch": 0.757679180887372, "grad_norm": 2.3063952560271015, "learning_rate": 1.1805444863463384e-06, "loss": 0.1849, "step": 3330 }, { "epoch": 0.7579067121729238, "grad_norm": 2.079072885402329, "learning_rate": 1.180503543714724e-06, "loss": 0.0683, "step": 3331 }, { "epoch": 0.7581342434584756, "grad_norm": 1.5746175237173137, "learning_rate": 1.1804625897296345e-06, "loss": 0.0965, "step": 3332 }, { "epoch": 0.7583617747440273, "grad_norm": 3.530110679680489, "learning_rate": 1.1804216243919074e-06, "loss": 0.1614, "step": 3333 }, { "epoch": 0.7585893060295791, "grad_norm": 2.0332642196040753, "learning_rate": 1.1803806477023792e-06, "loss": 0.0879, "step": 3334 }, { "epoch": 0.7588168373151308, "grad_norm": 1.3656933323492333, "learning_rate": 1.1803396596618878e-06, "loss": 0.0995, "step": 3335 }, { "epoch": 0.7590443686006826, "grad_norm": 2.0902834793608087, "learning_rate": 1.1802986602712705e-06, "loss": 0.1337, "step": 3336 }, { "epoch": 0.7592718998862343, "grad_norm": 2.5965116123759007, "learning_rate": 1.1802576495313657e-06, "loss": 0.0907, "step": 3337 }, { "epoch": 0.7594994311717861, "grad_norm": 4.142087858458215, "learning_rate": 1.1802166274430116e-06, "loss": 0.1953, "step": 3338 }, { "epoch": 0.7597269624573378, "grad_norm": 1.7222237359967432, "learning_rate": 1.1801755940070464e-06, "loss": 0.0783, "step": 3339 }, { "epoch": 0.7599544937428896, "grad_norm": 3.05894156690617, "learning_rate": 1.1801345492243087e-06, "loss": 0.1581, "step": 3340 }, { "epoch": 0.7601820250284415, "grad_norm": 2.5343054135059213, "learning_rate": 1.1800934930956378e-06, "loss": 0.1767, "step": 3341 }, { "epoch": 0.7604095563139932, "grad_norm": 1.3193612113140212, "learning_rate": 1.1800524256218724e-06, "loss": 0.1118, "step": 3342 }, { "epoch": 0.760637087599545, "grad_norm": 2.2549990933213406, "learning_rate": 1.1800113468038518e-06, "loss": 0.1354, "step": 3343 }, { "epoch": 0.7608646188850967, "grad_norm": 2.3490442108286347, "learning_rate": 1.1799702566424159e-06, "loss": 0.2573, "step": 3344 }, { "epoch": 0.7610921501706485, "grad_norm": 1.535894674316497, "learning_rate": 1.1799291551384042e-06, "loss": 0.1606, "step": 3345 }, { "epoch": 0.7613196814562002, "grad_norm": 2.14792184820641, "learning_rate": 1.1798880422926568e-06, "loss": 0.1214, "step": 3346 }, { "epoch": 0.761547212741752, "grad_norm": 1.5845109192049205, "learning_rate": 1.1798469181060143e-06, "loss": 0.1501, "step": 3347 }, { "epoch": 0.7617747440273037, "grad_norm": 2.711691703624, "learning_rate": 1.1798057825793167e-06, "loss": 0.1688, "step": 3348 }, { "epoch": 0.7620022753128555, "grad_norm": 2.4863555470792056, "learning_rate": 1.179764635713405e-06, "loss": 0.156, "step": 3349 }, { "epoch": 0.7622298065984073, "grad_norm": 1.7061185359263562, "learning_rate": 1.1797234775091204e-06, "loss": 0.1129, "step": 3350 }, { "epoch": 0.7624573378839591, "grad_norm": 2.706753314731583, "learning_rate": 1.1796823079673036e-06, "loss": 0.1696, "step": 3351 }, { "epoch": 0.7626848691695108, "grad_norm": 1.9907259016451502, "learning_rate": 1.1796411270887965e-06, "loss": 0.1619, "step": 3352 }, { "epoch": 0.7629124004550626, "grad_norm": 2.4653009277692552, "learning_rate": 1.1795999348744403e-06, "loss": 0.1482, "step": 3353 }, { "epoch": 0.7631399317406143, "grad_norm": 2.15969706994595, "learning_rate": 1.1795587313250773e-06, "loss": 0.0883, "step": 3354 }, { "epoch": 0.7633674630261661, "grad_norm": 1.7222779007665892, "learning_rate": 1.1795175164415493e-06, "loss": 0.1562, "step": 3355 }, { "epoch": 0.7635949943117178, "grad_norm": 1.9770931086408174, "learning_rate": 1.1794762902246988e-06, "loss": 0.1936, "step": 3356 }, { "epoch": 0.7638225255972696, "grad_norm": 2.787868720119073, "learning_rate": 1.1794350526753688e-06, "loss": 0.2025, "step": 3357 }, { "epoch": 0.7640500568828213, "grad_norm": 2.238724793825616, "learning_rate": 1.1793938037944014e-06, "loss": 0.1931, "step": 3358 }, { "epoch": 0.7642775881683731, "grad_norm": 2.0044557011415143, "learning_rate": 1.1793525435826399e-06, "loss": 0.1247, "step": 3359 }, { "epoch": 0.764505119453925, "grad_norm": 2.692377197275603, "learning_rate": 1.1793112720409277e-06, "loss": 0.2121, "step": 3360 }, { "epoch": 0.7647326507394767, "grad_norm": 3.1721291331762855, "learning_rate": 1.1792699891701085e-06, "loss": 0.1208, "step": 3361 }, { "epoch": 0.7649601820250285, "grad_norm": 1.814950863168666, "learning_rate": 1.1792286949710254e-06, "loss": 0.1618, "step": 3362 }, { "epoch": 0.7651877133105802, "grad_norm": 2.468945582085837, "learning_rate": 1.1791873894445233e-06, "loss": 0.127, "step": 3363 }, { "epoch": 0.765415244596132, "grad_norm": 3.033669581494895, "learning_rate": 1.1791460725914455e-06, "loss": 0.1724, "step": 3364 }, { "epoch": 0.7656427758816837, "grad_norm": 1.3064478725478224, "learning_rate": 1.179104744412637e-06, "loss": 0.0949, "step": 3365 }, { "epoch": 0.7658703071672355, "grad_norm": 2.499744061538605, "learning_rate": 1.1790634049089425e-06, "loss": 0.168, "step": 3366 }, { "epoch": 0.7660978384527872, "grad_norm": 2.2054261208451416, "learning_rate": 1.1790220540812063e-06, "loss": 0.1099, "step": 3367 }, { "epoch": 0.766325369738339, "grad_norm": 1.9138510381699176, "learning_rate": 1.1789806919302743e-06, "loss": 0.1629, "step": 3368 }, { "epoch": 0.7665529010238907, "grad_norm": 1.5530188387515713, "learning_rate": 1.1789393184569914e-06, "loss": 0.0733, "step": 3369 }, { "epoch": 0.7667804323094426, "grad_norm": 2.4731819444524525, "learning_rate": 1.1788979336622034e-06, "loss": 0.0997, "step": 3370 }, { "epoch": 0.7670079635949943, "grad_norm": 2.039023451665474, "learning_rate": 1.178856537546756e-06, "loss": 0.1742, "step": 3371 }, { "epoch": 0.7672354948805461, "grad_norm": 2.295655111835599, "learning_rate": 1.1788151301114952e-06, "loss": 0.1817, "step": 3372 }, { "epoch": 0.7674630261660979, "grad_norm": 3.0168578047526435, "learning_rate": 1.1787737113572678e-06, "loss": 0.1219, "step": 3373 }, { "epoch": 0.7676905574516496, "grad_norm": 1.9623836795766205, "learning_rate": 1.1787322812849196e-06, "loss": 0.1471, "step": 3374 }, { "epoch": 0.7679180887372014, "grad_norm": 3.1623467825524125, "learning_rate": 1.1786908398952977e-06, "loss": 0.2429, "step": 3375 }, { "epoch": 0.7681456200227531, "grad_norm": 2.41967387734086, "learning_rate": 1.1786493871892491e-06, "loss": 0.0984, "step": 3376 }, { "epoch": 0.7683731513083049, "grad_norm": 1.47807577415724, "learning_rate": 1.178607923167621e-06, "loss": 0.1533, "step": 3377 }, { "epoch": 0.7686006825938566, "grad_norm": 2.0022276093033873, "learning_rate": 1.1785664478312607e-06, "loss": 0.1213, "step": 3378 }, { "epoch": 0.7688282138794085, "grad_norm": 1.3538482719717833, "learning_rate": 1.1785249611810163e-06, "loss": 0.1202, "step": 3379 }, { "epoch": 0.7690557451649602, "grad_norm": 2.09633182774713, "learning_rate": 1.1784834632177352e-06, "loss": 0.1063, "step": 3380 }, { "epoch": 0.769283276450512, "grad_norm": 2.0767741041017485, "learning_rate": 1.178441953942266e-06, "loss": 0.1268, "step": 3381 }, { "epoch": 0.7695108077360637, "grad_norm": 2.1500043301721465, "learning_rate": 1.1784004333554565e-06, "loss": 0.0931, "step": 3382 }, { "epoch": 0.7697383390216155, "grad_norm": 2.3874439937858396, "learning_rate": 1.178358901458156e-06, "loss": 0.2008, "step": 3383 }, { "epoch": 0.7699658703071672, "grad_norm": 1.8338484321709367, "learning_rate": 1.1783173582512127e-06, "loss": 0.128, "step": 3384 }, { "epoch": 0.770193401592719, "grad_norm": 1.5910093680360542, "learning_rate": 1.178275803735476e-06, "loss": 0.1111, "step": 3385 }, { "epoch": 0.7704209328782707, "grad_norm": 2.064832606530375, "learning_rate": 1.1782342379117954e-06, "loss": 0.1496, "step": 3386 }, { "epoch": 0.7706484641638225, "grad_norm": 2.599028903859917, "learning_rate": 1.17819266078102e-06, "loss": 0.1673, "step": 3387 }, { "epoch": 0.7708759954493742, "grad_norm": 1.6264002152784236, "learning_rate": 1.1781510723439995e-06, "loss": 0.1288, "step": 3388 }, { "epoch": 0.7711035267349261, "grad_norm": 2.0707608895341196, "learning_rate": 1.1781094726015842e-06, "loss": 0.1472, "step": 3389 }, { "epoch": 0.7713310580204779, "grad_norm": 2.4106800417919096, "learning_rate": 1.1780678615546245e-06, "loss": 0.1807, "step": 3390 }, { "epoch": 0.7715585893060296, "grad_norm": 2.819472416476872, "learning_rate": 1.1780262392039706e-06, "loss": 0.1614, "step": 3391 }, { "epoch": 0.7717861205915814, "grad_norm": 2.2332176948627787, "learning_rate": 1.177984605550473e-06, "loss": 0.1236, "step": 3392 }, { "epoch": 0.7720136518771331, "grad_norm": 1.711407204599878, "learning_rate": 1.177942960594983e-06, "loss": 0.1284, "step": 3393 }, { "epoch": 0.7722411831626849, "grad_norm": 1.6565829770786178, "learning_rate": 1.1779013043383516e-06, "loss": 0.0791, "step": 3394 }, { "epoch": 0.7724687144482366, "grad_norm": 1.6450273322349465, "learning_rate": 1.17785963678143e-06, "loss": 0.1579, "step": 3395 }, { "epoch": 0.7726962457337884, "grad_norm": 2.731296051334521, "learning_rate": 1.1778179579250699e-06, "loss": 0.1343, "step": 3396 }, { "epoch": 0.7729237770193401, "grad_norm": 2.374318965488984, "learning_rate": 1.1777762677701232e-06, "loss": 0.1052, "step": 3397 }, { "epoch": 0.7731513083048919, "grad_norm": 1.450879572931303, "learning_rate": 1.1777345663174419e-06, "loss": 0.1993, "step": 3398 }, { "epoch": 0.7733788395904437, "grad_norm": 2.0577588079276925, "learning_rate": 1.1776928535678784e-06, "loss": 0.1257, "step": 3399 }, { "epoch": 0.7736063708759955, "grad_norm": 1.7629557940982252, "learning_rate": 1.1776511295222852e-06, "loss": 0.1317, "step": 3400 }, { "epoch": 0.7738339021615472, "grad_norm": 2.0710954801105452, "learning_rate": 1.177609394181515e-06, "loss": 0.0998, "step": 3401 }, { "epoch": 0.774061433447099, "grad_norm": 2.777274274681113, "learning_rate": 1.177567647546421e-06, "loss": 0.2848, "step": 3402 }, { "epoch": 0.7742889647326507, "grad_norm": 2.0921178488829977, "learning_rate": 1.177525889617856e-06, "loss": 0.1265, "step": 3403 }, { "epoch": 0.7745164960182025, "grad_norm": 1.8373289479738266, "learning_rate": 1.177484120396674e-06, "loss": 0.1216, "step": 3404 }, { "epoch": 0.7747440273037542, "grad_norm": 1.6538813916238742, "learning_rate": 1.1774423398837282e-06, "loss": 0.0639, "step": 3405 }, { "epoch": 0.774971558589306, "grad_norm": 1.2496723907667286, "learning_rate": 1.177400548079873e-06, "loss": 0.1251, "step": 3406 }, { "epoch": 0.7751990898748577, "grad_norm": 1.7383502930197228, "learning_rate": 1.177358744985962e-06, "loss": 0.1146, "step": 3407 }, { "epoch": 0.7754266211604095, "grad_norm": 2.110624002292149, "learning_rate": 1.1773169306028498e-06, "loss": 0.087, "step": 3408 }, { "epoch": 0.7756541524459614, "grad_norm": 2.0104258894056675, "learning_rate": 1.1772751049313911e-06, "loss": 0.1155, "step": 3409 }, { "epoch": 0.7758816837315131, "grad_norm": 1.6597706808667767, "learning_rate": 1.1772332679724408e-06, "loss": 0.1155, "step": 3410 }, { "epoch": 0.7761092150170649, "grad_norm": 1.5975357267288846, "learning_rate": 1.1771914197268538e-06, "loss": 0.0954, "step": 3411 }, { "epoch": 0.7763367463026166, "grad_norm": 1.5910280646678994, "learning_rate": 1.1771495601954856e-06, "loss": 0.1215, "step": 3412 }, { "epoch": 0.7765642775881684, "grad_norm": 2.619906793593659, "learning_rate": 1.1771076893791914e-06, "loss": 0.1167, "step": 3413 }, { "epoch": 0.7767918088737201, "grad_norm": 3.1996885670881867, "learning_rate": 1.1770658072788272e-06, "loss": 0.1344, "step": 3414 }, { "epoch": 0.7770193401592719, "grad_norm": 2.8478691570571137, "learning_rate": 1.1770239138952492e-06, "loss": 0.1774, "step": 3415 }, { "epoch": 0.7772468714448236, "grad_norm": 3.2605307459396853, "learning_rate": 1.1769820092293132e-06, "loss": 0.2085, "step": 3416 }, { "epoch": 0.7774744027303754, "grad_norm": 1.634390217637281, "learning_rate": 1.1769400932818758e-06, "loss": 0.1419, "step": 3417 }, { "epoch": 0.7777019340159272, "grad_norm": 1.1507493581000052, "learning_rate": 1.1768981660537938e-06, "loss": 0.1707, "step": 3418 }, { "epoch": 0.777929465301479, "grad_norm": 1.09109698933865, "learning_rate": 1.1768562275459242e-06, "loss": 0.0952, "step": 3419 }, { "epoch": 0.7781569965870307, "grad_norm": 1.9193041776951303, "learning_rate": 1.1768142777591235e-06, "loss": 0.163, "step": 3420 }, { "epoch": 0.7783845278725825, "grad_norm": 2.660447855208393, "learning_rate": 1.17677231669425e-06, "loss": 0.1946, "step": 3421 }, { "epoch": 0.7786120591581343, "grad_norm": 2.1193355335950743, "learning_rate": 1.1767303443521608e-06, "loss": 0.1059, "step": 3422 }, { "epoch": 0.778839590443686, "grad_norm": 1.5289759590491452, "learning_rate": 1.1766883607337137e-06, "loss": 0.1464, "step": 3423 }, { "epoch": 0.7790671217292378, "grad_norm": 1.3577624261424397, "learning_rate": 1.176646365839767e-06, "loss": 0.0896, "step": 3424 }, { "epoch": 0.7792946530147895, "grad_norm": 1.3596876247439247, "learning_rate": 1.1766043596711787e-06, "loss": 0.11, "step": 3425 }, { "epoch": 0.7795221843003413, "grad_norm": 1.678223461795012, "learning_rate": 1.1765623422288078e-06, "loss": 0.1154, "step": 3426 }, { "epoch": 0.779749715585893, "grad_norm": 1.728616847327067, "learning_rate": 1.1765203135135126e-06, "loss": 0.1138, "step": 3427 }, { "epoch": 0.7799772468714449, "grad_norm": 1.4891809627493324, "learning_rate": 1.176478273526152e-06, "loss": 0.0999, "step": 3428 }, { "epoch": 0.7802047781569966, "grad_norm": 1.778994205604812, "learning_rate": 1.1764362222675857e-06, "loss": 0.1816, "step": 3429 }, { "epoch": 0.7804323094425484, "grad_norm": 2.0271370931761963, "learning_rate": 1.176394159738673e-06, "loss": 0.1178, "step": 3430 }, { "epoch": 0.7806598407281001, "grad_norm": 2.237166839400963, "learning_rate": 1.1763520859402735e-06, "loss": 0.1421, "step": 3431 }, { "epoch": 0.7808873720136519, "grad_norm": 1.50673747208918, "learning_rate": 1.176310000873247e-06, "loss": 0.0839, "step": 3432 }, { "epoch": 0.7811149032992036, "grad_norm": 1.8131829377270645, "learning_rate": 1.1762679045384537e-06, "loss": 0.1395, "step": 3433 }, { "epoch": 0.7813424345847554, "grad_norm": 2.4892969634569333, "learning_rate": 1.1762257969367543e-06, "loss": 0.1181, "step": 3434 }, { "epoch": 0.7815699658703071, "grad_norm": 2.411363769583913, "learning_rate": 1.176183678069009e-06, "loss": 0.1729, "step": 3435 }, { "epoch": 0.7817974971558589, "grad_norm": 1.8263989621523369, "learning_rate": 1.1761415479360784e-06, "loss": 0.1787, "step": 3436 }, { "epoch": 0.7820250284414106, "grad_norm": 2.173624893203865, "learning_rate": 1.1760994065388246e-06, "loss": 0.1014, "step": 3437 }, { "epoch": 0.7822525597269625, "grad_norm": 2.5061481612728587, "learning_rate": 1.1760572538781077e-06, "loss": 0.1105, "step": 3438 }, { "epoch": 0.7824800910125143, "grad_norm": 1.5904833725277863, "learning_rate": 1.17601508995479e-06, "loss": 0.075, "step": 3439 }, { "epoch": 0.782707622298066, "grad_norm": 2.3323087090668846, "learning_rate": 1.175972914769733e-06, "loss": 0.1044, "step": 3440 }, { "epoch": 0.7829351535836178, "grad_norm": 2.103149597484196, "learning_rate": 1.1759307283237986e-06, "loss": 0.1347, "step": 3441 }, { "epoch": 0.7831626848691695, "grad_norm": 1.6169147832378625, "learning_rate": 1.175888530617849e-06, "loss": 0.0846, "step": 3442 }, { "epoch": 0.7833902161547213, "grad_norm": 2.899804826982188, "learning_rate": 1.175846321652747e-06, "loss": 0.1551, "step": 3443 }, { "epoch": 0.783617747440273, "grad_norm": 1.3458740703284953, "learning_rate": 1.1758041014293548e-06, "loss": 0.0923, "step": 3444 }, { "epoch": 0.7838452787258248, "grad_norm": 1.493122071904463, "learning_rate": 1.1757618699485353e-06, "loss": 0.141, "step": 3445 }, { "epoch": 0.7840728100113765, "grad_norm": 2.1402488719534074, "learning_rate": 1.1757196272111524e-06, "loss": 0.1485, "step": 3446 }, { "epoch": 0.7843003412969284, "grad_norm": 2.152687176574438, "learning_rate": 1.1756773732180684e-06, "loss": 0.0962, "step": 3447 }, { "epoch": 0.7845278725824801, "grad_norm": 1.974274187209729, "learning_rate": 1.1756351079701477e-06, "loss": 0.107, "step": 3448 }, { "epoch": 0.7847554038680319, "grad_norm": 2.0072732316781283, "learning_rate": 1.1755928314682537e-06, "loss": 0.0983, "step": 3449 }, { "epoch": 0.7849829351535836, "grad_norm": 1.880475722204832, "learning_rate": 1.1755505437132507e-06, "loss": 0.1217, "step": 3450 }, { "epoch": 0.7852104664391354, "grad_norm": 1.863619823885818, "learning_rate": 1.1755082447060029e-06, "loss": 0.1714, "step": 3451 }, { "epoch": 0.7854379977246871, "grad_norm": 2.2078473549037123, "learning_rate": 1.1754659344473747e-06, "loss": 0.1717, "step": 3452 }, { "epoch": 0.7856655290102389, "grad_norm": 0.9604209487774992, "learning_rate": 1.175423612938231e-06, "loss": 0.1206, "step": 3453 }, { "epoch": 0.7858930602957906, "grad_norm": 2.8777141127877566, "learning_rate": 1.1753812801794368e-06, "loss": 0.1956, "step": 3454 }, { "epoch": 0.7861205915813424, "grad_norm": 2.2683260816233624, "learning_rate": 1.175338936171857e-06, "loss": 0.0813, "step": 3455 }, { "epoch": 0.7863481228668942, "grad_norm": 2.2494833127836555, "learning_rate": 1.1752965809163574e-06, "loss": 0.1326, "step": 3456 }, { "epoch": 0.786575654152446, "grad_norm": 1.0250772681759766, "learning_rate": 1.1752542144138033e-06, "loss": 0.1171, "step": 3457 }, { "epoch": 0.7868031854379978, "grad_norm": 2.0754576696406155, "learning_rate": 1.1752118366650608e-06, "loss": 0.1096, "step": 3458 }, { "epoch": 0.7870307167235495, "grad_norm": 1.4736818380845116, "learning_rate": 1.1751694476709962e-06, "loss": 0.0629, "step": 3459 }, { "epoch": 0.7872582480091013, "grad_norm": 1.7459278393261928, "learning_rate": 1.1751270474324757e-06, "loss": 0.1304, "step": 3460 }, { "epoch": 0.787485779294653, "grad_norm": 1.8293046683083407, "learning_rate": 1.1750846359503657e-06, "loss": 0.1377, "step": 3461 }, { "epoch": 0.7877133105802048, "grad_norm": 1.4740555170674776, "learning_rate": 1.1750422132255334e-06, "loss": 0.101, "step": 3462 }, { "epoch": 0.7879408418657565, "grad_norm": 2.506831345458617, "learning_rate": 1.1749997792588453e-06, "loss": 0.15, "step": 3463 }, { "epoch": 0.7881683731513083, "grad_norm": 1.3945678529440881, "learning_rate": 1.1749573340511693e-06, "loss": 0.1358, "step": 3464 }, { "epoch": 0.78839590443686, "grad_norm": 2.1814793309718485, "learning_rate": 1.1749148776033723e-06, "loss": 0.1633, "step": 3465 }, { "epoch": 0.7886234357224118, "grad_norm": 2.641721026309912, "learning_rate": 1.1748724099163225e-06, "loss": 0.1939, "step": 3466 }, { "epoch": 0.7888509670079636, "grad_norm": 1.2748767276799335, "learning_rate": 1.1748299309908878e-06, "loss": 0.0653, "step": 3467 }, { "epoch": 0.7890784982935154, "grad_norm": 2.211160099467529, "learning_rate": 1.174787440827936e-06, "loss": 0.1336, "step": 3468 }, { "epoch": 0.7893060295790671, "grad_norm": 2.3953771038471743, "learning_rate": 1.1747449394283361e-06, "loss": 0.13, "step": 3469 }, { "epoch": 0.7895335608646189, "grad_norm": 2.515025121426822, "learning_rate": 1.174702426792956e-06, "loss": 0.188, "step": 3470 }, { "epoch": 0.7897610921501707, "grad_norm": 1.8273081736073256, "learning_rate": 1.1746599029226656e-06, "loss": 0.1129, "step": 3471 }, { "epoch": 0.7899886234357224, "grad_norm": 1.749015243103347, "learning_rate": 1.174617367818333e-06, "loss": 0.1096, "step": 3472 }, { "epoch": 0.7902161547212742, "grad_norm": 1.202000950355586, "learning_rate": 1.174574821480828e-06, "loss": 0.153, "step": 3473 }, { "epoch": 0.7904436860068259, "grad_norm": 1.7678029264916482, "learning_rate": 1.1745322639110203e-06, "loss": 0.1284, "step": 3474 }, { "epoch": 0.7906712172923777, "grad_norm": 1.0898438896362213, "learning_rate": 1.1744896951097798e-06, "loss": 0.1178, "step": 3475 }, { "epoch": 0.7908987485779294, "grad_norm": 1.6287384221021175, "learning_rate": 1.1744471150779758e-06, "loss": 0.0908, "step": 3476 }, { "epoch": 0.7911262798634813, "grad_norm": 2.020655633683389, "learning_rate": 1.1744045238164793e-06, "loss": 0.1178, "step": 3477 }, { "epoch": 0.791353811149033, "grad_norm": 3.13062068048053, "learning_rate": 1.1743619213261604e-06, "loss": 0.1702, "step": 3478 }, { "epoch": 0.7915813424345848, "grad_norm": 1.9211729488264053, "learning_rate": 1.1743193076078901e-06, "loss": 0.1438, "step": 3479 }, { "epoch": 0.7918088737201365, "grad_norm": 2.4542917746271145, "learning_rate": 1.174276682662539e-06, "loss": 0.1783, "step": 3480 }, { "epoch": 0.7920364050056883, "grad_norm": 2.000737339074988, "learning_rate": 1.1742340464909786e-06, "loss": 0.1256, "step": 3481 }, { "epoch": 0.79226393629124, "grad_norm": 2.354494981818762, "learning_rate": 1.1741913990940801e-06, "loss": 0.1332, "step": 3482 }, { "epoch": 0.7924914675767918, "grad_norm": 1.6008608978921464, "learning_rate": 1.174148740472715e-06, "loss": 0.1235, "step": 3483 }, { "epoch": 0.7927189988623435, "grad_norm": 3.3922095426630468, "learning_rate": 1.1741060706277557e-06, "loss": 0.1673, "step": 3484 }, { "epoch": 0.7929465301478953, "grad_norm": 1.493369989814107, "learning_rate": 1.1740633895600738e-06, "loss": 0.1465, "step": 3485 }, { "epoch": 0.7931740614334472, "grad_norm": 1.8432157550864956, "learning_rate": 1.1740206972705418e-06, "loss": 0.1209, "step": 3486 }, { "epoch": 0.7934015927189989, "grad_norm": 1.7889600196785538, "learning_rate": 1.1739779937600323e-06, "loss": 0.0642, "step": 3487 }, { "epoch": 0.7936291240045507, "grad_norm": 1.4593061124512618, "learning_rate": 1.1739352790294177e-06, "loss": 0.0828, "step": 3488 }, { "epoch": 0.7938566552901024, "grad_norm": 2.134125735004945, "learning_rate": 1.1738925530795716e-06, "loss": 0.1497, "step": 3489 }, { "epoch": 0.7940841865756542, "grad_norm": 3.0082402968183244, "learning_rate": 1.173849815911367e-06, "loss": 0.1692, "step": 3490 }, { "epoch": 0.7943117178612059, "grad_norm": 2.384918860515077, "learning_rate": 1.1738070675256773e-06, "loss": 0.1934, "step": 3491 }, { "epoch": 0.7945392491467577, "grad_norm": 1.8153820624252357, "learning_rate": 1.1737643079233763e-06, "loss": 0.0786, "step": 3492 }, { "epoch": 0.7947667804323094, "grad_norm": 1.7671049533475054, "learning_rate": 1.1737215371053376e-06, "loss": 0.0866, "step": 3493 }, { "epoch": 0.7949943117178612, "grad_norm": 1.4867713437127692, "learning_rate": 1.1736787550724357e-06, "loss": 0.1448, "step": 3494 }, { "epoch": 0.7952218430034129, "grad_norm": 2.749928140643179, "learning_rate": 1.1736359618255452e-06, "loss": 0.16, "step": 3495 }, { "epoch": 0.7954493742889648, "grad_norm": 3.118689887471958, "learning_rate": 1.1735931573655402e-06, "loss": 0.2245, "step": 3496 }, { "epoch": 0.7956769055745165, "grad_norm": 4.92304830305448, "learning_rate": 1.1735503416932957e-06, "loss": 0.1682, "step": 3497 }, { "epoch": 0.7959044368600683, "grad_norm": 0.7816490382403714, "learning_rate": 1.1735075148096869e-06, "loss": 0.0881, "step": 3498 }, { "epoch": 0.79613196814562, "grad_norm": 2.83082513127168, "learning_rate": 1.173464676715589e-06, "loss": 0.1574, "step": 3499 }, { "epoch": 0.7963594994311718, "grad_norm": 1.8736840297636916, "learning_rate": 1.1734218274118775e-06, "loss": 0.1357, "step": 3500 }, { "epoch": 0.7965870307167235, "grad_norm": 1.5238621219584085, "learning_rate": 1.1733789668994285e-06, "loss": 0.188, "step": 3501 }, { "epoch": 0.7968145620022753, "grad_norm": 1.990641449188006, "learning_rate": 1.1733360951791176e-06, "loss": 0.1838, "step": 3502 }, { "epoch": 0.797042093287827, "grad_norm": 1.4319575898148043, "learning_rate": 1.173293212251821e-06, "loss": 0.1096, "step": 3503 }, { "epoch": 0.7972696245733788, "grad_norm": 2.2086542380222944, "learning_rate": 1.1732503181184155e-06, "loss": 0.1326, "step": 3504 }, { "epoch": 0.7974971558589306, "grad_norm": 3.1095746415107235, "learning_rate": 1.1732074127797773e-06, "loss": 0.1399, "step": 3505 }, { "epoch": 0.7977246871444824, "grad_norm": 1.5737796542808713, "learning_rate": 1.1731644962367838e-06, "loss": 0.1047, "step": 3506 }, { "epoch": 0.7979522184300342, "grad_norm": 1.912201296923179, "learning_rate": 1.173121568490312e-06, "loss": 0.1626, "step": 3507 }, { "epoch": 0.7981797497155859, "grad_norm": 1.6232060275987237, "learning_rate": 1.173078629541239e-06, "loss": 0.2191, "step": 3508 }, { "epoch": 0.7984072810011377, "grad_norm": 2.7876686082536333, "learning_rate": 1.1730356793904426e-06, "loss": 0.1472, "step": 3509 }, { "epoch": 0.7986348122866894, "grad_norm": 3.0524433393481125, "learning_rate": 1.1729927180388008e-06, "loss": 0.1744, "step": 3510 }, { "epoch": 0.7988623435722412, "grad_norm": 2.533079498985637, "learning_rate": 1.172949745487191e-06, "loss": 0.1341, "step": 3511 }, { "epoch": 0.7990898748577929, "grad_norm": 2.8158615912420037, "learning_rate": 1.1729067617364923e-06, "loss": 0.1398, "step": 3512 }, { "epoch": 0.7993174061433447, "grad_norm": 3.810931501836283, "learning_rate": 1.1728637667875825e-06, "loss": 0.1603, "step": 3513 }, { "epoch": 0.7995449374288964, "grad_norm": 1.3831236821227444, "learning_rate": 1.172820760641341e-06, "loss": 0.1667, "step": 3514 }, { "epoch": 0.7997724687144482, "grad_norm": 1.5087369009672673, "learning_rate": 1.1727777432986465e-06, "loss": 0.0937, "step": 3515 }, { "epoch": 0.8, "grad_norm": 1.8545109020075539, "learning_rate": 1.1727347147603778e-06, "loss": 0.1271, "step": 3516 }, { "epoch": 0.8002275312855518, "grad_norm": 1.5875557587618756, "learning_rate": 1.1726916750274148e-06, "loss": 0.1111, "step": 3517 }, { "epoch": 0.8004550625711035, "grad_norm": 1.7651224057238886, "learning_rate": 1.172648624100637e-06, "loss": 0.1151, "step": 3518 }, { "epoch": 0.8006825938566553, "grad_norm": 2.522491282937047, "learning_rate": 1.1726055619809245e-06, "loss": 0.1259, "step": 3519 }, { "epoch": 0.800910125142207, "grad_norm": 2.9242004594958853, "learning_rate": 1.172562488669157e-06, "loss": 0.1981, "step": 3520 }, { "epoch": 0.8011376564277588, "grad_norm": 2.4349195989881958, "learning_rate": 1.172519404166215e-06, "loss": 0.1747, "step": 3521 }, { "epoch": 0.8013651877133106, "grad_norm": 2.262185722438123, "learning_rate": 1.1724763084729792e-06, "loss": 0.0748, "step": 3522 }, { "epoch": 0.8015927189988623, "grad_norm": 1.7293354987916565, "learning_rate": 1.1724332015903303e-06, "loss": 0.0951, "step": 3523 }, { "epoch": 0.8018202502844141, "grad_norm": 3.6362391316593756, "learning_rate": 1.1723900835191494e-06, "loss": 0.1691, "step": 3524 }, { "epoch": 0.8020477815699659, "grad_norm": 1.4941805820798755, "learning_rate": 1.1723469542603174e-06, "loss": 0.1029, "step": 3525 }, { "epoch": 0.8022753128555177, "grad_norm": 2.142785774100463, "learning_rate": 1.1723038138147165e-06, "loss": 0.1449, "step": 3526 }, { "epoch": 0.8025028441410694, "grad_norm": 5.041596350366937, "learning_rate": 1.1722606621832278e-06, "loss": 0.2721, "step": 3527 }, { "epoch": 0.8027303754266212, "grad_norm": 3.309987840845101, "learning_rate": 1.1722174993667335e-06, "loss": 0.1324, "step": 3528 }, { "epoch": 0.8029579067121729, "grad_norm": 2.4375248585456535, "learning_rate": 1.1721743253661155e-06, "loss": 0.1308, "step": 3529 }, { "epoch": 0.8031854379977247, "grad_norm": 1.7485109343470884, "learning_rate": 1.1721311401822566e-06, "loss": 0.0598, "step": 3530 }, { "epoch": 0.8034129692832764, "grad_norm": 2.8466501164494047, "learning_rate": 1.1720879438160391e-06, "loss": 0.1275, "step": 3531 }, { "epoch": 0.8036405005688282, "grad_norm": 2.1948137095546514, "learning_rate": 1.1720447362683458e-06, "loss": 0.1537, "step": 3532 }, { "epoch": 0.8038680318543799, "grad_norm": 2.5761273249687355, "learning_rate": 1.1720015175400602e-06, "loss": 0.2656, "step": 3533 }, { "epoch": 0.8040955631399317, "grad_norm": 1.35303524636575, "learning_rate": 1.1719582876320655e-06, "loss": 0.0725, "step": 3534 }, { "epoch": 0.8043230944254836, "grad_norm": 1.8582104202289196, "learning_rate": 1.1719150465452447e-06, "loss": 0.0731, "step": 3535 }, { "epoch": 0.8045506257110353, "grad_norm": 2.3070370218822993, "learning_rate": 1.1718717942804822e-06, "loss": 0.182, "step": 3536 }, { "epoch": 0.8047781569965871, "grad_norm": 2.9363741121384734, "learning_rate": 1.1718285308386618e-06, "loss": 0.1945, "step": 3537 }, { "epoch": 0.8050056882821388, "grad_norm": 2.1830272314645605, "learning_rate": 1.1717852562206678e-06, "loss": 0.1165, "step": 3538 }, { "epoch": 0.8052332195676906, "grad_norm": 3.245711083253953, "learning_rate": 1.1717419704273844e-06, "loss": 0.1591, "step": 3539 }, { "epoch": 0.8054607508532423, "grad_norm": 2.5490845759819, "learning_rate": 1.1716986734596963e-06, "loss": 0.1237, "step": 3540 }, { "epoch": 0.8056882821387941, "grad_norm": 2.9048546128584203, "learning_rate": 1.1716553653184887e-06, "loss": 0.149, "step": 3541 }, { "epoch": 0.8059158134243458, "grad_norm": 3.4610037458517735, "learning_rate": 1.1716120460046464e-06, "loss": 0.116, "step": 3542 }, { "epoch": 0.8061433447098976, "grad_norm": 1.2797618581011017, "learning_rate": 1.1715687155190553e-06, "loss": 0.15, "step": 3543 }, { "epoch": 0.8063708759954493, "grad_norm": 2.799222931592063, "learning_rate": 1.1715253738626005e-06, "loss": 0.2169, "step": 3544 }, { "epoch": 0.8065984072810012, "grad_norm": 2.3387405024192827, "learning_rate": 1.171482021036168e-06, "loss": 0.1816, "step": 3545 }, { "epoch": 0.8068259385665529, "grad_norm": 1.3614682853594804, "learning_rate": 1.1714386570406439e-06, "loss": 0.0795, "step": 3546 }, { "epoch": 0.8070534698521047, "grad_norm": 2.35431862689593, "learning_rate": 1.1713952818769142e-06, "loss": 0.1802, "step": 3547 }, { "epoch": 0.8072810011376564, "grad_norm": 4.073744850230153, "learning_rate": 1.1713518955458657e-06, "loss": 0.1367, "step": 3548 }, { "epoch": 0.8075085324232082, "grad_norm": 2.2417307320503315, "learning_rate": 1.1713084980483849e-06, "loss": 0.1117, "step": 3549 }, { "epoch": 0.8077360637087599, "grad_norm": 1.8171121725967154, "learning_rate": 1.1712650893853591e-06, "loss": 0.1116, "step": 3550 }, { "epoch": 0.8079635949943117, "grad_norm": 1.1273822389879684, "learning_rate": 1.1712216695576753e-06, "loss": 0.1209, "step": 3551 }, { "epoch": 0.8081911262798634, "grad_norm": 3.0048491196415985, "learning_rate": 1.171178238566221e-06, "loss": 0.1695, "step": 3552 }, { "epoch": 0.8084186575654152, "grad_norm": 2.968285056264807, "learning_rate": 1.1711347964118837e-06, "loss": 0.1703, "step": 3553 }, { "epoch": 0.8086461888509671, "grad_norm": 1.6533852634693318, "learning_rate": 1.1710913430955514e-06, "loss": 0.1568, "step": 3554 }, { "epoch": 0.8088737201365188, "grad_norm": 1.966029753790106, "learning_rate": 1.171047878618112e-06, "loss": 0.132, "step": 3555 }, { "epoch": 0.8091012514220706, "grad_norm": 1.8837334268649408, "learning_rate": 1.1710044029804542e-06, "loss": 0.1739, "step": 3556 }, { "epoch": 0.8093287827076223, "grad_norm": 1.5944840560397648, "learning_rate": 1.1709609161834663e-06, "loss": 0.1447, "step": 3557 }, { "epoch": 0.8095563139931741, "grad_norm": 3.3750416113176547, "learning_rate": 1.1709174182280371e-06, "loss": 0.2264, "step": 3558 }, { "epoch": 0.8097838452787258, "grad_norm": 1.42052223693585, "learning_rate": 1.1708739091150557e-06, "loss": 0.0966, "step": 3559 }, { "epoch": 0.8100113765642776, "grad_norm": 1.6640096543568796, "learning_rate": 1.1708303888454113e-06, "loss": 0.0897, "step": 3560 }, { "epoch": 0.8102389078498293, "grad_norm": 2.489262801628844, "learning_rate": 1.1707868574199934e-06, "loss": 0.2036, "step": 3561 }, { "epoch": 0.8104664391353811, "grad_norm": 1.7044828952839948, "learning_rate": 1.1707433148396918e-06, "loss": 0.112, "step": 3562 }, { "epoch": 0.8106939704209328, "grad_norm": 3.36001160307574, "learning_rate": 1.1706997611053963e-06, "loss": 0.1259, "step": 3563 }, { "epoch": 0.8109215017064847, "grad_norm": 1.6392717376534625, "learning_rate": 1.1706561962179968e-06, "loss": 0.1526, "step": 3564 }, { "epoch": 0.8111490329920364, "grad_norm": 1.7155808753624902, "learning_rate": 1.1706126201783844e-06, "loss": 0.1241, "step": 3565 }, { "epoch": 0.8113765642775882, "grad_norm": 2.5616579159310144, "learning_rate": 1.170569032987449e-06, "loss": 0.1258, "step": 3566 }, { "epoch": 0.81160409556314, "grad_norm": 2.769894221468227, "learning_rate": 1.1705254346460818e-06, "loss": 0.0991, "step": 3567 }, { "epoch": 0.8118316268486917, "grad_norm": 1.477469197427597, "learning_rate": 1.170481825155174e-06, "loss": 0.1278, "step": 3568 }, { "epoch": 0.8120591581342435, "grad_norm": 1.9002053859746235, "learning_rate": 1.1704382045156162e-06, "loss": 0.1467, "step": 3569 }, { "epoch": 0.8122866894197952, "grad_norm": 2.5583118029634395, "learning_rate": 1.1703945727283008e-06, "loss": 0.1011, "step": 3570 }, { "epoch": 0.812514220705347, "grad_norm": 0.9971154304856114, "learning_rate": 1.1703509297941193e-06, "loss": 0.073, "step": 3571 }, { "epoch": 0.8127417519908987, "grad_norm": 2.2480762900084996, "learning_rate": 1.1703072757139633e-06, "loss": 0.1251, "step": 3572 }, { "epoch": 0.8129692832764505, "grad_norm": 3.0737759248208745, "learning_rate": 1.1702636104887253e-06, "loss": 0.1322, "step": 3573 }, { "epoch": 0.8131968145620023, "grad_norm": 1.8198486335000044, "learning_rate": 1.1702199341192977e-06, "loss": 0.1625, "step": 3574 }, { "epoch": 0.8134243458475541, "grad_norm": 1.0495962275251278, "learning_rate": 1.1701762466065733e-06, "loss": 0.0854, "step": 3575 }, { "epoch": 0.8136518771331058, "grad_norm": 1.8541583708777531, "learning_rate": 1.1701325479514446e-06, "loss": 0.1218, "step": 3576 }, { "epoch": 0.8138794084186576, "grad_norm": 1.6740410358627995, "learning_rate": 1.1700888381548052e-06, "loss": 0.1061, "step": 3577 }, { "epoch": 0.8141069397042093, "grad_norm": 2.030287883816433, "learning_rate": 1.1700451172175482e-06, "loss": 0.1824, "step": 3578 }, { "epoch": 0.8143344709897611, "grad_norm": 1.3733587978383417, "learning_rate": 1.1700013851405673e-06, "loss": 0.0824, "step": 3579 }, { "epoch": 0.8145620022753128, "grad_norm": 2.7300886954563777, "learning_rate": 1.169957641924756e-06, "loss": 0.1499, "step": 3580 }, { "epoch": 0.8147895335608646, "grad_norm": 2.702060988424549, "learning_rate": 1.1699138875710087e-06, "loss": 0.1538, "step": 3581 }, { "epoch": 0.8150170648464163, "grad_norm": 2.0159475716435207, "learning_rate": 1.1698701220802194e-06, "loss": 0.1436, "step": 3582 }, { "epoch": 0.8152445961319681, "grad_norm": 1.9482556961132234, "learning_rate": 1.1698263454532827e-06, "loss": 0.1107, "step": 3583 }, { "epoch": 0.81547212741752, "grad_norm": 3.3242671905337766, "learning_rate": 1.1697825576910933e-06, "loss": 0.1605, "step": 3584 }, { "epoch": 0.8156996587030717, "grad_norm": 4.406286080021148, "learning_rate": 1.1697387587945461e-06, "loss": 0.1412, "step": 3585 }, { "epoch": 0.8159271899886235, "grad_norm": 3.521875860215498, "learning_rate": 1.1696949487645365e-06, "loss": 0.1904, "step": 3586 }, { "epoch": 0.8161547212741752, "grad_norm": 2.09054171831565, "learning_rate": 1.1696511276019595e-06, "loss": 0.2462, "step": 3587 }, { "epoch": 0.816382252559727, "grad_norm": 2.27420287387407, "learning_rate": 1.169607295307711e-06, "loss": 0.1244, "step": 3588 }, { "epoch": 0.8166097838452787, "grad_norm": 2.2747464420359145, "learning_rate": 1.169563451882687e-06, "loss": 0.1764, "step": 3589 }, { "epoch": 0.8168373151308305, "grad_norm": 2.7182648228191653, "learning_rate": 1.1695195973277831e-06, "loss": 0.1134, "step": 3590 }, { "epoch": 0.8170648464163822, "grad_norm": 1.5728156279782024, "learning_rate": 1.169475731643896e-06, "loss": 0.0988, "step": 3591 }, { "epoch": 0.817292377701934, "grad_norm": 1.8757192624992662, "learning_rate": 1.169431854831922e-06, "loss": 0.1089, "step": 3592 }, { "epoch": 0.8175199089874858, "grad_norm": 1.1856883759314114, "learning_rate": 1.169387966892758e-06, "loss": 0.1476, "step": 3593 }, { "epoch": 0.8177474402730376, "grad_norm": 1.915281752164245, "learning_rate": 1.169344067827301e-06, "loss": 0.1859, "step": 3594 }, { "epoch": 0.8179749715585893, "grad_norm": 1.641433371476581, "learning_rate": 1.1693001576364483e-06, "loss": 0.147, "step": 3595 }, { "epoch": 0.8182025028441411, "grad_norm": 1.9995302113667346, "learning_rate": 1.1692562363210969e-06, "loss": 0.1197, "step": 3596 }, { "epoch": 0.8184300341296928, "grad_norm": 1.6487131000503967, "learning_rate": 1.169212303882145e-06, "loss": 0.0894, "step": 3597 }, { "epoch": 0.8186575654152446, "grad_norm": 1.0718237003787452, "learning_rate": 1.1691683603204901e-06, "loss": 0.1471, "step": 3598 }, { "epoch": 0.8188850967007963, "grad_norm": 1.9872022609356077, "learning_rate": 1.1691244056370307e-06, "loss": 0.0775, "step": 3599 }, { "epoch": 0.8191126279863481, "grad_norm": 1.675911881257451, "learning_rate": 1.169080439832665e-06, "loss": 0.0875, "step": 3600 }, { "epoch": 0.8193401592718998, "grad_norm": 2.8463715468246105, "learning_rate": 1.1690364629082915e-06, "loss": 0.1676, "step": 3601 }, { "epoch": 0.8195676905574516, "grad_norm": 1.860062069008861, "learning_rate": 1.168992474864809e-06, "loss": 0.0992, "step": 3602 }, { "epoch": 0.8197952218430035, "grad_norm": 2.4111719114216763, "learning_rate": 1.1689484757031167e-06, "loss": 0.1242, "step": 3603 }, { "epoch": 0.8200227531285552, "grad_norm": 1.804420569976691, "learning_rate": 1.1689044654241136e-06, "loss": 0.1382, "step": 3604 }, { "epoch": 0.820250284414107, "grad_norm": 1.5941220037037336, "learning_rate": 1.1688604440286994e-06, "loss": 0.1924, "step": 3605 }, { "epoch": 0.8204778156996587, "grad_norm": 1.5263579220058776, "learning_rate": 1.1688164115177737e-06, "loss": 0.1021, "step": 3606 }, { "epoch": 0.8207053469852105, "grad_norm": 2.8075035415219505, "learning_rate": 1.1687723678922367e-06, "loss": 0.1988, "step": 3607 }, { "epoch": 0.8209328782707622, "grad_norm": 1.7318635873604538, "learning_rate": 1.1687283131529882e-06, "loss": 0.1329, "step": 3608 }, { "epoch": 0.821160409556314, "grad_norm": 1.5626567981027735, "learning_rate": 1.1686842473009286e-06, "loss": 0.089, "step": 3609 }, { "epoch": 0.8213879408418657, "grad_norm": 1.7420355931507356, "learning_rate": 1.168640170336959e-06, "loss": 0.0914, "step": 3610 }, { "epoch": 0.8216154721274175, "grad_norm": 1.9844192517974875, "learning_rate": 1.16859608226198e-06, "loss": 0.1082, "step": 3611 }, { "epoch": 0.8218430034129692, "grad_norm": 1.9480179532884792, "learning_rate": 1.1685519830768923e-06, "loss": 0.1706, "step": 3612 }, { "epoch": 0.8220705346985211, "grad_norm": 2.335868396841245, "learning_rate": 1.1685078727825978e-06, "loss": 0.1348, "step": 3613 }, { "epoch": 0.8222980659840728, "grad_norm": 2.2509068386811113, "learning_rate": 1.1684637513799976e-06, "loss": 0.1105, "step": 3614 }, { "epoch": 0.8225255972696246, "grad_norm": 1.385199024522045, "learning_rate": 1.1684196188699936e-06, "loss": 0.1107, "step": 3615 }, { "epoch": 0.8227531285551763, "grad_norm": 2.721417628687412, "learning_rate": 1.168375475253488e-06, "loss": 0.2143, "step": 3616 }, { "epoch": 0.8229806598407281, "grad_norm": 2.1327255768939564, "learning_rate": 1.168331320531383e-06, "loss": 0.1029, "step": 3617 }, { "epoch": 0.8232081911262799, "grad_norm": 2.2052514638307312, "learning_rate": 1.1682871547045805e-06, "loss": 0.1301, "step": 3618 }, { "epoch": 0.8234357224118316, "grad_norm": 2.295794037048131, "learning_rate": 1.1682429777739836e-06, "loss": 0.1319, "step": 3619 }, { "epoch": 0.8236632536973834, "grad_norm": 1.7346497245119692, "learning_rate": 1.168198789740495e-06, "loss": 0.1474, "step": 3620 }, { "epoch": 0.8238907849829351, "grad_norm": 1.4867035615045876, "learning_rate": 1.1681545906050184e-06, "loss": 0.0918, "step": 3621 }, { "epoch": 0.824118316268487, "grad_norm": 1.5853265849774527, "learning_rate": 1.1681103803684564e-06, "loss": 0.1508, "step": 3622 }, { "epoch": 0.8243458475540387, "grad_norm": 2.6635300104778876, "learning_rate": 1.168066159031713e-06, "loss": 0.1551, "step": 3623 }, { "epoch": 0.8245733788395905, "grad_norm": 3.10455139314672, "learning_rate": 1.1680219265956918e-06, "loss": 0.186, "step": 3624 }, { "epoch": 0.8248009101251422, "grad_norm": 2.6599031485707387, "learning_rate": 1.167977683061297e-06, "loss": 0.0953, "step": 3625 }, { "epoch": 0.825028441410694, "grad_norm": 2.0712725542677957, "learning_rate": 1.1679334284294328e-06, "loss": 0.1206, "step": 3626 }, { "epoch": 0.8252559726962457, "grad_norm": 1.339098894932053, "learning_rate": 1.1678891627010036e-06, "loss": 0.1296, "step": 3627 }, { "epoch": 0.8254835039817975, "grad_norm": 1.3418346832410328, "learning_rate": 1.1678448858769139e-06, "loss": 0.1069, "step": 3628 }, { "epoch": 0.8257110352673492, "grad_norm": 2.300704895731321, "learning_rate": 1.1678005979580694e-06, "loss": 0.232, "step": 3629 }, { "epoch": 0.825938566552901, "grad_norm": 2.565692079837086, "learning_rate": 1.1677562989453745e-06, "loss": 0.1685, "step": 3630 }, { "epoch": 0.8261660978384527, "grad_norm": 2.7439979743790177, "learning_rate": 1.1677119888397348e-06, "loss": 0.2008, "step": 3631 }, { "epoch": 0.8263936291240046, "grad_norm": 2.3355350842028133, "learning_rate": 1.1676676676420562e-06, "loss": 0.1449, "step": 3632 }, { "epoch": 0.8266211604095564, "grad_norm": 2.6558322573348927, "learning_rate": 1.1676233353532442e-06, "loss": 0.1475, "step": 3633 }, { "epoch": 0.8268486916951081, "grad_norm": 2.694804298395181, "learning_rate": 1.167578991974205e-06, "loss": 0.1267, "step": 3634 }, { "epoch": 0.8270762229806599, "grad_norm": 1.3844578486757833, "learning_rate": 1.167534637505845e-06, "loss": 0.1051, "step": 3635 }, { "epoch": 0.8273037542662116, "grad_norm": 3.8021262153636513, "learning_rate": 1.1674902719490704e-06, "loss": 0.1409, "step": 3636 }, { "epoch": 0.8275312855517634, "grad_norm": 1.9081376942490276, "learning_rate": 1.1674458953047883e-06, "loss": 0.1318, "step": 3637 }, { "epoch": 0.8277588168373151, "grad_norm": 1.5941119751883612, "learning_rate": 1.1674015075739054e-06, "loss": 0.1643, "step": 3638 }, { "epoch": 0.8279863481228669, "grad_norm": 4.574871513487587, "learning_rate": 1.1673571087573293e-06, "loss": 0.166, "step": 3639 }, { "epoch": 0.8282138794084186, "grad_norm": 3.608726841283483, "learning_rate": 1.167312698855967e-06, "loss": 0.1647, "step": 3640 }, { "epoch": 0.8284414106939704, "grad_norm": 1.5402864670776968, "learning_rate": 1.1672682778707262e-06, "loss": 0.0943, "step": 3641 }, { "epoch": 0.8286689419795222, "grad_norm": 1.1120726912524128, "learning_rate": 1.167223845802515e-06, "loss": 0.1138, "step": 3642 }, { "epoch": 0.828896473265074, "grad_norm": 2.078866420634302, "learning_rate": 1.1671794026522417e-06, "loss": 0.1944, "step": 3643 }, { "epoch": 0.8291240045506257, "grad_norm": 2.041196689513367, "learning_rate": 1.1671349484208142e-06, "loss": 0.1759, "step": 3644 }, { "epoch": 0.8293515358361775, "grad_norm": 3.0074049329788144, "learning_rate": 1.1670904831091412e-06, "loss": 0.1606, "step": 3645 }, { "epoch": 0.8295790671217292, "grad_norm": 2.414856061249191, "learning_rate": 1.1670460067181313e-06, "loss": 0.1622, "step": 3646 }, { "epoch": 0.829806598407281, "grad_norm": 1.6822279974303718, "learning_rate": 1.167001519248694e-06, "loss": 0.2434, "step": 3647 }, { "epoch": 0.8300341296928327, "grad_norm": 2.6099405415593475, "learning_rate": 1.1669570207017384e-06, "loss": 0.1426, "step": 3648 }, { "epoch": 0.8302616609783845, "grad_norm": 2.396884053282652, "learning_rate": 1.1669125110781737e-06, "loss": 0.1275, "step": 3649 }, { "epoch": 0.8304891922639362, "grad_norm": 1.9075049230967023, "learning_rate": 1.1668679903789095e-06, "loss": 0.1403, "step": 3650 }, { "epoch": 0.830716723549488, "grad_norm": 2.2255645987859727, "learning_rate": 1.1668234586048562e-06, "loss": 0.11, "step": 3651 }, { "epoch": 0.8309442548350399, "grad_norm": 1.1833888824062964, "learning_rate": 1.1667789157569237e-06, "loss": 0.1392, "step": 3652 }, { "epoch": 0.8311717861205916, "grad_norm": 2.4325462275271423, "learning_rate": 1.1667343618360224e-06, "loss": 0.1786, "step": 3653 }, { "epoch": 0.8313993174061434, "grad_norm": 1.3710217277398862, "learning_rate": 1.166689796843063e-06, "loss": 0.1462, "step": 3654 }, { "epoch": 0.8316268486916951, "grad_norm": 1.5198212298672198, "learning_rate": 1.166645220778956e-06, "loss": 0.1459, "step": 3655 }, { "epoch": 0.8318543799772469, "grad_norm": 1.628569489693494, "learning_rate": 1.1666006336446127e-06, "loss": 0.1202, "step": 3656 }, { "epoch": 0.8320819112627986, "grad_norm": 3.30227173439614, "learning_rate": 1.1665560354409444e-06, "loss": 0.1609, "step": 3657 }, { "epoch": 0.8323094425483504, "grad_norm": 1.4038774964979965, "learning_rate": 1.1665114261688625e-06, "loss": 0.1544, "step": 3658 }, { "epoch": 0.8325369738339021, "grad_norm": 1.821635366527404, "learning_rate": 1.1664668058292789e-06, "loss": 0.0919, "step": 3659 }, { "epoch": 0.8327645051194539, "grad_norm": 2.0806661475154695, "learning_rate": 1.1664221744231052e-06, "loss": 0.1351, "step": 3660 }, { "epoch": 0.8329920364050057, "grad_norm": 2.629918294593467, "learning_rate": 1.1663775319512539e-06, "loss": 0.1379, "step": 3661 }, { "epoch": 0.8332195676905575, "grad_norm": 1.8266189295452462, "learning_rate": 1.1663328784146375e-06, "loss": 0.1339, "step": 3662 }, { "epoch": 0.8334470989761092, "grad_norm": 2.567173056748027, "learning_rate": 1.1662882138141684e-06, "loss": 0.1347, "step": 3663 }, { "epoch": 0.833674630261661, "grad_norm": 1.8059501719212914, "learning_rate": 1.1662435381507595e-06, "loss": 0.1445, "step": 3664 }, { "epoch": 0.8339021615472128, "grad_norm": 1.28119552202134, "learning_rate": 1.1661988514253237e-06, "loss": 0.1317, "step": 3665 }, { "epoch": 0.8341296928327645, "grad_norm": 0.7832646833508663, "learning_rate": 1.1661541536387748e-06, "loss": 0.0444, "step": 3666 }, { "epoch": 0.8343572241183163, "grad_norm": 2.4326662770770673, "learning_rate": 1.166109444792026e-06, "loss": 0.183, "step": 3667 }, { "epoch": 0.834584755403868, "grad_norm": 1.5964670632363234, "learning_rate": 1.1660647248859913e-06, "loss": 0.1303, "step": 3668 }, { "epoch": 0.8348122866894198, "grad_norm": 1.9117342096036827, "learning_rate": 1.1660199939215845e-06, "loss": 0.1007, "step": 3669 }, { "epoch": 0.8350398179749715, "grad_norm": 2.605848649739405, "learning_rate": 1.1659752518997197e-06, "loss": 0.1419, "step": 3670 }, { "epoch": 0.8352673492605234, "grad_norm": 1.7438484870911297, "learning_rate": 1.1659304988213115e-06, "loss": 0.2068, "step": 3671 }, { "epoch": 0.8354948805460751, "grad_norm": 1.0600642685383936, "learning_rate": 1.1658857346872745e-06, "loss": 0.1193, "step": 3672 }, { "epoch": 0.8357224118316269, "grad_norm": 1.9518189045588334, "learning_rate": 1.165840959498524e-06, "loss": 0.1262, "step": 3673 }, { "epoch": 0.8359499431171786, "grad_norm": 1.7335154761632459, "learning_rate": 1.1657961732559745e-06, "loss": 0.1009, "step": 3674 }, { "epoch": 0.8361774744027304, "grad_norm": 2.2309685306176017, "learning_rate": 1.1657513759605417e-06, "loss": 0.1784, "step": 3675 }, { "epoch": 0.8364050056882821, "grad_norm": 2.2907312733247194, "learning_rate": 1.1657065676131412e-06, "loss": 0.1291, "step": 3676 }, { "epoch": 0.8366325369738339, "grad_norm": 1.6294624150585804, "learning_rate": 1.1656617482146886e-06, "loss": 0.1442, "step": 3677 }, { "epoch": 0.8368600682593856, "grad_norm": 2.8212610764914707, "learning_rate": 1.1656169177661e-06, "loss": 0.1711, "step": 3678 }, { "epoch": 0.8370875995449374, "grad_norm": 2.076450457598849, "learning_rate": 1.165572076268292e-06, "loss": 0.2057, "step": 3679 }, { "epoch": 0.8373151308304891, "grad_norm": 3.366007033872947, "learning_rate": 1.1655272237221804e-06, "loss": 0.1565, "step": 3680 }, { "epoch": 0.837542662116041, "grad_norm": 2.2261279746795846, "learning_rate": 1.1654823601286826e-06, "loss": 0.1642, "step": 3681 }, { "epoch": 0.8377701934015928, "grad_norm": 1.81724632660024, "learning_rate": 1.165437485488715e-06, "loss": 0.0943, "step": 3682 }, { "epoch": 0.8379977246871445, "grad_norm": 3.1269822903807225, "learning_rate": 1.165392599803195e-06, "loss": 0.1449, "step": 3683 }, { "epoch": 0.8382252559726963, "grad_norm": 2.379708780504192, "learning_rate": 1.16534770307304e-06, "loss": 0.153, "step": 3684 }, { "epoch": 0.838452787258248, "grad_norm": 1.8603168211221506, "learning_rate": 1.1653027952991676e-06, "loss": 0.1143, "step": 3685 }, { "epoch": 0.8386803185437998, "grad_norm": 2.2831231162185417, "learning_rate": 1.1652578764824953e-06, "loss": 0.1459, "step": 3686 }, { "epoch": 0.8389078498293515, "grad_norm": 2.8451139022978085, "learning_rate": 1.1652129466239417e-06, "loss": 0.1634, "step": 3687 }, { "epoch": 0.8391353811149033, "grad_norm": 2.3009781892443573, "learning_rate": 1.1651680057244247e-06, "loss": 0.1378, "step": 3688 }, { "epoch": 0.839362912400455, "grad_norm": 4.0903919882686814, "learning_rate": 1.165123053784863e-06, "loss": 0.1721, "step": 3689 }, { "epoch": 0.8395904436860068, "grad_norm": 1.7433135774699924, "learning_rate": 1.1650780908061753e-06, "loss": 0.1282, "step": 3690 }, { "epoch": 0.8398179749715586, "grad_norm": 1.3400959169617475, "learning_rate": 1.1650331167892805e-06, "loss": 0.1631, "step": 3691 }, { "epoch": 0.8400455062571104, "grad_norm": 2.56860477659448, "learning_rate": 1.1649881317350979e-06, "loss": 0.1444, "step": 3692 }, { "epoch": 0.8402730375426621, "grad_norm": 1.7498028154005396, "learning_rate": 1.1649431356445467e-06, "loss": 0.1098, "step": 3693 }, { "epoch": 0.8405005688282139, "grad_norm": 1.4621069776019282, "learning_rate": 1.1648981285185464e-06, "loss": 0.1369, "step": 3694 }, { "epoch": 0.8407281001137656, "grad_norm": 2.254007653069776, "learning_rate": 1.1648531103580178e-06, "loss": 0.2232, "step": 3695 }, { "epoch": 0.8409556313993174, "grad_norm": 2.03443979756766, "learning_rate": 1.1648080811638797e-06, "loss": 0.1291, "step": 3696 }, { "epoch": 0.8411831626848691, "grad_norm": 1.3415702204257411, "learning_rate": 1.1647630409370533e-06, "loss": 0.1113, "step": 3697 }, { "epoch": 0.8414106939704209, "grad_norm": 2.0728006792206086, "learning_rate": 1.1647179896784588e-06, "loss": 0.1298, "step": 3698 }, { "epoch": 0.8416382252559726, "grad_norm": 2.7540588294232253, "learning_rate": 1.1646729273890172e-06, "loss": 0.1053, "step": 3699 }, { "epoch": 0.8418657565415245, "grad_norm": 2.407303374360263, "learning_rate": 1.1646278540696493e-06, "loss": 0.1237, "step": 3700 }, { "epoch": 0.8420932878270763, "grad_norm": 1.6348359597782052, "learning_rate": 1.1645827697212762e-06, "loss": 0.0736, "step": 3701 }, { "epoch": 0.842320819112628, "grad_norm": 2.1217116646699474, "learning_rate": 1.1645376743448194e-06, "loss": 0.1372, "step": 3702 }, { "epoch": 0.8425483503981798, "grad_norm": 1.4266722248368366, "learning_rate": 1.1644925679412008e-06, "loss": 0.124, "step": 3703 }, { "epoch": 0.8427758816837315, "grad_norm": 0.988882296235461, "learning_rate": 1.1644474505113423e-06, "loss": 0.1114, "step": 3704 }, { "epoch": 0.8430034129692833, "grad_norm": 1.4548572895040972, "learning_rate": 1.1644023220561657e-06, "loss": 0.1811, "step": 3705 }, { "epoch": 0.843230944254835, "grad_norm": 2.0960066842624427, "learning_rate": 1.1643571825765935e-06, "loss": 0.1543, "step": 3706 }, { "epoch": 0.8434584755403868, "grad_norm": 3.0082397132223155, "learning_rate": 1.1643120320735481e-06, "loss": 0.2195, "step": 3707 }, { "epoch": 0.8436860068259385, "grad_norm": 2.7601349795873378, "learning_rate": 1.164266870547953e-06, "loss": 0.1608, "step": 3708 }, { "epoch": 0.8439135381114903, "grad_norm": 3.232912460236744, "learning_rate": 1.16422169800073e-06, "loss": 0.1857, "step": 3709 }, { "epoch": 0.8441410693970421, "grad_norm": 3.7374128521838665, "learning_rate": 1.1641765144328035e-06, "loss": 0.2689, "step": 3710 }, { "epoch": 0.8443686006825939, "grad_norm": 1.5630753818611163, "learning_rate": 1.1641313198450966e-06, "loss": 0.1372, "step": 3711 }, { "epoch": 0.8445961319681456, "grad_norm": 1.8779453375789192, "learning_rate": 1.1640861142385326e-06, "loss": 0.0862, "step": 3712 }, { "epoch": 0.8448236632536974, "grad_norm": 3.2220024645366028, "learning_rate": 1.1640408976140358e-06, "loss": 0.1589, "step": 3713 }, { "epoch": 0.8450511945392492, "grad_norm": 2.5580803791843376, "learning_rate": 1.1639956699725303e-06, "loss": 0.1293, "step": 3714 }, { "epoch": 0.8452787258248009, "grad_norm": 2.096403600241952, "learning_rate": 1.1639504313149403e-06, "loss": 0.0954, "step": 3715 }, { "epoch": 0.8455062571103527, "grad_norm": 2.080786160223831, "learning_rate": 1.1639051816421906e-06, "loss": 0.13, "step": 3716 }, { "epoch": 0.8457337883959044, "grad_norm": 1.6759172842352954, "learning_rate": 1.163859920955206e-06, "loss": 0.1121, "step": 3717 }, { "epoch": 0.8459613196814562, "grad_norm": 2.0642665653257346, "learning_rate": 1.1638146492549112e-06, "loss": 0.1178, "step": 3718 }, { "epoch": 0.8461888509670079, "grad_norm": 1.5058802030094345, "learning_rate": 1.163769366542232e-06, "loss": 0.0927, "step": 3719 }, { "epoch": 0.8464163822525598, "grad_norm": 1.308746050377977, "learning_rate": 1.1637240728180937e-06, "loss": 0.0819, "step": 3720 }, { "epoch": 0.8466439135381115, "grad_norm": 2.6006877199397747, "learning_rate": 1.163678768083422e-06, "loss": 0.1516, "step": 3721 }, { "epoch": 0.8468714448236633, "grad_norm": 1.8224971272064627, "learning_rate": 1.1636334523391426e-06, "loss": 0.09, "step": 3722 }, { "epoch": 0.847098976109215, "grad_norm": 1.6032671914851704, "learning_rate": 1.1635881255861821e-06, "loss": 0.1375, "step": 3723 }, { "epoch": 0.8473265073947668, "grad_norm": 1.726720538161939, "learning_rate": 1.1635427878254667e-06, "loss": 0.1176, "step": 3724 }, { "epoch": 0.8475540386803185, "grad_norm": 3.7139895273320915, "learning_rate": 1.163497439057923e-06, "loss": 0.1631, "step": 3725 }, { "epoch": 0.8477815699658703, "grad_norm": 1.1481004566162971, "learning_rate": 1.1634520792844778e-06, "loss": 0.1331, "step": 3726 }, { "epoch": 0.848009101251422, "grad_norm": 1.947322963642618, "learning_rate": 1.1634067085060583e-06, "loss": 0.1086, "step": 3727 }, { "epoch": 0.8482366325369738, "grad_norm": 1.9494034977033496, "learning_rate": 1.1633613267235915e-06, "loss": 0.1835, "step": 3728 }, { "epoch": 0.8484641638225257, "grad_norm": 2.408589703999215, "learning_rate": 1.1633159339380054e-06, "loss": 0.1851, "step": 3729 }, { "epoch": 0.8486916951080774, "grad_norm": 2.4119649285649123, "learning_rate": 1.1632705301502272e-06, "loss": 0.1233, "step": 3730 }, { "epoch": 0.8489192263936292, "grad_norm": 3.340579250378617, "learning_rate": 1.1632251153611855e-06, "loss": 0.1362, "step": 3731 }, { "epoch": 0.8491467576791809, "grad_norm": 2.23461351462565, "learning_rate": 1.163179689571808e-06, "loss": 0.1592, "step": 3732 }, { "epoch": 0.8493742889647327, "grad_norm": 1.9169594273914996, "learning_rate": 1.1631342527830234e-06, "loss": 0.0894, "step": 3733 }, { "epoch": 0.8496018202502844, "grad_norm": 1.706614485101184, "learning_rate": 1.16308880499576e-06, "loss": 0.0964, "step": 3734 }, { "epoch": 0.8498293515358362, "grad_norm": 2.212965703656509, "learning_rate": 1.163043346210947e-06, "loss": 0.146, "step": 3735 }, { "epoch": 0.8500568828213879, "grad_norm": 1.6891386815491374, "learning_rate": 1.1629978764295133e-06, "loss": 0.1352, "step": 3736 }, { "epoch": 0.8502844141069397, "grad_norm": 2.1697592534447034, "learning_rate": 1.1629523956523883e-06, "loss": 0.0917, "step": 3737 }, { "epoch": 0.8505119453924914, "grad_norm": 3.1025926867971876, "learning_rate": 1.1629069038805018e-06, "loss": 0.1329, "step": 3738 }, { "epoch": 0.8507394766780433, "grad_norm": 1.942888178882637, "learning_rate": 1.162861401114783e-06, "loss": 0.1248, "step": 3739 }, { "epoch": 0.850967007963595, "grad_norm": 1.5004249323078944, "learning_rate": 1.1628158873561624e-06, "loss": 0.085, "step": 3740 }, { "epoch": 0.8511945392491468, "grad_norm": 2.0290099909974333, "learning_rate": 1.16277036260557e-06, "loss": 0.1727, "step": 3741 }, { "epoch": 0.8514220705346985, "grad_norm": 1.3697183727422024, "learning_rate": 1.1627248268639363e-06, "loss": 0.0708, "step": 3742 }, { "epoch": 0.8516496018202503, "grad_norm": 1.4963457644894143, "learning_rate": 1.1626792801321917e-06, "loss": 0.1481, "step": 3743 }, { "epoch": 0.851877133105802, "grad_norm": 2.250464954679021, "learning_rate": 1.1626337224112676e-06, "loss": 0.1561, "step": 3744 }, { "epoch": 0.8521046643913538, "grad_norm": 2.1869910077111823, "learning_rate": 1.1625881537020948e-06, "loss": 0.1157, "step": 3745 }, { "epoch": 0.8523321956769055, "grad_norm": 2.428910593262319, "learning_rate": 1.1625425740056046e-06, "loss": 0.1068, "step": 3746 }, { "epoch": 0.8525597269624573, "grad_norm": 1.5214154740818184, "learning_rate": 1.1624969833227287e-06, "loss": 0.1403, "step": 3747 }, { "epoch": 0.852787258248009, "grad_norm": 2.467402534163389, "learning_rate": 1.1624513816543988e-06, "loss": 0.138, "step": 3748 }, { "epoch": 0.8530147895335609, "grad_norm": 3.3573512145729842, "learning_rate": 1.162405769001547e-06, "loss": 0.1799, "step": 3749 }, { "epoch": 0.8532423208191127, "grad_norm": 1.5381915258615733, "learning_rate": 1.1623601453651053e-06, "loss": 0.106, "step": 3750 }, { "epoch": 0.8534698521046644, "grad_norm": 2.0574854880892697, "learning_rate": 1.1623145107460065e-06, "loss": 0.0945, "step": 3751 }, { "epoch": 0.8536973833902162, "grad_norm": 1.0351595320260978, "learning_rate": 1.1622688651451833e-06, "loss": 0.078, "step": 3752 }, { "epoch": 0.8539249146757679, "grad_norm": 2.6677954818832332, "learning_rate": 1.1622232085635683e-06, "loss": 0.2526, "step": 3753 }, { "epoch": 0.8541524459613197, "grad_norm": 2.6487501278609398, "learning_rate": 1.162177541002095e-06, "loss": 0.1601, "step": 3754 }, { "epoch": 0.8543799772468714, "grad_norm": 0.9301475208416561, "learning_rate": 1.162131862461696e-06, "loss": 0.0665, "step": 3755 }, { "epoch": 0.8546075085324232, "grad_norm": 1.763957033542155, "learning_rate": 1.1620861729433062e-06, "loss": 0.1202, "step": 3756 }, { "epoch": 0.8548350398179749, "grad_norm": 2.176479505741929, "learning_rate": 1.1620404724478582e-06, "loss": 0.1798, "step": 3757 }, { "epoch": 0.8550625711035267, "grad_norm": 2.945225816531013, "learning_rate": 1.1619947609762867e-06, "loss": 0.1396, "step": 3758 }, { "epoch": 0.8552901023890785, "grad_norm": 2.562908702733841, "learning_rate": 1.1619490385295255e-06, "loss": 0.1973, "step": 3759 }, { "epoch": 0.8555176336746303, "grad_norm": 0.9401287921099067, "learning_rate": 1.1619033051085094e-06, "loss": 0.1027, "step": 3760 }, { "epoch": 0.855745164960182, "grad_norm": 1.7243955273221043, "learning_rate": 1.161857560714173e-06, "loss": 0.1261, "step": 3761 }, { "epoch": 0.8559726962457338, "grad_norm": 2.2572939153056666, "learning_rate": 1.1618118053474514e-06, "loss": 0.1806, "step": 3762 }, { "epoch": 0.8562002275312856, "grad_norm": 1.6576632747588675, "learning_rate": 1.1617660390092794e-06, "loss": 0.1136, "step": 3763 }, { "epoch": 0.8564277588168373, "grad_norm": 2.8436603000222784, "learning_rate": 1.161720261700593e-06, "loss": 0.1964, "step": 3764 }, { "epoch": 0.856655290102389, "grad_norm": 3.1381523860209155, "learning_rate": 1.161674473422327e-06, "loss": 0.1301, "step": 3765 }, { "epoch": 0.8568828213879408, "grad_norm": 2.469303248081205, "learning_rate": 1.1616286741754178e-06, "loss": 0.1002, "step": 3766 }, { "epoch": 0.8571103526734926, "grad_norm": 2.8285714025814492, "learning_rate": 1.1615828639608013e-06, "loss": 0.1487, "step": 3767 }, { "epoch": 0.8573378839590444, "grad_norm": 1.796917081972481, "learning_rate": 1.1615370427794138e-06, "loss": 0.2538, "step": 3768 }, { "epoch": 0.8575654152445962, "grad_norm": 1.2104132513749966, "learning_rate": 1.1614912106321916e-06, "loss": 0.1111, "step": 3769 }, { "epoch": 0.8577929465301479, "grad_norm": 2.240630387028681, "learning_rate": 1.1614453675200718e-06, "loss": 0.1593, "step": 3770 }, { "epoch": 0.8580204778156997, "grad_norm": 2.4065649472522033, "learning_rate": 1.1613995134439912e-06, "loss": 0.202, "step": 3771 }, { "epoch": 0.8582480091012514, "grad_norm": 2.424583488709643, "learning_rate": 1.1613536484048866e-06, "loss": 0.1912, "step": 3772 }, { "epoch": 0.8584755403868032, "grad_norm": 2.3925333932617905, "learning_rate": 1.161307772403696e-06, "loss": 0.1897, "step": 3773 }, { "epoch": 0.8587030716723549, "grad_norm": 2.3199334542035475, "learning_rate": 1.1612618854413566e-06, "loss": 0.1913, "step": 3774 }, { "epoch": 0.8589306029579067, "grad_norm": 2.3317040406753895, "learning_rate": 1.1612159875188065e-06, "loss": 0.1689, "step": 3775 }, { "epoch": 0.8591581342434584, "grad_norm": 1.7725857904878792, "learning_rate": 1.1611700786369835e-06, "loss": 0.1673, "step": 3776 }, { "epoch": 0.8593856655290102, "grad_norm": 1.6084308537687173, "learning_rate": 1.161124158796826e-06, "loss": 0.141, "step": 3777 }, { "epoch": 0.859613196814562, "grad_norm": 1.2596148838555763, "learning_rate": 1.1610782279992728e-06, "loss": 0.0778, "step": 3778 }, { "epoch": 0.8598407281001138, "grad_norm": 2.02054838296593, "learning_rate": 1.161032286245262e-06, "loss": 0.1876, "step": 3779 }, { "epoch": 0.8600682593856656, "grad_norm": 2.687762249569876, "learning_rate": 1.1609863335357332e-06, "loss": 0.1482, "step": 3780 }, { "epoch": 0.8602957906712173, "grad_norm": 2.3935859487409408, "learning_rate": 1.1609403698716255e-06, "loss": 0.1692, "step": 3781 }, { "epoch": 0.8605233219567691, "grad_norm": 2.508017368514477, "learning_rate": 1.160894395253878e-06, "loss": 0.134, "step": 3782 }, { "epoch": 0.8607508532423208, "grad_norm": 1.8303186156957627, "learning_rate": 1.1608484096834306e-06, "loss": 0.2355, "step": 3783 }, { "epoch": 0.8609783845278726, "grad_norm": 1.8831463075858677, "learning_rate": 1.1608024131612231e-06, "loss": 0.1362, "step": 3784 }, { "epoch": 0.8612059158134243, "grad_norm": 2.11822052966638, "learning_rate": 1.1607564056881953e-06, "loss": 0.0832, "step": 3785 }, { "epoch": 0.8614334470989761, "grad_norm": 2.5355097758095386, "learning_rate": 1.160710387265288e-06, "loss": 0.1257, "step": 3786 }, { "epoch": 0.8616609783845278, "grad_norm": 2.315866021639714, "learning_rate": 1.1606643578934414e-06, "loss": 0.1048, "step": 3787 }, { "epoch": 0.8618885096700797, "grad_norm": 2.428912559884676, "learning_rate": 1.1606183175735963e-06, "loss": 0.1784, "step": 3788 }, { "epoch": 0.8621160409556314, "grad_norm": 1.6349014413744898, "learning_rate": 1.1605722663066938e-06, "loss": 0.1257, "step": 3789 }, { "epoch": 0.8623435722411832, "grad_norm": 1.4193212111174016, "learning_rate": 1.1605262040936752e-06, "loss": 0.1551, "step": 3790 }, { "epoch": 0.8625711035267349, "grad_norm": 1.8487470971360136, "learning_rate": 1.1604801309354815e-06, "loss": 0.1162, "step": 3791 }, { "epoch": 0.8627986348122867, "grad_norm": 1.346999735527141, "learning_rate": 1.1604340468330546e-06, "loss": 0.1075, "step": 3792 }, { "epoch": 0.8630261660978384, "grad_norm": 3.178320117753975, "learning_rate": 1.1603879517873366e-06, "loss": 0.1917, "step": 3793 }, { "epoch": 0.8632536973833902, "grad_norm": 2.007629059273734, "learning_rate": 1.160341845799269e-06, "loss": 0.1435, "step": 3794 }, { "epoch": 0.863481228668942, "grad_norm": 1.777320793531153, "learning_rate": 1.160295728869795e-06, "loss": 0.165, "step": 3795 }, { "epoch": 0.8637087599544937, "grad_norm": 1.0573583615388784, "learning_rate": 1.1602496009998562e-06, "loss": 0.1049, "step": 3796 }, { "epoch": 0.8639362912400455, "grad_norm": 2.667616509532965, "learning_rate": 1.160203462190396e-06, "loss": 0.1379, "step": 3797 }, { "epoch": 0.8641638225255973, "grad_norm": 1.6159958386357638, "learning_rate": 1.1601573124423573e-06, "loss": 0.1306, "step": 3798 }, { "epoch": 0.8643913538111491, "grad_norm": 1.5656967132317656, "learning_rate": 1.1601111517566831e-06, "loss": 0.1139, "step": 3799 }, { "epoch": 0.8646188850967008, "grad_norm": 1.3820258167187487, "learning_rate": 1.1600649801343173e-06, "loss": 0.1014, "step": 3800 }, { "epoch": 0.8648464163822526, "grad_norm": 1.39843503895015, "learning_rate": 1.1600187975762029e-06, "loss": 0.1087, "step": 3801 }, { "epoch": 0.8650739476678043, "grad_norm": 2.1408742589304737, "learning_rate": 1.159972604083284e-06, "loss": 0.0972, "step": 3802 }, { "epoch": 0.8653014789533561, "grad_norm": 2.26397998281963, "learning_rate": 1.159926399656505e-06, "loss": 0.1448, "step": 3803 }, { "epoch": 0.8655290102389078, "grad_norm": 2.490895204679328, "learning_rate": 1.1598801842968103e-06, "loss": 0.1195, "step": 3804 }, { "epoch": 0.8657565415244596, "grad_norm": 1.6194440157886663, "learning_rate": 1.1598339580051439e-06, "loss": 0.1018, "step": 3805 }, { "epoch": 0.8659840728100113, "grad_norm": 1.3490011154721018, "learning_rate": 1.159787720782451e-06, "loss": 0.0966, "step": 3806 }, { "epoch": 0.8662116040955632, "grad_norm": 2.507900361953006, "learning_rate": 1.1597414726296764e-06, "loss": 0.2012, "step": 3807 }, { "epoch": 0.8664391353811149, "grad_norm": 1.2833341524803188, "learning_rate": 1.1596952135477656e-06, "loss": 0.1121, "step": 3808 }, { "epoch": 0.8666666666666667, "grad_norm": 2.2151027996183927, "learning_rate": 1.1596489435376638e-06, "loss": 0.2105, "step": 3809 }, { "epoch": 0.8668941979522184, "grad_norm": 1.1996595882366885, "learning_rate": 1.1596026626003168e-06, "loss": 0.1457, "step": 3810 }, { "epoch": 0.8671217292377702, "grad_norm": 1.4820717336888578, "learning_rate": 1.1595563707366705e-06, "loss": 0.1781, "step": 3811 }, { "epoch": 0.867349260523322, "grad_norm": 2.635221308330241, "learning_rate": 1.1595100679476707e-06, "loss": 0.1454, "step": 3812 }, { "epoch": 0.8675767918088737, "grad_norm": 1.6108295995494901, "learning_rate": 1.1594637542342644e-06, "loss": 0.106, "step": 3813 }, { "epoch": 0.8678043230944255, "grad_norm": 1.8221220205121134, "learning_rate": 1.1594174295973976e-06, "loss": 0.1006, "step": 3814 }, { "epoch": 0.8680318543799772, "grad_norm": 3.0101316748683664, "learning_rate": 1.1593710940380172e-06, "loss": 0.121, "step": 3815 }, { "epoch": 0.868259385665529, "grad_norm": 2.735583899461839, "learning_rate": 1.1593247475570704e-06, "loss": 0.209, "step": 3816 }, { "epoch": 0.8684869169510808, "grad_norm": 0.9832960186884339, "learning_rate": 1.1592783901555043e-06, "loss": 0.0808, "step": 3817 }, { "epoch": 0.8687144482366326, "grad_norm": 1.4074790278159912, "learning_rate": 1.1592320218342665e-06, "loss": 0.0947, "step": 3818 }, { "epoch": 0.8689419795221843, "grad_norm": 1.4230263763515514, "learning_rate": 1.1591856425943044e-06, "loss": 0.1207, "step": 3819 }, { "epoch": 0.8691695108077361, "grad_norm": 1.269490471422441, "learning_rate": 1.159139252436566e-06, "loss": 0.1045, "step": 3820 }, { "epoch": 0.8693970420932878, "grad_norm": 2.550687247141249, "learning_rate": 1.1590928513619997e-06, "loss": 0.1525, "step": 3821 }, { "epoch": 0.8696245733788396, "grad_norm": 1.7947776642256512, "learning_rate": 1.1590464393715536e-06, "loss": 0.0921, "step": 3822 }, { "epoch": 0.8698521046643913, "grad_norm": 1.7311193251335109, "learning_rate": 1.1590000164661763e-06, "loss": 0.1531, "step": 3823 }, { "epoch": 0.8700796359499431, "grad_norm": 1.5146472578814127, "learning_rate": 1.1589535826468168e-06, "loss": 0.1648, "step": 3824 }, { "epoch": 0.8703071672354948, "grad_norm": 1.5015916222628989, "learning_rate": 1.158907137914424e-06, "loss": 0.1389, "step": 3825 }, { "epoch": 0.8705346985210466, "grad_norm": 1.8122124285787728, "learning_rate": 1.158860682269947e-06, "loss": 0.1037, "step": 3826 }, { "epoch": 0.8707622298065985, "grad_norm": 2.833296008494366, "learning_rate": 1.1588142157143353e-06, "loss": 0.1188, "step": 3827 }, { "epoch": 0.8709897610921502, "grad_norm": 2.216618266323227, "learning_rate": 1.1587677382485386e-06, "loss": 0.1465, "step": 3828 }, { "epoch": 0.871217292377702, "grad_norm": 1.2793900862334322, "learning_rate": 1.158721249873507e-06, "loss": 0.1095, "step": 3829 }, { "epoch": 0.8714448236632537, "grad_norm": 1.8941138487312816, "learning_rate": 1.1586747505901904e-06, "loss": 0.1681, "step": 3830 }, { "epoch": 0.8716723549488055, "grad_norm": 2.3145356477245405, "learning_rate": 1.1586282403995395e-06, "loss": 0.2006, "step": 3831 }, { "epoch": 0.8718998862343572, "grad_norm": 2.6600034233732677, "learning_rate": 1.1585817193025046e-06, "loss": 0.1121, "step": 3832 }, { "epoch": 0.872127417519909, "grad_norm": 1.725320598895434, "learning_rate": 1.1585351873000365e-06, "loss": 0.082, "step": 3833 }, { "epoch": 0.8723549488054607, "grad_norm": 1.8752752336053349, "learning_rate": 1.1584886443930863e-06, "loss": 0.0895, "step": 3834 }, { "epoch": 0.8725824800910125, "grad_norm": 2.751725576098527, "learning_rate": 1.1584420905826051e-06, "loss": 0.1928, "step": 3835 }, { "epoch": 0.8728100113765643, "grad_norm": 1.9424867364532703, "learning_rate": 1.1583955258695447e-06, "loss": 0.1673, "step": 3836 }, { "epoch": 0.8730375426621161, "grad_norm": 1.4261787932440506, "learning_rate": 1.1583489502548566e-06, "loss": 0.2017, "step": 3837 }, { "epoch": 0.8732650739476678, "grad_norm": 2.6521619288755085, "learning_rate": 1.1583023637394928e-06, "loss": 0.1423, "step": 3838 }, { "epoch": 0.8734926052332196, "grad_norm": 1.4138016233960111, "learning_rate": 1.1582557663244052e-06, "loss": 0.1816, "step": 3839 }, { "epoch": 0.8737201365187713, "grad_norm": 1.6328060502150679, "learning_rate": 1.1582091580105464e-06, "loss": 0.1092, "step": 3840 }, { "epoch": 0.8739476678043231, "grad_norm": 2.337312865938115, "learning_rate": 1.158162538798869e-06, "loss": 0.1281, "step": 3841 }, { "epoch": 0.8741751990898748, "grad_norm": 1.98329071301972, "learning_rate": 1.158115908690326e-06, "loss": 0.2402, "step": 3842 }, { "epoch": 0.8744027303754266, "grad_norm": 1.685782018846633, "learning_rate": 1.1580692676858699e-06, "loss": 0.1349, "step": 3843 }, { "epoch": 0.8746302616609783, "grad_norm": 2.185499848922571, "learning_rate": 1.1580226157864542e-06, "loss": 0.1586, "step": 3844 }, { "epoch": 0.8748577929465301, "grad_norm": 1.8272041921707005, "learning_rate": 1.1579759529930324e-06, "loss": 0.0838, "step": 3845 }, { "epoch": 0.875085324232082, "grad_norm": 2.854267276269833, "learning_rate": 1.1579292793065583e-06, "loss": 0.1751, "step": 3846 }, { "epoch": 0.8753128555176337, "grad_norm": 1.8839936940679116, "learning_rate": 1.157882594727986e-06, "loss": 0.118, "step": 3847 }, { "epoch": 0.8755403868031855, "grad_norm": 1.2659058041051248, "learning_rate": 1.1578358992582689e-06, "loss": 0.1039, "step": 3848 }, { "epoch": 0.8757679180887372, "grad_norm": 1.81301741951695, "learning_rate": 1.1577891928983622e-06, "loss": 0.2074, "step": 3849 }, { "epoch": 0.875995449374289, "grad_norm": 1.651298203571477, "learning_rate": 1.15774247564922e-06, "loss": 0.1129, "step": 3850 }, { "epoch": 0.8762229806598407, "grad_norm": 1.2180184462913173, "learning_rate": 1.1576957475117973e-06, "loss": 0.1528, "step": 3851 }, { "epoch": 0.8764505119453925, "grad_norm": 2.1631750813889763, "learning_rate": 1.1576490084870493e-06, "loss": 0.1133, "step": 3852 }, { "epoch": 0.8766780432309442, "grad_norm": 1.7995773049554316, "learning_rate": 1.1576022585759308e-06, "loss": 0.1789, "step": 3853 }, { "epoch": 0.876905574516496, "grad_norm": 1.8726002975048064, "learning_rate": 1.1575554977793975e-06, "loss": 0.1175, "step": 3854 }, { "epoch": 0.8771331058020477, "grad_norm": 1.4099458224848984, "learning_rate": 1.1575087260984056e-06, "loss": 0.0726, "step": 3855 }, { "epoch": 0.8773606370875996, "grad_norm": 2.647213314256987, "learning_rate": 1.1574619435339101e-06, "loss": 0.1726, "step": 3856 }, { "epoch": 0.8775881683731513, "grad_norm": 0.9740327969556655, "learning_rate": 1.157415150086868e-06, "loss": 0.0632, "step": 3857 }, { "epoch": 0.8778156996587031, "grad_norm": 1.4207473326991096, "learning_rate": 1.1573683457582349e-06, "loss": 0.1026, "step": 3858 }, { "epoch": 0.8780432309442548, "grad_norm": 2.003642185189255, "learning_rate": 1.157321530548968e-06, "loss": 0.1118, "step": 3859 }, { "epoch": 0.8782707622298066, "grad_norm": 1.706947541711292, "learning_rate": 1.157274704460024e-06, "loss": 0.0931, "step": 3860 }, { "epoch": 0.8784982935153584, "grad_norm": 2.5323455895431586, "learning_rate": 1.1572278674923598e-06, "loss": 0.1391, "step": 3861 }, { "epoch": 0.8787258248009101, "grad_norm": 1.8894906354825123, "learning_rate": 1.1571810196469326e-06, "loss": 0.1403, "step": 3862 }, { "epoch": 0.8789533560864619, "grad_norm": 7.156443582789994, "learning_rate": 1.1571341609247003e-06, "loss": 0.2029, "step": 3863 }, { "epoch": 0.8791808873720136, "grad_norm": 1.6220204212083171, "learning_rate": 1.1570872913266202e-06, "loss": 0.1553, "step": 3864 }, { "epoch": 0.8794084186575654, "grad_norm": 1.0193269833748249, "learning_rate": 1.1570404108536501e-06, "loss": 0.047, "step": 3865 }, { "epoch": 0.8796359499431172, "grad_norm": 2.675021704289578, "learning_rate": 1.1569935195067487e-06, "loss": 0.1267, "step": 3866 }, { "epoch": 0.879863481228669, "grad_norm": 1.730509204885054, "learning_rate": 1.1569466172868737e-06, "loss": 0.0736, "step": 3867 }, { "epoch": 0.8800910125142207, "grad_norm": 1.1047910151414873, "learning_rate": 1.1568997041949843e-06, "loss": 0.0584, "step": 3868 }, { "epoch": 0.8803185437997725, "grad_norm": 1.496404259499306, "learning_rate": 1.156852780232039e-06, "loss": 0.1104, "step": 3869 }, { "epoch": 0.8805460750853242, "grad_norm": 3.3560356704270617, "learning_rate": 1.156805845398997e-06, "loss": 0.1684, "step": 3870 }, { "epoch": 0.880773606370876, "grad_norm": 1.561882218368479, "learning_rate": 1.1567588996968173e-06, "loss": 0.0996, "step": 3871 }, { "epoch": 0.8810011376564277, "grad_norm": 1.9936186582009299, "learning_rate": 1.1567119431264598e-06, "loss": 0.1882, "step": 3872 }, { "epoch": 0.8812286689419795, "grad_norm": 1.8110134738488533, "learning_rate": 1.156664975688884e-06, "loss": 0.101, "step": 3873 }, { "epoch": 0.8814562002275312, "grad_norm": 2.229579875203127, "learning_rate": 1.1566179973850496e-06, "loss": 0.21, "step": 3874 }, { "epoch": 0.8816837315130831, "grad_norm": 1.2621397772431622, "learning_rate": 1.156571008215917e-06, "loss": 0.0986, "step": 3875 }, { "epoch": 0.8819112627986349, "grad_norm": 0.9768697935292275, "learning_rate": 1.1565240081824466e-06, "loss": 0.07, "step": 3876 }, { "epoch": 0.8821387940841866, "grad_norm": 3.6517858861658117, "learning_rate": 1.1564769972855987e-06, "loss": 0.1368, "step": 3877 }, { "epoch": 0.8823663253697384, "grad_norm": 2.247760853958852, "learning_rate": 1.1564299755263345e-06, "loss": 0.1956, "step": 3878 }, { "epoch": 0.8825938566552901, "grad_norm": 1.5076798581031285, "learning_rate": 1.1563829429056148e-06, "loss": 0.1544, "step": 3879 }, { "epoch": 0.8828213879408419, "grad_norm": 1.4938750757570904, "learning_rate": 1.156335899424401e-06, "loss": 0.0977, "step": 3880 }, { "epoch": 0.8830489192263936, "grad_norm": 1.4303887183395276, "learning_rate": 1.1562888450836544e-06, "loss": 0.1261, "step": 3881 }, { "epoch": 0.8832764505119454, "grad_norm": 2.561231231387681, "learning_rate": 1.156241779884337e-06, "loss": 0.1923, "step": 3882 }, { "epoch": 0.8835039817974971, "grad_norm": 1.7108276568530942, "learning_rate": 1.1561947038274104e-06, "loss": 0.0778, "step": 3883 }, { "epoch": 0.8837315130830489, "grad_norm": 2.065780133370591, "learning_rate": 1.156147616913837e-06, "loss": 0.1611, "step": 3884 }, { "epoch": 0.8839590443686007, "grad_norm": 1.470042341002447, "learning_rate": 1.156100519144579e-06, "loss": 0.1056, "step": 3885 }, { "epoch": 0.8841865756541525, "grad_norm": 1.2016930431615216, "learning_rate": 1.156053410520599e-06, "loss": 0.0606, "step": 3886 }, { "epoch": 0.8844141069397042, "grad_norm": 2.282484813653413, "learning_rate": 1.15600629104286e-06, "loss": 0.1176, "step": 3887 }, { "epoch": 0.884641638225256, "grad_norm": 2.299338905769147, "learning_rate": 1.1559591607123248e-06, "loss": 0.1981, "step": 3888 }, { "epoch": 0.8848691695108077, "grad_norm": 2.099693518455378, "learning_rate": 1.1559120195299566e-06, "loss": 0.202, "step": 3889 }, { "epoch": 0.8850967007963595, "grad_norm": 2.0753613579757944, "learning_rate": 1.1558648674967191e-06, "loss": 0.1623, "step": 3890 }, { "epoch": 0.8853242320819112, "grad_norm": 1.4830462940755922, "learning_rate": 1.1558177046135761e-06, "loss": 0.0907, "step": 3891 }, { "epoch": 0.885551763367463, "grad_norm": 2.9005721227848795, "learning_rate": 1.1557705308814914e-06, "loss": 0.1057, "step": 3892 }, { "epoch": 0.8857792946530147, "grad_norm": 1.8592154177143236, "learning_rate": 1.155723346301429e-06, "loss": 0.1055, "step": 3893 }, { "epoch": 0.8860068259385665, "grad_norm": 1.7039048379439774, "learning_rate": 1.1556761508743532e-06, "loss": 0.142, "step": 3894 }, { "epoch": 0.8862343572241184, "grad_norm": 2.956045848513432, "learning_rate": 1.1556289446012292e-06, "loss": 0.1218, "step": 3895 }, { "epoch": 0.8864618885096701, "grad_norm": 3.1858139191729675, "learning_rate": 1.155581727483021e-06, "loss": 0.1639, "step": 3896 }, { "epoch": 0.8866894197952219, "grad_norm": 1.5140961525493326, "learning_rate": 1.1555344995206941e-06, "loss": 0.1113, "step": 3897 }, { "epoch": 0.8869169510807736, "grad_norm": 3.442091854151577, "learning_rate": 1.1554872607152138e-06, "loss": 0.0723, "step": 3898 }, { "epoch": 0.8871444823663254, "grad_norm": 1.6965582449202878, "learning_rate": 1.1554400110675453e-06, "loss": 0.1209, "step": 3899 }, { "epoch": 0.8873720136518771, "grad_norm": 1.3831082327687239, "learning_rate": 1.1553927505786543e-06, "loss": 0.0651, "step": 3900 }, { "epoch": 0.8875995449374289, "grad_norm": 2.663260132328584, "learning_rate": 1.1553454792495072e-06, "loss": 0.1917, "step": 3901 }, { "epoch": 0.8878270762229806, "grad_norm": 2.0976954738551927, "learning_rate": 1.1552981970810694e-06, "loss": 0.1899, "step": 3902 }, { "epoch": 0.8880546075085324, "grad_norm": 1.5870071722314034, "learning_rate": 1.1552509040743078e-06, "loss": 0.1131, "step": 3903 }, { "epoch": 0.8882821387940842, "grad_norm": 2.128086406305553, "learning_rate": 1.1552036002301891e-06, "loss": 0.1998, "step": 3904 }, { "epoch": 0.888509670079636, "grad_norm": 1.883247951676848, "learning_rate": 1.1551562855496796e-06, "loss": 0.1259, "step": 3905 }, { "epoch": 0.8887372013651877, "grad_norm": 2.1360180179379187, "learning_rate": 1.1551089600337465e-06, "loss": 0.1888, "step": 3906 }, { "epoch": 0.8889647326507395, "grad_norm": 2.217339619643206, "learning_rate": 1.1550616236833574e-06, "loss": 0.1299, "step": 3907 }, { "epoch": 0.8891922639362912, "grad_norm": 2.282198282828942, "learning_rate": 1.155014276499479e-06, "loss": 0.1434, "step": 3908 }, { "epoch": 0.889419795221843, "grad_norm": 1.6341018967015024, "learning_rate": 1.1549669184830796e-06, "loss": 0.0931, "step": 3909 }, { "epoch": 0.8896473265073948, "grad_norm": 1.7540331216445133, "learning_rate": 1.1549195496351271e-06, "loss": 0.2306, "step": 3910 }, { "epoch": 0.8898748577929465, "grad_norm": 2.3260297313053115, "learning_rate": 1.1548721699565896e-06, "loss": 0.1459, "step": 3911 }, { "epoch": 0.8901023890784983, "grad_norm": 2.9134592795255685, "learning_rate": 1.1548247794484353e-06, "loss": 0.2359, "step": 3912 }, { "epoch": 0.89032992036405, "grad_norm": 2.014218612686331, "learning_rate": 1.1547773781116326e-06, "loss": 0.1021, "step": 3913 }, { "epoch": 0.8905574516496019, "grad_norm": 1.9756799447867912, "learning_rate": 1.1547299659471509e-06, "loss": 0.1223, "step": 3914 }, { "epoch": 0.8907849829351536, "grad_norm": 1.1758644708180022, "learning_rate": 1.1546825429559585e-06, "loss": 0.1228, "step": 3915 }, { "epoch": 0.8910125142207054, "grad_norm": 2.545752499921782, "learning_rate": 1.1546351091390253e-06, "loss": 0.1292, "step": 3916 }, { "epoch": 0.8912400455062571, "grad_norm": 1.13014018897145, "learning_rate": 1.1545876644973202e-06, "loss": 0.0827, "step": 3917 }, { "epoch": 0.8914675767918089, "grad_norm": 1.3952469100874085, "learning_rate": 1.1545402090318133e-06, "loss": 0.1256, "step": 3918 }, { "epoch": 0.8916951080773606, "grad_norm": 1.4005307819920794, "learning_rate": 1.1544927427434743e-06, "loss": 0.1093, "step": 3919 }, { "epoch": 0.8919226393629124, "grad_norm": 2.9964531845500453, "learning_rate": 1.1544452656332733e-06, "loss": 0.1892, "step": 3920 }, { "epoch": 0.8921501706484641, "grad_norm": 1.8000119548590816, "learning_rate": 1.1543977777021808e-06, "loss": 0.1624, "step": 3921 }, { "epoch": 0.8923777019340159, "grad_norm": 2.004857047650623, "learning_rate": 1.1543502789511671e-06, "loss": 0.1227, "step": 3922 }, { "epoch": 0.8926052332195676, "grad_norm": 1.5469848933341204, "learning_rate": 1.1543027693812033e-06, "loss": 0.0991, "step": 3923 }, { "epoch": 0.8928327645051195, "grad_norm": 1.7189673141936324, "learning_rate": 1.15425524899326e-06, "loss": 0.1411, "step": 3924 }, { "epoch": 0.8930602957906713, "grad_norm": 2.516228330966882, "learning_rate": 1.154207717788309e-06, "loss": 0.1137, "step": 3925 }, { "epoch": 0.893287827076223, "grad_norm": 2.8926955599289315, "learning_rate": 1.1541601757673216e-06, "loss": 0.1127, "step": 3926 }, { "epoch": 0.8935153583617748, "grad_norm": 2.429413658560354, "learning_rate": 1.154112622931269e-06, "loss": 0.1738, "step": 3927 }, { "epoch": 0.8937428896473265, "grad_norm": 1.9238214340170672, "learning_rate": 1.1540650592811233e-06, "loss": 0.1376, "step": 3928 }, { "epoch": 0.8939704209328783, "grad_norm": 2.4395643175891055, "learning_rate": 1.154017484817857e-06, "loss": 0.1679, "step": 3929 }, { "epoch": 0.89419795221843, "grad_norm": 1.9381655876966044, "learning_rate": 1.1539698995424423e-06, "loss": 0.2045, "step": 3930 }, { "epoch": 0.8944254835039818, "grad_norm": 1.436692153329724, "learning_rate": 1.1539223034558513e-06, "loss": 0.1339, "step": 3931 }, { "epoch": 0.8946530147895335, "grad_norm": 1.706342058684486, "learning_rate": 1.1538746965590572e-06, "loss": 0.1282, "step": 3932 }, { "epoch": 0.8948805460750853, "grad_norm": 1.6727216071468962, "learning_rate": 1.153827078853033e-06, "loss": 0.1372, "step": 3933 }, { "epoch": 0.8951080773606371, "grad_norm": 1.3595940949373464, "learning_rate": 1.1537794503387516e-06, "loss": 0.1, "step": 3934 }, { "epoch": 0.8953356086461889, "grad_norm": 2.5018232688405684, "learning_rate": 1.1537318110171867e-06, "loss": 0.1883, "step": 3935 }, { "epoch": 0.8955631399317406, "grad_norm": 1.843425187540069, "learning_rate": 1.153684160889312e-06, "loss": 0.1922, "step": 3936 }, { "epoch": 0.8957906712172924, "grad_norm": 2.4952621408315387, "learning_rate": 1.1536364999561011e-06, "loss": 0.1519, "step": 3937 }, { "epoch": 0.8960182025028441, "grad_norm": 2.32279003930844, "learning_rate": 1.1535888282185283e-06, "loss": 0.2344, "step": 3938 }, { "epoch": 0.8962457337883959, "grad_norm": 2.0547686374255054, "learning_rate": 1.1535411456775682e-06, "loss": 0.1401, "step": 3939 }, { "epoch": 0.8964732650739476, "grad_norm": 1.798639150183759, "learning_rate": 1.1534934523341952e-06, "loss": 0.0823, "step": 3940 }, { "epoch": 0.8967007963594994, "grad_norm": 2.1804994768684014, "learning_rate": 1.1534457481893834e-06, "loss": 0.1315, "step": 3941 }, { "epoch": 0.8969283276450511, "grad_norm": 1.2797290557497885, "learning_rate": 1.1533980332441085e-06, "loss": 0.0734, "step": 3942 }, { "epoch": 0.897155858930603, "grad_norm": 2.829413645850169, "learning_rate": 1.1533503074993455e-06, "loss": 0.1007, "step": 3943 }, { "epoch": 0.8973833902161548, "grad_norm": 2.0371130013204763, "learning_rate": 1.15330257095607e-06, "loss": 0.1432, "step": 3944 }, { "epoch": 0.8976109215017065, "grad_norm": 1.9028884616413433, "learning_rate": 1.1532548236152574e-06, "loss": 0.0673, "step": 3945 }, { "epoch": 0.8978384527872583, "grad_norm": 1.7454338393426052, "learning_rate": 1.1532070654778838e-06, "loss": 0.1829, "step": 3946 }, { "epoch": 0.89806598407281, "grad_norm": 2.3355341925065356, "learning_rate": 1.1531592965449249e-06, "loss": 0.1586, "step": 3947 }, { "epoch": 0.8982935153583618, "grad_norm": 2.763480949546953, "learning_rate": 1.1531115168173574e-06, "loss": 0.1884, "step": 3948 }, { "epoch": 0.8985210466439135, "grad_norm": 1.4510805507342073, "learning_rate": 1.1530637262961574e-06, "loss": 0.1425, "step": 3949 }, { "epoch": 0.8987485779294653, "grad_norm": 1.968506252851868, "learning_rate": 1.1530159249823022e-06, "loss": 0.1893, "step": 3950 }, { "epoch": 0.898976109215017, "grad_norm": 2.324903096410494, "learning_rate": 1.1529681128767686e-06, "loss": 0.1095, "step": 3951 }, { "epoch": 0.8992036405005688, "grad_norm": 2.545094901563708, "learning_rate": 1.1529202899805336e-06, "loss": 0.1262, "step": 3952 }, { "epoch": 0.8994311717861206, "grad_norm": 2.15492457068193, "learning_rate": 1.1528724562945748e-06, "loss": 0.185, "step": 3953 }, { "epoch": 0.8996587030716724, "grad_norm": 2.492930770654846, "learning_rate": 1.1528246118198697e-06, "loss": 0.104, "step": 3954 }, { "epoch": 0.8998862343572241, "grad_norm": 2.3443014851416697, "learning_rate": 1.1527767565573961e-06, "loss": 0.1056, "step": 3955 }, { "epoch": 0.9001137656427759, "grad_norm": 2.3122789121507803, "learning_rate": 1.152728890508132e-06, "loss": 0.1805, "step": 3956 }, { "epoch": 0.9003412969283277, "grad_norm": 1.655678977924301, "learning_rate": 1.1526810136730562e-06, "loss": 0.1392, "step": 3957 }, { "epoch": 0.9005688282138794, "grad_norm": 2.107224890787166, "learning_rate": 1.1526331260531467e-06, "loss": 0.1126, "step": 3958 }, { "epoch": 0.9007963594994312, "grad_norm": 1.8474653043517228, "learning_rate": 1.1525852276493825e-06, "loss": 0.1033, "step": 3959 }, { "epoch": 0.9010238907849829, "grad_norm": 2.1340619481623704, "learning_rate": 1.1525373184627426e-06, "loss": 0.1245, "step": 3960 }, { "epoch": 0.9012514220705347, "grad_norm": 1.1501325031888365, "learning_rate": 1.1524893984942059e-06, "loss": 0.0947, "step": 3961 }, { "epoch": 0.9014789533560864, "grad_norm": 1.997782703140848, "learning_rate": 1.152441467744752e-06, "loss": 0.1392, "step": 3962 }, { "epoch": 0.9017064846416383, "grad_norm": 1.8412903765538933, "learning_rate": 1.1523935262153604e-06, "loss": 0.0778, "step": 3963 }, { "epoch": 0.90193401592719, "grad_norm": 3.1212109332297477, "learning_rate": 1.152345573907011e-06, "loss": 0.1328, "step": 3964 }, { "epoch": 0.9021615472127418, "grad_norm": 1.7203762023569207, "learning_rate": 1.1522976108206838e-06, "loss": 0.114, "step": 3965 }, { "epoch": 0.9023890784982935, "grad_norm": 3.173805185587451, "learning_rate": 1.1522496369573592e-06, "loss": 0.1798, "step": 3966 }, { "epoch": 0.9026166097838453, "grad_norm": 2.473918490691335, "learning_rate": 1.1522016523180177e-06, "loss": 0.1495, "step": 3967 }, { "epoch": 0.902844141069397, "grad_norm": 2.038623249613075, "learning_rate": 1.15215365690364e-06, "loss": 0.1534, "step": 3968 }, { "epoch": 0.9030716723549488, "grad_norm": 2.24172548077322, "learning_rate": 1.1521056507152068e-06, "loss": 0.1472, "step": 3969 }, { "epoch": 0.9032992036405005, "grad_norm": 1.886243794797654, "learning_rate": 1.1520576337536995e-06, "loss": 0.1659, "step": 3970 }, { "epoch": 0.9035267349260523, "grad_norm": 1.64432508648709, "learning_rate": 1.1520096060200995e-06, "loss": 0.1822, "step": 3971 }, { "epoch": 0.903754266211604, "grad_norm": 1.1043959428327752, "learning_rate": 1.1519615675153884e-06, "loss": 0.1708, "step": 3972 }, { "epoch": 0.9039817974971559, "grad_norm": 3.077639264235174, "learning_rate": 1.1519135182405477e-06, "loss": 0.1232, "step": 3973 }, { "epoch": 0.9042093287827077, "grad_norm": 2.3583973301893395, "learning_rate": 1.1518654581965597e-06, "loss": 0.1644, "step": 3974 }, { "epoch": 0.9044368600682594, "grad_norm": 1.931055818227307, "learning_rate": 1.1518173873844068e-06, "loss": 0.0957, "step": 3975 }, { "epoch": 0.9046643913538112, "grad_norm": 2.302484759884308, "learning_rate": 1.1517693058050714e-06, "loss": 0.1308, "step": 3976 }, { "epoch": 0.9048919226393629, "grad_norm": 2.0541876018634118, "learning_rate": 1.151721213459536e-06, "loss": 0.1398, "step": 3977 }, { "epoch": 0.9051194539249147, "grad_norm": 1.8604819838615643, "learning_rate": 1.1516731103487836e-06, "loss": 0.0897, "step": 3978 }, { "epoch": 0.9053469852104664, "grad_norm": 2.3469141053218965, "learning_rate": 1.1516249964737974e-06, "loss": 0.1314, "step": 3979 }, { "epoch": 0.9055745164960182, "grad_norm": 1.3279642669148322, "learning_rate": 1.1515768718355607e-06, "loss": 0.1102, "step": 3980 }, { "epoch": 0.9058020477815699, "grad_norm": 1.8053577348674945, "learning_rate": 1.1515287364350573e-06, "loss": 0.1213, "step": 3981 }, { "epoch": 0.9060295790671218, "grad_norm": 2.871234378558095, "learning_rate": 1.1514805902732706e-06, "loss": 0.1212, "step": 3982 }, { "epoch": 0.9062571103526735, "grad_norm": 2.156112604782932, "learning_rate": 1.151432433351185e-06, "loss": 0.1114, "step": 3983 }, { "epoch": 0.9064846416382253, "grad_norm": 2.6297498949498053, "learning_rate": 1.1513842656697844e-06, "loss": 0.1919, "step": 3984 }, { "epoch": 0.906712172923777, "grad_norm": 1.8042623931688218, "learning_rate": 1.1513360872300535e-06, "loss": 0.1075, "step": 3985 }, { "epoch": 0.9069397042093288, "grad_norm": 3.2391253841492182, "learning_rate": 1.1512878980329771e-06, "loss": 0.1631, "step": 3986 }, { "epoch": 0.9071672354948805, "grad_norm": 1.1000651357067122, "learning_rate": 1.1512396980795399e-06, "loss": 0.116, "step": 3987 }, { "epoch": 0.9073947667804323, "grad_norm": 2.1033192967440746, "learning_rate": 1.1511914873707269e-06, "loss": 0.1053, "step": 3988 }, { "epoch": 0.907622298065984, "grad_norm": 2.2451810625182556, "learning_rate": 1.1511432659075234e-06, "loss": 0.1043, "step": 3989 }, { "epoch": 0.9078498293515358, "grad_norm": 1.4658968735063378, "learning_rate": 1.1510950336909154e-06, "loss": 0.1242, "step": 3990 }, { "epoch": 0.9080773606370875, "grad_norm": 2.05962070131195, "learning_rate": 1.1510467907218883e-06, "loss": 0.1355, "step": 3991 }, { "epoch": 0.9083048919226394, "grad_norm": 1.4653271062892934, "learning_rate": 1.1509985370014283e-06, "loss": 0.0894, "step": 3992 }, { "epoch": 0.9085324232081912, "grad_norm": 3.091085962402644, "learning_rate": 1.1509502725305214e-06, "loss": 0.136, "step": 3993 }, { "epoch": 0.9087599544937429, "grad_norm": 2.8986186181498774, "learning_rate": 1.1509019973101542e-06, "loss": 0.1416, "step": 3994 }, { "epoch": 0.9089874857792947, "grad_norm": 1.2981522628859017, "learning_rate": 1.1508537113413134e-06, "loss": 0.0676, "step": 3995 }, { "epoch": 0.9092150170648464, "grad_norm": 1.5433048194105081, "learning_rate": 1.1508054146249858e-06, "loss": 0.1052, "step": 3996 }, { "epoch": 0.9094425483503982, "grad_norm": 1.4606601789320433, "learning_rate": 1.1507571071621585e-06, "loss": 0.0704, "step": 3997 }, { "epoch": 0.9096700796359499, "grad_norm": 2.1790916444676864, "learning_rate": 1.1507087889538186e-06, "loss": 0.145, "step": 3998 }, { "epoch": 0.9098976109215017, "grad_norm": 1.80575976747422, "learning_rate": 1.1506604600009542e-06, "loss": 0.1376, "step": 3999 }, { "epoch": 0.9101251422070534, "grad_norm": 1.4156080512130724, "learning_rate": 1.1506121203045524e-06, "loss": 0.1384, "step": 4000 }, { "epoch": 0.9103526734926052, "grad_norm": 1.384363765889395, "learning_rate": 1.1505637698656014e-06, "loss": 0.1224, "step": 4001 }, { "epoch": 0.910580204778157, "grad_norm": 2.331610054158199, "learning_rate": 1.1505154086850898e-06, "loss": 0.1111, "step": 4002 }, { "epoch": 0.9108077360637088, "grad_norm": 1.6098503805284052, "learning_rate": 1.1504670367640054e-06, "loss": 0.087, "step": 4003 }, { "epoch": 0.9110352673492605, "grad_norm": 1.3999525978643317, "learning_rate": 1.1504186541033371e-06, "loss": 0.1418, "step": 4004 }, { "epoch": 0.9112627986348123, "grad_norm": 2.2558646882092823, "learning_rate": 1.150370260704074e-06, "loss": 0.1381, "step": 4005 }, { "epoch": 0.911490329920364, "grad_norm": 1.7065927040440496, "learning_rate": 1.1503218565672047e-06, "loss": 0.0844, "step": 4006 }, { "epoch": 0.9117178612059158, "grad_norm": 2.1447300443618253, "learning_rate": 1.1502734416937188e-06, "loss": 0.2181, "step": 4007 }, { "epoch": 0.9119453924914676, "grad_norm": 1.680142723606261, "learning_rate": 1.150225016084606e-06, "loss": 0.1837, "step": 4008 }, { "epoch": 0.9121729237770193, "grad_norm": 2.283553663622, "learning_rate": 1.1501765797408558e-06, "loss": 0.1612, "step": 4009 }, { "epoch": 0.9124004550625711, "grad_norm": 2.1243190108046868, "learning_rate": 1.1501281326634578e-06, "loss": 0.1769, "step": 4010 }, { "epoch": 0.9126279863481229, "grad_norm": 1.5630837662985877, "learning_rate": 1.1500796748534026e-06, "loss": 0.1129, "step": 4011 }, { "epoch": 0.9128555176336747, "grad_norm": 1.1110498761701864, "learning_rate": 1.1500312063116803e-06, "loss": 0.0506, "step": 4012 }, { "epoch": 0.9130830489192264, "grad_norm": 2.454314887166611, "learning_rate": 1.149982727039282e-06, "loss": 0.2378, "step": 4013 }, { "epoch": 0.9133105802047782, "grad_norm": 1.6145277367237205, "learning_rate": 1.149934237037198e-06, "loss": 0.1419, "step": 4014 }, { "epoch": 0.9135381114903299, "grad_norm": 2.4477607721641332, "learning_rate": 1.1498857363064198e-06, "loss": 0.1204, "step": 4015 }, { "epoch": 0.9137656427758817, "grad_norm": 1.5769253132902517, "learning_rate": 1.1498372248479383e-06, "loss": 0.212, "step": 4016 }, { "epoch": 0.9139931740614334, "grad_norm": 1.6710945747403774, "learning_rate": 1.1497887026627451e-06, "loss": 0.1535, "step": 4017 }, { "epoch": 0.9142207053469852, "grad_norm": 2.8361936305649467, "learning_rate": 1.1497401697518318e-06, "loss": 0.1207, "step": 4018 }, { "epoch": 0.9144482366325369, "grad_norm": 1.6904198391156955, "learning_rate": 1.1496916261161908e-06, "loss": 0.1918, "step": 4019 }, { "epoch": 0.9146757679180887, "grad_norm": 1.2927293111341114, "learning_rate": 1.1496430717568136e-06, "loss": 0.0644, "step": 4020 }, { "epoch": 0.9149032992036406, "grad_norm": 1.1501478000489853, "learning_rate": 1.149594506674693e-06, "loss": 0.087, "step": 4021 }, { "epoch": 0.9151308304891923, "grad_norm": 1.2191834115791784, "learning_rate": 1.1495459308708212e-06, "loss": 0.0694, "step": 4022 }, { "epoch": 0.9153583617747441, "grad_norm": 1.41962768386301, "learning_rate": 1.1494973443461915e-06, "loss": 0.0914, "step": 4023 }, { "epoch": 0.9155858930602958, "grad_norm": 0.8951074706326217, "learning_rate": 1.1494487471017965e-06, "loss": 0.0556, "step": 4024 }, { "epoch": 0.9158134243458476, "grad_norm": 1.9498245721877319, "learning_rate": 1.1494001391386298e-06, "loss": 0.1551, "step": 4025 }, { "epoch": 0.9160409556313993, "grad_norm": 2.530813661494047, "learning_rate": 1.1493515204576844e-06, "loss": 0.1453, "step": 4026 }, { "epoch": 0.9162684869169511, "grad_norm": 2.734946404831388, "learning_rate": 1.1493028910599544e-06, "loss": 0.2283, "step": 4027 }, { "epoch": 0.9164960182025028, "grad_norm": 2.0385212703852127, "learning_rate": 1.1492542509464333e-06, "loss": 0.1215, "step": 4028 }, { "epoch": 0.9167235494880546, "grad_norm": 2.0433495208554375, "learning_rate": 1.1492056001181157e-06, "loss": 0.1181, "step": 4029 }, { "epoch": 0.9169510807736063, "grad_norm": 1.4433586254013528, "learning_rate": 1.1491569385759953e-06, "loss": 0.0923, "step": 4030 }, { "epoch": 0.9171786120591582, "grad_norm": 1.9753580690191006, "learning_rate": 1.1491082663210675e-06, "loss": 0.1565, "step": 4031 }, { "epoch": 0.9174061433447099, "grad_norm": 2.667070125143639, "learning_rate": 1.1490595833543263e-06, "loss": 0.1626, "step": 4032 }, { "epoch": 0.9176336746302617, "grad_norm": 2.269844760737215, "learning_rate": 1.1490108896767672e-06, "loss": 0.1313, "step": 4033 }, { "epoch": 0.9178612059158134, "grad_norm": 2.1613808031038673, "learning_rate": 1.1489621852893849e-06, "loss": 0.1564, "step": 4034 }, { "epoch": 0.9180887372013652, "grad_norm": 1.9130661258416666, "learning_rate": 1.1489134701931753e-06, "loss": 0.1327, "step": 4035 }, { "epoch": 0.9183162684869169, "grad_norm": 1.669381111217745, "learning_rate": 1.1488647443891339e-06, "loss": 0.1389, "step": 4036 }, { "epoch": 0.9185437997724687, "grad_norm": 1.989793112795475, "learning_rate": 1.1488160078782565e-06, "loss": 0.1622, "step": 4037 }, { "epoch": 0.9187713310580204, "grad_norm": 2.076351484775596, "learning_rate": 1.148767260661539e-06, "loss": 0.098, "step": 4038 }, { "epoch": 0.9189988623435722, "grad_norm": 3.2089411401806487, "learning_rate": 1.1487185027399783e-06, "loss": 0.2277, "step": 4039 }, { "epoch": 0.919226393629124, "grad_norm": 2.6063043974899838, "learning_rate": 1.1486697341145703e-06, "loss": 0.1371, "step": 4040 }, { "epoch": 0.9194539249146758, "grad_norm": 1.7360554006678817, "learning_rate": 1.148620954786312e-06, "loss": 0.1237, "step": 4041 }, { "epoch": 0.9196814562002276, "grad_norm": 2.1507897047848146, "learning_rate": 1.1485721647562005e-06, "loss": 0.2027, "step": 4042 }, { "epoch": 0.9199089874857793, "grad_norm": 1.8157430648391701, "learning_rate": 1.1485233640252328e-06, "loss": 0.1071, "step": 4043 }, { "epoch": 0.9201365187713311, "grad_norm": 2.013896650858182, "learning_rate": 1.1484745525944063e-06, "loss": 0.1488, "step": 4044 }, { "epoch": 0.9203640500568828, "grad_norm": 2.005039763917703, "learning_rate": 1.1484257304647187e-06, "loss": 0.1545, "step": 4045 }, { "epoch": 0.9205915813424346, "grad_norm": 3.0226451865233464, "learning_rate": 1.1483768976371677e-06, "loss": 0.167, "step": 4046 }, { "epoch": 0.9208191126279863, "grad_norm": 2.3586368879355626, "learning_rate": 1.1483280541127513e-06, "loss": 0.167, "step": 4047 }, { "epoch": 0.9210466439135381, "grad_norm": 2.3505247603935584, "learning_rate": 1.1482791998924681e-06, "loss": 0.1048, "step": 4048 }, { "epoch": 0.9212741751990898, "grad_norm": 1.8400187101895282, "learning_rate": 1.1482303349773164e-06, "loss": 0.1606, "step": 4049 }, { "epoch": 0.9215017064846417, "grad_norm": 3.0228280199249102, "learning_rate": 1.1481814593682946e-06, "loss": 0.1459, "step": 4050 }, { "epoch": 0.9217292377701934, "grad_norm": 1.2188213501070726, "learning_rate": 1.1481325730664023e-06, "loss": 0.1301, "step": 4051 }, { "epoch": 0.9219567690557452, "grad_norm": 1.8672928736173242, "learning_rate": 1.148083676072638e-06, "loss": 0.1316, "step": 4052 }, { "epoch": 0.922184300341297, "grad_norm": 1.8403390593858633, "learning_rate": 1.1480347683880016e-06, "loss": 0.1136, "step": 4053 }, { "epoch": 0.9224118316268487, "grad_norm": 2.132830111753058, "learning_rate": 1.1479858500134924e-06, "loss": 0.0883, "step": 4054 }, { "epoch": 0.9226393629124005, "grad_norm": 1.2880475838091152, "learning_rate": 1.14793692095011e-06, "loss": 0.118, "step": 4055 }, { "epoch": 0.9228668941979522, "grad_norm": 1.9377333599879183, "learning_rate": 1.147887981198855e-06, "loss": 0.1256, "step": 4056 }, { "epoch": 0.923094425483504, "grad_norm": 2.56950028002691, "learning_rate": 1.147839030760727e-06, "loss": 0.1674, "step": 4057 }, { "epoch": 0.9233219567690557, "grad_norm": 3.3720583293840103, "learning_rate": 1.1477900696367269e-06, "loss": 0.1903, "step": 4058 }, { "epoch": 0.9235494880546075, "grad_norm": 1.4802557956584177, "learning_rate": 1.147741097827855e-06, "loss": 0.0574, "step": 4059 }, { "epoch": 0.9237770193401593, "grad_norm": 1.3196731818666811, "learning_rate": 1.1476921153351126e-06, "loss": 0.1187, "step": 4060 }, { "epoch": 0.9240045506257111, "grad_norm": 2.3841783165877453, "learning_rate": 1.1476431221595005e-06, "loss": 0.1433, "step": 4061 }, { "epoch": 0.9242320819112628, "grad_norm": 2.159370750846227, "learning_rate": 1.1475941183020203e-06, "loss": 0.1423, "step": 4062 }, { "epoch": 0.9244596131968146, "grad_norm": 1.9272353361429937, "learning_rate": 1.1475451037636733e-06, "loss": 0.111, "step": 4063 }, { "epoch": 0.9246871444823663, "grad_norm": 1.5766120669779804, "learning_rate": 1.1474960785454615e-06, "loss": 0.1207, "step": 4064 }, { "epoch": 0.9249146757679181, "grad_norm": 1.7214929778176364, "learning_rate": 1.1474470426483868e-06, "loss": 0.1049, "step": 4065 }, { "epoch": 0.9251422070534698, "grad_norm": 1.9077781728883594, "learning_rate": 1.1473979960734513e-06, "loss": 0.099, "step": 4066 }, { "epoch": 0.9253697383390216, "grad_norm": 1.4279277010662257, "learning_rate": 1.1473489388216574e-06, "loss": 0.1325, "step": 4067 }, { "epoch": 0.9255972696245733, "grad_norm": 1.2402250408372297, "learning_rate": 1.1472998708940079e-06, "loss": 0.1266, "step": 4068 }, { "epoch": 0.9258248009101251, "grad_norm": 1.7364107091551009, "learning_rate": 1.1472507922915056e-06, "loss": 0.0807, "step": 4069 }, { "epoch": 0.926052332195677, "grad_norm": 1.9921546594650124, "learning_rate": 1.1472017030151536e-06, "loss": 0.1734, "step": 4070 }, { "epoch": 0.9262798634812287, "grad_norm": 1.8248980083404165, "learning_rate": 1.147152603065955e-06, "loss": 0.1147, "step": 4071 }, { "epoch": 0.9265073947667805, "grad_norm": 2.0270199058560348, "learning_rate": 1.1471034924449133e-06, "loss": 0.1316, "step": 4072 }, { "epoch": 0.9267349260523322, "grad_norm": 2.317502793742025, "learning_rate": 1.1470543711530328e-06, "loss": 0.1377, "step": 4073 }, { "epoch": 0.926962457337884, "grad_norm": 2.198374934269782, "learning_rate": 1.147005239191317e-06, "loss": 0.2035, "step": 4074 }, { "epoch": 0.9271899886234357, "grad_norm": 1.1810521503732803, "learning_rate": 1.1469560965607699e-06, "loss": 0.0786, "step": 4075 }, { "epoch": 0.9274175199089875, "grad_norm": 1.4292953433454851, "learning_rate": 1.1469069432623965e-06, "loss": 0.0914, "step": 4076 }, { "epoch": 0.9276450511945392, "grad_norm": 2.7112408608897365, "learning_rate": 1.1468577792972004e-06, "loss": 0.1557, "step": 4077 }, { "epoch": 0.927872582480091, "grad_norm": 1.2029939421333486, "learning_rate": 1.1468086046661874e-06, "loss": 0.1556, "step": 4078 }, { "epoch": 0.9281001137656428, "grad_norm": 2.0892087550113234, "learning_rate": 1.146759419370362e-06, "loss": 0.2167, "step": 4079 }, { "epoch": 0.9283276450511946, "grad_norm": 1.7305291008510646, "learning_rate": 1.14671022341073e-06, "loss": 0.0964, "step": 4080 }, { "epoch": 0.9285551763367463, "grad_norm": 1.6688027122772393, "learning_rate": 1.1466610167882963e-06, "loss": 0.1447, "step": 4081 }, { "epoch": 0.9287827076222981, "grad_norm": 3.0784918422868373, "learning_rate": 1.1466117995040666e-06, "loss": 0.1506, "step": 4082 }, { "epoch": 0.9290102389078498, "grad_norm": 2.124545290642094, "learning_rate": 1.1465625715590473e-06, "loss": 0.1008, "step": 4083 }, { "epoch": 0.9292377701934016, "grad_norm": 1.2578652782716944, "learning_rate": 1.146513332954244e-06, "loss": 0.1283, "step": 4084 }, { "epoch": 0.9294653014789533, "grad_norm": 2.4153588158676182, "learning_rate": 1.1464640836906635e-06, "loss": 0.1172, "step": 4085 }, { "epoch": 0.9296928327645051, "grad_norm": 2.3544484292723804, "learning_rate": 1.146414823769312e-06, "loss": 0.1101, "step": 4086 }, { "epoch": 0.9299203640500568, "grad_norm": 2.134969325323832, "learning_rate": 1.1463655531911963e-06, "loss": 0.0966, "step": 4087 }, { "epoch": 0.9301478953356086, "grad_norm": 4.133663465987289, "learning_rate": 1.1463162719573236e-06, "loss": 0.1581, "step": 4088 }, { "epoch": 0.9303754266211605, "grad_norm": 1.8487089633836562, "learning_rate": 1.1462669800687012e-06, "loss": 0.1634, "step": 4089 }, { "epoch": 0.9306029579067122, "grad_norm": 1.7975377039405993, "learning_rate": 1.1462176775263365e-06, "loss": 0.0946, "step": 4090 }, { "epoch": 0.930830489192264, "grad_norm": 2.373976736841564, "learning_rate": 1.1461683643312366e-06, "loss": 0.1869, "step": 4091 }, { "epoch": 0.9310580204778157, "grad_norm": 1.5868652532544276, "learning_rate": 1.1461190404844103e-06, "loss": 0.1333, "step": 4092 }, { "epoch": 0.9312855517633675, "grad_norm": 1.6337419551444752, "learning_rate": 1.1460697059868648e-06, "loss": 0.1835, "step": 4093 }, { "epoch": 0.9315130830489192, "grad_norm": 2.125031365520933, "learning_rate": 1.146020360839609e-06, "loss": 0.1, "step": 4094 }, { "epoch": 0.931740614334471, "grad_norm": 1.3164350885665195, "learning_rate": 1.1459710050436513e-06, "loss": 0.1065, "step": 4095 }, { "epoch": 0.9319681456200227, "grad_norm": 1.9989494690793859, "learning_rate": 1.1459216385999999e-06, "loss": 0.1275, "step": 4096 }, { "epoch": 0.9321956769055745, "grad_norm": 2.185423641353373, "learning_rate": 1.1458722615096648e-06, "loss": 0.0737, "step": 4097 }, { "epoch": 0.9324232081911262, "grad_norm": 2.4202341523485007, "learning_rate": 1.1458228737736542e-06, "loss": 0.1073, "step": 4098 }, { "epoch": 0.9326507394766781, "grad_norm": 1.66652378551551, "learning_rate": 1.145773475392978e-06, "loss": 0.1211, "step": 4099 }, { "epoch": 0.9328782707622298, "grad_norm": 1.8441447680982124, "learning_rate": 1.145724066368646e-06, "loss": 0.1297, "step": 4100 }, { "epoch": 0.9331058020477816, "grad_norm": 2.2269635218641426, "learning_rate": 1.1456746467016675e-06, "loss": 0.0988, "step": 4101 }, { "epoch": 0.9333333333333333, "grad_norm": 2.121237671716207, "learning_rate": 1.1456252163930528e-06, "loss": 0.1355, "step": 4102 }, { "epoch": 0.9335608646188851, "grad_norm": 2.7856152827774143, "learning_rate": 1.1455757754438122e-06, "loss": 0.1413, "step": 4103 }, { "epoch": 0.9337883959044369, "grad_norm": 1.3498604957526619, "learning_rate": 1.1455263238549563e-06, "loss": 0.1072, "step": 4104 }, { "epoch": 0.9340159271899886, "grad_norm": 1.6746673810268238, "learning_rate": 1.1454768616274955e-06, "loss": 0.1455, "step": 4105 }, { "epoch": 0.9342434584755404, "grad_norm": 1.5273623252161623, "learning_rate": 1.1454273887624407e-06, "loss": 0.0891, "step": 4106 }, { "epoch": 0.9344709897610921, "grad_norm": 2.0414032969179776, "learning_rate": 1.1453779052608032e-06, "loss": 0.0906, "step": 4107 }, { "epoch": 0.9346985210466439, "grad_norm": 1.954781728427514, "learning_rate": 1.1453284111235947e-06, "loss": 0.2374, "step": 4108 }, { "epoch": 0.9349260523321957, "grad_norm": 2.2715359787453213, "learning_rate": 1.145278906351826e-06, "loss": 0.1218, "step": 4109 }, { "epoch": 0.9351535836177475, "grad_norm": 1.5696301603122902, "learning_rate": 1.1452293909465095e-06, "loss": 0.123, "step": 4110 }, { "epoch": 0.9353811149032992, "grad_norm": 2.19325395449243, "learning_rate": 1.145179864908657e-06, "loss": 0.1683, "step": 4111 }, { "epoch": 0.935608646188851, "grad_norm": 2.5751019735499963, "learning_rate": 1.1451303282392808e-06, "loss": 0.1256, "step": 4112 }, { "epoch": 0.9358361774744027, "grad_norm": 1.8716699205619842, "learning_rate": 1.145080780939393e-06, "loss": 0.0974, "step": 4113 }, { "epoch": 0.9360637087599545, "grad_norm": 1.2789688026695687, "learning_rate": 1.145031223010007e-06, "loss": 0.0736, "step": 4114 }, { "epoch": 0.9362912400455062, "grad_norm": 1.5985054428399412, "learning_rate": 1.1449816544521347e-06, "loss": 0.1484, "step": 4115 }, { "epoch": 0.936518771331058, "grad_norm": 1.6163529234144407, "learning_rate": 1.1449320752667898e-06, "loss": 0.1358, "step": 4116 }, { "epoch": 0.9367463026166097, "grad_norm": 1.0948746671297045, "learning_rate": 1.1448824854549856e-06, "loss": 0.0662, "step": 4117 }, { "epoch": 0.9369738339021616, "grad_norm": 2.0429510432349103, "learning_rate": 1.1448328850177356e-06, "loss": 0.1023, "step": 4118 }, { "epoch": 0.9372013651877134, "grad_norm": 2.909739539143929, "learning_rate": 1.1447832739560533e-06, "loss": 0.2704, "step": 4119 }, { "epoch": 0.9374288964732651, "grad_norm": 1.8669293896102523, "learning_rate": 1.1447336522709528e-06, "loss": 0.1643, "step": 4120 }, { "epoch": 0.9376564277588169, "grad_norm": 1.98051617462352, "learning_rate": 1.1446840199634483e-06, "loss": 0.1212, "step": 4121 }, { "epoch": 0.9378839590443686, "grad_norm": 2.575814064863609, "learning_rate": 1.1446343770345544e-06, "loss": 0.1351, "step": 4122 }, { "epoch": 0.9381114903299204, "grad_norm": 1.4793880550121636, "learning_rate": 1.1445847234852853e-06, "loss": 0.1332, "step": 4123 }, { "epoch": 0.9383390216154721, "grad_norm": 2.6986765190100876, "learning_rate": 1.1445350593166559e-06, "loss": 0.215, "step": 4124 }, { "epoch": 0.9385665529010239, "grad_norm": 13.432551035472212, "learning_rate": 1.1444853845296816e-06, "loss": 0.153, "step": 4125 }, { "epoch": 0.9387940841865756, "grad_norm": 2.7787794431563353, "learning_rate": 1.1444356991253774e-06, "loss": 0.1385, "step": 4126 }, { "epoch": 0.9390216154721274, "grad_norm": 2.844290881868777, "learning_rate": 1.1443860031047589e-06, "loss": 0.1564, "step": 4127 }, { "epoch": 0.9392491467576792, "grad_norm": 1.3610957169917577, "learning_rate": 1.1443362964688416e-06, "loss": 0.143, "step": 4128 }, { "epoch": 0.939476678043231, "grad_norm": 2.1082592980578174, "learning_rate": 1.1442865792186413e-06, "loss": 0.16, "step": 4129 }, { "epoch": 0.9397042093287827, "grad_norm": 1.9753673499965854, "learning_rate": 1.1442368513551746e-06, "loss": 0.0904, "step": 4130 }, { "epoch": 0.9399317406143345, "grad_norm": 1.479785764805608, "learning_rate": 1.1441871128794576e-06, "loss": 0.0822, "step": 4131 }, { "epoch": 0.9401592718998862, "grad_norm": 1.577368771739526, "learning_rate": 1.1441373637925068e-06, "loss": 0.168, "step": 4132 }, { "epoch": 0.940386803185438, "grad_norm": 1.1502270382236108, "learning_rate": 1.1440876040953392e-06, "loss": 0.1633, "step": 4133 }, { "epoch": 0.9406143344709897, "grad_norm": 2.8101793735585034, "learning_rate": 1.1440378337889713e-06, "loss": 0.156, "step": 4134 }, { "epoch": 0.9408418657565415, "grad_norm": 1.207307845030043, "learning_rate": 1.143988052874421e-06, "loss": 0.0777, "step": 4135 }, { "epoch": 0.9410693970420932, "grad_norm": 2.773572388788643, "learning_rate": 1.143938261352705e-06, "loss": 0.1961, "step": 4136 }, { "epoch": 0.941296928327645, "grad_norm": 2.619426570540865, "learning_rate": 1.1438884592248416e-06, "loss": 0.0814, "step": 4137 }, { "epoch": 0.9415244596131969, "grad_norm": 1.8476195592110707, "learning_rate": 1.1438386464918483e-06, "loss": 0.0998, "step": 4138 }, { "epoch": 0.9417519908987486, "grad_norm": 2.5320903121338376, "learning_rate": 1.1437888231547434e-06, "loss": 0.1897, "step": 4139 }, { "epoch": 0.9419795221843004, "grad_norm": 1.8341313571748092, "learning_rate": 1.143738989214545e-06, "loss": 0.111, "step": 4140 }, { "epoch": 0.9422070534698521, "grad_norm": 2.0680871615443657, "learning_rate": 1.1436891446722718e-06, "loss": 0.147, "step": 4141 }, { "epoch": 0.9424345847554039, "grad_norm": 2.938990966912184, "learning_rate": 1.1436392895289423e-06, "loss": 0.2542, "step": 4142 }, { "epoch": 0.9426621160409556, "grad_norm": 2.162441432206442, "learning_rate": 1.1435894237855754e-06, "loss": 0.1437, "step": 4143 }, { "epoch": 0.9428896473265074, "grad_norm": 2.1600113975804045, "learning_rate": 1.1435395474431906e-06, "loss": 0.1397, "step": 4144 }, { "epoch": 0.9431171786120591, "grad_norm": 2.136599526696913, "learning_rate": 1.143489660502807e-06, "loss": 0.1394, "step": 4145 }, { "epoch": 0.9433447098976109, "grad_norm": 1.8658722635830092, "learning_rate": 1.1434397629654445e-06, "loss": 0.2155, "step": 4146 }, { "epoch": 0.9435722411831626, "grad_norm": 1.4701919591216344, "learning_rate": 1.1433898548321226e-06, "loss": 0.0903, "step": 4147 }, { "epoch": 0.9437997724687145, "grad_norm": 2.2540515992681525, "learning_rate": 1.1433399361038614e-06, "loss": 0.1212, "step": 4148 }, { "epoch": 0.9440273037542662, "grad_norm": 1.4986748683203326, "learning_rate": 1.1432900067816813e-06, "loss": 0.1015, "step": 4149 }, { "epoch": 0.944254835039818, "grad_norm": 1.7538742820647613, "learning_rate": 1.1432400668666028e-06, "loss": 0.1078, "step": 4150 }, { "epoch": 0.9444823663253697, "grad_norm": 2.7575185503123407, "learning_rate": 1.1431901163596462e-06, "loss": 0.2745, "step": 4151 }, { "epoch": 0.9447098976109215, "grad_norm": 2.0879360274125998, "learning_rate": 1.1431401552618327e-06, "loss": 0.1636, "step": 4152 }, { "epoch": 0.9449374288964733, "grad_norm": 1.3069902569614682, "learning_rate": 1.1430901835741833e-06, "loss": 0.0736, "step": 4153 }, { "epoch": 0.945164960182025, "grad_norm": 2.257465630700085, "learning_rate": 1.1430402012977195e-06, "loss": 0.141, "step": 4154 }, { "epoch": 0.9453924914675768, "grad_norm": 1.1487328455761492, "learning_rate": 1.1429902084334627e-06, "loss": 0.0854, "step": 4155 }, { "epoch": 0.9456200227531285, "grad_norm": 1.7462901983970547, "learning_rate": 1.1429402049824348e-06, "loss": 0.1554, "step": 4156 }, { "epoch": 0.9458475540386804, "grad_norm": 2.1404630984856987, "learning_rate": 1.1428901909456575e-06, "loss": 0.1513, "step": 4157 }, { "epoch": 0.9460750853242321, "grad_norm": 2.160428099601572, "learning_rate": 1.1428401663241533e-06, "loss": 0.1132, "step": 4158 }, { "epoch": 0.9463026166097839, "grad_norm": 1.01649187642893, "learning_rate": 1.1427901311189444e-06, "loss": 0.0906, "step": 4159 }, { "epoch": 0.9465301478953356, "grad_norm": 1.5167730970382043, "learning_rate": 1.1427400853310536e-06, "loss": 0.0898, "step": 4160 }, { "epoch": 0.9467576791808874, "grad_norm": 1.05791105281377, "learning_rate": 1.1426900289615034e-06, "loss": 0.0774, "step": 4161 }, { "epoch": 0.9469852104664391, "grad_norm": 1.9510241509956734, "learning_rate": 1.1426399620113174e-06, "loss": 0.1024, "step": 4162 }, { "epoch": 0.9472127417519909, "grad_norm": 1.818663643287264, "learning_rate": 1.1425898844815183e-06, "loss": 0.1049, "step": 4163 }, { "epoch": 0.9474402730375426, "grad_norm": 2.0321761096429847, "learning_rate": 1.1425397963731303e-06, "loss": 0.1718, "step": 4164 }, { "epoch": 0.9476678043230944, "grad_norm": 2.810440066300498, "learning_rate": 1.1424896976871763e-06, "loss": 0.1967, "step": 4165 }, { "epoch": 0.9478953356086461, "grad_norm": 1.844076725117748, "learning_rate": 1.1424395884246808e-06, "loss": 0.1809, "step": 4166 }, { "epoch": 0.948122866894198, "grad_norm": 2.62312758907365, "learning_rate": 1.1423894685866677e-06, "loss": 0.1068, "step": 4167 }, { "epoch": 0.9483503981797498, "grad_norm": 3.383530309668352, "learning_rate": 1.1423393381741614e-06, "loss": 0.1795, "step": 4168 }, { "epoch": 0.9485779294653015, "grad_norm": 2.8737766321935343, "learning_rate": 1.1422891971881867e-06, "loss": 0.226, "step": 4169 }, { "epoch": 0.9488054607508533, "grad_norm": 1.492334368814069, "learning_rate": 1.142239045629768e-06, "loss": 0.1521, "step": 4170 }, { "epoch": 0.949032992036405, "grad_norm": 1.6985507812410747, "learning_rate": 1.1421888834999306e-06, "loss": 0.1272, "step": 4171 }, { "epoch": 0.9492605233219568, "grad_norm": 2.6594628914223204, "learning_rate": 1.1421387107996993e-06, "loss": 0.175, "step": 4172 }, { "epoch": 0.9494880546075085, "grad_norm": 1.6600808249427008, "learning_rate": 1.1420885275301001e-06, "loss": 0.1736, "step": 4173 }, { "epoch": 0.9497155858930603, "grad_norm": 1.5070094807377818, "learning_rate": 1.1420383336921583e-06, "loss": 0.1116, "step": 4174 }, { "epoch": 0.949943117178612, "grad_norm": 2.976695679711648, "learning_rate": 1.1419881292869e-06, "loss": 0.1242, "step": 4175 }, { "epoch": 0.9501706484641638, "grad_norm": 3.24134019331233, "learning_rate": 1.1419379143153511e-06, "loss": 0.1451, "step": 4176 }, { "epoch": 0.9503981797497156, "grad_norm": 2.5026189974890283, "learning_rate": 1.1418876887785379e-06, "loss": 0.1261, "step": 4177 }, { "epoch": 0.9506257110352674, "grad_norm": 3.516403926919639, "learning_rate": 1.1418374526774872e-06, "loss": 0.1354, "step": 4178 }, { "epoch": 0.9508532423208191, "grad_norm": 2.1467294760401123, "learning_rate": 1.1417872060132251e-06, "loss": 0.1613, "step": 4179 }, { "epoch": 0.9510807736063709, "grad_norm": 1.7910764537754473, "learning_rate": 1.1417369487867793e-06, "loss": 0.1552, "step": 4180 }, { "epoch": 0.9513083048919226, "grad_norm": 1.261796346597939, "learning_rate": 1.1416866809991763e-06, "loss": 0.1095, "step": 4181 }, { "epoch": 0.9515358361774744, "grad_norm": 1.5378712086295998, "learning_rate": 1.1416364026514443e-06, "loss": 0.0908, "step": 4182 }, { "epoch": 0.9517633674630261, "grad_norm": 2.3404103222924673, "learning_rate": 1.1415861137446099e-06, "loss": 0.1253, "step": 4183 }, { "epoch": 0.9519908987485779, "grad_norm": 1.927348026595231, "learning_rate": 1.1415358142797018e-06, "loss": 0.1227, "step": 4184 }, { "epoch": 0.9522184300341296, "grad_norm": 1.4109091755193108, "learning_rate": 1.1414855042577474e-06, "loss": 0.1501, "step": 4185 }, { "epoch": 0.9524459613196815, "grad_norm": 1.7942241015528582, "learning_rate": 1.1414351836797755e-06, "loss": 0.1365, "step": 4186 }, { "epoch": 0.9526734926052333, "grad_norm": 1.3105709011841327, "learning_rate": 1.1413848525468139e-06, "loss": 0.1223, "step": 4187 }, { "epoch": 0.952901023890785, "grad_norm": 1.750535419659169, "learning_rate": 1.1413345108598916e-06, "loss": 0.1227, "step": 4188 }, { "epoch": 0.9531285551763368, "grad_norm": 2.565908749432796, "learning_rate": 1.1412841586200378e-06, "loss": 0.1156, "step": 4189 }, { "epoch": 0.9533560864618885, "grad_norm": 3.0056891740775318, "learning_rate": 1.1412337958282812e-06, "loss": 0.1786, "step": 4190 }, { "epoch": 0.9535836177474403, "grad_norm": 1.5893153387746444, "learning_rate": 1.1411834224856514e-06, "loss": 0.1434, "step": 4191 }, { "epoch": 0.953811149032992, "grad_norm": 1.3984147057203036, "learning_rate": 1.1411330385931776e-06, "loss": 0.1293, "step": 4192 }, { "epoch": 0.9540386803185438, "grad_norm": 1.2592930853587179, "learning_rate": 1.1410826441518898e-06, "loss": 0.1555, "step": 4193 }, { "epoch": 0.9542662116040955, "grad_norm": 2.687102327685415, "learning_rate": 1.1410322391628179e-06, "loss": 0.1676, "step": 4194 }, { "epoch": 0.9544937428896473, "grad_norm": 2.0680105884093845, "learning_rate": 1.140981823626992e-06, "loss": 0.0851, "step": 4195 }, { "epoch": 0.9547212741751991, "grad_norm": 1.1971146998473337, "learning_rate": 1.1409313975454429e-06, "loss": 0.1242, "step": 4196 }, { "epoch": 0.9549488054607509, "grad_norm": 0.9679023425689602, "learning_rate": 1.1408809609192007e-06, "loss": 0.078, "step": 4197 }, { "epoch": 0.9551763367463026, "grad_norm": 2.949152525431351, "learning_rate": 1.1408305137492963e-06, "loss": 0.3056, "step": 4198 }, { "epoch": 0.9554038680318544, "grad_norm": 2.3564769577645817, "learning_rate": 1.1407800560367612e-06, "loss": 0.1402, "step": 4199 }, { "epoch": 0.9556313993174061, "grad_norm": 2.2384378241835376, "learning_rate": 1.140729587782626e-06, "loss": 0.143, "step": 4200 }, { "epoch": 0.9558589306029579, "grad_norm": 1.947160996821623, "learning_rate": 1.1406791089879229e-06, "loss": 0.1644, "step": 4201 }, { "epoch": 0.9560864618885097, "grad_norm": 3.4892873782238296, "learning_rate": 1.1406286196536832e-06, "loss": 0.1897, "step": 4202 }, { "epoch": 0.9563139931740614, "grad_norm": 1.9038405626772368, "learning_rate": 1.1405781197809388e-06, "loss": 0.115, "step": 4203 }, { "epoch": 0.9565415244596132, "grad_norm": 1.5068217662014, "learning_rate": 1.1405276093707218e-06, "loss": 0.0991, "step": 4204 }, { "epoch": 0.9567690557451649, "grad_norm": 2.796879901668464, "learning_rate": 1.1404770884240645e-06, "loss": 0.1362, "step": 4205 }, { "epoch": 0.9569965870307168, "grad_norm": 2.3007498141980585, "learning_rate": 1.1404265569419998e-06, "loss": 0.1599, "step": 4206 }, { "epoch": 0.9572241183162685, "grad_norm": 1.799583084349667, "learning_rate": 1.14037601492556e-06, "loss": 0.1147, "step": 4207 }, { "epoch": 0.9574516496018203, "grad_norm": 1.3651569176318479, "learning_rate": 1.1403254623757785e-06, "loss": 0.0413, "step": 4208 }, { "epoch": 0.957679180887372, "grad_norm": 3.035407426201199, "learning_rate": 1.1402748992936881e-06, "loss": 0.1924, "step": 4209 }, { "epoch": 0.9579067121729238, "grad_norm": 1.467754212052448, "learning_rate": 1.1402243256803228e-06, "loss": 0.0743, "step": 4210 }, { "epoch": 0.9581342434584755, "grad_norm": 1.9336209918045806, "learning_rate": 1.1401737415367157e-06, "loss": 0.1342, "step": 4211 }, { "epoch": 0.9583617747440273, "grad_norm": 1.680828937378028, "learning_rate": 1.1401231468639008e-06, "loss": 0.1099, "step": 4212 }, { "epoch": 0.958589306029579, "grad_norm": 1.5684810967016425, "learning_rate": 1.140072541662912e-06, "loss": 0.1596, "step": 4213 }, { "epoch": 0.9588168373151308, "grad_norm": 1.97644029701773, "learning_rate": 1.1400219259347842e-06, "loss": 0.1719, "step": 4214 }, { "epoch": 0.9590443686006825, "grad_norm": 1.6091477688636493, "learning_rate": 1.139971299680551e-06, "loss": 0.1682, "step": 4215 }, { "epoch": 0.9592718998862344, "grad_norm": 1.5649145183454007, "learning_rate": 1.1399206629012478e-06, "loss": 0.0748, "step": 4216 }, { "epoch": 0.9594994311717862, "grad_norm": 1.168564720112399, "learning_rate": 1.1398700155979092e-06, "loss": 0.1224, "step": 4217 }, { "epoch": 0.9597269624573379, "grad_norm": 1.5060255960765723, "learning_rate": 1.1398193577715705e-06, "loss": 0.1202, "step": 4218 }, { "epoch": 0.9599544937428897, "grad_norm": 2.200763885887003, "learning_rate": 1.1397686894232671e-06, "loss": 0.1255, "step": 4219 }, { "epoch": 0.9601820250284414, "grad_norm": 2.777826300232821, "learning_rate": 1.1397180105540343e-06, "loss": 0.1323, "step": 4220 }, { "epoch": 0.9604095563139932, "grad_norm": 1.9394526192366455, "learning_rate": 1.1396673211649078e-06, "loss": 0.1286, "step": 4221 }, { "epoch": 0.9606370875995449, "grad_norm": 2.4079011934215533, "learning_rate": 1.139616621256924e-06, "loss": 0.0972, "step": 4222 }, { "epoch": 0.9608646188850967, "grad_norm": 1.5717434706985411, "learning_rate": 1.1395659108311192e-06, "loss": 0.1546, "step": 4223 }, { "epoch": 0.9610921501706484, "grad_norm": 1.891681115479138, "learning_rate": 1.1395151898885293e-06, "loss": 0.1113, "step": 4224 }, { "epoch": 0.9613196814562003, "grad_norm": 1.569786603078423, "learning_rate": 1.1394644584301912e-06, "loss": 0.1161, "step": 4225 }, { "epoch": 0.961547212741752, "grad_norm": 3.0048493135744043, "learning_rate": 1.1394137164571418e-06, "loss": 0.167, "step": 4226 }, { "epoch": 0.9617747440273038, "grad_norm": 2.7038083428526716, "learning_rate": 1.1393629639704182e-06, "loss": 0.1764, "step": 4227 }, { "epoch": 0.9620022753128555, "grad_norm": 1.6628649624730723, "learning_rate": 1.1393122009710575e-06, "loss": 0.1079, "step": 4228 }, { "epoch": 0.9622298065984073, "grad_norm": 1.7102127604416588, "learning_rate": 1.1392614274600975e-06, "loss": 0.0968, "step": 4229 }, { "epoch": 0.962457337883959, "grad_norm": 1.3187473775319558, "learning_rate": 1.1392106434385754e-06, "loss": 0.1167, "step": 4230 }, { "epoch": 0.9626848691695108, "grad_norm": 1.8678205589578505, "learning_rate": 1.1391598489075298e-06, "loss": 0.2621, "step": 4231 }, { "epoch": 0.9629124004550625, "grad_norm": 1.7996446274602325, "learning_rate": 1.1391090438679986e-06, "loss": 0.1643, "step": 4232 }, { "epoch": 0.9631399317406143, "grad_norm": 2.1682833518232405, "learning_rate": 1.1390582283210199e-06, "loss": 0.2173, "step": 4233 }, { "epoch": 0.963367463026166, "grad_norm": 2.6867113923181387, "learning_rate": 1.1390074022676325e-06, "loss": 0.0996, "step": 4234 }, { "epoch": 0.9635949943117179, "grad_norm": 2.011700216724661, "learning_rate": 1.1389565657088752e-06, "loss": 0.1241, "step": 4235 }, { "epoch": 0.9638225255972697, "grad_norm": 1.5044579516502963, "learning_rate": 1.1389057186457868e-06, "loss": 0.2565, "step": 4236 }, { "epoch": 0.9640500568828214, "grad_norm": 1.9887001444404104, "learning_rate": 1.1388548610794069e-06, "loss": 0.1277, "step": 4237 }, { "epoch": 0.9642775881683732, "grad_norm": 2.285745992348972, "learning_rate": 1.1388039930107747e-06, "loss": 0.1067, "step": 4238 }, { "epoch": 0.9645051194539249, "grad_norm": 1.6891307072134218, "learning_rate": 1.1387531144409297e-06, "loss": 0.0772, "step": 4239 }, { "epoch": 0.9647326507394767, "grad_norm": 1.337769022595117, "learning_rate": 1.138702225370912e-06, "loss": 0.0804, "step": 4240 }, { "epoch": 0.9649601820250284, "grad_norm": 2.7570690521663934, "learning_rate": 1.1386513258017617e-06, "loss": 0.1543, "step": 4241 }, { "epoch": 0.9651877133105802, "grad_norm": 2.5030514235096657, "learning_rate": 1.138600415734519e-06, "loss": 0.0915, "step": 4242 }, { "epoch": 0.9654152445961319, "grad_norm": 1.1507439636656707, "learning_rate": 1.1385494951702245e-06, "loss": 0.1138, "step": 4243 }, { "epoch": 0.9656427758816837, "grad_norm": 1.4979011428477753, "learning_rate": 1.1384985641099187e-06, "loss": 0.1466, "step": 4244 }, { "epoch": 0.9658703071672355, "grad_norm": 1.9623811949903942, "learning_rate": 1.1384476225546426e-06, "loss": 0.1219, "step": 4245 }, { "epoch": 0.9660978384527873, "grad_norm": 1.84621487590758, "learning_rate": 1.1383966705054377e-06, "loss": 0.1323, "step": 4246 }, { "epoch": 0.966325369738339, "grad_norm": 2.8495103716366983, "learning_rate": 1.1383457079633448e-06, "loss": 0.1594, "step": 4247 }, { "epoch": 0.9665529010238908, "grad_norm": 1.655415187692044, "learning_rate": 1.138294734929406e-06, "loss": 0.141, "step": 4248 }, { "epoch": 0.9667804323094426, "grad_norm": 2.375785741891734, "learning_rate": 1.1382437514046627e-06, "loss": 0.1236, "step": 4249 }, { "epoch": 0.9670079635949943, "grad_norm": 2.001867889330283, "learning_rate": 1.1381927573901572e-06, "loss": 0.1337, "step": 4250 }, { "epoch": 0.967235494880546, "grad_norm": 0.9396950060200854, "learning_rate": 1.1381417528869316e-06, "loss": 0.0702, "step": 4251 }, { "epoch": 0.9674630261660978, "grad_norm": 1.462486126163728, "learning_rate": 1.1380907378960282e-06, "loss": 0.1029, "step": 4252 }, { "epoch": 0.9676905574516496, "grad_norm": 1.6071269347895911, "learning_rate": 1.13803971241849e-06, "loss": 0.1696, "step": 4253 }, { "epoch": 0.9679180887372013, "grad_norm": 1.628532405088259, "learning_rate": 1.1379886764553596e-06, "loss": 0.1051, "step": 4254 }, { "epoch": 0.9681456200227532, "grad_norm": 2.923835193493409, "learning_rate": 1.1379376300076803e-06, "loss": 0.1836, "step": 4255 }, { "epoch": 0.9683731513083049, "grad_norm": 2.5536251649436794, "learning_rate": 1.137886573076495e-06, "loss": 0.1119, "step": 4256 }, { "epoch": 0.9686006825938567, "grad_norm": 1.4150265596545728, "learning_rate": 1.1378355056628474e-06, "loss": 0.1061, "step": 4257 }, { "epoch": 0.9688282138794084, "grad_norm": 3.0955486444699916, "learning_rate": 1.1377844277677815e-06, "loss": 0.2038, "step": 4258 }, { "epoch": 0.9690557451649602, "grad_norm": 1.6087312990432219, "learning_rate": 1.1377333393923408e-06, "loss": 0.1059, "step": 4259 }, { "epoch": 0.9692832764505119, "grad_norm": 1.1513657828053114, "learning_rate": 1.1376822405375698e-06, "loss": 0.0679, "step": 4260 }, { "epoch": 0.9695108077360637, "grad_norm": 1.623397897479566, "learning_rate": 1.1376311312045128e-06, "loss": 0.0915, "step": 4261 }, { "epoch": 0.9697383390216154, "grad_norm": 2.3944554360529886, "learning_rate": 1.1375800113942144e-06, "loss": 0.115, "step": 4262 }, { "epoch": 0.9699658703071672, "grad_norm": 1.9802197111942301, "learning_rate": 1.137528881107719e-06, "loss": 0.1113, "step": 4263 }, { "epoch": 0.970193401592719, "grad_norm": 3.0310932910862145, "learning_rate": 1.137477740346072e-06, "loss": 0.105, "step": 4264 }, { "epoch": 0.9704209328782708, "grad_norm": 2.8664093923483946, "learning_rate": 1.1374265891103187e-06, "loss": 0.1464, "step": 4265 }, { "epoch": 0.9706484641638226, "grad_norm": 2.241954646362492, "learning_rate": 1.1373754274015044e-06, "loss": 0.1171, "step": 4266 }, { "epoch": 0.9708759954493743, "grad_norm": 2.0471078332382358, "learning_rate": 1.1373242552206744e-06, "loss": 0.1427, "step": 4267 }, { "epoch": 0.9711035267349261, "grad_norm": 2.246513733887583, "learning_rate": 1.1372730725688754e-06, "loss": 0.1524, "step": 4268 }, { "epoch": 0.9713310580204778, "grad_norm": 3.6522298247730585, "learning_rate": 1.1372218794471527e-06, "loss": 0.1175, "step": 4269 }, { "epoch": 0.9715585893060296, "grad_norm": 2.095106143688212, "learning_rate": 1.1371706758565529e-06, "loss": 0.1154, "step": 4270 }, { "epoch": 0.9717861205915813, "grad_norm": 1.3788867318393225, "learning_rate": 1.1371194617981224e-06, "loss": 0.0995, "step": 4271 }, { "epoch": 0.9720136518771331, "grad_norm": 1.8027774578469935, "learning_rate": 1.137068237272908e-06, "loss": 0.1189, "step": 4272 }, { "epoch": 0.9722411831626848, "grad_norm": 2.092762888855007, "learning_rate": 1.1370170022819569e-06, "loss": 0.0788, "step": 4273 }, { "epoch": 0.9724687144482367, "grad_norm": 2.4272221864712558, "learning_rate": 1.1369657568263157e-06, "loss": 0.1111, "step": 4274 }, { "epoch": 0.9726962457337884, "grad_norm": 1.629457983537679, "learning_rate": 1.1369145009070323e-06, "loss": 0.1251, "step": 4275 }, { "epoch": 0.9729237770193402, "grad_norm": 1.8676345743374245, "learning_rate": 1.1368632345251538e-06, "loss": 0.0962, "step": 4276 }, { "epoch": 0.9731513083048919, "grad_norm": 2.0581929804225907, "learning_rate": 1.1368119576817283e-06, "loss": 0.1381, "step": 4277 }, { "epoch": 0.9733788395904437, "grad_norm": 1.3640113222245722, "learning_rate": 1.1367606703778037e-06, "loss": 0.0782, "step": 4278 }, { "epoch": 0.9736063708759954, "grad_norm": 2.2722595301486783, "learning_rate": 1.1367093726144283e-06, "loss": 0.134, "step": 4279 }, { "epoch": 0.9738339021615472, "grad_norm": 1.4359397100942914, "learning_rate": 1.1366580643926506e-06, "loss": 0.09, "step": 4280 }, { "epoch": 0.974061433447099, "grad_norm": 1.6832451491075302, "learning_rate": 1.1366067457135188e-06, "loss": 0.1221, "step": 4281 }, { "epoch": 0.9742889647326507, "grad_norm": 2.130004538111451, "learning_rate": 1.1365554165780823e-06, "loss": 0.126, "step": 4282 }, { "epoch": 0.9745164960182024, "grad_norm": 3.1871453915863475, "learning_rate": 1.13650407698739e-06, "loss": 0.1402, "step": 4283 }, { "epoch": 0.9747440273037543, "grad_norm": 2.183678937207992, "learning_rate": 1.136452726942491e-06, "loss": 0.1173, "step": 4284 }, { "epoch": 0.9749715585893061, "grad_norm": 1.9710667666023438, "learning_rate": 1.1364013664444351e-06, "loss": 0.1596, "step": 4285 }, { "epoch": 0.9751990898748578, "grad_norm": 2.163665685442942, "learning_rate": 1.1363499954942717e-06, "loss": 0.1537, "step": 4286 }, { "epoch": 0.9754266211604096, "grad_norm": 2.534636527671189, "learning_rate": 1.1362986140930509e-06, "loss": 0.1157, "step": 4287 }, { "epoch": 0.9756541524459613, "grad_norm": 1.636399149395774, "learning_rate": 1.1362472222418228e-06, "loss": 0.1538, "step": 4288 }, { "epoch": 0.9758816837315131, "grad_norm": 1.826529942423519, "learning_rate": 1.1361958199416378e-06, "loss": 0.116, "step": 4289 }, { "epoch": 0.9761092150170648, "grad_norm": 3.576678848053755, "learning_rate": 1.1361444071935467e-06, "loss": 0.1821, "step": 4290 }, { "epoch": 0.9763367463026166, "grad_norm": 1.713382015938669, "learning_rate": 1.1360929839985998e-06, "loss": 0.1742, "step": 4291 }, { "epoch": 0.9765642775881683, "grad_norm": 1.609348815275912, "learning_rate": 1.1360415503578485e-06, "loss": 0.1446, "step": 4292 }, { "epoch": 0.9767918088737202, "grad_norm": 1.0924323312589486, "learning_rate": 1.1359901062723437e-06, "loss": 0.0912, "step": 4293 }, { "epoch": 0.9770193401592719, "grad_norm": 2.3332736667282146, "learning_rate": 1.1359386517431366e-06, "loss": 0.127, "step": 4294 }, { "epoch": 0.9772468714448237, "grad_norm": 2.2011673248048975, "learning_rate": 1.1358871867712797e-06, "loss": 0.2629, "step": 4295 }, { "epoch": 0.9774744027303754, "grad_norm": 1.9212910975034734, "learning_rate": 1.1358357113578242e-06, "loss": 0.1228, "step": 4296 }, { "epoch": 0.9777019340159272, "grad_norm": 2.3260282738895226, "learning_rate": 1.1357842255038222e-06, "loss": 0.1067, "step": 4297 }, { "epoch": 0.977929465301479, "grad_norm": 1.475524850603775, "learning_rate": 1.1357327292103266e-06, "loss": 0.1522, "step": 4298 }, { "epoch": 0.9781569965870307, "grad_norm": 1.8612552057655247, "learning_rate": 1.135681222478389e-06, "loss": 0.1375, "step": 4299 }, { "epoch": 0.9783845278725825, "grad_norm": 1.8283279905566892, "learning_rate": 1.1356297053090623e-06, "loss": 0.1235, "step": 4300 }, { "epoch": 0.9786120591581342, "grad_norm": 1.8357637664074558, "learning_rate": 1.1355781777033998e-06, "loss": 0.138, "step": 4301 }, { "epoch": 0.978839590443686, "grad_norm": 1.2631401818522685, "learning_rate": 1.1355266396624545e-06, "loss": 0.1161, "step": 4302 }, { "epoch": 0.9790671217292378, "grad_norm": 1.2730308216200366, "learning_rate": 1.1354750911872795e-06, "loss": 0.119, "step": 4303 }, { "epoch": 0.9792946530147896, "grad_norm": 1.9518448337670264, "learning_rate": 1.1354235322789286e-06, "loss": 0.1945, "step": 4304 }, { "epoch": 0.9795221843003413, "grad_norm": 1.2218564664195801, "learning_rate": 1.1353719629384554e-06, "loss": 0.1329, "step": 4305 }, { "epoch": 0.9797497155858931, "grad_norm": 1.3504543016276331, "learning_rate": 1.135320383166914e-06, "loss": 0.1504, "step": 4306 }, { "epoch": 0.9799772468714448, "grad_norm": 3.1228187183100946, "learning_rate": 1.1352687929653586e-06, "loss": 0.2716, "step": 4307 }, { "epoch": 0.9802047781569966, "grad_norm": 2.4100339134625264, "learning_rate": 1.1352171923348438e-06, "loss": 0.1911, "step": 4308 }, { "epoch": 0.9804323094425483, "grad_norm": 1.8961258255968092, "learning_rate": 1.1351655812764236e-06, "loss": 0.1552, "step": 4309 }, { "epoch": 0.9806598407281001, "grad_norm": 1.758739411124323, "learning_rate": 1.1351139597911536e-06, "loss": 0.1091, "step": 4310 }, { "epoch": 0.9808873720136518, "grad_norm": 2.257284438813299, "learning_rate": 1.1350623278800884e-06, "loss": 0.1427, "step": 4311 }, { "epoch": 0.9811149032992036, "grad_norm": 1.5193987741131023, "learning_rate": 1.1350106855442833e-06, "loss": 0.1043, "step": 4312 }, { "epoch": 0.9813424345847555, "grad_norm": 3.0839542265345194, "learning_rate": 1.134959032784794e-06, "loss": 0.1315, "step": 4313 }, { "epoch": 0.9815699658703072, "grad_norm": 2.6269195704445725, "learning_rate": 1.1349073696026759e-06, "loss": 0.132, "step": 4314 }, { "epoch": 0.981797497155859, "grad_norm": 2.410755923938861, "learning_rate": 1.1348556959989848e-06, "loss": 0.0962, "step": 4315 }, { "epoch": 0.9820250284414107, "grad_norm": 1.8676901583023484, "learning_rate": 1.1348040119747771e-06, "loss": 0.108, "step": 4316 }, { "epoch": 0.9822525597269625, "grad_norm": 1.7889496982318338, "learning_rate": 1.1347523175311092e-06, "loss": 0.1453, "step": 4317 }, { "epoch": 0.9824800910125142, "grad_norm": 1.4259289185918111, "learning_rate": 1.1347006126690377e-06, "loss": 0.1674, "step": 4318 }, { "epoch": 0.982707622298066, "grad_norm": 2.7410660853435838, "learning_rate": 1.1346488973896188e-06, "loss": 0.1252, "step": 4319 }, { "epoch": 0.9829351535836177, "grad_norm": 1.6223968424245374, "learning_rate": 1.13459717169391e-06, "loss": 0.2018, "step": 4320 }, { "epoch": 0.9831626848691695, "grad_norm": 1.3310882897622611, "learning_rate": 1.1345454355829682e-06, "loss": 0.1641, "step": 4321 }, { "epoch": 0.9833902161547212, "grad_norm": 1.781413030603326, "learning_rate": 1.1344936890578508e-06, "loss": 0.1044, "step": 4322 }, { "epoch": 0.9836177474402731, "grad_norm": 1.447371024861841, "learning_rate": 1.1344419321196156e-06, "loss": 0.1488, "step": 4323 }, { "epoch": 0.9838452787258248, "grad_norm": 1.4980956875745954, "learning_rate": 1.1343901647693204e-06, "loss": 0.1309, "step": 4324 }, { "epoch": 0.9840728100113766, "grad_norm": 1.2811922093105048, "learning_rate": 1.134338387008023e-06, "loss": 0.0593, "step": 4325 }, { "epoch": 0.9843003412969283, "grad_norm": 1.3642128367264363, "learning_rate": 1.134286598836782e-06, "loss": 0.1017, "step": 4326 }, { "epoch": 0.9845278725824801, "grad_norm": 1.7177876329128943, "learning_rate": 1.1342348002566553e-06, "loss": 0.0867, "step": 4327 }, { "epoch": 0.9847554038680318, "grad_norm": 1.3703182672414826, "learning_rate": 1.1341829912687023e-06, "loss": 0.1109, "step": 4328 }, { "epoch": 0.9849829351535836, "grad_norm": 1.5958337506813505, "learning_rate": 1.134131171873981e-06, "loss": 0.1292, "step": 4329 }, { "epoch": 0.9852104664391353, "grad_norm": 3.3711109781277444, "learning_rate": 1.1340793420735514e-06, "loss": 0.1689, "step": 4330 }, { "epoch": 0.9854379977246871, "grad_norm": 2.000675388129801, "learning_rate": 1.1340275018684722e-06, "loss": 0.1109, "step": 4331 }, { "epoch": 0.985665529010239, "grad_norm": 2.0533711499371288, "learning_rate": 1.133975651259803e-06, "loss": 0.1052, "step": 4332 }, { "epoch": 0.9858930602957907, "grad_norm": 1.7529387892804091, "learning_rate": 1.1339237902486037e-06, "loss": 0.11, "step": 4333 }, { "epoch": 0.9861205915813425, "grad_norm": 1.6889892694894149, "learning_rate": 1.1338719188359343e-06, "loss": 0.1184, "step": 4334 }, { "epoch": 0.9863481228668942, "grad_norm": 3.5878456222264887, "learning_rate": 1.1338200370228546e-06, "loss": 0.1742, "step": 4335 }, { "epoch": 0.986575654152446, "grad_norm": 1.6673173056055803, "learning_rate": 1.1337681448104254e-06, "loss": 0.1423, "step": 4336 }, { "epoch": 0.9868031854379977, "grad_norm": 1.7330563337658174, "learning_rate": 1.1337162421997072e-06, "loss": 0.1696, "step": 4337 }, { "epoch": 0.9870307167235495, "grad_norm": 2.0663054470234115, "learning_rate": 1.1336643291917604e-06, "loss": 0.1273, "step": 4338 }, { "epoch": 0.9872582480091012, "grad_norm": 2.11842399087793, "learning_rate": 1.1336124057876464e-06, "loss": 0.1032, "step": 4339 }, { "epoch": 0.987485779294653, "grad_norm": 2.4883273260756105, "learning_rate": 1.1335604719884264e-06, "loss": 0.135, "step": 4340 }, { "epoch": 0.9877133105802047, "grad_norm": 1.731780640656524, "learning_rate": 1.1335085277951616e-06, "loss": 0.1621, "step": 4341 }, { "epoch": 0.9879408418657566, "grad_norm": 1.203909326079841, "learning_rate": 1.1334565732089138e-06, "loss": 0.14, "step": 4342 }, { "epoch": 0.9881683731513083, "grad_norm": 1.601403569944671, "learning_rate": 1.133404608230745e-06, "loss": 0.2358, "step": 4343 }, { "epoch": 0.9883959044368601, "grad_norm": 1.964021565396351, "learning_rate": 1.1333526328617168e-06, "loss": 0.1895, "step": 4344 }, { "epoch": 0.9886234357224118, "grad_norm": 2.3437207735796366, "learning_rate": 1.133300647102892e-06, "loss": 0.1144, "step": 4345 }, { "epoch": 0.9888509670079636, "grad_norm": 3.5296942681172943, "learning_rate": 1.1332486509553328e-06, "loss": 0.1699, "step": 4346 }, { "epoch": 0.9890784982935154, "grad_norm": 2.4413901735081134, "learning_rate": 1.133196644420102e-06, "loss": 0.1381, "step": 4347 }, { "epoch": 0.9893060295790671, "grad_norm": 1.841207848166938, "learning_rate": 1.1331446274982625e-06, "loss": 0.132, "step": 4348 }, { "epoch": 0.9895335608646189, "grad_norm": 2.662793499305453, "learning_rate": 1.1330926001908777e-06, "loss": 0.1421, "step": 4349 }, { "epoch": 0.9897610921501706, "grad_norm": 1.809157807855444, "learning_rate": 1.1330405624990104e-06, "loss": 0.0863, "step": 4350 }, { "epoch": 0.9899886234357224, "grad_norm": 1.999436579625728, "learning_rate": 1.1329885144237243e-06, "loss": 0.1119, "step": 4351 }, { "epoch": 0.9902161547212742, "grad_norm": 2.0702270524123705, "learning_rate": 1.1329364559660836e-06, "loss": 0.1301, "step": 4352 }, { "epoch": 0.990443686006826, "grad_norm": 2.094037825552476, "learning_rate": 1.132884387127152e-06, "loss": 0.1169, "step": 4353 }, { "epoch": 0.9906712172923777, "grad_norm": 1.8657162382122898, "learning_rate": 1.1328323079079934e-06, "loss": 0.1845, "step": 4354 }, { "epoch": 0.9908987485779295, "grad_norm": 2.0171570859055494, "learning_rate": 1.1327802183096725e-06, "loss": 0.1207, "step": 4355 }, { "epoch": 0.9911262798634812, "grad_norm": 1.7790517405477932, "learning_rate": 1.1327281183332542e-06, "loss": 0.0978, "step": 4356 }, { "epoch": 0.991353811149033, "grad_norm": 3.3828371556295216, "learning_rate": 1.1326760079798027e-06, "loss": 0.1455, "step": 4357 }, { "epoch": 0.9915813424345847, "grad_norm": 1.9916518111295805, "learning_rate": 1.1326238872503837e-06, "loss": 0.1303, "step": 4358 }, { "epoch": 0.9918088737201365, "grad_norm": 2.3902240797390735, "learning_rate": 1.1325717561460617e-06, "loss": 0.113, "step": 4359 }, { "epoch": 0.9920364050056882, "grad_norm": 2.543784637415317, "learning_rate": 1.132519614667903e-06, "loss": 0.1722, "step": 4360 }, { "epoch": 0.9922639362912401, "grad_norm": 1.9595219175974092, "learning_rate": 1.1324674628169725e-06, "loss": 0.2108, "step": 4361 }, { "epoch": 0.9924914675767919, "grad_norm": 2.5982341940178837, "learning_rate": 1.1324153005943367e-06, "loss": 0.1161, "step": 4362 }, { "epoch": 0.9927189988623436, "grad_norm": 1.7123301218970444, "learning_rate": 1.1323631280010611e-06, "loss": 0.1137, "step": 4363 }, { "epoch": 0.9929465301478954, "grad_norm": 1.9745516784082002, "learning_rate": 1.1323109450382128e-06, "loss": 0.1954, "step": 4364 }, { "epoch": 0.9931740614334471, "grad_norm": 1.4852701391065133, "learning_rate": 1.1322587517068576e-06, "loss": 0.092, "step": 4365 }, { "epoch": 0.9934015927189989, "grad_norm": 1.2880529332410495, "learning_rate": 1.1322065480080625e-06, "loss": 0.1119, "step": 4366 }, { "epoch": 0.9936291240045506, "grad_norm": 3.5272975673478877, "learning_rate": 1.1321543339428946e-06, "loss": 0.2384, "step": 4367 }, { "epoch": 0.9938566552901024, "grad_norm": 1.3569450896326727, "learning_rate": 1.132102109512421e-06, "loss": 0.1211, "step": 4368 }, { "epoch": 0.9940841865756541, "grad_norm": 4.000179282059702, "learning_rate": 1.1320498747177088e-06, "loss": 0.1658, "step": 4369 }, { "epoch": 0.9943117178612059, "grad_norm": 1.282888080346011, "learning_rate": 1.1319976295598258e-06, "loss": 0.0889, "step": 4370 }, { "epoch": 0.9945392491467577, "grad_norm": 1.7469871671415789, "learning_rate": 1.1319453740398397e-06, "loss": 0.1074, "step": 4371 }, { "epoch": 0.9947667804323095, "grad_norm": 2.332358467010625, "learning_rate": 1.1318931081588188e-06, "loss": 0.1113, "step": 4372 }, { "epoch": 0.9949943117178612, "grad_norm": 2.4141042022127968, "learning_rate": 1.1318408319178308e-06, "loss": 0.2397, "step": 4373 }, { "epoch": 0.995221843003413, "grad_norm": 2.536120347644097, "learning_rate": 1.1317885453179448e-06, "loss": 0.0825, "step": 4374 }, { "epoch": 0.9954493742889647, "grad_norm": 2.758829896595987, "learning_rate": 1.131736248360229e-06, "loss": 0.1675, "step": 4375 }, { "epoch": 0.9956769055745165, "grad_norm": 1.9724252102552065, "learning_rate": 1.1316839410457523e-06, "loss": 0.1328, "step": 4376 }, { "epoch": 0.9959044368600682, "grad_norm": 2.2407432243866454, "learning_rate": 1.1316316233755837e-06, "loss": 0.1014, "step": 4377 }, { "epoch": 0.99613196814562, "grad_norm": 1.5373618862518739, "learning_rate": 1.1315792953507924e-06, "loss": 0.0816, "step": 4378 }, { "epoch": 0.9963594994311717, "grad_norm": 1.1171814219877398, "learning_rate": 1.1315269569724483e-06, "loss": 0.1068, "step": 4379 }, { "epoch": 0.9965870307167235, "grad_norm": 1.3551021899004305, "learning_rate": 1.131474608241621e-06, "loss": 0.1286, "step": 4380 }, { "epoch": 0.9968145620022754, "grad_norm": 1.6680324675811677, "learning_rate": 1.1314222491593798e-06, "loss": 0.0832, "step": 4381 }, { "epoch": 0.9970420932878271, "grad_norm": 2.6677504525045803, "learning_rate": 1.1313698797267958e-06, "loss": 0.1544, "step": 4382 }, { "epoch": 0.9972696245733789, "grad_norm": 2.224612434800048, "learning_rate": 1.1313174999449384e-06, "loss": 0.1407, "step": 4383 }, { "epoch": 0.9974971558589306, "grad_norm": 1.5495425486172278, "learning_rate": 1.1312651098148788e-06, "loss": 0.0819, "step": 4384 }, { "epoch": 0.9977246871444824, "grad_norm": 3.56356215123795, "learning_rate": 1.1312127093376876e-06, "loss": 0.1517, "step": 4385 }, { "epoch": 0.9979522184300341, "grad_norm": 2.1877397253991417, "learning_rate": 1.1311602985144358e-06, "loss": 0.1212, "step": 4386 }, { "epoch": 0.9981797497155859, "grad_norm": 1.3671801752420045, "learning_rate": 1.1311078773461942e-06, "loss": 0.1401, "step": 4387 }, { "epoch": 0.9984072810011376, "grad_norm": 1.7649784039691725, "learning_rate": 1.1310554458340345e-06, "loss": 0.1155, "step": 4388 }, { "epoch": 0.9986348122866894, "grad_norm": 2.4503650421883116, "learning_rate": 1.1310030039790285e-06, "loss": 0.2011, "step": 4389 }, { "epoch": 0.9988623435722411, "grad_norm": 1.0225634979283895, "learning_rate": 1.1309505517822476e-06, "loss": 0.0906, "step": 4390 }, { "epoch": 0.999089874857793, "grad_norm": 1.6113750565499732, "learning_rate": 1.1308980892447641e-06, "loss": 0.1153, "step": 4391 }, { "epoch": 0.9993174061433447, "grad_norm": 2.5556355560493413, "learning_rate": 1.1308456163676501e-06, "loss": 0.2161, "step": 4392 }, { "epoch": 0.9995449374288965, "grad_norm": 1.5875119927953458, "learning_rate": 1.1307931331519783e-06, "loss": 0.1184, "step": 4393 }, { "epoch": 0.9997724687144482, "grad_norm": 1.8164623960583735, "learning_rate": 1.1307406395988211e-06, "loss": 0.1784, "step": 4394 }, { "epoch": 1.0, "grad_norm": 1.4833104582392516, "learning_rate": 1.1306881357092513e-06, "loss": 0.0764, "step": 4395 }, { "epoch": 1.0002275312855518, "grad_norm": 1.4747819154618664, "learning_rate": 1.1306356214843423e-06, "loss": 0.0695, "step": 4396 }, { "epoch": 1.0004550625711035, "grad_norm": 1.996181577315539, "learning_rate": 1.1305830969251672e-06, "loss": 0.0754, "step": 4397 }, { "epoch": 1.0006825938566553, "grad_norm": 2.4403368221950883, "learning_rate": 1.1305305620327994e-06, "loss": 0.1301, "step": 4398 }, { "epoch": 1.000910125142207, "grad_norm": 2.5103859341625836, "learning_rate": 1.1304780168083128e-06, "loss": 0.1432, "step": 4399 }, { "epoch": 1.0011376564277588, "grad_norm": 2.2576121489705274, "learning_rate": 1.1304254612527815e-06, "loss": 0.0847, "step": 4400 }, { "epoch": 1.0013651877133105, "grad_norm": 0.6505949983538077, "learning_rate": 1.130372895367279e-06, "loss": 0.0411, "step": 4401 }, { "epoch": 1.0015927189988623, "grad_norm": 1.5672813299417419, "learning_rate": 1.1303203191528803e-06, "loss": 0.0546, "step": 4402 }, { "epoch": 1.001820250284414, "grad_norm": 1.8844107765714966, "learning_rate": 1.1302677326106598e-06, "loss": 0.0755, "step": 4403 }, { "epoch": 1.0020477815699658, "grad_norm": 2.3441263756887456, "learning_rate": 1.130215135741692e-06, "loss": 0.0932, "step": 4404 }, { "epoch": 1.0022753128555177, "grad_norm": 0.9320352106293828, "learning_rate": 1.1301625285470522e-06, "loss": 0.0492, "step": 4405 }, { "epoch": 1.0025028441410695, "grad_norm": 1.120280020876937, "learning_rate": 1.1301099110278156e-06, "loss": 0.069, "step": 4406 }, { "epoch": 1.0027303754266212, "grad_norm": 1.7643601297911453, "learning_rate": 1.1300572831850574e-06, "loss": 0.0617, "step": 4407 }, { "epoch": 1.002957906712173, "grad_norm": 1.7223702795031912, "learning_rate": 1.1300046450198532e-06, "loss": 0.0975, "step": 4408 }, { "epoch": 1.0031854379977247, "grad_norm": 1.4345188013436136, "learning_rate": 1.1299519965332791e-06, "loss": 0.093, "step": 4409 }, { "epoch": 1.0034129692832765, "grad_norm": 2.2514745321091003, "learning_rate": 1.1298993377264108e-06, "loss": 0.1432, "step": 4410 }, { "epoch": 1.0036405005688283, "grad_norm": 1.5123230129730694, "learning_rate": 1.129846668600325e-06, "loss": 0.0897, "step": 4411 }, { "epoch": 1.00386803185438, "grad_norm": 1.1854568683295474, "learning_rate": 1.1297939891560975e-06, "loss": 0.0795, "step": 4412 }, { "epoch": 1.0040955631399318, "grad_norm": 2.1331405358880065, "learning_rate": 1.1297412993948054e-06, "loss": 0.1339, "step": 4413 }, { "epoch": 1.0043230944254835, "grad_norm": 2.1403450032317006, "learning_rate": 1.1296885993175255e-06, "loss": 0.0914, "step": 4414 }, { "epoch": 1.0045506257110353, "grad_norm": 2.0722720568137825, "learning_rate": 1.1296358889253351e-06, "loss": 0.1073, "step": 4415 }, { "epoch": 1.004778156996587, "grad_norm": 1.6803936020610233, "learning_rate": 1.1295831682193115e-06, "loss": 0.0737, "step": 4416 }, { "epoch": 1.0050056882821388, "grad_norm": 1.6133414130965238, "learning_rate": 1.1295304372005316e-06, "loss": 0.0951, "step": 4417 }, { "epoch": 1.0052332195676905, "grad_norm": 1.011782483121823, "learning_rate": 1.129477695870074e-06, "loss": 0.0989, "step": 4418 }, { "epoch": 1.0054607508532423, "grad_norm": 1.697633317040223, "learning_rate": 1.129424944229016e-06, "loss": 0.0689, "step": 4419 }, { "epoch": 1.005688282138794, "grad_norm": 1.3459567859085924, "learning_rate": 1.1293721822784359e-06, "loss": 0.0781, "step": 4420 }, { "epoch": 1.0059158134243458, "grad_norm": 1.7935976771238544, "learning_rate": 1.1293194100194121e-06, "loss": 0.0679, "step": 4421 }, { "epoch": 1.0061433447098975, "grad_norm": 1.4206028858761883, "learning_rate": 1.1292666274530232e-06, "loss": 0.0604, "step": 4422 }, { "epoch": 1.0063708759954493, "grad_norm": 1.8737473801514988, "learning_rate": 1.129213834580348e-06, "loss": 0.0892, "step": 4423 }, { "epoch": 1.006598407281001, "grad_norm": 1.4562994747544142, "learning_rate": 1.1291610314024653e-06, "loss": 0.0621, "step": 4424 }, { "epoch": 1.006825938566553, "grad_norm": 1.6144715931341527, "learning_rate": 1.1291082179204548e-06, "loss": 0.0891, "step": 4425 }, { "epoch": 1.0070534698521048, "grad_norm": 1.417358558553153, "learning_rate": 1.1290553941353954e-06, "loss": 0.0469, "step": 4426 }, { "epoch": 1.0072810011376565, "grad_norm": 1.2388288333880253, "learning_rate": 1.1290025600483667e-06, "loss": 0.1067, "step": 4427 }, { "epoch": 1.0075085324232083, "grad_norm": 1.2758605963996141, "learning_rate": 1.1289497156604487e-06, "loss": 0.1237, "step": 4428 }, { "epoch": 1.00773606370876, "grad_norm": 1.4052553927682834, "learning_rate": 1.1288968609727216e-06, "loss": 0.0604, "step": 4429 }, { "epoch": 1.0079635949943118, "grad_norm": 1.119682904381322, "learning_rate": 1.1288439959862654e-06, "loss": 0.0902, "step": 4430 }, { "epoch": 1.0081911262798635, "grad_norm": 1.7203969017671572, "learning_rate": 1.128791120702161e-06, "loss": 0.1059, "step": 4431 }, { "epoch": 1.0084186575654153, "grad_norm": 2.6485605967013686, "learning_rate": 1.1287382351214884e-06, "loss": 0.1169, "step": 4432 }, { "epoch": 1.008646188850967, "grad_norm": 1.2027084176698186, "learning_rate": 1.128685339245329e-06, "loss": 0.0624, "step": 4433 }, { "epoch": 1.0088737201365188, "grad_norm": 1.933143757940641, "learning_rate": 1.1286324330747637e-06, "loss": 0.1785, "step": 4434 }, { "epoch": 1.0091012514220705, "grad_norm": 1.8514912698937183, "learning_rate": 1.1285795166108735e-06, "loss": 0.0624, "step": 4435 }, { "epoch": 1.0093287827076223, "grad_norm": 1.8965468725111816, "learning_rate": 1.1285265898547406e-06, "loss": 0.0993, "step": 4436 }, { "epoch": 1.009556313993174, "grad_norm": 2.2625515852101232, "learning_rate": 1.1284736528074464e-06, "loss": 0.1312, "step": 4437 }, { "epoch": 1.0097838452787258, "grad_norm": 0.9513962126598605, "learning_rate": 1.1284207054700727e-06, "loss": 0.042, "step": 4438 }, { "epoch": 1.0100113765642775, "grad_norm": 1.463114771314281, "learning_rate": 1.1283677478437016e-06, "loss": 0.0763, "step": 4439 }, { "epoch": 1.0102389078498293, "grad_norm": 2.354851541830737, "learning_rate": 1.1283147799294158e-06, "loss": 0.1046, "step": 4440 }, { "epoch": 1.010466439135381, "grad_norm": 1.1766333820955324, "learning_rate": 1.1282618017282977e-06, "loss": 0.1248, "step": 4441 }, { "epoch": 1.0106939704209328, "grad_norm": 1.5422549437499427, "learning_rate": 1.1282088132414297e-06, "loss": 0.114, "step": 4442 }, { "epoch": 1.0109215017064845, "grad_norm": 2.2298271761237483, "learning_rate": 1.1281558144698956e-06, "loss": 0.0924, "step": 4443 }, { "epoch": 1.0111490329920365, "grad_norm": 2.519798954114394, "learning_rate": 1.128102805414778e-06, "loss": 0.157, "step": 4444 }, { "epoch": 1.0113765642775883, "grad_norm": 1.4415500014186144, "learning_rate": 1.1280497860771603e-06, "loss": 0.1013, "step": 4445 }, { "epoch": 1.01160409556314, "grad_norm": 2.0125789101308205, "learning_rate": 1.1279967564581264e-06, "loss": 0.0848, "step": 4446 }, { "epoch": 1.0118316268486918, "grad_norm": 2.251550332027883, "learning_rate": 1.12794371655876e-06, "loss": 0.0646, "step": 4447 }, { "epoch": 1.0120591581342435, "grad_norm": 1.0526890868708527, "learning_rate": 1.127890666380145e-06, "loss": 0.0767, "step": 4448 }, { "epoch": 1.0122866894197953, "grad_norm": 1.0816163534208596, "learning_rate": 1.1278376059233658e-06, "loss": 0.0563, "step": 4449 }, { "epoch": 1.012514220705347, "grad_norm": 1.6202591756823337, "learning_rate": 1.127784535189507e-06, "loss": 0.082, "step": 4450 }, { "epoch": 1.0127417519908988, "grad_norm": 2.114784758301838, "learning_rate": 1.127731454179653e-06, "loss": 0.1543, "step": 4451 }, { "epoch": 1.0129692832764505, "grad_norm": 1.5200937072938314, "learning_rate": 1.1276783628948887e-06, "loss": 0.0926, "step": 4452 }, { "epoch": 1.0131968145620023, "grad_norm": 0.8832036316733904, "learning_rate": 1.1276252613362995e-06, "loss": 0.051, "step": 4453 }, { "epoch": 1.013424345847554, "grad_norm": 1.791095170570671, "learning_rate": 1.1275721495049702e-06, "loss": 0.126, "step": 4454 }, { "epoch": 1.0136518771331058, "grad_norm": 1.4567099222693494, "learning_rate": 1.1275190274019867e-06, "loss": 0.0937, "step": 4455 }, { "epoch": 1.0138794084186575, "grad_norm": 1.8661960896015608, "learning_rate": 1.1274658950284347e-06, "loss": 0.0746, "step": 4456 }, { "epoch": 1.0141069397042093, "grad_norm": 2.054547499620593, "learning_rate": 1.1274127523854e-06, "loss": 0.098, "step": 4457 }, { "epoch": 1.014334470989761, "grad_norm": 1.4935646424282278, "learning_rate": 1.1273595994739688e-06, "loss": 0.096, "step": 4458 }, { "epoch": 1.0145620022753128, "grad_norm": 1.8465602968348116, "learning_rate": 1.1273064362952272e-06, "loss": 0.0927, "step": 4459 }, { "epoch": 1.0147895335608645, "grad_norm": 1.921602489637173, "learning_rate": 1.1272532628502621e-06, "loss": 0.0645, "step": 4460 }, { "epoch": 1.0150170648464163, "grad_norm": 1.383709505060379, "learning_rate": 1.1272000791401602e-06, "loss": 0.1155, "step": 4461 }, { "epoch": 1.015244596131968, "grad_norm": 1.1642883714545766, "learning_rate": 1.1271468851660084e-06, "loss": 0.0924, "step": 4462 }, { "epoch": 1.01547212741752, "grad_norm": 1.2445933138998786, "learning_rate": 1.127093680928894e-06, "loss": 0.052, "step": 4463 }, { "epoch": 1.0156996587030718, "grad_norm": 3.4339049713427667, "learning_rate": 1.1270404664299042e-06, "loss": 0.1109, "step": 4464 }, { "epoch": 1.0159271899886235, "grad_norm": 1.6198738915791715, "learning_rate": 1.1269872416701267e-06, "loss": 0.1034, "step": 4465 }, { "epoch": 1.0161547212741753, "grad_norm": 1.8925845714324792, "learning_rate": 1.1269340066506493e-06, "loss": 0.0613, "step": 4466 }, { "epoch": 1.016382252559727, "grad_norm": 1.4139672561252017, "learning_rate": 1.12688076137256e-06, "loss": 0.0875, "step": 4467 }, { "epoch": 1.0166097838452788, "grad_norm": 1.6115204329476527, "learning_rate": 1.1268275058369472e-06, "loss": 0.089, "step": 4468 }, { "epoch": 1.0168373151308305, "grad_norm": 2.0290217444617853, "learning_rate": 1.1267742400448992e-06, "loss": 0.1111, "step": 4469 }, { "epoch": 1.0170648464163823, "grad_norm": 1.779159979414333, "learning_rate": 1.1267209639975046e-06, "loss": 0.0839, "step": 4470 }, { "epoch": 1.017292377701934, "grad_norm": 1.3519160870633005, "learning_rate": 1.1266676776958523e-06, "loss": 0.0885, "step": 4471 }, { "epoch": 1.0175199089874858, "grad_norm": 1.4586296010653232, "learning_rate": 1.1266143811410317e-06, "loss": 0.0668, "step": 4472 }, { "epoch": 1.0177474402730375, "grad_norm": 2.016744342105442, "learning_rate": 1.1265610743341316e-06, "loss": 0.1435, "step": 4473 }, { "epoch": 1.0179749715585893, "grad_norm": 1.1369067026133264, "learning_rate": 1.1265077572762418e-06, "loss": 0.108, "step": 4474 }, { "epoch": 1.018202502844141, "grad_norm": 8.903995088813302, "learning_rate": 1.1264544299684518e-06, "loss": 0.1271, "step": 4475 }, { "epoch": 1.0184300341296928, "grad_norm": 1.6336221990915103, "learning_rate": 1.1264010924118518e-06, "loss": 0.0878, "step": 4476 }, { "epoch": 1.0186575654152445, "grad_norm": 1.658678946132026, "learning_rate": 1.1263477446075315e-06, "loss": 0.1076, "step": 4477 }, { "epoch": 1.0188850967007963, "grad_norm": 1.707584314379802, "learning_rate": 1.1262943865565818e-06, "loss": 0.0964, "step": 4478 }, { "epoch": 1.019112627986348, "grad_norm": 1.9120240803965922, "learning_rate": 1.1262410182600927e-06, "loss": 0.1555, "step": 4479 }, { "epoch": 1.0193401592718998, "grad_norm": 0.9282735076245712, "learning_rate": 1.1261876397191554e-06, "loss": 0.0626, "step": 4480 }, { "epoch": 1.0195676905574516, "grad_norm": 1.6682216698884167, "learning_rate": 1.1261342509348604e-06, "loss": 0.1009, "step": 4481 }, { "epoch": 1.0197952218430033, "grad_norm": 1.2851007188914807, "learning_rate": 1.126080851908299e-06, "loss": 0.0596, "step": 4482 }, { "epoch": 1.0200227531285553, "grad_norm": 1.7254355793639742, "learning_rate": 1.1260274426405629e-06, "loss": 0.1048, "step": 4483 }, { "epoch": 1.020250284414107, "grad_norm": 1.4883296224579121, "learning_rate": 1.1259740231327434e-06, "loss": 0.1188, "step": 4484 }, { "epoch": 1.0204778156996588, "grad_norm": 1.4466839237766165, "learning_rate": 1.1259205933859325e-06, "loss": 0.0551, "step": 4485 }, { "epoch": 1.0207053469852105, "grad_norm": 1.6088130949320203, "learning_rate": 1.1258671534012216e-06, "loss": 0.1246, "step": 4486 }, { "epoch": 1.0209328782707623, "grad_norm": 1.9306073145993348, "learning_rate": 1.1258137031797037e-06, "loss": 0.0852, "step": 4487 }, { "epoch": 1.021160409556314, "grad_norm": 1.4681528767570764, "learning_rate": 1.125760242722471e-06, "loss": 0.073, "step": 4488 }, { "epoch": 1.0213879408418658, "grad_norm": 1.8686422771397933, "learning_rate": 1.1257067720306159e-06, "loss": 0.0845, "step": 4489 }, { "epoch": 1.0216154721274175, "grad_norm": 1.1681457319700357, "learning_rate": 1.1256532911052313e-06, "loss": 0.1197, "step": 4490 }, { "epoch": 1.0218430034129693, "grad_norm": 1.1190800102290346, "learning_rate": 1.1255997999474105e-06, "loss": 0.0398, "step": 4491 }, { "epoch": 1.022070534698521, "grad_norm": 1.42435477981386, "learning_rate": 1.1255462985582465e-06, "loss": 0.071, "step": 4492 }, { "epoch": 1.0222980659840728, "grad_norm": 1.8224520226771979, "learning_rate": 1.125492786938833e-06, "loss": 0.1101, "step": 4493 }, { "epoch": 1.0225255972696246, "grad_norm": 1.4069476965043848, "learning_rate": 1.1254392650902633e-06, "loss": 0.0753, "step": 4494 }, { "epoch": 1.0227531285551763, "grad_norm": 1.5861436800658038, "learning_rate": 1.1253857330136316e-06, "loss": 0.1617, "step": 4495 }, { "epoch": 1.022980659840728, "grad_norm": 1.2272660070201709, "learning_rate": 1.125332190710032e-06, "loss": 0.0804, "step": 4496 }, { "epoch": 1.0232081911262798, "grad_norm": 1.5725673439218046, "learning_rate": 1.125278638180559e-06, "loss": 0.0666, "step": 4497 }, { "epoch": 1.0234357224118316, "grad_norm": 2.002782257227273, "learning_rate": 1.1252250754263064e-06, "loss": 0.0899, "step": 4498 }, { "epoch": 1.0236632536973833, "grad_norm": 1.456920740957027, "learning_rate": 1.1251715024483695e-06, "loss": 0.0697, "step": 4499 }, { "epoch": 1.023890784982935, "grad_norm": 1.108094546855012, "learning_rate": 1.125117919247843e-06, "loss": 0.0665, "step": 4500 }, { "epoch": 1.0241183162684868, "grad_norm": 1.4026920191631007, "learning_rate": 1.1250643258258225e-06, "loss": 0.1499, "step": 4501 }, { "epoch": 1.0243458475540388, "grad_norm": 2.029837399777728, "learning_rate": 1.1250107221834027e-06, "loss": 0.1546, "step": 4502 }, { "epoch": 1.0245733788395905, "grad_norm": 0.887255926728237, "learning_rate": 1.12495710832168e-06, "loss": 0.0648, "step": 4503 }, { "epoch": 1.0248009101251423, "grad_norm": 1.615916482107172, "learning_rate": 1.1249034842417489e-06, "loss": 0.097, "step": 4504 }, { "epoch": 1.025028441410694, "grad_norm": 1.402342600220676, "learning_rate": 1.1248498499447065e-06, "loss": 0.0915, "step": 4505 }, { "epoch": 1.0252559726962458, "grad_norm": 1.592211294130619, "learning_rate": 1.1247962054316485e-06, "loss": 0.1552, "step": 4506 }, { "epoch": 1.0254835039817976, "grad_norm": 1.7740489402500772, "learning_rate": 1.1247425507036715e-06, "loss": 0.1013, "step": 4507 }, { "epoch": 1.0257110352673493, "grad_norm": 2.236483074679014, "learning_rate": 1.1246888857618719e-06, "loss": 0.1269, "step": 4508 }, { "epoch": 1.025938566552901, "grad_norm": 2.026475728815122, "learning_rate": 1.1246352106073466e-06, "loss": 0.1492, "step": 4509 }, { "epoch": 1.0261660978384528, "grad_norm": 1.4043085831160411, "learning_rate": 1.1245815252411928e-06, "loss": 0.1606, "step": 4510 }, { "epoch": 1.0263936291240046, "grad_norm": 1.3478102522875726, "learning_rate": 1.1245278296645073e-06, "loss": 0.138, "step": 4511 }, { "epoch": 1.0266211604095563, "grad_norm": 1.5130340363330543, "learning_rate": 1.124474123878388e-06, "loss": 0.0584, "step": 4512 }, { "epoch": 1.026848691695108, "grad_norm": 3.0044980775672867, "learning_rate": 1.1244204078839325e-06, "loss": 0.1119, "step": 4513 }, { "epoch": 1.0270762229806598, "grad_norm": 1.7841919834467284, "learning_rate": 1.1243666816822382e-06, "loss": 0.1234, "step": 4514 }, { "epoch": 1.0273037542662116, "grad_norm": 2.0156152758572996, "learning_rate": 1.1243129452744036e-06, "loss": 0.1077, "step": 4515 }, { "epoch": 1.0275312855517633, "grad_norm": 1.3091943644643798, "learning_rate": 1.1242591986615268e-06, "loss": 0.1099, "step": 4516 }, { "epoch": 1.027758816837315, "grad_norm": 1.9516196995491382, "learning_rate": 1.1242054418447063e-06, "loss": 0.0734, "step": 4517 }, { "epoch": 1.0279863481228668, "grad_norm": 1.8054250637811027, "learning_rate": 1.1241516748250408e-06, "loss": 0.1087, "step": 4518 }, { "epoch": 1.0282138794084186, "grad_norm": 2.874562035518301, "learning_rate": 1.1240978976036294e-06, "loss": 0.2136, "step": 4519 }, { "epoch": 1.0284414106939703, "grad_norm": 1.0770500977157564, "learning_rate": 1.124044110181571e-06, "loss": 0.0895, "step": 4520 }, { "epoch": 1.028668941979522, "grad_norm": 1.5287898441796675, "learning_rate": 1.1239903125599648e-06, "loss": 0.1935, "step": 4521 }, { "epoch": 1.028896473265074, "grad_norm": 1.2141770039320132, "learning_rate": 1.1239365047399106e-06, "loss": 0.1316, "step": 4522 }, { "epoch": 1.0291240045506258, "grad_norm": 1.7076130924026178, "learning_rate": 1.1238826867225077e-06, "loss": 0.1006, "step": 4523 }, { "epoch": 1.0293515358361776, "grad_norm": 2.172750884502955, "learning_rate": 1.1238288585088567e-06, "loss": 0.0846, "step": 4524 }, { "epoch": 1.0295790671217293, "grad_norm": 1.5673811074944972, "learning_rate": 1.1237750201000574e-06, "loss": 0.1357, "step": 4525 }, { "epoch": 1.029806598407281, "grad_norm": 2.361397974753223, "learning_rate": 1.1237211714972098e-06, "loss": 0.1862, "step": 4526 }, { "epoch": 1.0300341296928328, "grad_norm": 1.59606082155671, "learning_rate": 1.1236673127014152e-06, "loss": 0.1032, "step": 4527 }, { "epoch": 1.0302616609783846, "grad_norm": 1.4456707646110476, "learning_rate": 1.1236134437137738e-06, "loss": 0.0556, "step": 4528 }, { "epoch": 1.0304891922639363, "grad_norm": 1.9510921040673854, "learning_rate": 1.1235595645353869e-06, "loss": 0.128, "step": 4529 }, { "epoch": 1.030716723549488, "grad_norm": 1.1503668274652719, "learning_rate": 1.1235056751673554e-06, "loss": 0.0674, "step": 4530 }, { "epoch": 1.0309442548350398, "grad_norm": 1.9465124654240242, "learning_rate": 1.123451775610781e-06, "loss": 0.1208, "step": 4531 }, { "epoch": 1.0311717861205916, "grad_norm": 0.8598640038575219, "learning_rate": 1.1233978658667651e-06, "loss": 0.0863, "step": 4532 }, { "epoch": 1.0313993174061433, "grad_norm": 1.311506374530133, "learning_rate": 1.1233439459364097e-06, "loss": 0.0974, "step": 4533 }, { "epoch": 1.031626848691695, "grad_norm": 2.3989951849111546, "learning_rate": 1.1232900158208166e-06, "loss": 0.0865, "step": 4534 }, { "epoch": 1.0318543799772468, "grad_norm": 2.0112852237633376, "learning_rate": 1.1232360755210883e-06, "loss": 0.0935, "step": 4535 }, { "epoch": 1.0320819112627986, "grad_norm": 1.4723457241660713, "learning_rate": 1.123182125038327e-06, "loss": 0.1059, "step": 4536 }, { "epoch": 1.0323094425483503, "grad_norm": 2.086137912197496, "learning_rate": 1.1231281643736353e-06, "loss": 0.1119, "step": 4537 }, { "epoch": 1.032536973833902, "grad_norm": 1.2936224119616597, "learning_rate": 1.1230741935281163e-06, "loss": 0.1498, "step": 4538 }, { "epoch": 1.0327645051194538, "grad_norm": 1.9307986409707742, "learning_rate": 1.123020212502873e-06, "loss": 0.0861, "step": 4539 }, { "epoch": 1.0329920364050056, "grad_norm": 1.9937770717754, "learning_rate": 1.1229662212990088e-06, "loss": 0.2239, "step": 4540 }, { "epoch": 1.0332195676905576, "grad_norm": 1.5346898654618122, "learning_rate": 1.1229122199176268e-06, "loss": 0.0611, "step": 4541 }, { "epoch": 1.0334470989761093, "grad_norm": 1.1175713863887276, "learning_rate": 1.1228582083598311e-06, "loss": 0.0794, "step": 4542 }, { "epoch": 1.033674630261661, "grad_norm": 2.2328550567917484, "learning_rate": 1.122804186626725e-06, "loss": 0.0804, "step": 4543 }, { "epoch": 1.0339021615472128, "grad_norm": 1.1528552818810556, "learning_rate": 1.1227501547194133e-06, "loss": 0.0955, "step": 4544 }, { "epoch": 1.0341296928327646, "grad_norm": 2.9382553353367706, "learning_rate": 1.1226961126390001e-06, "loss": 0.1691, "step": 4545 }, { "epoch": 1.0343572241183163, "grad_norm": 1.9931887866365103, "learning_rate": 1.1226420603865898e-06, "loss": 0.1017, "step": 4546 }, { "epoch": 1.034584755403868, "grad_norm": 1.6486532012729544, "learning_rate": 1.122587997963287e-06, "loss": 0.121, "step": 4547 }, { "epoch": 1.0348122866894198, "grad_norm": 2.2996104242216595, "learning_rate": 1.122533925370197e-06, "loss": 0.08, "step": 4548 }, { "epoch": 1.0350398179749716, "grad_norm": 2.05667419327721, "learning_rate": 1.1224798426084246e-06, "loss": 0.0905, "step": 4549 }, { "epoch": 1.0352673492605233, "grad_norm": 1.6750893821513704, "learning_rate": 1.1224257496790756e-06, "loss": 0.1614, "step": 4550 }, { "epoch": 1.035494880546075, "grad_norm": 1.8124862022379826, "learning_rate": 1.122371646583255e-06, "loss": 0.1094, "step": 4551 }, { "epoch": 1.0357224118316268, "grad_norm": 1.1789521290469518, "learning_rate": 1.1223175333220688e-06, "loss": 0.0544, "step": 4552 }, { "epoch": 1.0359499431171786, "grad_norm": 1.4726290884229478, "learning_rate": 1.122263409896623e-06, "loss": 0.0415, "step": 4553 }, { "epoch": 1.0361774744027303, "grad_norm": 1.5983976452101323, "learning_rate": 1.1222092763080242e-06, "loss": 0.1389, "step": 4554 }, { "epoch": 1.036405005688282, "grad_norm": 1.7064510686199956, "learning_rate": 1.1221551325573779e-06, "loss": 0.1995, "step": 4555 }, { "epoch": 1.0366325369738338, "grad_norm": 1.8831480696901932, "learning_rate": 1.1221009786457914e-06, "loss": 0.1159, "step": 4556 }, { "epoch": 1.0368600682593856, "grad_norm": 1.8251977038998766, "learning_rate": 1.1220468145743713e-06, "loss": 0.0593, "step": 4557 }, { "epoch": 1.0370875995449373, "grad_norm": 2.250994931720387, "learning_rate": 1.1219926403442247e-06, "loss": 0.1241, "step": 4558 }, { "epoch": 1.037315130830489, "grad_norm": 2.149745732751104, "learning_rate": 1.1219384559564587e-06, "loss": 0.0641, "step": 4559 }, { "epoch": 1.0375426621160408, "grad_norm": 1.481812364607165, "learning_rate": 1.1218842614121806e-06, "loss": 0.1053, "step": 4560 }, { "epoch": 1.0377701934015928, "grad_norm": 3.1982872252107017, "learning_rate": 1.1218300567124983e-06, "loss": 0.0885, "step": 4561 }, { "epoch": 1.0379977246871446, "grad_norm": 1.5207099396961141, "learning_rate": 1.1217758418585195e-06, "loss": 0.1485, "step": 4562 }, { "epoch": 1.0382252559726963, "grad_norm": 1.357320820979069, "learning_rate": 1.1217216168513522e-06, "loss": 0.0863, "step": 4563 }, { "epoch": 1.038452787258248, "grad_norm": 1.7245587056307685, "learning_rate": 1.1216673816921048e-06, "loss": 0.1392, "step": 4564 }, { "epoch": 1.0386803185437998, "grad_norm": 1.3388736695631331, "learning_rate": 1.1216131363818859e-06, "loss": 0.0712, "step": 4565 }, { "epoch": 1.0389078498293516, "grad_norm": 1.6991747662454542, "learning_rate": 1.1215588809218038e-06, "loss": 0.1135, "step": 4566 }, { "epoch": 1.0391353811149033, "grad_norm": 1.7543912071841141, "learning_rate": 1.1215046153129678e-06, "loss": 0.0656, "step": 4567 }, { "epoch": 1.039362912400455, "grad_norm": 1.3584849508462316, "learning_rate": 1.1214503395564866e-06, "loss": 0.0536, "step": 4568 }, { "epoch": 1.0395904436860068, "grad_norm": 0.9240388411832862, "learning_rate": 1.1213960536534698e-06, "loss": 0.0332, "step": 4569 }, { "epoch": 1.0398179749715586, "grad_norm": 1.6546290657725133, "learning_rate": 1.1213417576050267e-06, "loss": 0.1005, "step": 4570 }, { "epoch": 1.0400455062571103, "grad_norm": 1.3885969596375243, "learning_rate": 1.1212874514122669e-06, "loss": 0.1047, "step": 4571 }, { "epoch": 1.040273037542662, "grad_norm": 2.017166875593496, "learning_rate": 1.1212331350763007e-06, "loss": 0.1456, "step": 4572 }, { "epoch": 1.0405005688282138, "grad_norm": 1.444948131842632, "learning_rate": 1.1211788085982381e-06, "loss": 0.0579, "step": 4573 }, { "epoch": 1.0407281001137656, "grad_norm": 2.406951373648501, "learning_rate": 1.1211244719791892e-06, "loss": 0.1583, "step": 4574 }, { "epoch": 1.0409556313993173, "grad_norm": 2.197884382980465, "learning_rate": 1.1210701252202647e-06, "loss": 0.1243, "step": 4575 }, { "epoch": 1.041183162684869, "grad_norm": 1.4181821249909716, "learning_rate": 1.1210157683225753e-06, "loss": 0.0774, "step": 4576 }, { "epoch": 1.0414106939704209, "grad_norm": 1.6354137634346002, "learning_rate": 1.1209614012872323e-06, "loss": 0.0879, "step": 4577 }, { "epoch": 1.0416382252559726, "grad_norm": 1.0303262963114748, "learning_rate": 1.1209070241153462e-06, "loss": 0.104, "step": 4578 }, { "epoch": 1.0418657565415244, "grad_norm": 2.1044621651764723, "learning_rate": 1.1208526368080288e-06, "loss": 0.1179, "step": 4579 }, { "epoch": 1.0420932878270763, "grad_norm": 1.2487244960480326, "learning_rate": 1.120798239366392e-06, "loss": 0.0926, "step": 4580 }, { "epoch": 1.042320819112628, "grad_norm": 1.224762304659932, "learning_rate": 1.1207438317915468e-06, "loss": 0.0584, "step": 4581 }, { "epoch": 1.0425483503981798, "grad_norm": 1.4128646708017816, "learning_rate": 1.1206894140846055e-06, "loss": 0.0498, "step": 4582 }, { "epoch": 1.0427758816837316, "grad_norm": 2.6167846385891567, "learning_rate": 1.1206349862466807e-06, "loss": 0.1441, "step": 4583 }, { "epoch": 1.0430034129692833, "grad_norm": 1.3737857439282581, "learning_rate": 1.1205805482788846e-06, "loss": 0.0846, "step": 4584 }, { "epoch": 1.043230944254835, "grad_norm": 1.4524682790222339, "learning_rate": 1.1205261001823293e-06, "loss": 0.1068, "step": 4585 }, { "epoch": 1.0434584755403868, "grad_norm": 1.4167951979507394, "learning_rate": 1.1204716419581281e-06, "loss": 0.0669, "step": 4586 }, { "epoch": 1.0436860068259386, "grad_norm": 1.9626607976722603, "learning_rate": 1.1204171736073942e-06, "loss": 0.0596, "step": 4587 }, { "epoch": 1.0439135381114903, "grad_norm": 0.965468994366607, "learning_rate": 1.1203626951312405e-06, "loss": 0.0621, "step": 4588 }, { "epoch": 1.044141069397042, "grad_norm": 1.0583807127720506, "learning_rate": 1.1203082065307805e-06, "loss": 0.0468, "step": 4589 }, { "epoch": 1.0443686006825939, "grad_norm": 1.353491198352836, "learning_rate": 1.1202537078071277e-06, "loss": 0.0972, "step": 4590 }, { "epoch": 1.0445961319681456, "grad_norm": 2.135114467092782, "learning_rate": 1.1201991989613963e-06, "loss": 0.0813, "step": 4591 }, { "epoch": 1.0448236632536974, "grad_norm": 1.7028217782174653, "learning_rate": 1.1201446799947003e-06, "loss": 0.0968, "step": 4592 }, { "epoch": 1.045051194539249, "grad_norm": 1.1670795075657887, "learning_rate": 1.1200901509081537e-06, "loss": 0.0451, "step": 4593 }, { "epoch": 1.0452787258248009, "grad_norm": 0.9396890176885602, "learning_rate": 1.120035611702871e-06, "loss": 0.0931, "step": 4594 }, { "epoch": 1.0455062571103526, "grad_norm": 1.7193114496757371, "learning_rate": 1.1199810623799673e-06, "loss": 0.1, "step": 4595 }, { "epoch": 1.0457337883959044, "grad_norm": 1.6764946052943284, "learning_rate": 1.119926502940557e-06, "loss": 0.0632, "step": 4596 }, { "epoch": 1.0459613196814561, "grad_norm": 1.7852800076694235, "learning_rate": 1.1198719333857555e-06, "loss": 0.0833, "step": 4597 }, { "epoch": 1.0461888509670079, "grad_norm": 2.3234885020060094, "learning_rate": 1.119817353716678e-06, "loss": 0.0766, "step": 4598 }, { "epoch": 1.0464163822525596, "grad_norm": 1.4797020097779663, "learning_rate": 1.11976276393444e-06, "loss": 0.055, "step": 4599 }, { "epoch": 1.0466439135381116, "grad_norm": 1.634600829488423, "learning_rate": 1.1197081640401572e-06, "loss": 0.1146, "step": 4600 }, { "epoch": 1.0468714448236633, "grad_norm": 1.8746220464401466, "learning_rate": 1.1196535540349453e-06, "loss": 0.1229, "step": 4601 }, { "epoch": 1.047098976109215, "grad_norm": 2.3559999951268624, "learning_rate": 1.119598933919921e-06, "loss": 0.1277, "step": 4602 }, { "epoch": 1.0473265073947668, "grad_norm": 1.0083505534534876, "learning_rate": 1.1195443036962002e-06, "loss": 0.067, "step": 4603 }, { "epoch": 1.0475540386803186, "grad_norm": 0.8680772321300652, "learning_rate": 1.1194896633648996e-06, "loss": 0.0546, "step": 4604 }, { "epoch": 1.0477815699658704, "grad_norm": 2.427145418238581, "learning_rate": 1.1194350129271358e-06, "loss": 0.1341, "step": 4605 }, { "epoch": 1.048009101251422, "grad_norm": 2.3008553524730933, "learning_rate": 1.119380352384026e-06, "loss": 0.1438, "step": 4606 }, { "epoch": 1.0482366325369739, "grad_norm": 2.2562366914461376, "learning_rate": 1.1193256817366871e-06, "loss": 0.0817, "step": 4607 }, { "epoch": 1.0484641638225256, "grad_norm": 2.020216249876879, "learning_rate": 1.1192710009862365e-06, "loss": 0.0862, "step": 4608 }, { "epoch": 1.0486916951080774, "grad_norm": 1.2123623538229429, "learning_rate": 1.1192163101337921e-06, "loss": 0.0589, "step": 4609 }, { "epoch": 1.0489192263936291, "grad_norm": 0.7241673359429386, "learning_rate": 1.1191616091804712e-06, "loss": 0.0505, "step": 4610 }, { "epoch": 1.0491467576791809, "grad_norm": 1.552740662211025, "learning_rate": 1.1191068981273919e-06, "loss": 0.09, "step": 4611 }, { "epoch": 1.0493742889647326, "grad_norm": 1.036959423467112, "learning_rate": 1.1190521769756729e-06, "loss": 0.0897, "step": 4612 }, { "epoch": 1.0496018202502844, "grad_norm": 1.3205129521300534, "learning_rate": 1.118997445726432e-06, "loss": 0.1081, "step": 4613 }, { "epoch": 1.0498293515358361, "grad_norm": 2.0114633754619655, "learning_rate": 1.118942704380788e-06, "loss": 0.1166, "step": 4614 }, { "epoch": 1.0500568828213879, "grad_norm": 2.426555596440509, "learning_rate": 1.11888795293986e-06, "loss": 0.1125, "step": 4615 }, { "epoch": 1.0502844141069396, "grad_norm": 1.8060961422070485, "learning_rate": 1.1188331914047666e-06, "loss": 0.0715, "step": 4616 }, { "epoch": 1.0505119453924914, "grad_norm": 1.7144346832473707, "learning_rate": 1.1187784197766269e-06, "loss": 0.0914, "step": 4617 }, { "epoch": 1.0507394766780431, "grad_norm": 1.3408767090161975, "learning_rate": 1.1187236380565608e-06, "loss": 0.1256, "step": 4618 }, { "epoch": 1.050967007963595, "grad_norm": 1.5665147033583955, "learning_rate": 1.1186688462456879e-06, "loss": 0.0745, "step": 4619 }, { "epoch": 1.0511945392491469, "grad_norm": 1.3016358691203176, "learning_rate": 1.118614044345128e-06, "loss": 0.0716, "step": 4620 }, { "epoch": 1.0514220705346986, "grad_norm": 2.045746614166097, "learning_rate": 1.1185592323560006e-06, "loss": 0.1064, "step": 4621 }, { "epoch": 1.0516496018202504, "grad_norm": 1.7391658273056083, "learning_rate": 1.1185044102794267e-06, "loss": 0.1341, "step": 4622 }, { "epoch": 1.051877133105802, "grad_norm": 2.219721072354361, "learning_rate": 1.1184495781165263e-06, "loss": 0.133, "step": 4623 }, { "epoch": 1.0521046643913539, "grad_norm": 3.8225784468179542, "learning_rate": 1.1183947358684203e-06, "loss": 0.0639, "step": 4624 }, { "epoch": 1.0523321956769056, "grad_norm": 1.2881445992416602, "learning_rate": 1.1183398835362298e-06, "loss": 0.0725, "step": 4625 }, { "epoch": 1.0525597269624574, "grad_norm": 2.304920176139941, "learning_rate": 1.1182850211210752e-06, "loss": 0.0916, "step": 4626 }, { "epoch": 1.0527872582480091, "grad_norm": 1.837079698598604, "learning_rate": 1.1182301486240782e-06, "loss": 0.1102, "step": 4627 }, { "epoch": 1.0530147895335609, "grad_norm": 1.8399238573897383, "learning_rate": 1.1181752660463604e-06, "loss": 0.0992, "step": 4628 }, { "epoch": 1.0532423208191126, "grad_norm": 1.1035148907341084, "learning_rate": 1.1181203733890433e-06, "loss": 0.0705, "step": 4629 }, { "epoch": 1.0534698521046644, "grad_norm": 1.284026604785757, "learning_rate": 1.118065470653249e-06, "loss": 0.1644, "step": 4630 }, { "epoch": 1.0536973833902161, "grad_norm": 1.390049950226029, "learning_rate": 1.1180105578400993e-06, "loss": 0.1564, "step": 4631 }, { "epoch": 1.0539249146757679, "grad_norm": 2.007397098246824, "learning_rate": 1.117955634950717e-06, "loss": 0.1507, "step": 4632 }, { "epoch": 1.0541524459613196, "grad_norm": 2.3767977233264457, "learning_rate": 1.117900701986224e-06, "loss": 0.2561, "step": 4633 }, { "epoch": 1.0543799772468714, "grad_norm": 1.4294941525905513, "learning_rate": 1.1178457589477434e-06, "loss": 0.0564, "step": 4634 }, { "epoch": 1.0546075085324231, "grad_norm": 1.5732365617118949, "learning_rate": 1.1177908058363984e-06, "loss": 0.0443, "step": 4635 }, { "epoch": 1.0548350398179749, "grad_norm": 2.193495953897933, "learning_rate": 1.1177358426533115e-06, "loss": 0.1299, "step": 4636 }, { "epoch": 1.0550625711035266, "grad_norm": 2.2874034231596765, "learning_rate": 1.1176808693996067e-06, "loss": 0.1652, "step": 4637 }, { "epoch": 1.0552901023890784, "grad_norm": 1.9529291419382124, "learning_rate": 1.117625886076407e-06, "loss": 0.0731, "step": 4638 }, { "epoch": 1.0555176336746304, "grad_norm": 2.759454689334842, "learning_rate": 1.1175708926848363e-06, "loss": 0.0728, "step": 4639 }, { "epoch": 1.0557451649601821, "grad_norm": 1.5338896104841968, "learning_rate": 1.1175158892260187e-06, "loss": 0.1026, "step": 4640 }, { "epoch": 1.0559726962457339, "grad_norm": 1.4378649009540245, "learning_rate": 1.1174608757010785e-06, "loss": 0.0894, "step": 4641 }, { "epoch": 1.0562002275312856, "grad_norm": 1.5134147490681178, "learning_rate": 1.1174058521111398e-06, "loss": 0.0924, "step": 4642 }, { "epoch": 1.0564277588168374, "grad_norm": 1.917950365533836, "learning_rate": 1.1173508184573273e-06, "loss": 0.0958, "step": 4643 }, { "epoch": 1.0566552901023891, "grad_norm": 1.3789830119471285, "learning_rate": 1.1172957747407657e-06, "loss": 0.0816, "step": 4644 }, { "epoch": 1.0568828213879409, "grad_norm": 1.2790394403403582, "learning_rate": 1.11724072096258e-06, "loss": 0.1122, "step": 4645 }, { "epoch": 1.0571103526734926, "grad_norm": 1.0135911519508813, "learning_rate": 1.1171856571238958e-06, "loss": 0.0646, "step": 4646 }, { "epoch": 1.0573378839590444, "grad_norm": 1.8567305234547644, "learning_rate": 1.1171305832258378e-06, "loss": 0.1032, "step": 4647 }, { "epoch": 1.0575654152445961, "grad_norm": 2.4359360899436098, "learning_rate": 1.117075499269532e-06, "loss": 0.0981, "step": 4648 }, { "epoch": 1.0577929465301479, "grad_norm": 1.5734015373702166, "learning_rate": 1.1170204052561045e-06, "loss": 0.0963, "step": 4649 }, { "epoch": 1.0580204778156996, "grad_norm": 1.515846702271059, "learning_rate": 1.1169653011866806e-06, "loss": 0.1188, "step": 4650 }, { "epoch": 1.0582480091012514, "grad_norm": 1.4229982355792368, "learning_rate": 1.1169101870623872e-06, "loss": 0.1127, "step": 4651 }, { "epoch": 1.0584755403868031, "grad_norm": 1.6554966357036334, "learning_rate": 1.1168550628843506e-06, "loss": 0.0789, "step": 4652 }, { "epoch": 1.058703071672355, "grad_norm": 1.5192728010457008, "learning_rate": 1.116799928653697e-06, "loss": 0.1155, "step": 4653 }, { "epoch": 1.0589306029579066, "grad_norm": 2.2933314149526898, "learning_rate": 1.1167447843715536e-06, "loss": 0.1181, "step": 4654 }, { "epoch": 1.0591581342434584, "grad_norm": 1.2625384296045075, "learning_rate": 1.1166896300390475e-06, "loss": 0.0763, "step": 4655 }, { "epoch": 1.0593856655290101, "grad_norm": 1.2148579431146604, "learning_rate": 1.1166344656573058e-06, "loss": 0.0759, "step": 4656 }, { "epoch": 1.059613196814562, "grad_norm": 1.2800038842620556, "learning_rate": 1.116579291227456e-06, "loss": 0.0766, "step": 4657 }, { "epoch": 1.0598407281001139, "grad_norm": 1.8062981732112615, "learning_rate": 1.1165241067506258e-06, "loss": 0.1418, "step": 4658 }, { "epoch": 1.0600682593856656, "grad_norm": 1.4847559035451179, "learning_rate": 1.116468912227943e-06, "loss": 0.1106, "step": 4659 }, { "epoch": 1.0602957906712174, "grad_norm": 1.8952358982035167, "learning_rate": 1.1164137076605359e-06, "loss": 0.0678, "step": 4660 }, { "epoch": 1.0605233219567691, "grad_norm": 1.4381274024858606, "learning_rate": 1.1163584930495323e-06, "loss": 0.1189, "step": 4661 }, { "epoch": 1.0607508532423209, "grad_norm": 2.7251709950763447, "learning_rate": 1.1163032683960612e-06, "loss": 0.1013, "step": 4662 }, { "epoch": 1.0609783845278726, "grad_norm": 1.0847987069830758, "learning_rate": 1.116248033701251e-06, "loss": 0.0759, "step": 4663 }, { "epoch": 1.0612059158134244, "grad_norm": 1.8578929914423556, "learning_rate": 1.1161927889662307e-06, "loss": 0.0724, "step": 4664 }, { "epoch": 1.0614334470989761, "grad_norm": 1.1291431011699526, "learning_rate": 1.1161375341921293e-06, "loss": 0.0809, "step": 4665 }, { "epoch": 1.0616609783845279, "grad_norm": 1.9028105528288284, "learning_rate": 1.1160822693800761e-06, "loss": 0.0788, "step": 4666 }, { "epoch": 1.0618885096700796, "grad_norm": 1.5906930716104764, "learning_rate": 1.116026994531201e-06, "loss": 0.1806, "step": 4667 }, { "epoch": 1.0621160409556314, "grad_norm": 1.4806073894025749, "learning_rate": 1.1159717096466332e-06, "loss": 0.0917, "step": 4668 }, { "epoch": 1.0623435722411831, "grad_norm": 1.7555748075182174, "learning_rate": 1.1159164147275026e-06, "loss": 0.1221, "step": 4669 }, { "epoch": 1.062571103526735, "grad_norm": 1.9388103998351425, "learning_rate": 1.11586110977494e-06, "loss": 0.101, "step": 4670 }, { "epoch": 1.0627986348122866, "grad_norm": 2.54858320636725, "learning_rate": 1.1158057947900749e-06, "loss": 0.0826, "step": 4671 }, { "epoch": 1.0630261660978384, "grad_norm": 1.7593201330973203, "learning_rate": 1.1157504697740384e-06, "loss": 0.0921, "step": 4672 }, { "epoch": 1.0632536973833902, "grad_norm": 1.3601408281439702, "learning_rate": 1.1156951347279612e-06, "loss": 0.0909, "step": 4673 }, { "epoch": 1.063481228668942, "grad_norm": 2.4921469575180923, "learning_rate": 1.1156397896529739e-06, "loss": 0.1152, "step": 4674 }, { "epoch": 1.0637087599544937, "grad_norm": 2.61680060550728, "learning_rate": 1.1155844345502079e-06, "loss": 0.1493, "step": 4675 }, { "epoch": 1.0639362912400454, "grad_norm": 1.2304215041389976, "learning_rate": 1.1155290694207946e-06, "loss": 0.0948, "step": 4676 }, { "epoch": 1.0641638225255972, "grad_norm": 1.499666016464916, "learning_rate": 1.1154736942658655e-06, "loss": 0.0505, "step": 4677 }, { "epoch": 1.0643913538111491, "grad_norm": 1.5895614920307246, "learning_rate": 1.1154183090865523e-06, "loss": 0.102, "step": 4678 }, { "epoch": 1.0646188850967009, "grad_norm": 2.115605875809715, "learning_rate": 1.1153629138839869e-06, "loss": 0.1225, "step": 4679 }, { "epoch": 1.0648464163822526, "grad_norm": 2.3561395200123747, "learning_rate": 1.115307508659302e-06, "loss": 0.1031, "step": 4680 }, { "epoch": 1.0650739476678044, "grad_norm": 1.1554650460211398, "learning_rate": 1.115252093413629e-06, "loss": 0.1216, "step": 4681 }, { "epoch": 1.0653014789533561, "grad_norm": 1.4823986847762305, "learning_rate": 1.1151966681481013e-06, "loss": 0.0594, "step": 4682 }, { "epoch": 1.065529010238908, "grad_norm": 1.6251842447572193, "learning_rate": 1.1151412328638516e-06, "loss": 0.0907, "step": 4683 }, { "epoch": 1.0657565415244596, "grad_norm": 1.1507819894156535, "learning_rate": 1.1150857875620129e-06, "loss": 0.0472, "step": 4684 }, { "epoch": 1.0659840728100114, "grad_norm": 1.3805150312721923, "learning_rate": 1.1150303322437179e-06, "loss": 0.0771, "step": 4685 }, { "epoch": 1.0662116040955631, "grad_norm": 1.358932906756745, "learning_rate": 1.1149748669101005e-06, "loss": 0.0513, "step": 4686 }, { "epoch": 1.066439135381115, "grad_norm": 5.71481532720155, "learning_rate": 1.1149193915622942e-06, "loss": 0.1539, "step": 4687 }, { "epoch": 1.0666666666666667, "grad_norm": 2.6297199766031865, "learning_rate": 1.1148639062014325e-06, "loss": 0.1656, "step": 4688 }, { "epoch": 1.0668941979522184, "grad_norm": 1.9080409573155523, "learning_rate": 1.11480841082865e-06, "loss": 0.0762, "step": 4689 }, { "epoch": 1.0671217292377702, "grad_norm": 1.7438819672239148, "learning_rate": 1.1147529054450805e-06, "loss": 0.0885, "step": 4690 }, { "epoch": 1.067349260523322, "grad_norm": 2.0574645907461955, "learning_rate": 1.1146973900518587e-06, "loss": 0.1442, "step": 4691 }, { "epoch": 1.0675767918088737, "grad_norm": 2.6252395753862294, "learning_rate": 1.1146418646501189e-06, "loss": 0.1878, "step": 4692 }, { "epoch": 1.0678043230944254, "grad_norm": 1.0913326597489377, "learning_rate": 1.114586329240996e-06, "loss": 0.0378, "step": 4693 }, { "epoch": 1.0680318543799772, "grad_norm": 1.9442927380841992, "learning_rate": 1.1145307838256255e-06, "loss": 0.0636, "step": 4694 }, { "epoch": 1.068259385665529, "grad_norm": 1.2746222339178395, "learning_rate": 1.1144752284051422e-06, "loss": 0.0999, "step": 4695 }, { "epoch": 1.068486916951081, "grad_norm": 1.5469092535365685, "learning_rate": 1.1144196629806817e-06, "loss": 0.1484, "step": 4696 }, { "epoch": 1.0687144482366326, "grad_norm": 1.8149614702646155, "learning_rate": 1.1143640875533795e-06, "loss": 0.0656, "step": 4697 }, { "epoch": 1.0689419795221844, "grad_norm": 1.952760861877504, "learning_rate": 1.1143085021243717e-06, "loss": 0.116, "step": 4698 }, { "epoch": 1.0691695108077361, "grad_norm": 1.5095899894504656, "learning_rate": 1.1142529066947941e-06, "loss": 0.1141, "step": 4699 }, { "epoch": 1.069397042093288, "grad_norm": 1.2954537974569234, "learning_rate": 1.1141973012657834e-06, "loss": 0.0521, "step": 4700 }, { "epoch": 1.0696245733788396, "grad_norm": 1.8070083827165602, "learning_rate": 1.1141416858384753e-06, "loss": 0.1679, "step": 4701 }, { "epoch": 1.0698521046643914, "grad_norm": 1.1006465944591577, "learning_rate": 1.1140860604140076e-06, "loss": 0.1058, "step": 4702 }, { "epoch": 1.0700796359499432, "grad_norm": 1.1929894713879348, "learning_rate": 1.114030424993516e-06, "loss": 0.0839, "step": 4703 }, { "epoch": 1.070307167235495, "grad_norm": 1.8098296886220957, "learning_rate": 1.1139747795781382e-06, "loss": 0.0751, "step": 4704 }, { "epoch": 1.0705346985210467, "grad_norm": 1.3608491383009365, "learning_rate": 1.1139191241690116e-06, "loss": 0.0636, "step": 4705 }, { "epoch": 1.0707622298065984, "grad_norm": 0.7968044916321982, "learning_rate": 1.1138634587672734e-06, "loss": 0.0196, "step": 4706 }, { "epoch": 1.0709897610921502, "grad_norm": 1.1618137365747236, "learning_rate": 1.1138077833740616e-06, "loss": 0.0543, "step": 4707 }, { "epoch": 1.071217292377702, "grad_norm": 1.6636727239611144, "learning_rate": 1.1137520979905138e-06, "loss": 0.078, "step": 4708 }, { "epoch": 1.0714448236632537, "grad_norm": 1.3224939982871424, "learning_rate": 1.1136964026177683e-06, "loss": 0.081, "step": 4709 }, { "epoch": 1.0716723549488054, "grad_norm": 2.102064361920128, "learning_rate": 1.113640697256963e-06, "loss": 0.0756, "step": 4710 }, { "epoch": 1.0718998862343572, "grad_norm": 1.5869491881927147, "learning_rate": 1.113584981909237e-06, "loss": 0.0977, "step": 4711 }, { "epoch": 1.072127417519909, "grad_norm": 2.528352862438852, "learning_rate": 1.1135292565757288e-06, "loss": 0.0922, "step": 4712 }, { "epoch": 1.0723549488054607, "grad_norm": 2.1890348604827152, "learning_rate": 1.1134735212575772e-06, "loss": 0.0748, "step": 4713 }, { "epoch": 1.0725824800910124, "grad_norm": 2.721031491992015, "learning_rate": 1.1134177759559216e-06, "loss": 0.166, "step": 4714 }, { "epoch": 1.0728100113765642, "grad_norm": 1.2699457735676007, "learning_rate": 1.1133620206719011e-06, "loss": 0.1368, "step": 4715 }, { "epoch": 1.073037542662116, "grad_norm": 1.4567637517882654, "learning_rate": 1.1133062554066551e-06, "loss": 0.0957, "step": 4716 }, { "epoch": 1.073265073947668, "grad_norm": 1.1981840138040833, "learning_rate": 1.1132504801613237e-06, "loss": 0.0996, "step": 4717 }, { "epoch": 1.0734926052332197, "grad_norm": 2.5605965751924016, "learning_rate": 1.1131946949370467e-06, "loss": 0.1288, "step": 4718 }, { "epoch": 1.0737201365187714, "grad_norm": 2.2199917939261424, "learning_rate": 1.113138899734964e-06, "loss": 0.1176, "step": 4719 }, { "epoch": 1.0739476678043232, "grad_norm": 1.8349868522475592, "learning_rate": 1.1130830945562165e-06, "loss": 0.1546, "step": 4720 }, { "epoch": 1.074175199089875, "grad_norm": 2.835134050444113, "learning_rate": 1.1130272794019442e-06, "loss": 0.1291, "step": 4721 }, { "epoch": 1.0744027303754267, "grad_norm": 1.009967448557906, "learning_rate": 1.1129714542732882e-06, "loss": 0.0669, "step": 4722 }, { "epoch": 1.0746302616609784, "grad_norm": 1.529305478280692, "learning_rate": 1.1129156191713893e-06, "loss": 0.0727, "step": 4723 }, { "epoch": 1.0748577929465302, "grad_norm": 1.3960850231566015, "learning_rate": 1.1128597740973886e-06, "loss": 0.1294, "step": 4724 }, { "epoch": 1.075085324232082, "grad_norm": 1.5662589297405252, "learning_rate": 1.1128039190524278e-06, "loss": 0.0441, "step": 4725 }, { "epoch": 1.0753128555176337, "grad_norm": 2.147391485773349, "learning_rate": 1.112748054037648e-06, "loss": 0.0715, "step": 4726 }, { "epoch": 1.0755403868031854, "grad_norm": 1.9537494356887783, "learning_rate": 1.1126921790541915e-06, "loss": 0.1266, "step": 4727 }, { "epoch": 1.0757679180887372, "grad_norm": 1.807870084752669, "learning_rate": 1.1126362941032e-06, "loss": 0.0663, "step": 4728 }, { "epoch": 1.075995449374289, "grad_norm": 1.2731792994483404, "learning_rate": 1.1125803991858156e-06, "loss": 0.0777, "step": 4729 }, { "epoch": 1.0762229806598407, "grad_norm": 1.9875691409773595, "learning_rate": 1.1125244943031809e-06, "loss": 0.0595, "step": 4730 }, { "epoch": 1.0764505119453924, "grad_norm": 1.3445874567355482, "learning_rate": 1.1124685794564383e-06, "loss": 0.1031, "step": 4731 }, { "epoch": 1.0766780432309442, "grad_norm": 1.2246092409806042, "learning_rate": 1.1124126546467307e-06, "loss": 0.0939, "step": 4732 }, { "epoch": 1.076905574516496, "grad_norm": 1.431137650749334, "learning_rate": 1.1123567198752012e-06, "loss": 0.1475, "step": 4733 }, { "epoch": 1.0771331058020477, "grad_norm": 1.8994402495648997, "learning_rate": 1.1123007751429928e-06, "loss": 0.1429, "step": 4734 }, { "epoch": 1.0773606370875997, "grad_norm": 1.4085164315858043, "learning_rate": 1.1122448204512493e-06, "loss": 0.1428, "step": 4735 }, { "epoch": 1.0775881683731514, "grad_norm": 1.2999518219276889, "learning_rate": 1.1121888558011136e-06, "loss": 0.0662, "step": 4736 }, { "epoch": 1.0778156996587032, "grad_norm": 1.7946331927576185, "learning_rate": 1.11213288119373e-06, "loss": 0.0765, "step": 4737 }, { "epoch": 1.078043230944255, "grad_norm": 1.6859375154233005, "learning_rate": 1.1120768966302424e-06, "loss": 0.0686, "step": 4738 }, { "epoch": 1.0782707622298067, "grad_norm": 0.9077920170925504, "learning_rate": 1.1120209021117953e-06, "loss": 0.0337, "step": 4739 }, { "epoch": 1.0784982935153584, "grad_norm": 1.5316954108177494, "learning_rate": 1.111964897639533e-06, "loss": 0.0663, "step": 4740 }, { "epoch": 1.0787258248009102, "grad_norm": 1.4238197221056483, "learning_rate": 1.1119088832145999e-06, "loss": 0.0609, "step": 4741 }, { "epoch": 1.078953356086462, "grad_norm": 1.8622699266174956, "learning_rate": 1.1118528588381408e-06, "loss": 0.0967, "step": 4742 }, { "epoch": 1.0791808873720137, "grad_norm": 2.132771649775128, "learning_rate": 1.111796824511301e-06, "loss": 0.0949, "step": 4743 }, { "epoch": 1.0794084186575654, "grad_norm": 1.4389905550232829, "learning_rate": 1.1117407802352257e-06, "loss": 0.0592, "step": 4744 }, { "epoch": 1.0796359499431172, "grad_norm": 2.313842970080908, "learning_rate": 1.11168472601106e-06, "loss": 0.11, "step": 4745 }, { "epoch": 1.079863481228669, "grad_norm": 1.189342164666975, "learning_rate": 1.1116286618399502e-06, "loss": 0.1139, "step": 4746 }, { "epoch": 1.0800910125142207, "grad_norm": 1.5878813013229367, "learning_rate": 1.1115725877230416e-06, "loss": 0.0941, "step": 4747 }, { "epoch": 1.0803185437997724, "grad_norm": 1.5708323312118704, "learning_rate": 1.1115165036614803e-06, "loss": 0.0805, "step": 4748 }, { "epoch": 1.0805460750853242, "grad_norm": 1.7412513143173347, "learning_rate": 1.1114604096564128e-06, "loss": 0.0927, "step": 4749 }, { "epoch": 1.080773606370876, "grad_norm": 2.1069394060272932, "learning_rate": 1.1114043057089855e-06, "loss": 0.0689, "step": 4750 }, { "epoch": 1.0810011376564277, "grad_norm": 1.584268007677841, "learning_rate": 1.1113481918203447e-06, "loss": 0.1189, "step": 4751 }, { "epoch": 1.0812286689419794, "grad_norm": 1.6608102806453529, "learning_rate": 1.111292067991638e-06, "loss": 0.0689, "step": 4752 }, { "epoch": 1.0814562002275312, "grad_norm": 1.5725937241650605, "learning_rate": 1.1112359342240118e-06, "loss": 0.0956, "step": 4753 }, { "epoch": 1.081683731513083, "grad_norm": 3.407327630763881, "learning_rate": 1.1111797905186137e-06, "loss": 0.1318, "step": 4754 }, { "epoch": 1.0819112627986347, "grad_norm": 2.3742849467462017, "learning_rate": 1.111123636876591e-06, "loss": 0.1672, "step": 4755 }, { "epoch": 1.0821387940841867, "grad_norm": 2.436154151939714, "learning_rate": 1.1110674732990915e-06, "loss": 0.0952, "step": 4756 }, { "epoch": 1.0823663253697384, "grad_norm": 1.9025140854878257, "learning_rate": 1.1110112997872627e-06, "loss": 0.0782, "step": 4757 }, { "epoch": 1.0825938566552902, "grad_norm": 1.8868683604933614, "learning_rate": 1.1109551163422535e-06, "loss": 0.0948, "step": 4758 }, { "epoch": 1.082821387940842, "grad_norm": 1.3681510858841002, "learning_rate": 1.1108989229652115e-06, "loss": 0.0732, "step": 4759 }, { "epoch": 1.0830489192263937, "grad_norm": 2.282564072440466, "learning_rate": 1.1108427196572854e-06, "loss": 0.0941, "step": 4760 }, { "epoch": 1.0832764505119454, "grad_norm": 1.185382354142743, "learning_rate": 1.110786506419624e-06, "loss": 0.1061, "step": 4761 }, { "epoch": 1.0835039817974972, "grad_norm": 1.2750556200490737, "learning_rate": 1.110730283253376e-06, "loss": 0.0983, "step": 4762 }, { "epoch": 1.083731513083049, "grad_norm": 0.9432019459962149, "learning_rate": 1.1106740501596904e-06, "loss": 0.0719, "step": 4763 }, { "epoch": 1.0839590443686007, "grad_norm": 2.399313506752063, "learning_rate": 1.110617807139717e-06, "loss": 0.1592, "step": 4764 }, { "epoch": 1.0841865756541524, "grad_norm": 2.0585624525235673, "learning_rate": 1.1105615541946049e-06, "loss": 0.1019, "step": 4765 }, { "epoch": 1.0844141069397042, "grad_norm": 1.5509608506343646, "learning_rate": 1.1105052913255038e-06, "loss": 0.063, "step": 4766 }, { "epoch": 1.084641638225256, "grad_norm": 2.3875790847090896, "learning_rate": 1.1104490185335638e-06, "loss": 0.0617, "step": 4767 }, { "epoch": 1.0848691695108077, "grad_norm": 1.5526617818466677, "learning_rate": 1.1103927358199349e-06, "loss": 0.0636, "step": 4768 }, { "epoch": 1.0850967007963594, "grad_norm": 2.070040492410767, "learning_rate": 1.1103364431857672e-06, "loss": 0.1196, "step": 4769 }, { "epoch": 1.0853242320819112, "grad_norm": 0.9037860659580849, "learning_rate": 1.1102801406322118e-06, "loss": 0.0788, "step": 4770 }, { "epoch": 1.085551763367463, "grad_norm": 2.51446051197986, "learning_rate": 1.110223828160419e-06, "loss": 0.1056, "step": 4771 }, { "epoch": 1.0857792946530147, "grad_norm": 1.5056718047772528, "learning_rate": 1.1101675057715396e-06, "loss": 0.0553, "step": 4772 }, { "epoch": 1.0860068259385665, "grad_norm": 1.3450576783641792, "learning_rate": 1.110111173466725e-06, "loss": 0.114, "step": 4773 }, { "epoch": 1.0862343572241184, "grad_norm": 2.3282598728110564, "learning_rate": 1.1100548312471266e-06, "loss": 0.1398, "step": 4774 }, { "epoch": 1.0864618885096702, "grad_norm": 1.5835969433641823, "learning_rate": 1.1099984791138957e-06, "loss": 0.0563, "step": 4775 }, { "epoch": 1.086689419795222, "grad_norm": 1.7042160190222275, "learning_rate": 1.109942117068184e-06, "loss": 0.0595, "step": 4776 }, { "epoch": 1.0869169510807737, "grad_norm": 1.8015845399975852, "learning_rate": 1.1098857451111437e-06, "loss": 0.1093, "step": 4777 }, { "epoch": 1.0871444823663254, "grad_norm": 1.4929033187735148, "learning_rate": 1.1098293632439267e-06, "loss": 0.0568, "step": 4778 }, { "epoch": 1.0873720136518772, "grad_norm": 1.819585409380252, "learning_rate": 1.1097729714676855e-06, "loss": 0.0784, "step": 4779 }, { "epoch": 1.087599544937429, "grad_norm": 1.6093861745924347, "learning_rate": 1.1097165697835726e-06, "loss": 0.0815, "step": 4780 }, { "epoch": 1.0878270762229807, "grad_norm": 2.3915450012148947, "learning_rate": 1.1096601581927407e-06, "loss": 0.0843, "step": 4781 }, { "epoch": 1.0880546075085324, "grad_norm": 1.9249173368001073, "learning_rate": 1.109603736696343e-06, "loss": 0.1324, "step": 4782 }, { "epoch": 1.0882821387940842, "grad_norm": 1.8896040569960282, "learning_rate": 1.1095473052955322e-06, "loss": 0.1745, "step": 4783 }, { "epoch": 1.088509670079636, "grad_norm": 1.7358772955397874, "learning_rate": 1.1094908639914617e-06, "loss": 0.0989, "step": 4784 }, { "epoch": 1.0887372013651877, "grad_norm": 1.5331795064228024, "learning_rate": 1.1094344127852855e-06, "loss": 0.1397, "step": 4785 }, { "epoch": 1.0889647326507395, "grad_norm": 1.3003409141659286, "learning_rate": 1.1093779516781571e-06, "loss": 0.0485, "step": 4786 }, { "epoch": 1.0891922639362912, "grad_norm": 1.6700571373067126, "learning_rate": 1.1093214806712305e-06, "loss": 0.097, "step": 4787 }, { "epoch": 1.089419795221843, "grad_norm": 1.9278343656449526, "learning_rate": 1.1092649997656597e-06, "loss": 0.0512, "step": 4788 }, { "epoch": 1.0896473265073947, "grad_norm": 1.577450330940266, "learning_rate": 1.1092085089625992e-06, "loss": 0.0609, "step": 4789 }, { "epoch": 1.0898748577929465, "grad_norm": 2.1296342852219263, "learning_rate": 1.1091520082632037e-06, "loss": 0.0782, "step": 4790 }, { "epoch": 1.0901023890784982, "grad_norm": 1.0783695776737507, "learning_rate": 1.1090954976686277e-06, "loss": 0.0638, "step": 4791 }, { "epoch": 1.09032992036405, "grad_norm": 2.154161769490288, "learning_rate": 1.1090389771800264e-06, "loss": 0.1817, "step": 4792 }, { "epoch": 1.0905574516496017, "grad_norm": 1.5962990513677988, "learning_rate": 1.1089824467985549e-06, "loss": 0.0733, "step": 4793 }, { "epoch": 1.0907849829351535, "grad_norm": 1.4177139267549546, "learning_rate": 1.1089259065253684e-06, "loss": 0.0906, "step": 4794 }, { "epoch": 1.0910125142207054, "grad_norm": 1.5152987959640343, "learning_rate": 1.1088693563616226e-06, "loss": 0.0775, "step": 4795 }, { "epoch": 1.0912400455062572, "grad_norm": 1.0343892482754946, "learning_rate": 1.1088127963084736e-06, "loss": 0.092, "step": 4796 }, { "epoch": 1.091467576791809, "grad_norm": 1.3749328966275285, "learning_rate": 1.108756226367077e-06, "loss": 0.0552, "step": 4797 }, { "epoch": 1.0916951080773607, "grad_norm": 1.8028526464917305, "learning_rate": 1.108699646538589e-06, "loss": 0.1074, "step": 4798 }, { "epoch": 1.0919226393629125, "grad_norm": 1.511012234308768, "learning_rate": 1.108643056824166e-06, "loss": 0.1182, "step": 4799 }, { "epoch": 1.0921501706484642, "grad_norm": 1.8866591465514666, "learning_rate": 1.108586457224965e-06, "loss": 0.0742, "step": 4800 }, { "epoch": 1.092377701934016, "grad_norm": 2.1241623003632992, "learning_rate": 1.1085298477421421e-06, "loss": 0.0651, "step": 4801 }, { "epoch": 1.0926052332195677, "grad_norm": 1.9682663221505046, "learning_rate": 1.1084732283768548e-06, "loss": 0.0589, "step": 4802 }, { "epoch": 1.0928327645051195, "grad_norm": 1.7770689808885827, "learning_rate": 1.1084165991302601e-06, "loss": 0.0624, "step": 4803 }, { "epoch": 1.0930602957906712, "grad_norm": 1.9487102696775727, "learning_rate": 1.1083599600035155e-06, "loss": 0.2643, "step": 4804 }, { "epoch": 1.093287827076223, "grad_norm": 1.3820178073566898, "learning_rate": 1.1083033109977787e-06, "loss": 0.1225, "step": 4805 }, { "epoch": 1.0935153583617747, "grad_norm": 2.6803891273727563, "learning_rate": 1.1082466521142072e-06, "loss": 0.0959, "step": 4806 }, { "epoch": 1.0937428896473265, "grad_norm": 2.7679540832371603, "learning_rate": 1.1081899833539592e-06, "loss": 0.1187, "step": 4807 }, { "epoch": 1.0939704209328782, "grad_norm": 1.4189385820383673, "learning_rate": 1.1081333047181928e-06, "loss": 0.0551, "step": 4808 }, { "epoch": 1.09419795221843, "grad_norm": 2.407210728799108, "learning_rate": 1.1080766162080664e-06, "loss": 0.2183, "step": 4809 }, { "epoch": 1.0944254835039817, "grad_norm": 2.190691343555159, "learning_rate": 1.1080199178247388e-06, "loss": 0.1554, "step": 4810 }, { "epoch": 1.0946530147895335, "grad_norm": 1.9158986499607824, "learning_rate": 1.1079632095693688e-06, "loss": 0.1014, "step": 4811 }, { "epoch": 1.0948805460750852, "grad_norm": 2.1376153598636334, "learning_rate": 1.107906491443115e-06, "loss": 0.0867, "step": 4812 }, { "epoch": 1.0951080773606372, "grad_norm": 1.640434083574093, "learning_rate": 1.1078497634471373e-06, "loss": 0.119, "step": 4813 }, { "epoch": 1.095335608646189, "grad_norm": 1.953361284918329, "learning_rate": 1.1077930255825944e-06, "loss": 0.0685, "step": 4814 }, { "epoch": 1.0955631399317407, "grad_norm": 2.2145384548118496, "learning_rate": 1.1077362778506464e-06, "loss": 0.0773, "step": 4815 }, { "epoch": 1.0957906712172925, "grad_norm": 1.3681951995706059, "learning_rate": 1.107679520252453e-06, "loss": 0.0696, "step": 4816 }, { "epoch": 1.0960182025028442, "grad_norm": 1.720391413339496, "learning_rate": 1.107622752789174e-06, "loss": 0.0927, "step": 4817 }, { "epoch": 1.096245733788396, "grad_norm": 2.535960750420784, "learning_rate": 1.10756597546197e-06, "loss": 0.1101, "step": 4818 }, { "epoch": 1.0964732650739477, "grad_norm": 1.3722685641226562, "learning_rate": 1.1075091882720012e-06, "loss": 0.0326, "step": 4819 }, { "epoch": 1.0967007963594995, "grad_norm": 2.7001720504024527, "learning_rate": 1.1074523912204282e-06, "loss": 0.0871, "step": 4820 }, { "epoch": 1.0969283276450512, "grad_norm": 1.9075168583854094, "learning_rate": 1.107395584308412e-06, "loss": 0.1269, "step": 4821 }, { "epoch": 1.097155858930603, "grad_norm": 2.205540615959917, "learning_rate": 1.1073387675371134e-06, "loss": 0.1026, "step": 4822 }, { "epoch": 1.0973833902161547, "grad_norm": 1.0718686815433365, "learning_rate": 1.1072819409076937e-06, "loss": 0.0546, "step": 4823 }, { "epoch": 1.0976109215017065, "grad_norm": 1.2739063256637186, "learning_rate": 1.1072251044213146e-06, "loss": 0.1489, "step": 4824 }, { "epoch": 1.0978384527872582, "grad_norm": 1.6711917421442544, "learning_rate": 1.1071682580791375e-06, "loss": 0.1218, "step": 4825 }, { "epoch": 1.09806598407281, "grad_norm": 1.0808000148116539, "learning_rate": 1.107111401882324e-06, "loss": 0.0937, "step": 4826 }, { "epoch": 1.0982935153583617, "grad_norm": 1.3984069618118502, "learning_rate": 1.1070545358320367e-06, "loss": 0.081, "step": 4827 }, { "epoch": 1.0985210466439135, "grad_norm": 1.6817172368352458, "learning_rate": 1.1069976599294374e-06, "loss": 0.0446, "step": 4828 }, { "epoch": 1.0987485779294652, "grad_norm": 2.9953765468149873, "learning_rate": 1.1069407741756884e-06, "loss": 0.121, "step": 4829 }, { "epoch": 1.098976109215017, "grad_norm": 1.5977627847753106, "learning_rate": 1.106883878571953e-06, "loss": 0.1372, "step": 4830 }, { "epoch": 1.0992036405005687, "grad_norm": 1.473455640746034, "learning_rate": 1.1068269731193936e-06, "loss": 0.0628, "step": 4831 }, { "epoch": 1.0994311717861205, "grad_norm": 2.574413671871508, "learning_rate": 1.106770057819173e-06, "loss": 0.0773, "step": 4832 }, { "epoch": 1.0996587030716722, "grad_norm": 1.7305196200030186, "learning_rate": 1.1067131326724551e-06, "loss": 0.1119, "step": 4833 }, { "epoch": 1.0998862343572242, "grad_norm": 1.9296560592971308, "learning_rate": 1.106656197680403e-06, "loss": 0.0751, "step": 4834 }, { "epoch": 1.100113765642776, "grad_norm": 1.2661901992123141, "learning_rate": 1.10659925284418e-06, "loss": 0.1527, "step": 4835 }, { "epoch": 1.1003412969283277, "grad_norm": 1.4346695408957206, "learning_rate": 1.1065422981649506e-06, "loss": 0.0821, "step": 4836 }, { "epoch": 1.1005688282138795, "grad_norm": 1.522864316512361, "learning_rate": 1.1064853336438782e-06, "loss": 0.1456, "step": 4837 }, { "epoch": 1.1007963594994312, "grad_norm": 1.674140320495389, "learning_rate": 1.1064283592821276e-06, "loss": 0.0824, "step": 4838 }, { "epoch": 1.101023890784983, "grad_norm": 1.8199138679313875, "learning_rate": 1.106371375080863e-06, "loss": 0.0821, "step": 4839 }, { "epoch": 1.1012514220705347, "grad_norm": 1.0988975008672435, "learning_rate": 1.106314381041249e-06, "loss": 0.0747, "step": 4840 }, { "epoch": 1.1014789533560865, "grad_norm": 2.2473800940435344, "learning_rate": 1.1062573771644506e-06, "loss": 0.1135, "step": 4841 }, { "epoch": 1.1017064846416382, "grad_norm": 2.173377398488759, "learning_rate": 1.106200363451633e-06, "loss": 0.1225, "step": 4842 }, { "epoch": 1.10193401592719, "grad_norm": 1.0237409844466847, "learning_rate": 1.1061433399039608e-06, "loss": 0.0913, "step": 4843 }, { "epoch": 1.1021615472127417, "grad_norm": 1.4883802918021127, "learning_rate": 1.1060863065226002e-06, "loss": 0.0718, "step": 4844 }, { "epoch": 1.1023890784982935, "grad_norm": 2.0993117460282273, "learning_rate": 1.1060292633087167e-06, "loss": 0.1518, "step": 4845 }, { "epoch": 1.1026166097838452, "grad_norm": 2.228132250689892, "learning_rate": 1.1059722102634756e-06, "loss": 0.092, "step": 4846 }, { "epoch": 1.102844141069397, "grad_norm": 2.3453851765604954, "learning_rate": 1.1059151473880439e-06, "loss": 0.2292, "step": 4847 }, { "epoch": 1.1030716723549487, "grad_norm": 2.1780310675452053, "learning_rate": 1.105858074683587e-06, "loss": 0.0824, "step": 4848 }, { "epoch": 1.1032992036405005, "grad_norm": 2.3050276311535005, "learning_rate": 1.1058009921512717e-06, "loss": 0.0875, "step": 4849 }, { "epoch": 1.1035267349260522, "grad_norm": 1.9644576717502578, "learning_rate": 1.1057438997922648e-06, "loss": 0.0938, "step": 4850 }, { "epoch": 1.103754266211604, "grad_norm": 1.910478387408807, "learning_rate": 1.105686797607733e-06, "loss": 0.1052, "step": 4851 }, { "epoch": 1.103981797497156, "grad_norm": 1.4309370002920787, "learning_rate": 1.1056296855988432e-06, "loss": 0.0575, "step": 4852 }, { "epoch": 1.1042093287827077, "grad_norm": 1.4145434776922763, "learning_rate": 1.105572563766763e-06, "loss": 0.1068, "step": 4853 }, { "epoch": 1.1044368600682595, "grad_norm": 1.6308808817252087, "learning_rate": 1.1055154321126597e-06, "loss": 0.1075, "step": 4854 }, { "epoch": 1.1046643913538112, "grad_norm": 1.261091948999157, "learning_rate": 1.105458290637701e-06, "loss": 0.0849, "step": 4855 }, { "epoch": 1.104891922639363, "grad_norm": 1.3215074790429198, "learning_rate": 1.105401139343055e-06, "loss": 0.0793, "step": 4856 }, { "epoch": 1.1051194539249147, "grad_norm": 1.6985162376269767, "learning_rate": 1.105343978229889e-06, "loss": 0.1139, "step": 4857 }, { "epoch": 1.1053469852104665, "grad_norm": 2.510127015289047, "learning_rate": 1.1052868072993723e-06, "loss": 0.1154, "step": 4858 }, { "epoch": 1.1055745164960182, "grad_norm": 1.4859308252397974, "learning_rate": 1.1052296265526726e-06, "loss": 0.0858, "step": 4859 }, { "epoch": 1.10580204778157, "grad_norm": 1.5618239549892472, "learning_rate": 1.105172435990959e-06, "loss": 0.1463, "step": 4860 }, { "epoch": 1.1060295790671217, "grad_norm": 2.5953777991374887, "learning_rate": 1.1051152356154e-06, "loss": 0.1413, "step": 4861 }, { "epoch": 1.1062571103526735, "grad_norm": 1.7585866088824806, "learning_rate": 1.105058025427165e-06, "loss": 0.0762, "step": 4862 }, { "epoch": 1.1064846416382252, "grad_norm": 1.480281395602001, "learning_rate": 1.105000805427423e-06, "loss": 0.0773, "step": 4863 }, { "epoch": 1.106712172923777, "grad_norm": 1.0381473831114791, "learning_rate": 1.1049435756173439e-06, "loss": 0.0798, "step": 4864 }, { "epoch": 1.1069397042093287, "grad_norm": 1.1214002462532684, "learning_rate": 1.104886335998097e-06, "loss": 0.0458, "step": 4865 }, { "epoch": 1.1071672354948805, "grad_norm": 1.3717563405461803, "learning_rate": 1.104829086570852e-06, "loss": 0.0844, "step": 4866 }, { "epoch": 1.1073947667804322, "grad_norm": 1.5047887960440787, "learning_rate": 1.1047718273367794e-06, "loss": 0.0806, "step": 4867 }, { "epoch": 1.107622298065984, "grad_norm": 1.3342232875764293, "learning_rate": 1.1047145582970494e-06, "loss": 0.0361, "step": 4868 }, { "epoch": 1.1078498293515358, "grad_norm": 1.3652330753382278, "learning_rate": 1.1046572794528324e-06, "loss": 0.1085, "step": 4869 }, { "epoch": 1.1080773606370875, "grad_norm": 1.3247715704309602, "learning_rate": 1.104599990805299e-06, "loss": 0.0697, "step": 4870 }, { "epoch": 1.1083048919226393, "grad_norm": 1.8015325943851643, "learning_rate": 1.1045426923556198e-06, "loss": 0.0804, "step": 4871 }, { "epoch": 1.108532423208191, "grad_norm": 1.7563135798847476, "learning_rate": 1.1044853841049668e-06, "loss": 0.0822, "step": 4872 }, { "epoch": 1.108759954493743, "grad_norm": 0.8710308246253357, "learning_rate": 1.1044280660545103e-06, "loss": 0.0206, "step": 4873 }, { "epoch": 1.1089874857792947, "grad_norm": 2.005582754489557, "learning_rate": 1.1043707382054223e-06, "loss": 0.0684, "step": 4874 }, { "epoch": 1.1092150170648465, "grad_norm": 0.869109478461898, "learning_rate": 1.1043134005588743e-06, "loss": 0.0478, "step": 4875 }, { "epoch": 1.1094425483503982, "grad_norm": 1.298794429145746, "learning_rate": 1.1042560531160381e-06, "loss": 0.105, "step": 4876 }, { "epoch": 1.10967007963595, "grad_norm": 2.4479802256736893, "learning_rate": 1.104198695878086e-06, "loss": 0.0964, "step": 4877 }, { "epoch": 1.1098976109215017, "grad_norm": 1.2851994456273843, "learning_rate": 1.1041413288461903e-06, "loss": 0.0592, "step": 4878 }, { "epoch": 1.1101251422070535, "grad_norm": 2.100558559057352, "learning_rate": 1.1040839520215233e-06, "loss": 0.0617, "step": 4879 }, { "epoch": 1.1103526734926052, "grad_norm": 1.160798247725018, "learning_rate": 1.1040265654052575e-06, "loss": 0.0901, "step": 4880 }, { "epoch": 1.110580204778157, "grad_norm": 1.120661149863947, "learning_rate": 1.103969168998566e-06, "loss": 0.121, "step": 4881 }, { "epoch": 1.1108077360637088, "grad_norm": 4.950864682787037, "learning_rate": 1.1039117628026222e-06, "loss": 0.1655, "step": 4882 }, { "epoch": 1.1110352673492605, "grad_norm": 1.364699813740917, "learning_rate": 1.1038543468185988e-06, "loss": 0.0854, "step": 4883 }, { "epoch": 1.1112627986348123, "grad_norm": 1.1250871225008297, "learning_rate": 1.1037969210476696e-06, "loss": 0.1017, "step": 4884 }, { "epoch": 1.111490329920364, "grad_norm": 2.983700929393962, "learning_rate": 1.1037394854910082e-06, "loss": 0.1118, "step": 4885 }, { "epoch": 1.1117178612059158, "grad_norm": 1.8781961530183755, "learning_rate": 1.1036820401497884e-06, "loss": 0.0519, "step": 4886 }, { "epoch": 1.1119453924914675, "grad_norm": 2.069070944004058, "learning_rate": 1.1036245850251844e-06, "loss": 0.0479, "step": 4887 }, { "epoch": 1.1121729237770193, "grad_norm": 1.712184545237977, "learning_rate": 1.1035671201183706e-06, "loss": 0.0939, "step": 4888 }, { "epoch": 1.112400455062571, "grad_norm": 2.189959357572597, "learning_rate": 1.1035096454305213e-06, "loss": 0.151, "step": 4889 }, { "epoch": 1.1126279863481228, "grad_norm": 1.3078629490348257, "learning_rate": 1.103452160962811e-06, "loss": 0.0482, "step": 4890 }, { "epoch": 1.1128555176336747, "grad_norm": 2.860153538637598, "learning_rate": 1.103394666716415e-06, "loss": 0.1244, "step": 4891 }, { "epoch": 1.1130830489192265, "grad_norm": 1.722875065107273, "learning_rate": 1.1033371626925079e-06, "loss": 0.0584, "step": 4892 }, { "epoch": 1.1133105802047782, "grad_norm": 1.801918674856795, "learning_rate": 1.1032796488922653e-06, "loss": 0.0536, "step": 4893 }, { "epoch": 1.11353811149033, "grad_norm": 1.779178738655481, "learning_rate": 1.103222125316863e-06, "loss": 0.0569, "step": 4894 }, { "epoch": 1.1137656427758817, "grad_norm": 2.4375614426254204, "learning_rate": 1.1031645919674758e-06, "loss": 0.1431, "step": 4895 }, { "epoch": 1.1139931740614335, "grad_norm": 0.9387408943236797, "learning_rate": 1.10310704884528e-06, "loss": 0.0793, "step": 4896 }, { "epoch": 1.1142207053469853, "grad_norm": 1.7562606828790148, "learning_rate": 1.1030494959514521e-06, "loss": 0.0955, "step": 4897 }, { "epoch": 1.114448236632537, "grad_norm": 1.6211572762717632, "learning_rate": 1.1029919332871678e-06, "loss": 0.1339, "step": 4898 }, { "epoch": 1.1146757679180888, "grad_norm": 1.682319009818202, "learning_rate": 1.102934360853604e-06, "loss": 0.092, "step": 4899 }, { "epoch": 1.1149032992036405, "grad_norm": 1.2468699954267968, "learning_rate": 1.1028767786519368e-06, "loss": 0.0494, "step": 4900 }, { "epoch": 1.1151308304891923, "grad_norm": 1.152573256589869, "learning_rate": 1.1028191866833438e-06, "loss": 0.0447, "step": 4901 }, { "epoch": 1.115358361774744, "grad_norm": 1.2871716357390797, "learning_rate": 1.1027615849490014e-06, "loss": 0.0545, "step": 4902 }, { "epoch": 1.1155858930602958, "grad_norm": 2.0888356191297532, "learning_rate": 1.1027039734500872e-06, "loss": 0.1307, "step": 4903 }, { "epoch": 1.1158134243458475, "grad_norm": 1.5339979897809541, "learning_rate": 1.102646352187779e-06, "loss": 0.1462, "step": 4904 }, { "epoch": 1.1160409556313993, "grad_norm": 1.6652585638651645, "learning_rate": 1.1025887211632538e-06, "loss": 0.0727, "step": 4905 }, { "epoch": 1.116268486916951, "grad_norm": 1.585173064754058, "learning_rate": 1.1025310803776898e-06, "loss": 0.1625, "step": 4906 }, { "epoch": 1.1164960182025028, "grad_norm": 1.888781057343968, "learning_rate": 1.1024734298322655e-06, "loss": 0.1054, "step": 4907 }, { "epoch": 1.1167235494880545, "grad_norm": 1.425278772221824, "learning_rate": 1.1024157695281582e-06, "loss": 0.119, "step": 4908 }, { "epoch": 1.1169510807736063, "grad_norm": 1.8240200320037065, "learning_rate": 1.1023580994665472e-06, "loss": 0.0669, "step": 4909 }, { "epoch": 1.117178612059158, "grad_norm": 1.5457442923166032, "learning_rate": 1.1023004196486108e-06, "loss": 0.1373, "step": 4910 }, { "epoch": 1.11740614334471, "grad_norm": 2.094866405122345, "learning_rate": 1.102242730075528e-06, "loss": 0.1202, "step": 4911 }, { "epoch": 1.1176336746302618, "grad_norm": 2.445145034318404, "learning_rate": 1.1021850307484776e-06, "loss": 0.133, "step": 4912 }, { "epoch": 1.1178612059158135, "grad_norm": 1.7994599245951428, "learning_rate": 1.1021273216686397e-06, "loss": 0.0912, "step": 4913 }, { "epoch": 1.1180887372013653, "grad_norm": 3.0642622839592706, "learning_rate": 1.1020696028371926e-06, "loss": 0.1026, "step": 4914 }, { "epoch": 1.118316268486917, "grad_norm": 1.5494532571576725, "learning_rate": 1.1020118742553166e-06, "loss": 0.1116, "step": 4915 }, { "epoch": 1.1185437997724688, "grad_norm": 2.504252666932219, "learning_rate": 1.1019541359241917e-06, "loss": 0.0939, "step": 4916 }, { "epoch": 1.1187713310580205, "grad_norm": 2.331022199171125, "learning_rate": 1.1018963878449976e-06, "loss": 0.0996, "step": 4917 }, { "epoch": 1.1189988623435723, "grad_norm": 1.7064165985683355, "learning_rate": 1.1018386300189148e-06, "loss": 0.0452, "step": 4918 }, { "epoch": 1.119226393629124, "grad_norm": 1.495724359531087, "learning_rate": 1.1017808624471237e-06, "loss": 0.0973, "step": 4919 }, { "epoch": 1.1194539249146758, "grad_norm": 1.3478931502531566, "learning_rate": 1.101723085130805e-06, "loss": 0.0929, "step": 4920 }, { "epoch": 1.1196814562002275, "grad_norm": 1.8942149767063106, "learning_rate": 1.1016652980711392e-06, "loss": 0.0939, "step": 4921 }, { "epoch": 1.1199089874857793, "grad_norm": 1.1254758558845417, "learning_rate": 1.1016075012693082e-06, "loss": 0.0788, "step": 4922 }, { "epoch": 1.120136518771331, "grad_norm": 1.2756441216407504, "learning_rate": 1.1015496947264923e-06, "loss": 0.0834, "step": 4923 }, { "epoch": 1.1203640500568828, "grad_norm": 2.911374051671654, "learning_rate": 1.1014918784438736e-06, "loss": 0.0624, "step": 4924 }, { "epoch": 1.1205915813424345, "grad_norm": 1.3868160433273433, "learning_rate": 1.1014340524226337e-06, "loss": 0.1127, "step": 4925 }, { "epoch": 1.1208191126279863, "grad_norm": 1.3222460616762193, "learning_rate": 1.101376216663954e-06, "loss": 0.0991, "step": 4926 }, { "epoch": 1.121046643913538, "grad_norm": 1.8165175156132298, "learning_rate": 1.101318371169017e-06, "loss": 0.204, "step": 4927 }, { "epoch": 1.1212741751990898, "grad_norm": 1.1690083635578878, "learning_rate": 1.1012605159390048e-06, "loss": 0.0939, "step": 4928 }, { "epoch": 1.1215017064846415, "grad_norm": 1.5839171650253496, "learning_rate": 1.1012026509751e-06, "loss": 0.0811, "step": 4929 }, { "epoch": 1.1217292377701935, "grad_norm": 1.3688530836003974, "learning_rate": 1.1011447762784849e-06, "loss": 0.0628, "step": 4930 }, { "epoch": 1.1219567690557453, "grad_norm": 1.682646501606769, "learning_rate": 1.1010868918503429e-06, "loss": 0.1209, "step": 4931 }, { "epoch": 1.122184300341297, "grad_norm": 1.1263472398197065, "learning_rate": 1.1010289976918565e-06, "loss": 0.0512, "step": 4932 }, { "epoch": 1.1224118316268488, "grad_norm": 1.8786745580953335, "learning_rate": 1.1009710938042093e-06, "loss": 0.0829, "step": 4933 }, { "epoch": 1.1226393629124005, "grad_norm": 1.7691231961554044, "learning_rate": 1.1009131801885848e-06, "loss": 0.1705, "step": 4934 }, { "epoch": 1.1228668941979523, "grad_norm": 1.2152009575549836, "learning_rate": 1.100855256846166e-06, "loss": 0.0517, "step": 4935 }, { "epoch": 1.123094425483504, "grad_norm": 1.4082011663537433, "learning_rate": 1.1007973237781377e-06, "loss": 0.0915, "step": 4936 }, { "epoch": 1.1233219567690558, "grad_norm": 1.7851853742678743, "learning_rate": 1.1007393809856834e-06, "loss": 0.0666, "step": 4937 }, { "epoch": 1.1235494880546075, "grad_norm": 1.3679603490528933, "learning_rate": 1.1006814284699873e-06, "loss": 0.187, "step": 4938 }, { "epoch": 1.1237770193401593, "grad_norm": 1.0869679752100778, "learning_rate": 1.100623466232234e-06, "loss": 0.0979, "step": 4939 }, { "epoch": 1.124004550625711, "grad_norm": 1.8324197313337576, "learning_rate": 1.1005654942736082e-06, "loss": 0.0991, "step": 4940 }, { "epoch": 1.1242320819112628, "grad_norm": 0.8916797514793634, "learning_rate": 1.1005075125952946e-06, "loss": 0.0449, "step": 4941 }, { "epoch": 1.1244596131968145, "grad_norm": 1.4439535008292297, "learning_rate": 1.1004495211984783e-06, "loss": 0.1386, "step": 4942 }, { "epoch": 1.1246871444823663, "grad_norm": 1.7320674419620372, "learning_rate": 1.1003915200843446e-06, "loss": 0.1045, "step": 4943 }, { "epoch": 1.124914675767918, "grad_norm": 2.086445312550206, "learning_rate": 1.1003335092540787e-06, "loss": 0.0756, "step": 4944 }, { "epoch": 1.1251422070534698, "grad_norm": 1.188570368927323, "learning_rate": 1.1002754887088665e-06, "loss": 0.0578, "step": 4945 }, { "epoch": 1.1253697383390215, "grad_norm": 1.7752270711874487, "learning_rate": 1.1002174584498938e-06, "loss": 0.0948, "step": 4946 }, { "epoch": 1.1255972696245733, "grad_norm": 2.3256358668512687, "learning_rate": 1.1001594184783464e-06, "loss": 0.1521, "step": 4947 }, { "epoch": 1.125824800910125, "grad_norm": 1.5836370395195216, "learning_rate": 1.1001013687954109e-06, "loss": 0.0716, "step": 4948 }, { "epoch": 1.1260523321956768, "grad_norm": 0.8926228009452495, "learning_rate": 1.1000433094022735e-06, "loss": 0.0785, "step": 4949 }, { "epoch": 1.1262798634812285, "grad_norm": 1.2143047544290266, "learning_rate": 1.0999852403001208e-06, "loss": 0.0755, "step": 4950 }, { "epoch": 1.1265073947667805, "grad_norm": 1.6120702109468077, "learning_rate": 1.0999271614901396e-06, "loss": 0.079, "step": 4951 }, { "epoch": 1.1267349260523323, "grad_norm": 1.0700572670291029, "learning_rate": 1.099869072973517e-06, "loss": 0.0459, "step": 4952 }, { "epoch": 1.126962457337884, "grad_norm": 1.406661725254882, "learning_rate": 1.0998109747514404e-06, "loss": 0.1368, "step": 4953 }, { "epoch": 1.1271899886234358, "grad_norm": 1.8428127725262113, "learning_rate": 1.099752866825097e-06, "loss": 0.0559, "step": 4954 }, { "epoch": 1.1274175199089875, "grad_norm": 1.6669400062763435, "learning_rate": 1.0996947491956745e-06, "loss": 0.0721, "step": 4955 }, { "epoch": 1.1276450511945393, "grad_norm": 1.563884717527578, "learning_rate": 1.0996366218643607e-06, "loss": 0.0622, "step": 4956 }, { "epoch": 1.127872582480091, "grad_norm": 1.8893173163715162, "learning_rate": 1.0995784848323434e-06, "loss": 0.0927, "step": 4957 }, { "epoch": 1.1281001137656428, "grad_norm": 1.631214871572094, "learning_rate": 1.0995203381008112e-06, "loss": 0.0658, "step": 4958 }, { "epoch": 1.1283276450511945, "grad_norm": 2.057566048669985, "learning_rate": 1.099462181670952e-06, "loss": 0.1078, "step": 4959 }, { "epoch": 1.1285551763367463, "grad_norm": 2.9901712732479093, "learning_rate": 1.0994040155439553e-06, "loss": 0.1499, "step": 4960 }, { "epoch": 1.128782707622298, "grad_norm": 1.8173197795146472, "learning_rate": 1.0993458397210092e-06, "loss": 0.0909, "step": 4961 }, { "epoch": 1.1290102389078498, "grad_norm": 1.4406862960819018, "learning_rate": 1.0992876542033026e-06, "loss": 0.0679, "step": 4962 }, { "epoch": 1.1292377701934015, "grad_norm": 2.0147220621119724, "learning_rate": 1.0992294589920252e-06, "loss": 0.0979, "step": 4963 }, { "epoch": 1.1294653014789533, "grad_norm": 1.7427158658250603, "learning_rate": 1.099171254088366e-06, "loss": 0.0578, "step": 4964 }, { "epoch": 1.129692832764505, "grad_norm": 2.3163769351990084, "learning_rate": 1.0991130394935148e-06, "loss": 0.0706, "step": 4965 }, { "epoch": 1.1299203640500568, "grad_norm": 2.9626845696330824, "learning_rate": 1.0990548152086616e-06, "loss": 0.1327, "step": 4966 }, { "epoch": 1.1301478953356086, "grad_norm": 1.4455808915658892, "learning_rate": 1.098996581234996e-06, "loss": 0.0883, "step": 4967 }, { "epoch": 1.1303754266211605, "grad_norm": 1.6512236234603601, "learning_rate": 1.0989383375737081e-06, "loss": 0.0543, "step": 4968 }, { "epoch": 1.1306029579067123, "grad_norm": 1.533650029665724, "learning_rate": 1.098880084225989e-06, "loss": 0.0964, "step": 4969 }, { "epoch": 1.130830489192264, "grad_norm": 1.604433883847564, "learning_rate": 1.0988218211930285e-06, "loss": 0.0791, "step": 4970 }, { "epoch": 1.1310580204778158, "grad_norm": 1.7409770933966728, "learning_rate": 1.0987635484760178e-06, "loss": 0.13, "step": 4971 }, { "epoch": 1.1312855517633675, "grad_norm": 2.7884517331400325, "learning_rate": 1.098705266076148e-06, "loss": 0.0671, "step": 4972 }, { "epoch": 1.1315130830489193, "grad_norm": 1.630625307393665, "learning_rate": 1.0986469739946102e-06, "loss": 0.0855, "step": 4973 }, { "epoch": 1.131740614334471, "grad_norm": 1.2094350075349947, "learning_rate": 1.0985886722325954e-06, "loss": 0.0368, "step": 4974 }, { "epoch": 1.1319681456200228, "grad_norm": 1.3111484263137263, "learning_rate": 1.0985303607912956e-06, "loss": 0.0511, "step": 4975 }, { "epoch": 1.1321956769055745, "grad_norm": 2.657021119411178, "learning_rate": 1.0984720396719024e-06, "loss": 0.1313, "step": 4976 }, { "epoch": 1.1324232081911263, "grad_norm": 1.891785343782106, "learning_rate": 1.098413708875608e-06, "loss": 0.0948, "step": 4977 }, { "epoch": 1.132650739476678, "grad_norm": 1.5850598764256127, "learning_rate": 1.098355368403604e-06, "loss": 0.0744, "step": 4978 }, { "epoch": 1.1328782707622298, "grad_norm": 1.8115705441515912, "learning_rate": 1.0982970182570837e-06, "loss": 0.0817, "step": 4979 }, { "epoch": 1.1331058020477816, "grad_norm": 1.8766599987971402, "learning_rate": 1.098238658437239e-06, "loss": 0.0903, "step": 4980 }, { "epoch": 1.1333333333333333, "grad_norm": 1.4992947077118386, "learning_rate": 1.0981802889452627e-06, "loss": 0.075, "step": 4981 }, { "epoch": 1.133560864618885, "grad_norm": 3.497579577636057, "learning_rate": 1.0981219097823479e-06, "loss": 0.1198, "step": 4982 }, { "epoch": 1.1337883959044368, "grad_norm": 1.7734853138248883, "learning_rate": 1.0980635209496878e-06, "loss": 0.0982, "step": 4983 }, { "epoch": 1.1340159271899886, "grad_norm": 2.5972880470148616, "learning_rate": 1.0980051224484756e-06, "loss": 0.1034, "step": 4984 }, { "epoch": 1.1342434584755403, "grad_norm": 1.6807793940071352, "learning_rate": 1.0979467142799052e-06, "loss": 0.1123, "step": 4985 }, { "epoch": 1.134470989761092, "grad_norm": 1.167512015914663, "learning_rate": 1.0978882964451698e-06, "loss": 0.1098, "step": 4986 }, { "epoch": 1.1346985210466438, "grad_norm": 1.1705606733035332, "learning_rate": 1.097829868945464e-06, "loss": 0.0673, "step": 4987 }, { "epoch": 1.1349260523321956, "grad_norm": 2.2907339208973925, "learning_rate": 1.0977714317819812e-06, "loss": 0.1176, "step": 4988 }, { "epoch": 1.1351535836177473, "grad_norm": 2.121070478697396, "learning_rate": 1.0977129849559165e-06, "loss": 0.0845, "step": 4989 }, { "epoch": 1.1353811149032993, "grad_norm": 1.806954543632207, "learning_rate": 1.0976545284684642e-06, "loss": 0.0599, "step": 4990 }, { "epoch": 1.135608646188851, "grad_norm": 2.5598944741988734, "learning_rate": 1.0975960623208188e-06, "loss": 0.1371, "step": 4991 }, { "epoch": 1.1358361774744028, "grad_norm": 2.0091610638634583, "learning_rate": 1.0975375865141753e-06, "loss": 0.0653, "step": 4992 }, { "epoch": 1.1360637087599545, "grad_norm": 1.7957636193915902, "learning_rate": 1.097479101049729e-06, "loss": 0.0551, "step": 4993 }, { "epoch": 1.1362912400455063, "grad_norm": 1.1892795380116103, "learning_rate": 1.0974206059286752e-06, "loss": 0.1151, "step": 4994 }, { "epoch": 1.136518771331058, "grad_norm": 2.1464705127422543, "learning_rate": 1.0973621011522096e-06, "loss": 0.0761, "step": 4995 }, { "epoch": 1.1367463026166098, "grad_norm": 1.9966944001252394, "learning_rate": 1.0973035867215276e-06, "loss": 0.1168, "step": 4996 }, { "epoch": 1.1369738339021616, "grad_norm": 1.020794945372424, "learning_rate": 1.0972450626378254e-06, "loss": 0.0485, "step": 4997 }, { "epoch": 1.1372013651877133, "grad_norm": 1.64511125117566, "learning_rate": 1.0971865289022988e-06, "loss": 0.0694, "step": 4998 }, { "epoch": 1.137428896473265, "grad_norm": 1.175913889162089, "learning_rate": 1.0971279855161442e-06, "loss": 0.084, "step": 4999 }, { "epoch": 1.1376564277588168, "grad_norm": 2.8231213648251714, "learning_rate": 1.0970694324805586e-06, "loss": 0.1428, "step": 5000 }, { "epoch": 1.1378839590443686, "grad_norm": 2.6932229520099846, "learning_rate": 1.0970108697967382e-06, "loss": 0.1014, "step": 5001 }, { "epoch": 1.1381114903299203, "grad_norm": 1.3526729503758106, "learning_rate": 1.09695229746588e-06, "loss": 0.0988, "step": 5002 }, { "epoch": 1.138339021615472, "grad_norm": 3.6679630171563193, "learning_rate": 1.0968937154891812e-06, "loss": 0.1297, "step": 5003 }, { "epoch": 1.1385665529010238, "grad_norm": 2.0787582994902976, "learning_rate": 1.096835123867839e-06, "loss": 0.0786, "step": 5004 }, { "epoch": 1.1387940841865756, "grad_norm": 2.0738995997206207, "learning_rate": 1.0967765226030512e-06, "loss": 0.096, "step": 5005 }, { "epoch": 1.1390216154721273, "grad_norm": 1.792981831800975, "learning_rate": 1.0967179116960153e-06, "loss": 0.108, "step": 5006 }, { "epoch": 1.1392491467576793, "grad_norm": 1.7850160079944348, "learning_rate": 1.096659291147929e-06, "loss": 0.1949, "step": 5007 }, { "epoch": 1.139476678043231, "grad_norm": 1.4390443615551238, "learning_rate": 1.0966006609599908e-06, "loss": 0.05, "step": 5008 }, { "epoch": 1.1397042093287828, "grad_norm": 2.2005543377996886, "learning_rate": 1.0965420211333984e-06, "loss": 0.0938, "step": 5009 }, { "epoch": 1.1399317406143346, "grad_norm": 1.3374633377814356, "learning_rate": 1.0964833716693512e-06, "loss": 0.1373, "step": 5010 }, { "epoch": 1.1401592718998863, "grad_norm": 2.1028444721908586, "learning_rate": 1.096424712569047e-06, "loss": 0.0684, "step": 5011 }, { "epoch": 1.140386803185438, "grad_norm": 1.7106971142516763, "learning_rate": 1.0963660438336851e-06, "loss": 0.1593, "step": 5012 }, { "epoch": 1.1406143344709898, "grad_norm": 1.2864010761726647, "learning_rate": 1.0963073654644645e-06, "loss": 0.0538, "step": 5013 }, { "epoch": 1.1408418657565416, "grad_norm": 1.3129074175693134, "learning_rate": 1.0962486774625847e-06, "loss": 0.0949, "step": 5014 }, { "epoch": 1.1410693970420933, "grad_norm": 1.4503139865982977, "learning_rate": 1.096189979829245e-06, "loss": 0.0662, "step": 5015 }, { "epoch": 1.141296928327645, "grad_norm": 1.3609039708489337, "learning_rate": 1.096131272565645e-06, "loss": 0.0492, "step": 5016 }, { "epoch": 1.1415244596131968, "grad_norm": 1.5885653226656653, "learning_rate": 1.0960725556729845e-06, "loss": 0.047, "step": 5017 }, { "epoch": 1.1417519908987486, "grad_norm": 1.4277301607595434, "learning_rate": 1.0960138291524637e-06, "loss": 0.0681, "step": 5018 }, { "epoch": 1.1419795221843003, "grad_norm": 1.7749885462374024, "learning_rate": 1.095955093005283e-06, "loss": 0.1022, "step": 5019 }, { "epoch": 1.142207053469852, "grad_norm": 1.9945055261306845, "learning_rate": 1.0958963472326426e-06, "loss": 0.0992, "step": 5020 }, { "epoch": 1.1424345847554038, "grad_norm": 1.7159767521687925, "learning_rate": 1.0958375918357433e-06, "loss": 0.0699, "step": 5021 }, { "epoch": 1.1426621160409556, "grad_norm": 0.9252880714466132, "learning_rate": 1.095778826815786e-06, "loss": 0.0426, "step": 5022 }, { "epoch": 1.1428896473265073, "grad_norm": 2.3010291974952604, "learning_rate": 1.0957200521739715e-06, "loss": 0.1163, "step": 5023 }, { "epoch": 1.143117178612059, "grad_norm": 1.7201270965579583, "learning_rate": 1.0956612679115012e-06, "loss": 0.174, "step": 5024 }, { "epoch": 1.1433447098976108, "grad_norm": 1.2170020491679054, "learning_rate": 1.0956024740295767e-06, "loss": 0.1368, "step": 5025 }, { "epoch": 1.1435722411831626, "grad_norm": 1.6673945996132118, "learning_rate": 1.0955436705293996e-06, "loss": 0.1039, "step": 5026 }, { "epoch": 1.1437997724687143, "grad_norm": 1.4526736263426159, "learning_rate": 1.0954848574121715e-06, "loss": 0.148, "step": 5027 }, { "epoch": 1.144027303754266, "grad_norm": 1.000929358914613, "learning_rate": 1.0954260346790944e-06, "loss": 0.063, "step": 5028 }, { "epoch": 1.144254835039818, "grad_norm": 2.004003133169068, "learning_rate": 1.0953672023313709e-06, "loss": 0.1182, "step": 5029 }, { "epoch": 1.1444823663253698, "grad_norm": 2.8747884612525074, "learning_rate": 1.0953083603702031e-06, "loss": 0.0799, "step": 5030 }, { "epoch": 1.1447098976109216, "grad_norm": 2.616654201453664, "learning_rate": 1.0952495087967939e-06, "loss": 0.0952, "step": 5031 }, { "epoch": 1.1449374288964733, "grad_norm": 1.1926571603629328, "learning_rate": 1.095190647612346e-06, "loss": 0.1146, "step": 5032 }, { "epoch": 1.145164960182025, "grad_norm": 2.012002754656629, "learning_rate": 1.0951317768180623e-06, "loss": 0.0832, "step": 5033 }, { "epoch": 1.1453924914675768, "grad_norm": 1.7498100999489232, "learning_rate": 1.0950728964151457e-06, "loss": 0.1023, "step": 5034 }, { "epoch": 1.1456200227531286, "grad_norm": 1.1549230270434567, "learning_rate": 1.0950140064048005e-06, "loss": 0.0307, "step": 5035 }, { "epoch": 1.1458475540386803, "grad_norm": 1.3442692312701563, "learning_rate": 1.0949551067882297e-06, "loss": 0.0996, "step": 5036 }, { "epoch": 1.146075085324232, "grad_norm": 1.2566411523687606, "learning_rate": 1.094896197566637e-06, "loss": 0.0358, "step": 5037 }, { "epoch": 1.1463026166097838, "grad_norm": 1.5156306393603887, "learning_rate": 1.0948372787412267e-06, "loss": 0.0861, "step": 5038 }, { "epoch": 1.1465301478953356, "grad_norm": 1.8535248219358293, "learning_rate": 1.094778350313203e-06, "loss": 0.0702, "step": 5039 }, { "epoch": 1.1467576791808873, "grad_norm": 1.2766493100453777, "learning_rate": 1.09471941228377e-06, "loss": 0.0863, "step": 5040 }, { "epoch": 1.146985210466439, "grad_norm": 1.798776404954717, "learning_rate": 1.0946604646541327e-06, "loss": 0.1255, "step": 5041 }, { "epoch": 1.1472127417519908, "grad_norm": 1.5012306399931543, "learning_rate": 1.0946015074254957e-06, "loss": 0.0504, "step": 5042 }, { "epoch": 1.1474402730375426, "grad_norm": 1.7885741561851027, "learning_rate": 1.0945425405990636e-06, "loss": 0.1188, "step": 5043 }, { "epoch": 1.1476678043230943, "grad_norm": 1.830862543786872, "learning_rate": 1.094483564176042e-06, "loss": 0.151, "step": 5044 }, { "epoch": 1.147895335608646, "grad_norm": 2.6040357792438322, "learning_rate": 1.0944245781576363e-06, "loss": 0.1065, "step": 5045 }, { "epoch": 1.148122866894198, "grad_norm": 2.0581342138073175, "learning_rate": 1.0943655825450517e-06, "loss": 0.0851, "step": 5046 }, { "epoch": 1.1483503981797498, "grad_norm": 2.143603515071713, "learning_rate": 1.0943065773394943e-06, "loss": 0.137, "step": 5047 }, { "epoch": 1.1485779294653016, "grad_norm": 1.352482485940918, "learning_rate": 1.0942475625421701e-06, "loss": 0.0726, "step": 5048 }, { "epoch": 1.1488054607508533, "grad_norm": 1.6964232784625224, "learning_rate": 1.094188538154285e-06, "loss": 0.0994, "step": 5049 }, { "epoch": 1.149032992036405, "grad_norm": 2.775135542145203, "learning_rate": 1.0941295041770453e-06, "loss": 0.099, "step": 5050 }, { "epoch": 1.1492605233219568, "grad_norm": 1.5050400089796847, "learning_rate": 1.0940704606116578e-06, "loss": 0.0574, "step": 5051 }, { "epoch": 1.1494880546075086, "grad_norm": 2.721899773786621, "learning_rate": 1.0940114074593292e-06, "loss": 0.0886, "step": 5052 }, { "epoch": 1.1497155858930603, "grad_norm": 1.356790292993199, "learning_rate": 1.0939523447212665e-06, "loss": 0.0632, "step": 5053 }, { "epoch": 1.149943117178612, "grad_norm": 1.3503374996306448, "learning_rate": 1.0938932723986766e-06, "loss": 0.0816, "step": 5054 }, { "epoch": 1.1501706484641638, "grad_norm": 1.481659233232767, "learning_rate": 1.0938341904927669e-06, "loss": 0.0719, "step": 5055 }, { "epoch": 1.1503981797497156, "grad_norm": 0.8694013815410049, "learning_rate": 1.093775099004745e-06, "loss": 0.0424, "step": 5056 }, { "epoch": 1.1506257110352673, "grad_norm": 1.919365341975163, "learning_rate": 1.0937159979358186e-06, "loss": 0.1127, "step": 5057 }, { "epoch": 1.150853242320819, "grad_norm": 2.0981902421175773, "learning_rate": 1.0936568872871958e-06, "loss": 0.0914, "step": 5058 }, { "epoch": 1.1510807736063708, "grad_norm": 1.5675678492995555, "learning_rate": 1.0935977670600843e-06, "loss": 0.1131, "step": 5059 }, { "epoch": 1.1513083048919226, "grad_norm": 1.9064072576465214, "learning_rate": 1.0935386372556928e-06, "loss": 0.0772, "step": 5060 }, { "epoch": 1.1515358361774743, "grad_norm": 1.4104845636818022, "learning_rate": 1.0934794978752295e-06, "loss": 0.0684, "step": 5061 }, { "epoch": 1.151763367463026, "grad_norm": 2.5853584445718676, "learning_rate": 1.0934203489199033e-06, "loss": 0.0998, "step": 5062 }, { "epoch": 1.1519908987485779, "grad_norm": 1.9121176842785628, "learning_rate": 1.093361190390923e-06, "loss": 0.1038, "step": 5063 }, { "epoch": 1.1522184300341296, "grad_norm": 2.0243437314386616, "learning_rate": 1.0933020222894978e-06, "loss": 0.1361, "step": 5064 }, { "epoch": 1.1524459613196814, "grad_norm": 1.7972592627695714, "learning_rate": 1.0932428446168369e-06, "loss": 0.0427, "step": 5065 }, { "epoch": 1.152673492605233, "grad_norm": 1.6741471355620121, "learning_rate": 1.0931836573741498e-06, "loss": 0.1559, "step": 5066 }, { "epoch": 1.1529010238907849, "grad_norm": 2.3765148471218462, "learning_rate": 1.093124460562646e-06, "loss": 0.0764, "step": 5067 }, { "epoch": 1.1531285551763368, "grad_norm": 1.5942557601662923, "learning_rate": 1.0930652541835357e-06, "loss": 0.1573, "step": 5068 }, { "epoch": 1.1533560864618886, "grad_norm": 3.08185190716079, "learning_rate": 1.093006038238029e-06, "loss": 0.141, "step": 5069 }, { "epoch": 1.1535836177474403, "grad_norm": 1.414273885302686, "learning_rate": 1.0929468127273357e-06, "loss": 0.1006, "step": 5070 }, { "epoch": 1.153811149032992, "grad_norm": 2.080210442470953, "learning_rate": 1.0928875776526667e-06, "loss": 0.1276, "step": 5071 }, { "epoch": 1.1540386803185438, "grad_norm": 1.6679057714706864, "learning_rate": 1.0928283330152325e-06, "loss": 0.1001, "step": 5072 }, { "epoch": 1.1542662116040956, "grad_norm": 1.4223736080513678, "learning_rate": 1.092769078816244e-06, "loss": 0.1408, "step": 5073 }, { "epoch": 1.1544937428896473, "grad_norm": 1.8966004842746413, "learning_rate": 1.092709815056912e-06, "loss": 0.0953, "step": 5074 }, { "epoch": 1.154721274175199, "grad_norm": 1.1682597216102555, "learning_rate": 1.0926505417384482e-06, "loss": 0.0513, "step": 5075 }, { "epoch": 1.1549488054607508, "grad_norm": 2.2910856841946154, "learning_rate": 1.0925912588620637e-06, "loss": 0.0945, "step": 5076 }, { "epoch": 1.1551763367463026, "grad_norm": 1.0720514021970478, "learning_rate": 1.0925319664289703e-06, "loss": 0.0764, "step": 5077 }, { "epoch": 1.1554038680318544, "grad_norm": 1.5743332758446995, "learning_rate": 1.0924726644403797e-06, "loss": 0.059, "step": 5078 }, { "epoch": 1.155631399317406, "grad_norm": 1.3255429568097516, "learning_rate": 1.0924133528975039e-06, "loss": 0.0919, "step": 5079 }, { "epoch": 1.1558589306029579, "grad_norm": 1.5332950391438218, "learning_rate": 1.0923540318015552e-06, "loss": 0.0913, "step": 5080 }, { "epoch": 1.1560864618885096, "grad_norm": 2.6893499621057306, "learning_rate": 1.092294701153746e-06, "loss": 0.126, "step": 5081 }, { "epoch": 1.1563139931740614, "grad_norm": 0.9148314458809074, "learning_rate": 1.092235360955289e-06, "loss": 0.1184, "step": 5082 }, { "epoch": 1.1565415244596131, "grad_norm": 1.1437794949654425, "learning_rate": 1.092176011207397e-06, "loss": 0.0367, "step": 5083 }, { "epoch": 1.1567690557451649, "grad_norm": 1.8172588347866085, "learning_rate": 1.0921166519112828e-06, "loss": 0.1471, "step": 5084 }, { "epoch": 1.1569965870307168, "grad_norm": 1.1447836349023166, "learning_rate": 1.0920572830681597e-06, "loss": 0.0537, "step": 5085 }, { "epoch": 1.1572241183162686, "grad_norm": 1.5357107672237025, "learning_rate": 1.0919979046792411e-06, "loss": 0.0537, "step": 5086 }, { "epoch": 1.1574516496018203, "grad_norm": 1.0688283394230311, "learning_rate": 1.0919385167457408e-06, "loss": 0.0354, "step": 5087 }, { "epoch": 1.157679180887372, "grad_norm": 2.0639977539582595, "learning_rate": 1.0918791192688722e-06, "loss": 0.09, "step": 5088 }, { "epoch": 1.1579067121729238, "grad_norm": 1.3518105035209564, "learning_rate": 1.0918197122498495e-06, "loss": 0.0798, "step": 5089 }, { "epoch": 1.1581342434584756, "grad_norm": 1.5164367559828507, "learning_rate": 1.0917602956898867e-06, "loss": 0.0943, "step": 5090 }, { "epoch": 1.1583617747440274, "grad_norm": 1.792644442256943, "learning_rate": 1.0917008695901985e-06, "loss": 0.0656, "step": 5091 }, { "epoch": 1.158589306029579, "grad_norm": 2.1479763014207953, "learning_rate": 1.091641433951999e-06, "loss": 0.1433, "step": 5092 }, { "epoch": 1.1588168373151309, "grad_norm": 2.053907620353767, "learning_rate": 1.0915819887765034e-06, "loss": 0.1345, "step": 5093 }, { "epoch": 1.1590443686006826, "grad_norm": 1.2848859703626283, "learning_rate": 1.0915225340649264e-06, "loss": 0.0808, "step": 5094 }, { "epoch": 1.1592718998862344, "grad_norm": 1.6760046409461768, "learning_rate": 1.091463069818483e-06, "loss": 0.0932, "step": 5095 }, { "epoch": 1.159499431171786, "grad_norm": 1.9797530016618736, "learning_rate": 1.091403596038389e-06, "loss": 0.1296, "step": 5096 }, { "epoch": 1.1597269624573379, "grad_norm": 1.372540390789414, "learning_rate": 1.0913441127258596e-06, "loss": 0.0416, "step": 5097 }, { "epoch": 1.1599544937428896, "grad_norm": 1.770026041242371, "learning_rate": 1.0912846198821105e-06, "loss": 0.1109, "step": 5098 }, { "epoch": 1.1601820250284414, "grad_norm": 2.3255312369703427, "learning_rate": 1.091225117508358e-06, "loss": 0.1967, "step": 5099 }, { "epoch": 1.1604095563139931, "grad_norm": 1.922123825453258, "learning_rate": 1.0911656056058175e-06, "loss": 0.1104, "step": 5100 }, { "epoch": 1.1606370875995449, "grad_norm": 1.5508865361545927, "learning_rate": 1.0911060841757063e-06, "loss": 0.1008, "step": 5101 }, { "epoch": 1.1608646188850966, "grad_norm": 3.3431646625962825, "learning_rate": 1.09104655321924e-06, "loss": 0.1382, "step": 5102 }, { "epoch": 1.1610921501706484, "grad_norm": 1.699504224482545, "learning_rate": 1.0909870127376358e-06, "loss": 0.1004, "step": 5103 }, { "epoch": 1.1613196814562001, "grad_norm": 2.462992773726134, "learning_rate": 1.0909274627321106e-06, "loss": 0.1523, "step": 5104 }, { "epoch": 1.1615472127417519, "grad_norm": 1.4732070097539725, "learning_rate": 1.090867903203881e-06, "loss": 0.0655, "step": 5105 }, { "epoch": 1.1617747440273036, "grad_norm": 1.85057272922146, "learning_rate": 1.090808334154165e-06, "loss": 0.1804, "step": 5106 }, { "epoch": 1.1620022753128556, "grad_norm": 1.4961803957728892, "learning_rate": 1.0907487555841797e-06, "loss": 0.0582, "step": 5107 }, { "epoch": 1.1622298065984074, "grad_norm": 2.43515890824142, "learning_rate": 1.0906891674951426e-06, "loss": 0.1159, "step": 5108 }, { "epoch": 1.162457337883959, "grad_norm": 2.4092226646171033, "learning_rate": 1.090629569888272e-06, "loss": 0.1874, "step": 5109 }, { "epoch": 1.1626848691695109, "grad_norm": 1.430724702642088, "learning_rate": 1.0905699627647857e-06, "loss": 0.0464, "step": 5110 }, { "epoch": 1.1629124004550626, "grad_norm": 2.0043701924808364, "learning_rate": 1.090510346125902e-06, "loss": 0.1058, "step": 5111 }, { "epoch": 1.1631399317406144, "grad_norm": 1.6881125359192517, "learning_rate": 1.0904507199728392e-06, "loss": 0.0975, "step": 5112 }, { "epoch": 1.1633674630261661, "grad_norm": 2.020904439641956, "learning_rate": 1.0903910843068163e-06, "loss": 0.1164, "step": 5113 }, { "epoch": 1.1635949943117179, "grad_norm": 1.4662243703806388, "learning_rate": 1.090331439129052e-06, "loss": 0.0449, "step": 5114 }, { "epoch": 1.1638225255972696, "grad_norm": 2.3936294807872502, "learning_rate": 1.0902717844407651e-06, "loss": 0.1056, "step": 5115 }, { "epoch": 1.1640500568828214, "grad_norm": 1.402339373672487, "learning_rate": 1.0902121202431754e-06, "loss": 0.0984, "step": 5116 }, { "epoch": 1.1642775881683731, "grad_norm": 1.1601403443745117, "learning_rate": 1.0901524465375015e-06, "loss": 0.0618, "step": 5117 }, { "epoch": 1.1645051194539249, "grad_norm": 2.3796698078699827, "learning_rate": 1.0900927633249638e-06, "loss": 0.2121, "step": 5118 }, { "epoch": 1.1647326507394766, "grad_norm": 2.111092663401694, "learning_rate": 1.0900330706067818e-06, "loss": 0.1236, "step": 5119 }, { "epoch": 1.1649601820250284, "grad_norm": 2.4073374503765286, "learning_rate": 1.0899733683841753e-06, "loss": 0.064, "step": 5120 }, { "epoch": 1.1651877133105801, "grad_norm": 1.4765861177514648, "learning_rate": 1.0899136566583647e-06, "loss": 0.0787, "step": 5121 }, { "epoch": 1.1654152445961319, "grad_norm": 1.8206625324312107, "learning_rate": 1.0898539354305706e-06, "loss": 0.0589, "step": 5122 }, { "epoch": 1.1656427758816836, "grad_norm": 1.2742725949565343, "learning_rate": 1.0897942047020131e-06, "loss": 0.0823, "step": 5123 }, { "epoch": 1.1658703071672356, "grad_norm": 2.317344383776149, "learning_rate": 1.0897344644739139e-06, "loss": 0.0802, "step": 5124 }, { "epoch": 1.1660978384527874, "grad_norm": 2.082018147741833, "learning_rate": 1.089674714747493e-06, "loss": 0.0879, "step": 5125 }, { "epoch": 1.1663253697383391, "grad_norm": 1.017078316760799, "learning_rate": 1.0896149555239717e-06, "loss": 0.0675, "step": 5126 }, { "epoch": 1.1665529010238909, "grad_norm": 1.1599218032685075, "learning_rate": 1.0895551868045718e-06, "loss": 0.0357, "step": 5127 }, { "epoch": 1.1667804323094426, "grad_norm": 1.3348293242411355, "learning_rate": 1.0894954085905147e-06, "loss": 0.0986, "step": 5128 }, { "epoch": 1.1670079635949944, "grad_norm": 1.1294710535163477, "learning_rate": 1.0894356208830223e-06, "loss": 0.098, "step": 5129 }, { "epoch": 1.1672354948805461, "grad_norm": 1.6825900072808717, "learning_rate": 1.089375823683316e-06, "loss": 0.0856, "step": 5130 }, { "epoch": 1.1674630261660979, "grad_norm": 1.4078748724249117, "learning_rate": 1.0893160169926186e-06, "loss": 0.0397, "step": 5131 }, { "epoch": 1.1676905574516496, "grad_norm": 2.2607172341356963, "learning_rate": 1.0892562008121522e-06, "loss": 0.1146, "step": 5132 }, { "epoch": 1.1679180887372014, "grad_norm": 2.137903831657451, "learning_rate": 1.0891963751431392e-06, "loss": 0.0831, "step": 5133 }, { "epoch": 1.1681456200227531, "grad_norm": 1.4391610604694447, "learning_rate": 1.0891365399868022e-06, "loss": 0.1239, "step": 5134 }, { "epoch": 1.1683731513083049, "grad_norm": 1.3650904209209243, "learning_rate": 1.0890766953443646e-06, "loss": 0.0555, "step": 5135 }, { "epoch": 1.1686006825938566, "grad_norm": 1.0371856273982027, "learning_rate": 1.0890168412170493e-06, "loss": 0.0951, "step": 5136 }, { "epoch": 1.1688282138794084, "grad_norm": 1.5565752560490373, "learning_rate": 1.0889569776060796e-06, "loss": 0.0835, "step": 5137 }, { "epoch": 1.1690557451649601, "grad_norm": 1.6651305013672462, "learning_rate": 1.088897104512679e-06, "loss": 0.1987, "step": 5138 }, { "epoch": 1.1692832764505119, "grad_norm": 2.3900689357963825, "learning_rate": 1.0888372219380709e-06, "loss": 0.0795, "step": 5139 }, { "epoch": 1.1695108077360636, "grad_norm": 2.432864936838184, "learning_rate": 1.0887773298834798e-06, "loss": 0.1243, "step": 5140 }, { "epoch": 1.1697383390216154, "grad_norm": 1.185319864549767, "learning_rate": 1.0887174283501293e-06, "loss": 0.0477, "step": 5141 }, { "epoch": 1.1699658703071671, "grad_norm": 1.8287546810857624, "learning_rate": 1.0886575173392435e-06, "loss": 0.1251, "step": 5142 }, { "epoch": 1.170193401592719, "grad_norm": 2.177633518642662, "learning_rate": 1.0885975968520476e-06, "loss": 0.0893, "step": 5143 }, { "epoch": 1.1704209328782706, "grad_norm": 1.3130463339730047, "learning_rate": 1.0885376668897656e-06, "loss": 0.0855, "step": 5144 }, { "epoch": 1.1706484641638226, "grad_norm": 1.5628072719826096, "learning_rate": 1.0884777274536228e-06, "loss": 0.0856, "step": 5145 }, { "epoch": 1.1708759954493744, "grad_norm": 2.4402271387318017, "learning_rate": 1.0884177785448441e-06, "loss": 0.1261, "step": 5146 }, { "epoch": 1.1711035267349261, "grad_norm": 1.1284689217760129, "learning_rate": 1.0883578201646546e-06, "loss": 0.0861, "step": 5147 }, { "epoch": 1.1713310580204779, "grad_norm": 1.808467621183764, "learning_rate": 1.08829785231428e-06, "loss": 0.0825, "step": 5148 }, { "epoch": 1.1715585893060296, "grad_norm": 2.542285399099055, "learning_rate": 1.0882378749949456e-06, "loss": 0.1083, "step": 5149 }, { "epoch": 1.1717861205915814, "grad_norm": 2.021913721897118, "learning_rate": 1.0881778882078774e-06, "loss": 0.0901, "step": 5150 }, { "epoch": 1.1720136518771331, "grad_norm": 1.406045953578652, "learning_rate": 1.0881178919543016e-06, "loss": 0.0773, "step": 5151 }, { "epoch": 1.1722411831626849, "grad_norm": 2.0040086875362944, "learning_rate": 1.0880578862354444e-06, "loss": 0.1644, "step": 5152 }, { "epoch": 1.1724687144482366, "grad_norm": 1.8294788787626037, "learning_rate": 1.087997871052532e-06, "loss": 0.0948, "step": 5153 }, { "epoch": 1.1726962457337884, "grad_norm": 1.736926598359245, "learning_rate": 1.0879378464067906e-06, "loss": 0.1223, "step": 5154 }, { "epoch": 1.1729237770193401, "grad_norm": 1.91009580821313, "learning_rate": 1.0878778122994477e-06, "loss": 0.1093, "step": 5155 }, { "epoch": 1.173151308304892, "grad_norm": 1.6708279487986484, "learning_rate": 1.0878177687317302e-06, "loss": 0.0982, "step": 5156 }, { "epoch": 1.1733788395904436, "grad_norm": 1.7121086391611304, "learning_rate": 1.0877577157048648e-06, "loss": 0.0841, "step": 5157 }, { "epoch": 1.1736063708759954, "grad_norm": 1.5932913712350176, "learning_rate": 1.0876976532200797e-06, "loss": 0.1199, "step": 5158 }, { "epoch": 1.1738339021615471, "grad_norm": 1.9929519320083202, "learning_rate": 1.0876375812786017e-06, "loss": 0.094, "step": 5159 }, { "epoch": 1.174061433447099, "grad_norm": 1.0817988622931909, "learning_rate": 1.0875774998816586e-06, "loss": 0.0748, "step": 5160 }, { "epoch": 1.1742889647326507, "grad_norm": 1.2130835603422332, "learning_rate": 1.087517409030479e-06, "loss": 0.0383, "step": 5161 }, { "epoch": 1.1745164960182026, "grad_norm": 1.7517362332183557, "learning_rate": 1.0874573087262902e-06, "loss": 0.0986, "step": 5162 }, { "epoch": 1.1747440273037544, "grad_norm": 1.0166108394008488, "learning_rate": 1.087397198970321e-06, "loss": 0.0846, "step": 5163 }, { "epoch": 1.1749715585893061, "grad_norm": 1.9628550032769965, "learning_rate": 1.0873370797638002e-06, "loss": 0.1405, "step": 5164 }, { "epoch": 1.1751990898748579, "grad_norm": 1.8776527771081926, "learning_rate": 1.0872769511079561e-06, "loss": 0.1075, "step": 5165 }, { "epoch": 1.1754266211604096, "grad_norm": 1.0899004955073874, "learning_rate": 1.0872168130040175e-06, "loss": 0.0422, "step": 5166 }, { "epoch": 1.1756541524459614, "grad_norm": 1.3832930528440743, "learning_rate": 1.087156665453214e-06, "loss": 0.0768, "step": 5167 }, { "epoch": 1.1758816837315131, "grad_norm": 1.9657143389773364, "learning_rate": 1.0870965084567748e-06, "loss": 0.1274, "step": 5168 }, { "epoch": 1.176109215017065, "grad_norm": 1.5211869967581584, "learning_rate": 1.087036342015929e-06, "loss": 0.0845, "step": 5169 }, { "epoch": 1.1763367463026166, "grad_norm": 1.143246238134219, "learning_rate": 1.0869761661319064e-06, "loss": 0.0973, "step": 5170 }, { "epoch": 1.1765642775881684, "grad_norm": 1.9263291809785696, "learning_rate": 1.0869159808059373e-06, "loss": 0.0769, "step": 5171 }, { "epoch": 1.1767918088737201, "grad_norm": 2.2676787630517787, "learning_rate": 1.0868557860392516e-06, "loss": 0.1097, "step": 5172 }, { "epoch": 1.177019340159272, "grad_norm": 2.0019023188667764, "learning_rate": 1.0867955818330792e-06, "loss": 0.1312, "step": 5173 }, { "epoch": 1.1772468714448237, "grad_norm": 1.9525647770660846, "learning_rate": 1.086735368188651e-06, "loss": 0.1152, "step": 5174 }, { "epoch": 1.1774744027303754, "grad_norm": 1.096166997406837, "learning_rate": 1.0866751451071974e-06, "loss": 0.0966, "step": 5175 }, { "epoch": 1.1777019340159272, "grad_norm": 1.796427540212248, "learning_rate": 1.0866149125899495e-06, "loss": 0.1346, "step": 5176 }, { "epoch": 1.177929465301479, "grad_norm": 2.0245892091737177, "learning_rate": 1.086554670638138e-06, "loss": 0.1421, "step": 5177 }, { "epoch": 1.1781569965870307, "grad_norm": 1.7782356641667965, "learning_rate": 1.0864944192529946e-06, "loss": 0.1021, "step": 5178 }, { "epoch": 1.1783845278725824, "grad_norm": 1.1463385215583979, "learning_rate": 1.08643415843575e-06, "loss": 0.0348, "step": 5179 }, { "epoch": 1.1786120591581342, "grad_norm": 2.176529725496333, "learning_rate": 1.0863738881876369e-06, "loss": 0.1201, "step": 5180 }, { "epoch": 1.178839590443686, "grad_norm": 2.5057338760003276, "learning_rate": 1.0863136085098862e-06, "loss": 0.2077, "step": 5181 }, { "epoch": 1.1790671217292377, "grad_norm": 1.5428397804224192, "learning_rate": 1.08625331940373e-06, "loss": 0.0771, "step": 5182 }, { "epoch": 1.1792946530147894, "grad_norm": 1.8434351161312814, "learning_rate": 1.086193020870401e-06, "loss": 0.1054, "step": 5183 }, { "epoch": 1.1795221843003414, "grad_norm": 1.9271362170030948, "learning_rate": 1.0861327129111313e-06, "loss": 0.0839, "step": 5184 }, { "epoch": 1.1797497155858931, "grad_norm": 1.5531813526055755, "learning_rate": 1.0860723955271533e-06, "loss": 0.0622, "step": 5185 }, { "epoch": 1.179977246871445, "grad_norm": 1.759159281092021, "learning_rate": 1.0860120687196998e-06, "loss": 0.1115, "step": 5186 }, { "epoch": 1.1802047781569966, "grad_norm": 1.8647371747744599, "learning_rate": 1.0859517324900042e-06, "loss": 0.0861, "step": 5187 }, { "epoch": 1.1804323094425484, "grad_norm": 1.805897808366817, "learning_rate": 1.0858913868392993e-06, "loss": 0.1431, "step": 5188 }, { "epoch": 1.1806598407281002, "grad_norm": 1.6704627128473584, "learning_rate": 1.0858310317688184e-06, "loss": 0.0873, "step": 5189 }, { "epoch": 1.180887372013652, "grad_norm": 1.343094360546439, "learning_rate": 1.0857706672797954e-06, "loss": 0.0859, "step": 5190 }, { "epoch": 1.1811149032992037, "grad_norm": 2.066147911559209, "learning_rate": 1.0857102933734636e-06, "loss": 0.1149, "step": 5191 }, { "epoch": 1.1813424345847554, "grad_norm": 1.4544321870028036, "learning_rate": 1.0856499100510575e-06, "loss": 0.0601, "step": 5192 }, { "epoch": 1.1815699658703072, "grad_norm": 2.1031693123515574, "learning_rate": 1.0855895173138107e-06, "loss": 0.1274, "step": 5193 }, { "epoch": 1.181797497155859, "grad_norm": 1.7573489864066676, "learning_rate": 1.0855291151629576e-06, "loss": 0.0615, "step": 5194 }, { "epoch": 1.1820250284414107, "grad_norm": 1.9854988027981146, "learning_rate": 1.0854687035997329e-06, "loss": 0.097, "step": 5195 }, { "epoch": 1.1822525597269624, "grad_norm": 1.5268785940107066, "learning_rate": 1.0854082826253712e-06, "loss": 0.0491, "step": 5196 }, { "epoch": 1.1824800910125142, "grad_norm": 1.1410295227600973, "learning_rate": 1.0853478522411072e-06, "loss": 0.0546, "step": 5197 }, { "epoch": 1.182707622298066, "grad_norm": 1.4884685353979168, "learning_rate": 1.0852874124481764e-06, "loss": 0.0544, "step": 5198 }, { "epoch": 1.1829351535836177, "grad_norm": 1.3165143158682053, "learning_rate": 1.0852269632478138e-06, "loss": 0.0916, "step": 5199 }, { "epoch": 1.1831626848691694, "grad_norm": 1.3572510385460108, "learning_rate": 1.085166504641255e-06, "loss": 0.0509, "step": 5200 }, { "epoch": 1.1833902161547214, "grad_norm": 0.9661066398138327, "learning_rate": 1.0851060366297356e-06, "loss": 0.0875, "step": 5201 }, { "epoch": 1.1836177474402731, "grad_norm": 2.333836932538073, "learning_rate": 1.0850455592144916e-06, "loss": 0.1679, "step": 5202 }, { "epoch": 1.183845278725825, "grad_norm": 1.445056359388779, "learning_rate": 1.0849850723967585e-06, "loss": 0.071, "step": 5203 }, { "epoch": 1.1840728100113767, "grad_norm": 1.6295801416897442, "learning_rate": 1.0849245761777733e-06, "loss": 0.0938, "step": 5204 }, { "epoch": 1.1843003412969284, "grad_norm": 2.3549017722464316, "learning_rate": 1.0848640705587718e-06, "loss": 0.124, "step": 5205 }, { "epoch": 1.1845278725824802, "grad_norm": 2.050952800493849, "learning_rate": 1.0848035555409911e-06, "loss": 0.0691, "step": 5206 }, { "epoch": 1.184755403868032, "grad_norm": 2.077817689629389, "learning_rate": 1.0847430311256676e-06, "loss": 0.0818, "step": 5207 }, { "epoch": 1.1849829351535837, "grad_norm": 2.253900685722683, "learning_rate": 1.0846824973140388e-06, "loss": 0.1127, "step": 5208 }, { "epoch": 1.1852104664391354, "grad_norm": 1.9840592846831089, "learning_rate": 1.0846219541073417e-06, "loss": 0.0618, "step": 5209 }, { "epoch": 1.1854379977246872, "grad_norm": 1.6704261322608704, "learning_rate": 1.084561401506813e-06, "loss": 0.1531, "step": 5210 }, { "epoch": 1.185665529010239, "grad_norm": 2.1725141580654896, "learning_rate": 1.0845008395136915e-06, "loss": 0.2325, "step": 5211 }, { "epoch": 1.1858930602957907, "grad_norm": 2.393322470314106, "learning_rate": 1.0844402681292144e-06, "loss": 0.2173, "step": 5212 }, { "epoch": 1.1861205915813424, "grad_norm": 1.6261443486893132, "learning_rate": 1.084379687354619e-06, "loss": 0.0728, "step": 5213 }, { "epoch": 1.1863481228668942, "grad_norm": 2.2205983994574523, "learning_rate": 1.0843190971911447e-06, "loss": 0.1077, "step": 5214 }, { "epoch": 1.186575654152446, "grad_norm": 1.6223311298818315, "learning_rate": 1.0842584976400292e-06, "loss": 0.169, "step": 5215 }, { "epoch": 1.1868031854379977, "grad_norm": 1.5664039317333338, "learning_rate": 1.084197888702511e-06, "loss": 0.1082, "step": 5216 }, { "epoch": 1.1870307167235494, "grad_norm": 2.1952187183985354, "learning_rate": 1.0841372703798289e-06, "loss": 0.1031, "step": 5217 }, { "epoch": 1.1872582480091012, "grad_norm": 1.5272178345552438, "learning_rate": 1.0840766426732219e-06, "loss": 0.0822, "step": 5218 }, { "epoch": 1.187485779294653, "grad_norm": 1.4542703091629854, "learning_rate": 1.0840160055839291e-06, "loss": 0.1352, "step": 5219 }, { "epoch": 1.1877133105802047, "grad_norm": 1.6566468578059188, "learning_rate": 1.08395535911319e-06, "loss": 0.0599, "step": 5220 }, { "epoch": 1.1879408418657564, "grad_norm": 1.4369561966167148, "learning_rate": 1.0838947032622436e-06, "loss": 0.0588, "step": 5221 }, { "epoch": 1.1881683731513082, "grad_norm": 1.6911609691995355, "learning_rate": 1.0838340380323297e-06, "loss": 0.107, "step": 5222 }, { "epoch": 1.1883959044368602, "grad_norm": 2.7120228637912085, "learning_rate": 1.083773363424689e-06, "loss": 0.1183, "step": 5223 }, { "epoch": 1.188623435722412, "grad_norm": 1.4822450492657908, "learning_rate": 1.0837126794405603e-06, "loss": 0.0733, "step": 5224 }, { "epoch": 1.1888509670079637, "grad_norm": 2.189919934484601, "learning_rate": 1.083651986081185e-06, "loss": 0.1001, "step": 5225 }, { "epoch": 1.1890784982935154, "grad_norm": 1.3261475366877535, "learning_rate": 1.0835912833478029e-06, "loss": 0.1536, "step": 5226 }, { "epoch": 1.1893060295790672, "grad_norm": 2.0355476056994553, "learning_rate": 1.0835305712416546e-06, "loss": 0.0915, "step": 5227 }, { "epoch": 1.189533560864619, "grad_norm": 1.9337187468158625, "learning_rate": 1.0834698497639817e-06, "loss": 0.1797, "step": 5228 }, { "epoch": 1.1897610921501707, "grad_norm": 1.528534377609125, "learning_rate": 1.0834091189160243e-06, "loss": 0.0623, "step": 5229 }, { "epoch": 1.1899886234357224, "grad_norm": 1.7978417621819656, "learning_rate": 1.0833483786990243e-06, "loss": 0.1616, "step": 5230 }, { "epoch": 1.1902161547212742, "grad_norm": 1.3180132857072435, "learning_rate": 1.0832876291142228e-06, "loss": 0.0656, "step": 5231 }, { "epoch": 1.190443686006826, "grad_norm": 1.5694544783138118, "learning_rate": 1.0832268701628613e-06, "loss": 0.1214, "step": 5232 }, { "epoch": 1.1906712172923777, "grad_norm": 2.7018835662805047, "learning_rate": 1.083166101846182e-06, "loss": 0.2433, "step": 5233 }, { "epoch": 1.1908987485779294, "grad_norm": 1.2165773857409203, "learning_rate": 1.0831053241654265e-06, "loss": 0.0302, "step": 5234 }, { "epoch": 1.1911262798634812, "grad_norm": 1.4873073484241224, "learning_rate": 1.0830445371218374e-06, "loss": 0.1109, "step": 5235 }, { "epoch": 1.191353811149033, "grad_norm": 2.169001416121012, "learning_rate": 1.0829837407166565e-06, "loss": 0.1875, "step": 5236 }, { "epoch": 1.1915813424345847, "grad_norm": 2.2992957388980924, "learning_rate": 1.082922934951127e-06, "loss": 0.1762, "step": 5237 }, { "epoch": 1.1918088737201364, "grad_norm": 3.0098436793577776, "learning_rate": 1.082862119826491e-06, "loss": 0.1161, "step": 5238 }, { "epoch": 1.1920364050056882, "grad_norm": 1.0473761856310173, "learning_rate": 1.0828012953439921e-06, "loss": 0.151, "step": 5239 }, { "epoch": 1.1922639362912402, "grad_norm": 2.200991609795066, "learning_rate": 1.082740461504873e-06, "loss": 0.049, "step": 5240 }, { "epoch": 1.192491467576792, "grad_norm": 2.1341044427621654, "learning_rate": 1.0826796183103774e-06, "loss": 0.0995, "step": 5241 }, { "epoch": 1.1927189988623437, "grad_norm": 1.797138107236956, "learning_rate": 1.0826187657617484e-06, "loss": 0.0836, "step": 5242 }, { "epoch": 1.1929465301478954, "grad_norm": 1.5361734716911093, "learning_rate": 1.0825579038602298e-06, "loss": 0.0956, "step": 5243 }, { "epoch": 1.1931740614334472, "grad_norm": 1.3830245145453257, "learning_rate": 1.082497032607066e-06, "loss": 0.0859, "step": 5244 }, { "epoch": 1.193401592718999, "grad_norm": 1.653730545070457, "learning_rate": 1.0824361520035004e-06, "loss": 0.1367, "step": 5245 }, { "epoch": 1.1936291240045507, "grad_norm": 1.5316553149024767, "learning_rate": 1.0823752620507778e-06, "loss": 0.0923, "step": 5246 }, { "epoch": 1.1938566552901024, "grad_norm": 1.2318860231960242, "learning_rate": 1.0823143627501423e-06, "loss": 0.0465, "step": 5247 }, { "epoch": 1.1940841865756542, "grad_norm": 1.368501769833903, "learning_rate": 1.082253454102839e-06, "loss": 0.0527, "step": 5248 }, { "epoch": 1.194311717861206, "grad_norm": 1.688804518608022, "learning_rate": 1.0821925361101124e-06, "loss": 0.0794, "step": 5249 }, { "epoch": 1.1945392491467577, "grad_norm": 1.350823772473527, "learning_rate": 1.0821316087732075e-06, "loss": 0.0687, "step": 5250 }, { "epoch": 1.1947667804323094, "grad_norm": 1.1567294535302972, "learning_rate": 1.0820706720933698e-06, "loss": 0.104, "step": 5251 }, { "epoch": 1.1949943117178612, "grad_norm": 1.6015117566025172, "learning_rate": 1.0820097260718448e-06, "loss": 0.0855, "step": 5252 }, { "epoch": 1.195221843003413, "grad_norm": 2.133718516574482, "learning_rate": 1.081948770709878e-06, "loss": 0.1237, "step": 5253 }, { "epoch": 1.1954493742889647, "grad_norm": 2.192934701634061, "learning_rate": 1.0818878060087151e-06, "loss": 0.1113, "step": 5254 }, { "epoch": 1.1956769055745164, "grad_norm": 1.5826077427944494, "learning_rate": 1.081826831969602e-06, "loss": 0.122, "step": 5255 }, { "epoch": 1.1959044368600682, "grad_norm": 1.9016171603713237, "learning_rate": 1.0817658485937855e-06, "loss": 0.1475, "step": 5256 }, { "epoch": 1.19613196814562, "grad_norm": 1.574823856287606, "learning_rate": 1.0817048558825114e-06, "loss": 0.1361, "step": 5257 }, { "epoch": 1.1963594994311717, "grad_norm": 1.8062796035655573, "learning_rate": 1.0816438538370262e-06, "loss": 0.0838, "step": 5258 }, { "epoch": 1.1965870307167235, "grad_norm": 2.150669189308933, "learning_rate": 1.0815828424585772e-06, "loss": 0.0727, "step": 5259 }, { "epoch": 1.1968145620022752, "grad_norm": 2.37299043188769, "learning_rate": 1.081521821748411e-06, "loss": 0.0777, "step": 5260 }, { "epoch": 1.197042093287827, "grad_norm": 1.5748977460726736, "learning_rate": 1.081460791707775e-06, "loss": 0.0713, "step": 5261 }, { "epoch": 1.197269624573379, "grad_norm": 1.845039914221183, "learning_rate": 1.0813997523379163e-06, "loss": 0.1783, "step": 5262 }, { "epoch": 1.1974971558589307, "grad_norm": 1.2418153199696063, "learning_rate": 1.0813387036400825e-06, "loss": 0.0838, "step": 5263 }, { "epoch": 1.1977246871444824, "grad_norm": 1.601138886177542, "learning_rate": 1.081277645615521e-06, "loss": 0.0561, "step": 5264 }, { "epoch": 1.1979522184300342, "grad_norm": 2.477792000861364, "learning_rate": 1.0812165782654806e-06, "loss": 0.0802, "step": 5265 }, { "epoch": 1.198179749715586, "grad_norm": 1.8339107520724027, "learning_rate": 1.0811555015912086e-06, "loss": 0.0638, "step": 5266 }, { "epoch": 1.1984072810011377, "grad_norm": 1.1686725493823096, "learning_rate": 1.0810944155939536e-06, "loss": 0.059, "step": 5267 }, { "epoch": 1.1986348122866894, "grad_norm": 1.9602369962972666, "learning_rate": 1.081033320274964e-06, "loss": 0.1053, "step": 5268 }, { "epoch": 1.1988623435722412, "grad_norm": 1.1474755283420095, "learning_rate": 1.0809722156354884e-06, "loss": 0.1005, "step": 5269 }, { "epoch": 1.199089874857793, "grad_norm": 0.9917236908020932, "learning_rate": 1.0809111016767762e-06, "loss": 0.0622, "step": 5270 }, { "epoch": 1.1993174061433447, "grad_norm": 1.4830981024424121, "learning_rate": 1.0808499784000756e-06, "loss": 0.0561, "step": 5271 }, { "epoch": 1.1995449374288965, "grad_norm": 1.2794179990446428, "learning_rate": 1.0807888458066364e-06, "loss": 0.0547, "step": 5272 }, { "epoch": 1.1997724687144482, "grad_norm": 2.4510983663131745, "learning_rate": 1.0807277038977083e-06, "loss": 0.2331, "step": 5273 }, { "epoch": 1.2, "grad_norm": 3.2059009973371646, "learning_rate": 1.0806665526745403e-06, "loss": 0.2011, "step": 5274 }, { "epoch": 1.2002275312855517, "grad_norm": 1.603764392751713, "learning_rate": 1.0806053921383823e-06, "loss": 0.0736, "step": 5275 }, { "epoch": 1.2004550625711035, "grad_norm": 1.098547269397925, "learning_rate": 1.0805442222904846e-06, "loss": 0.071, "step": 5276 }, { "epoch": 1.2006825938566552, "grad_norm": 2.2506163203297875, "learning_rate": 1.0804830431320972e-06, "loss": 0.1489, "step": 5277 }, { "epoch": 1.200910125142207, "grad_norm": 1.904831313890037, "learning_rate": 1.0804218546644708e-06, "loss": 0.128, "step": 5278 }, { "epoch": 1.201137656427759, "grad_norm": 1.3427616236849924, "learning_rate": 1.0803606568888557e-06, "loss": 0.0641, "step": 5279 }, { "epoch": 1.2013651877133107, "grad_norm": 2.695821206487897, "learning_rate": 1.0802994498065027e-06, "loss": 0.1282, "step": 5280 }, { "epoch": 1.2015927189988624, "grad_norm": 2.0080613955222235, "learning_rate": 1.0802382334186627e-06, "loss": 0.0821, "step": 5281 }, { "epoch": 1.2018202502844142, "grad_norm": 1.6911329459727755, "learning_rate": 1.080177007726587e-06, "loss": 0.1019, "step": 5282 }, { "epoch": 1.202047781569966, "grad_norm": 1.6381634754276007, "learning_rate": 1.080115772731527e-06, "loss": 0.0434, "step": 5283 }, { "epoch": 1.2022753128555177, "grad_norm": 1.1969099245269164, "learning_rate": 1.0800545284347338e-06, "loss": 0.0616, "step": 5284 }, { "epoch": 1.2025028441410694, "grad_norm": 1.7741375061533398, "learning_rate": 1.0799932748374598e-06, "loss": 0.0436, "step": 5285 }, { "epoch": 1.2027303754266212, "grad_norm": 2.264571806309411, "learning_rate": 1.0799320119409562e-06, "loss": 0.104, "step": 5286 }, { "epoch": 1.202957906712173, "grad_norm": 1.848273839594112, "learning_rate": 1.0798707397464756e-06, "loss": 0.117, "step": 5287 }, { "epoch": 1.2031854379977247, "grad_norm": 2.262303139744276, "learning_rate": 1.0798094582552703e-06, "loss": 0.1028, "step": 5288 }, { "epoch": 1.2034129692832765, "grad_norm": 2.0589025509516263, "learning_rate": 1.0797481674685925e-06, "loss": 0.2736, "step": 5289 }, { "epoch": 1.2036405005688282, "grad_norm": 1.7761896760428244, "learning_rate": 1.0796868673876947e-06, "loss": 0.087, "step": 5290 }, { "epoch": 1.20386803185438, "grad_norm": 1.7782779357046952, "learning_rate": 1.0796255580138303e-06, "loss": 0.0899, "step": 5291 }, { "epoch": 1.2040955631399317, "grad_norm": 1.1500037983509879, "learning_rate": 1.0795642393482523e-06, "loss": 0.118, "step": 5292 }, { "epoch": 1.2043230944254835, "grad_norm": 1.842925587308416, "learning_rate": 1.0795029113922136e-06, "loss": 0.0426, "step": 5293 }, { "epoch": 1.2045506257110352, "grad_norm": 2.1630846104313783, "learning_rate": 1.0794415741469677e-06, "loss": 0.0854, "step": 5294 }, { "epoch": 1.204778156996587, "grad_norm": 1.9984928427035924, "learning_rate": 1.0793802276137683e-06, "loss": 0.0758, "step": 5295 }, { "epoch": 1.2050056882821387, "grad_norm": 1.7270916728186407, "learning_rate": 1.0793188717938693e-06, "loss": 0.0984, "step": 5296 }, { "epoch": 1.2052332195676905, "grad_norm": 2.562446895041978, "learning_rate": 1.0792575066885245e-06, "loss": 0.0848, "step": 5297 }, { "epoch": 1.2054607508532422, "grad_norm": 1.0211875489183757, "learning_rate": 1.0791961322989882e-06, "loss": 0.0809, "step": 5298 }, { "epoch": 1.205688282138794, "grad_norm": 1.4354948603513233, "learning_rate": 1.0791347486265147e-06, "loss": 0.0578, "step": 5299 }, { "epoch": 1.2059158134243457, "grad_norm": 1.7350825872789628, "learning_rate": 1.0790733556723589e-06, "loss": 0.0941, "step": 5300 }, { "epoch": 1.2061433447098977, "grad_norm": 1.8738498200816718, "learning_rate": 1.079011953437775e-06, "loss": 0.0693, "step": 5301 }, { "epoch": 1.2063708759954495, "grad_norm": 1.3923030676619426, "learning_rate": 1.0789505419240185e-06, "loss": 0.0453, "step": 5302 }, { "epoch": 1.2065984072810012, "grad_norm": 1.5273840730643755, "learning_rate": 1.0788891211323442e-06, "loss": 0.1353, "step": 5303 }, { "epoch": 1.206825938566553, "grad_norm": 1.33456271047386, "learning_rate": 1.0788276910640074e-06, "loss": 0.0269, "step": 5304 }, { "epoch": 1.2070534698521047, "grad_norm": 1.3783261991509468, "learning_rate": 1.0787662517202641e-06, "loss": 0.0564, "step": 5305 }, { "epoch": 1.2072810011376565, "grad_norm": 1.2413093608926025, "learning_rate": 1.0787048031023693e-06, "loss": 0.0514, "step": 5306 }, { "epoch": 1.2075085324232082, "grad_norm": 2.067632414807228, "learning_rate": 1.0786433452115794e-06, "loss": 0.0852, "step": 5307 }, { "epoch": 1.20773606370876, "grad_norm": 1.6493210193338594, "learning_rate": 1.0785818780491502e-06, "loss": 0.0957, "step": 5308 }, { "epoch": 1.2079635949943117, "grad_norm": 2.335430233427145, "learning_rate": 1.0785204016163384e-06, "loss": 0.1113, "step": 5309 }, { "epoch": 1.2081911262798635, "grad_norm": 1.8914362366278195, "learning_rate": 1.0784589159143999e-06, "loss": 0.0587, "step": 5310 }, { "epoch": 1.2084186575654152, "grad_norm": 1.8389745120373866, "learning_rate": 1.0783974209445915e-06, "loss": 0.0582, "step": 5311 }, { "epoch": 1.208646188850967, "grad_norm": 1.2870683840945183, "learning_rate": 1.0783359167081705e-06, "loss": 0.0769, "step": 5312 }, { "epoch": 1.2088737201365187, "grad_norm": 1.9375950528547237, "learning_rate": 1.0782744032063935e-06, "loss": 0.1055, "step": 5313 }, { "epoch": 1.2091012514220705, "grad_norm": 2.9149019557260667, "learning_rate": 1.078212880440518e-06, "loss": 0.0749, "step": 5314 }, { "epoch": 1.2093287827076222, "grad_norm": 1.4032344304927438, "learning_rate": 1.0781513484118008e-06, "loss": 0.0953, "step": 5315 }, { "epoch": 1.209556313993174, "grad_norm": 2.021712881413012, "learning_rate": 1.0780898071215004e-06, "loss": 0.074, "step": 5316 }, { "epoch": 1.2097838452787257, "grad_norm": 1.295514380352357, "learning_rate": 1.0780282565708738e-06, "loss": 0.1227, "step": 5317 }, { "epoch": 1.2100113765642777, "grad_norm": 2.7681513279773555, "learning_rate": 1.0779666967611796e-06, "loss": 0.187, "step": 5318 }, { "epoch": 1.2102389078498295, "grad_norm": 1.8640645938184524, "learning_rate": 1.0779051276936755e-06, "loss": 0.0711, "step": 5319 }, { "epoch": 1.2104664391353812, "grad_norm": 1.3066357408288654, "learning_rate": 1.0778435493696202e-06, "loss": 0.1392, "step": 5320 }, { "epoch": 1.210693970420933, "grad_norm": 2.2361490684160663, "learning_rate": 1.0777819617902718e-06, "loss": 0.1587, "step": 5321 }, { "epoch": 1.2109215017064847, "grad_norm": 2.28890369536384, "learning_rate": 1.0777203649568896e-06, "loss": 0.1427, "step": 5322 }, { "epoch": 1.2111490329920365, "grad_norm": 2.443743866077574, "learning_rate": 1.077658758870732e-06, "loss": 0.0844, "step": 5323 }, { "epoch": 1.2113765642775882, "grad_norm": 2.5045475861456246, "learning_rate": 1.0775971435330588e-06, "loss": 0.0822, "step": 5324 }, { "epoch": 1.21160409556314, "grad_norm": 1.4419614896477466, "learning_rate": 1.0775355189451286e-06, "loss": 0.1231, "step": 5325 }, { "epoch": 1.2118316268486917, "grad_norm": 2.1607954202798574, "learning_rate": 1.0774738851082011e-06, "loss": 0.0656, "step": 5326 }, { "epoch": 1.2120591581342435, "grad_norm": 2.0076172738726563, "learning_rate": 1.0774122420235363e-06, "loss": 0.1213, "step": 5327 }, { "epoch": 1.2122866894197952, "grad_norm": 2.243065656502911, "learning_rate": 1.0773505896923936e-06, "loss": 0.2355, "step": 5328 }, { "epoch": 1.212514220705347, "grad_norm": 1.5464308593607745, "learning_rate": 1.0772889281160335e-06, "loss": 0.1193, "step": 5329 }, { "epoch": 1.2127417519908987, "grad_norm": 2.0341247889416456, "learning_rate": 1.0772272572957158e-06, "loss": 0.0715, "step": 5330 }, { "epoch": 1.2129692832764505, "grad_norm": 1.638874906761963, "learning_rate": 1.0771655772327013e-06, "loss": 0.0915, "step": 5331 }, { "epoch": 1.2131968145620022, "grad_norm": 1.8261492962825066, "learning_rate": 1.0771038879282505e-06, "loss": 0.1693, "step": 5332 }, { "epoch": 1.213424345847554, "grad_norm": 1.6573900903132563, "learning_rate": 1.0770421893836243e-06, "loss": 0.1321, "step": 5333 }, { "epoch": 1.2136518771331057, "grad_norm": 1.0977836462853496, "learning_rate": 1.0769804816000835e-06, "loss": 0.0332, "step": 5334 }, { "epoch": 1.2138794084186575, "grad_norm": 2.5558904100663935, "learning_rate": 1.0769187645788895e-06, "loss": 0.1563, "step": 5335 }, { "epoch": 1.2141069397042092, "grad_norm": 1.6661784494218759, "learning_rate": 1.0768570383213035e-06, "loss": 0.0554, "step": 5336 }, { "epoch": 1.214334470989761, "grad_norm": 2.081264112380425, "learning_rate": 1.0767953028285872e-06, "loss": 0.0795, "step": 5337 }, { "epoch": 1.2145620022753127, "grad_norm": 1.3727735429346972, "learning_rate": 1.0767335581020024e-06, "loss": 0.0684, "step": 5338 }, { "epoch": 1.2147895335608645, "grad_norm": 1.2415036393799737, "learning_rate": 1.076671804142811e-06, "loss": 0.065, "step": 5339 }, { "epoch": 1.2150170648464165, "grad_norm": 2.1447181222162635, "learning_rate": 1.076610040952275e-06, "loss": 0.1577, "step": 5340 }, { "epoch": 1.2152445961319682, "grad_norm": 1.8197546232930755, "learning_rate": 1.076548268531657e-06, "loss": 0.0763, "step": 5341 }, { "epoch": 1.21547212741752, "grad_norm": 1.5817686653534704, "learning_rate": 1.0764864868822194e-06, "loss": 0.1499, "step": 5342 }, { "epoch": 1.2156996587030717, "grad_norm": 1.7955937776313586, "learning_rate": 1.0764246960052247e-06, "loss": 0.0699, "step": 5343 }, { "epoch": 1.2159271899886235, "grad_norm": 1.5998182027119985, "learning_rate": 1.0763628959019359e-06, "loss": 0.0873, "step": 5344 }, { "epoch": 1.2161547212741752, "grad_norm": 2.0206122711914953, "learning_rate": 1.076301086573616e-06, "loss": 0.0591, "step": 5345 }, { "epoch": 1.216382252559727, "grad_norm": 1.7209121866538086, "learning_rate": 1.076239268021529e-06, "loss": 0.1182, "step": 5346 }, { "epoch": 1.2166097838452787, "grad_norm": 1.6012738381710532, "learning_rate": 1.0761774402469375e-06, "loss": 0.1265, "step": 5347 }, { "epoch": 1.2168373151308305, "grad_norm": 1.5190529580939407, "learning_rate": 1.0761156032511052e-06, "loss": 0.0509, "step": 5348 }, { "epoch": 1.2170648464163822, "grad_norm": 1.2081251429678452, "learning_rate": 1.0760537570352963e-06, "loss": 0.0699, "step": 5349 }, { "epoch": 1.217292377701934, "grad_norm": 1.270701433256153, "learning_rate": 1.0759919016007747e-06, "loss": 0.0988, "step": 5350 }, { "epoch": 1.2175199089874857, "grad_norm": 1.7911755711480872, "learning_rate": 1.0759300369488046e-06, "loss": 0.1166, "step": 5351 }, { "epoch": 1.2177474402730375, "grad_norm": 1.9513325042672363, "learning_rate": 1.0758681630806502e-06, "loss": 0.1296, "step": 5352 }, { "epoch": 1.2179749715585892, "grad_norm": 1.7136195442158118, "learning_rate": 1.0758062799975765e-06, "loss": 0.1126, "step": 5353 }, { "epoch": 1.218202502844141, "grad_norm": 1.744883740836297, "learning_rate": 1.075744387700848e-06, "loss": 0.1597, "step": 5354 }, { "epoch": 1.2184300341296928, "grad_norm": 1.417163512342875, "learning_rate": 1.0756824861917294e-06, "loss": 0.0616, "step": 5355 }, { "epoch": 1.2186575654152445, "grad_norm": 2.5446233172568244, "learning_rate": 1.0756205754714866e-06, "loss": 0.1226, "step": 5356 }, { "epoch": 1.2188850967007965, "grad_norm": 1.2377188221566995, "learning_rate": 1.0755586555413845e-06, "loss": 0.0689, "step": 5357 }, { "epoch": 1.2191126279863482, "grad_norm": 0.8909679121651022, "learning_rate": 1.0754967264026882e-06, "loss": 0.0289, "step": 5358 }, { "epoch": 1.2193401592719, "grad_norm": 2.0352234100842055, "learning_rate": 1.0754347880566643e-06, "loss": 0.0828, "step": 5359 }, { "epoch": 1.2195676905574517, "grad_norm": 1.0940962413492108, "learning_rate": 1.075372840504578e-06, "loss": 0.0727, "step": 5360 }, { "epoch": 1.2197952218430035, "grad_norm": 2.270597989691131, "learning_rate": 1.0753108837476958e-06, "loss": 0.0725, "step": 5361 }, { "epoch": 1.2200227531285552, "grad_norm": 3.189546972379516, "learning_rate": 1.0752489177872839e-06, "loss": 0.1068, "step": 5362 }, { "epoch": 1.220250284414107, "grad_norm": 1.2750167089262343, "learning_rate": 1.0751869426246086e-06, "loss": 0.1154, "step": 5363 }, { "epoch": 1.2204778156996587, "grad_norm": 2.135052433897321, "learning_rate": 1.0751249582609368e-06, "loss": 0.0865, "step": 5364 }, { "epoch": 1.2207053469852105, "grad_norm": 1.244270521244428, "learning_rate": 1.075062964697535e-06, "loss": 0.0383, "step": 5365 }, { "epoch": 1.2209328782707622, "grad_norm": 2.7250882706023827, "learning_rate": 1.0750009619356706e-06, "loss": 0.1476, "step": 5366 }, { "epoch": 1.221160409556314, "grad_norm": 1.9790334045682223, "learning_rate": 1.0749389499766106e-06, "loss": 0.1329, "step": 5367 }, { "epoch": 1.2213879408418657, "grad_norm": 2.648242876474357, "learning_rate": 1.0748769288216226e-06, "loss": 0.1221, "step": 5368 }, { "epoch": 1.2216154721274175, "grad_norm": 1.8748774083050568, "learning_rate": 1.074814898471974e-06, "loss": 0.1707, "step": 5369 }, { "epoch": 1.2218430034129693, "grad_norm": 1.8555297714441998, "learning_rate": 1.0747528589289327e-06, "loss": 0.0599, "step": 5370 }, { "epoch": 1.222070534698521, "grad_norm": 0.9556357019620195, "learning_rate": 1.0746908101937666e-06, "loss": 0.0234, "step": 5371 }, { "epoch": 1.2222980659840728, "grad_norm": 1.5483602186289163, "learning_rate": 1.0746287522677439e-06, "loss": 0.098, "step": 5372 }, { "epoch": 1.2225255972696245, "grad_norm": 1.484110142108006, "learning_rate": 1.074566685152133e-06, "loss": 0.1136, "step": 5373 }, { "epoch": 1.2227531285551763, "grad_norm": 0.860555011969557, "learning_rate": 1.0745046088482025e-06, "loss": 0.0212, "step": 5374 }, { "epoch": 1.222980659840728, "grad_norm": 2.0013686983091783, "learning_rate": 1.074442523357221e-06, "loss": 0.0731, "step": 5375 }, { "epoch": 1.2232081911262798, "grad_norm": 2.4325343000739887, "learning_rate": 1.0743804286804573e-06, "loss": 0.1035, "step": 5376 }, { "epoch": 1.2234357224118315, "grad_norm": 1.9106606707808065, "learning_rate": 1.0743183248191806e-06, "loss": 0.0576, "step": 5377 }, { "epoch": 1.2236632536973833, "grad_norm": 0.5769559312003509, "learning_rate": 1.0742562117746604e-06, "loss": 0.0339, "step": 5378 }, { "epoch": 1.2238907849829352, "grad_norm": 1.2801763987646924, "learning_rate": 1.074194089548166e-06, "loss": 0.0796, "step": 5379 }, { "epoch": 1.224118316268487, "grad_norm": 1.2678528066846348, "learning_rate": 1.0741319581409667e-06, "loss": 0.0785, "step": 5380 }, { "epoch": 1.2243458475540387, "grad_norm": 1.866996912283029, "learning_rate": 1.0740698175543332e-06, "loss": 0.0697, "step": 5381 }, { "epoch": 1.2245733788395905, "grad_norm": 1.5828941718449743, "learning_rate": 1.0740076677895348e-06, "loss": 0.095, "step": 5382 }, { "epoch": 1.2248009101251423, "grad_norm": 2.0245338544598757, "learning_rate": 1.0739455088478422e-06, "loss": 0.0893, "step": 5383 }, { "epoch": 1.225028441410694, "grad_norm": 1.9376486203134127, "learning_rate": 1.0738833407305254e-06, "loss": 0.125, "step": 5384 }, { "epoch": 1.2252559726962458, "grad_norm": 1.2831894786317846, "learning_rate": 1.0738211634388554e-06, "loss": 0.061, "step": 5385 }, { "epoch": 1.2254835039817975, "grad_norm": 2.28610050767493, "learning_rate": 1.0737589769741025e-06, "loss": 0.0829, "step": 5386 }, { "epoch": 1.2257110352673493, "grad_norm": 2.079256703978429, "learning_rate": 1.0736967813375382e-06, "loss": 0.0933, "step": 5387 }, { "epoch": 1.225938566552901, "grad_norm": 1.4133775667119193, "learning_rate": 1.0736345765304335e-06, "loss": 0.1665, "step": 5388 }, { "epoch": 1.2261660978384528, "grad_norm": 1.8025993666918807, "learning_rate": 1.0735723625540596e-06, "loss": 0.1953, "step": 5389 }, { "epoch": 1.2263936291240045, "grad_norm": 1.6168058770721976, "learning_rate": 1.073510139409688e-06, "loss": 0.0958, "step": 5390 }, { "epoch": 1.2266211604095563, "grad_norm": 0.8413217564117436, "learning_rate": 1.0734479070985908e-06, "loss": 0.0231, "step": 5391 }, { "epoch": 1.226848691695108, "grad_norm": 1.7545268518006023, "learning_rate": 1.0733856656220396e-06, "loss": 0.1106, "step": 5392 }, { "epoch": 1.2270762229806598, "grad_norm": 0.9684484368779872, "learning_rate": 1.0733234149813065e-06, "loss": 0.0525, "step": 5393 }, { "epoch": 1.2273037542662115, "grad_norm": 1.118301840198287, "learning_rate": 1.0732611551776639e-06, "loss": 0.0526, "step": 5394 }, { "epoch": 1.2275312855517633, "grad_norm": 2.4499203280689064, "learning_rate": 1.0731988862123841e-06, "loss": 0.0808, "step": 5395 }, { "epoch": 1.2277588168373152, "grad_norm": 1.8673149754872067, "learning_rate": 1.07313660808674e-06, "loss": 0.081, "step": 5396 }, { "epoch": 1.227986348122867, "grad_norm": 1.187493720900378, "learning_rate": 1.0730743208020044e-06, "loss": 0.0467, "step": 5397 }, { "epoch": 1.2282138794084188, "grad_norm": 1.5388729878525773, "learning_rate": 1.0730120243594504e-06, "loss": 0.0801, "step": 5398 }, { "epoch": 1.2284414106939705, "grad_norm": 1.3665636981078553, "learning_rate": 1.0729497187603508e-06, "loss": 0.1399, "step": 5399 }, { "epoch": 1.2286689419795223, "grad_norm": 2.1297488683384715, "learning_rate": 1.0728874040059798e-06, "loss": 0.0788, "step": 5400 }, { "epoch": 1.228896473265074, "grad_norm": 2.7263173378574153, "learning_rate": 1.07282508009761e-06, "loss": 0.0686, "step": 5401 }, { "epoch": 1.2291240045506258, "grad_norm": 1.2951500192928536, "learning_rate": 1.072762747036516e-06, "loss": 0.0842, "step": 5402 }, { "epoch": 1.2293515358361775, "grad_norm": 1.952069542680283, "learning_rate": 1.0727004048239715e-06, "loss": 0.1264, "step": 5403 }, { "epoch": 1.2295790671217293, "grad_norm": 1.6010547640917303, "learning_rate": 1.0726380534612507e-06, "loss": 0.0853, "step": 5404 }, { "epoch": 1.229806598407281, "grad_norm": 1.9483808000546532, "learning_rate": 1.0725756929496277e-06, "loss": 0.0829, "step": 5405 }, { "epoch": 1.2300341296928328, "grad_norm": 2.4565704934042647, "learning_rate": 1.0725133232903773e-06, "loss": 0.2031, "step": 5406 }, { "epoch": 1.2302616609783845, "grad_norm": 1.1049855430303344, "learning_rate": 1.0724509444847741e-06, "loss": 0.1009, "step": 5407 }, { "epoch": 1.2304891922639363, "grad_norm": 1.9135004329441547, "learning_rate": 1.0723885565340933e-06, "loss": 0.1154, "step": 5408 }, { "epoch": 1.230716723549488, "grad_norm": 1.914997345530264, "learning_rate": 1.0723261594396095e-06, "loss": 0.1121, "step": 5409 }, { "epoch": 1.2309442548350398, "grad_norm": 1.443690506471369, "learning_rate": 1.0722637532025984e-06, "loss": 0.1022, "step": 5410 }, { "epoch": 1.2311717861205915, "grad_norm": 2.6024216163733582, "learning_rate": 1.0722013378243354e-06, "loss": 0.1794, "step": 5411 }, { "epoch": 1.2313993174061433, "grad_norm": 3.20516486682456, "learning_rate": 1.0721389133060958e-06, "loss": 0.0982, "step": 5412 }, { "epoch": 1.231626848691695, "grad_norm": 2.3908403467267445, "learning_rate": 1.0720764796491559e-06, "loss": 0.0561, "step": 5413 }, { "epoch": 1.2318543799772468, "grad_norm": 2.498040600232711, "learning_rate": 1.0720140368547915e-06, "loss": 0.0705, "step": 5414 }, { "epoch": 1.2320819112627985, "grad_norm": 1.9676099266840996, "learning_rate": 1.0719515849242787e-06, "loss": 0.1109, "step": 5415 }, { "epoch": 1.2323094425483503, "grad_norm": 1.3576426657142715, "learning_rate": 1.0718891238588943e-06, "loss": 0.0617, "step": 5416 }, { "epoch": 1.232536973833902, "grad_norm": 1.8753810587976327, "learning_rate": 1.0718266536599145e-06, "loss": 0.1394, "step": 5417 }, { "epoch": 1.232764505119454, "grad_norm": 1.8597389433553986, "learning_rate": 1.0717641743286163e-06, "loss": 0.0849, "step": 5418 }, { "epoch": 1.2329920364050058, "grad_norm": 1.922637481586932, "learning_rate": 1.0717016858662766e-06, "loss": 0.1043, "step": 5419 }, { "epoch": 1.2332195676905575, "grad_norm": 1.620832607357832, "learning_rate": 1.0716391882741722e-06, "loss": 0.1114, "step": 5420 }, { "epoch": 1.2334470989761093, "grad_norm": 2.0014578199306343, "learning_rate": 1.0715766815535812e-06, "loss": 0.1206, "step": 5421 }, { "epoch": 1.233674630261661, "grad_norm": 1.2240310028768215, "learning_rate": 1.0715141657057805e-06, "loss": 0.137, "step": 5422 }, { "epoch": 1.2339021615472128, "grad_norm": 2.138110039795511, "learning_rate": 1.0714516407320482e-06, "loss": 0.1787, "step": 5423 }, { "epoch": 1.2341296928327645, "grad_norm": 2.316015876424966, "learning_rate": 1.0713891066336619e-06, "loss": 0.1035, "step": 5424 }, { "epoch": 1.2343572241183163, "grad_norm": 1.9765888798589069, "learning_rate": 1.0713265634118998e-06, "loss": 0.1094, "step": 5425 }, { "epoch": 1.234584755403868, "grad_norm": 1.7144437579313214, "learning_rate": 1.0712640110680398e-06, "loss": 0.0621, "step": 5426 }, { "epoch": 1.2348122866894198, "grad_norm": 1.4697474263995853, "learning_rate": 1.071201449603361e-06, "loss": 0.1143, "step": 5427 }, { "epoch": 1.2350398179749715, "grad_norm": 2.4457858543323154, "learning_rate": 1.0711388790191418e-06, "loss": 0.0991, "step": 5428 }, { "epoch": 1.2352673492605233, "grad_norm": 1.4119744703675479, "learning_rate": 1.071076299316661e-06, "loss": 0.0973, "step": 5429 }, { "epoch": 1.235494880546075, "grad_norm": 1.4595760481057491, "learning_rate": 1.0710137104971973e-06, "loss": 0.0999, "step": 5430 }, { "epoch": 1.2357224118316268, "grad_norm": 1.5583570046816027, "learning_rate": 1.0709511125620306e-06, "loss": 0.124, "step": 5431 }, { "epoch": 1.2359499431171785, "grad_norm": 1.843599923959497, "learning_rate": 1.0708885055124396e-06, "loss": 0.0858, "step": 5432 }, { "epoch": 1.2361774744027303, "grad_norm": 1.1999122894890224, "learning_rate": 1.0708258893497043e-06, "loss": 0.0479, "step": 5433 }, { "epoch": 1.236405005688282, "grad_norm": 1.457239578244025, "learning_rate": 1.0707632640751042e-06, "loss": 0.0449, "step": 5434 }, { "epoch": 1.236632536973834, "grad_norm": 1.6945912200555417, "learning_rate": 1.0707006296899194e-06, "loss": 0.081, "step": 5435 }, { "epoch": 1.2368600682593858, "grad_norm": 2.6001020024335815, "learning_rate": 1.0706379861954299e-06, "loss": 0.2526, "step": 5436 }, { "epoch": 1.2370875995449375, "grad_norm": 1.3492096197419317, "learning_rate": 1.0705753335929162e-06, "loss": 0.1496, "step": 5437 }, { "epoch": 1.2373151308304893, "grad_norm": 1.6010914508078664, "learning_rate": 1.070512671883659e-06, "loss": 0.0601, "step": 5438 }, { "epoch": 1.237542662116041, "grad_norm": 0.9988092205638144, "learning_rate": 1.0704500010689383e-06, "loss": 0.0708, "step": 5439 }, { "epoch": 1.2377701934015928, "grad_norm": 2.0272592593408856, "learning_rate": 1.0703873211500356e-06, "loss": 0.0981, "step": 5440 }, { "epoch": 1.2379977246871445, "grad_norm": 1.5814448790855402, "learning_rate": 1.0703246321282316e-06, "loss": 0.0647, "step": 5441 }, { "epoch": 1.2382252559726963, "grad_norm": 1.3583333002765134, "learning_rate": 1.0702619340048077e-06, "loss": 0.0937, "step": 5442 }, { "epoch": 1.238452787258248, "grad_norm": 1.6595104474776738, "learning_rate": 1.0701992267810454e-06, "loss": 0.0559, "step": 5443 }, { "epoch": 1.2386803185437998, "grad_norm": 1.6197295051794525, "learning_rate": 1.0701365104582262e-06, "loss": 0.154, "step": 5444 }, { "epoch": 1.2389078498293515, "grad_norm": 1.2894140286449491, "learning_rate": 1.070073785037632e-06, "loss": 0.0704, "step": 5445 }, { "epoch": 1.2391353811149033, "grad_norm": 1.5941117277473875, "learning_rate": 1.0700110505205447e-06, "loss": 0.0839, "step": 5446 }, { "epoch": 1.239362912400455, "grad_norm": 1.7878642941689529, "learning_rate": 1.0699483069082468e-06, "loss": 0.0649, "step": 5447 }, { "epoch": 1.2395904436860068, "grad_norm": 2.599750455430904, "learning_rate": 1.0698855542020201e-06, "loss": 0.1301, "step": 5448 }, { "epoch": 1.2398179749715585, "grad_norm": 2.0450552321843025, "learning_rate": 1.0698227924031474e-06, "loss": 0.1969, "step": 5449 }, { "epoch": 1.2400455062571103, "grad_norm": 1.2281153831660558, "learning_rate": 1.0697600215129113e-06, "loss": 0.0613, "step": 5450 }, { "epoch": 1.240273037542662, "grad_norm": 2.143751448330337, "learning_rate": 1.0696972415325954e-06, "loss": 0.0722, "step": 5451 }, { "epoch": 1.2405005688282138, "grad_norm": 1.322311765084235, "learning_rate": 1.069634452463482e-06, "loss": 0.1549, "step": 5452 }, { "epoch": 1.2407281001137656, "grad_norm": 1.6004401205919885, "learning_rate": 1.0695716543068548e-06, "loss": 0.1183, "step": 5453 }, { "epoch": 1.2409556313993173, "grad_norm": 1.3942986796389523, "learning_rate": 1.0695088470639973e-06, "loss": 0.0864, "step": 5454 }, { "epoch": 1.241183162684869, "grad_norm": 2.578779304276581, "learning_rate": 1.069446030736193e-06, "loss": 0.1039, "step": 5455 }, { "epoch": 1.2414106939704208, "grad_norm": 1.504902639716046, "learning_rate": 1.0693832053247256e-06, "loss": 0.0512, "step": 5456 }, { "epoch": 1.2416382252559728, "grad_norm": 2.6715606976374646, "learning_rate": 1.0693203708308792e-06, "loss": 0.0914, "step": 5457 }, { "epoch": 1.2418657565415245, "grad_norm": 1.8134513201348859, "learning_rate": 1.0692575272559385e-06, "loss": 0.0939, "step": 5458 }, { "epoch": 1.2420932878270763, "grad_norm": 2.353744451889051, "learning_rate": 1.0691946746011874e-06, "loss": 0.0906, "step": 5459 }, { "epoch": 1.242320819112628, "grad_norm": 2.066531174712776, "learning_rate": 1.0691318128679107e-06, "loss": 0.1075, "step": 5460 }, { "epoch": 1.2425483503981798, "grad_norm": 1.8513813952978166, "learning_rate": 1.0690689420573933e-06, "loss": 0.1163, "step": 5461 }, { "epoch": 1.2427758816837315, "grad_norm": 2.4822445733516143, "learning_rate": 1.0690060621709198e-06, "loss": 0.0855, "step": 5462 }, { "epoch": 1.2430034129692833, "grad_norm": 1.9977462685240035, "learning_rate": 1.0689431732097754e-06, "loss": 0.1, "step": 5463 }, { "epoch": 1.243230944254835, "grad_norm": 1.6816887574312476, "learning_rate": 1.0688802751752458e-06, "loss": 0.1155, "step": 5464 }, { "epoch": 1.2434584755403868, "grad_norm": 1.6544785097209609, "learning_rate": 1.0688173680686164e-06, "loss": 0.0555, "step": 5465 }, { "epoch": 1.2436860068259386, "grad_norm": 4.858334551394226, "learning_rate": 1.0687544518911726e-06, "loss": 0.0815, "step": 5466 }, { "epoch": 1.2439135381114903, "grad_norm": 2.2928222165455883, "learning_rate": 1.0686915266442005e-06, "loss": 0.0874, "step": 5467 }, { "epoch": 1.244141069397042, "grad_norm": 1.7968322866387734, "learning_rate": 1.0686285923289863e-06, "loss": 0.1786, "step": 5468 }, { "epoch": 1.2443686006825938, "grad_norm": 1.8341969945123524, "learning_rate": 1.0685656489468161e-06, "loss": 0.0728, "step": 5469 }, { "epoch": 1.2445961319681456, "grad_norm": 1.9808057129983412, "learning_rate": 1.0685026964989764e-06, "loss": 0.0614, "step": 5470 }, { "epoch": 1.2448236632536973, "grad_norm": 2.3603917978939686, "learning_rate": 1.0684397349867537e-06, "loss": 0.12, "step": 5471 }, { "epoch": 1.245051194539249, "grad_norm": 1.3367535002517599, "learning_rate": 1.068376764411435e-06, "loss": 0.0885, "step": 5472 }, { "epoch": 1.2452787258248008, "grad_norm": 1.397713582074191, "learning_rate": 1.0683137847743076e-06, "loss": 0.142, "step": 5473 }, { "epoch": 1.2455062571103528, "grad_norm": 1.6484425230118132, "learning_rate": 1.0682507960766578e-06, "loss": 0.0859, "step": 5474 }, { "epoch": 1.2457337883959045, "grad_norm": 1.4069345626600442, "learning_rate": 1.0681877983197738e-06, "loss": 0.1161, "step": 5475 }, { "epoch": 1.2459613196814563, "grad_norm": 1.4998871720536815, "learning_rate": 1.0681247915049428e-06, "loss": 0.0772, "step": 5476 }, { "epoch": 1.246188850967008, "grad_norm": 1.9389078200268701, "learning_rate": 1.0680617756334527e-06, "loss": 0.0716, "step": 5477 }, { "epoch": 1.2464163822525598, "grad_norm": 2.7660431376565695, "learning_rate": 1.0679987507065912e-06, "loss": 0.1567, "step": 5478 }, { "epoch": 1.2466439135381115, "grad_norm": 2.4466457466496205, "learning_rate": 1.0679357167256465e-06, "loss": 0.0861, "step": 5479 }, { "epoch": 1.2468714448236633, "grad_norm": 2.243817072525832, "learning_rate": 1.067872673691907e-06, "loss": 0.06, "step": 5480 }, { "epoch": 1.247098976109215, "grad_norm": 1.1074974938970048, "learning_rate": 1.0678096216066611e-06, "loss": 0.0617, "step": 5481 }, { "epoch": 1.2473265073947668, "grad_norm": 1.5816842034349576, "learning_rate": 1.0677465604711975e-06, "loss": 0.1165, "step": 5482 }, { "epoch": 1.2475540386803186, "grad_norm": 1.6815250673847648, "learning_rate": 1.0676834902868051e-06, "loss": 0.0966, "step": 5483 }, { "epoch": 1.2477815699658703, "grad_norm": 1.8948162149491237, "learning_rate": 1.067620411054773e-06, "loss": 0.1998, "step": 5484 }, { "epoch": 1.248009101251422, "grad_norm": 1.5218119585260081, "learning_rate": 1.0675573227763903e-06, "loss": 0.1333, "step": 5485 }, { "epoch": 1.2482366325369738, "grad_norm": 1.894461051388274, "learning_rate": 1.0674942254529463e-06, "loss": 0.1003, "step": 5486 }, { "epoch": 1.2484641638225256, "grad_norm": 2.2504857522339297, "learning_rate": 1.0674311190857308e-06, "loss": 0.1372, "step": 5487 }, { "epoch": 1.2486916951080773, "grad_norm": 1.5857559144795927, "learning_rate": 1.0673680036760333e-06, "loss": 0.1743, "step": 5488 }, { "epoch": 1.248919226393629, "grad_norm": 1.7267319127098426, "learning_rate": 1.0673048792251443e-06, "loss": 0.0452, "step": 5489 }, { "epoch": 1.2491467576791808, "grad_norm": 1.7855570682733017, "learning_rate": 1.0672417457343535e-06, "loss": 0.0699, "step": 5490 }, { "epoch": 1.2493742889647326, "grad_norm": 1.775936905731107, "learning_rate": 1.0671786032049512e-06, "loss": 0.0854, "step": 5491 }, { "epoch": 1.2496018202502843, "grad_norm": 2.2449973755366126, "learning_rate": 1.0671154516382283e-06, "loss": 0.0941, "step": 5492 }, { "epoch": 1.249829351535836, "grad_norm": 1.4681093724936798, "learning_rate": 1.067052291035475e-06, "loss": 0.0744, "step": 5493 }, { "epoch": 1.2500568828213878, "grad_norm": 1.4102566771482665, "learning_rate": 1.0669891213979826e-06, "loss": 0.0794, "step": 5494 }, { "epoch": 1.2502844141069396, "grad_norm": 1.7355330983101092, "learning_rate": 1.066925942727042e-06, "loss": 0.0578, "step": 5495 }, { "epoch": 1.2505119453924913, "grad_norm": 2.3854746229810497, "learning_rate": 1.0668627550239444e-06, "loss": 0.0943, "step": 5496 }, { "epoch": 1.2507394766780433, "grad_norm": 1.5833507298430867, "learning_rate": 1.0667995582899815e-06, "loss": 0.0441, "step": 5497 }, { "epoch": 1.250967007963595, "grad_norm": 2.0083629771890057, "learning_rate": 1.0667363525264446e-06, "loss": 0.0733, "step": 5498 }, { "epoch": 1.2511945392491468, "grad_norm": 1.2971360177807068, "learning_rate": 1.0666731377346257e-06, "loss": 0.1036, "step": 5499 }, { "epoch": 1.2514220705346986, "grad_norm": 2.800119901797372, "learning_rate": 1.0666099139158168e-06, "loss": 0.1237, "step": 5500 }, { "epoch": 1.2516496018202503, "grad_norm": 1.8870897807924603, "learning_rate": 1.06654668107131e-06, "loss": 0.086, "step": 5501 }, { "epoch": 1.251877133105802, "grad_norm": 2.290616338788931, "learning_rate": 1.0664834392023975e-06, "loss": 0.1499, "step": 5502 }, { "epoch": 1.2521046643913538, "grad_norm": 2.0803939807047613, "learning_rate": 1.0664201883103722e-06, "loss": 0.1016, "step": 5503 }, { "epoch": 1.2523321956769056, "grad_norm": 1.9461188419368005, "learning_rate": 1.066356928396527e-06, "loss": 0.0614, "step": 5504 }, { "epoch": 1.2525597269624573, "grad_norm": 2.2124110257952405, "learning_rate": 1.066293659462154e-06, "loss": 0.0948, "step": 5505 }, { "epoch": 1.252787258248009, "grad_norm": 2.0599721227236443, "learning_rate": 1.066230381508547e-06, "loss": 0.1591, "step": 5506 }, { "epoch": 1.2530147895335608, "grad_norm": 1.8004176359521435, "learning_rate": 1.0661670945369991e-06, "loss": 0.1559, "step": 5507 }, { "epoch": 1.2532423208191126, "grad_norm": 1.1888930303857044, "learning_rate": 1.0661037985488037e-06, "loss": 0.0518, "step": 5508 }, { "epoch": 1.2534698521046643, "grad_norm": 1.8289874247099218, "learning_rate": 1.0660404935452545e-06, "loss": 0.0546, "step": 5509 }, { "epoch": 1.253697383390216, "grad_norm": 1.1560218852925717, "learning_rate": 1.0659771795276451e-06, "loss": 0.1254, "step": 5510 }, { "epoch": 1.253924914675768, "grad_norm": 1.2249808857764073, "learning_rate": 1.06591385649727e-06, "loss": 0.0676, "step": 5511 }, { "epoch": 1.2541524459613198, "grad_norm": 0.85050849951928, "learning_rate": 1.0658505244554233e-06, "loss": 0.093, "step": 5512 }, { "epoch": 1.2543799772468716, "grad_norm": 1.5438548423850245, "learning_rate": 1.0657871834033992e-06, "loss": 0.125, "step": 5513 }, { "epoch": 1.2546075085324233, "grad_norm": 0.9078275355366267, "learning_rate": 1.0657238333424922e-06, "loss": 0.0376, "step": 5514 }, { "epoch": 1.254835039817975, "grad_norm": 2.2143429786465063, "learning_rate": 1.0656604742739974e-06, "loss": 0.0724, "step": 5515 }, { "epoch": 1.2550625711035268, "grad_norm": 2.0486216037329146, "learning_rate": 1.0655971061992093e-06, "loss": 0.1584, "step": 5516 }, { "epoch": 1.2552901023890786, "grad_norm": 1.997450488802889, "learning_rate": 1.0655337291194235e-06, "loss": 0.0802, "step": 5517 }, { "epoch": 1.2555176336746303, "grad_norm": 1.1050951081736293, "learning_rate": 1.0654703430359348e-06, "loss": 0.077, "step": 5518 }, { "epoch": 1.255745164960182, "grad_norm": 2.039605618688286, "learning_rate": 1.065406947950039e-06, "loss": 0.0795, "step": 5519 }, { "epoch": 1.2559726962457338, "grad_norm": 1.518327874025393, "learning_rate": 1.065343543863032e-06, "loss": 0.0779, "step": 5520 }, { "epoch": 1.2562002275312856, "grad_norm": 1.3387099786697993, "learning_rate": 1.0652801307762093e-06, "loss": 0.0931, "step": 5521 }, { "epoch": 1.2564277588168373, "grad_norm": 3.272032880936236, "learning_rate": 1.065216708690867e-06, "loss": 0.1039, "step": 5522 }, { "epoch": 1.256655290102389, "grad_norm": 1.9746026723150696, "learning_rate": 1.0651532776083014e-06, "loss": 0.1066, "step": 5523 }, { "epoch": 1.2568828213879408, "grad_norm": 1.5931847333482678, "learning_rate": 1.0650898375298088e-06, "loss": 0.0763, "step": 5524 }, { "epoch": 1.2571103526734926, "grad_norm": 1.8732431674607817, "learning_rate": 1.0650263884566863e-06, "loss": 0.0763, "step": 5525 }, { "epoch": 1.2573378839590443, "grad_norm": 1.8693038945153335, "learning_rate": 1.06496293039023e-06, "loss": 0.1149, "step": 5526 }, { "epoch": 1.257565415244596, "grad_norm": 1.4481248834894889, "learning_rate": 1.0648994633317373e-06, "loss": 0.1457, "step": 5527 }, { "epoch": 1.2577929465301478, "grad_norm": 1.862432920570466, "learning_rate": 1.064835987282505e-06, "loss": 0.1352, "step": 5528 }, { "epoch": 1.2580204778156996, "grad_norm": 1.2450473886682687, "learning_rate": 1.0647725022438307e-06, "loss": 0.0673, "step": 5529 }, { "epoch": 1.2582480091012513, "grad_norm": 1.752937419466306, "learning_rate": 1.0647090082170118e-06, "loss": 0.0803, "step": 5530 }, { "epoch": 1.258475540386803, "grad_norm": 1.7576675405029405, "learning_rate": 1.0646455052033463e-06, "loss": 0.1399, "step": 5531 }, { "epoch": 1.2587030716723548, "grad_norm": 1.3799958444972384, "learning_rate": 1.0645819932041317e-06, "loss": 0.1049, "step": 5532 }, { "epoch": 1.2589306029579066, "grad_norm": 1.0385789823026492, "learning_rate": 1.064518472220666e-06, "loss": 0.0457, "step": 5533 }, { "epoch": 1.2591581342434583, "grad_norm": 2.019081092458579, "learning_rate": 1.064454942254248e-06, "loss": 0.0607, "step": 5534 }, { "epoch": 1.25938566552901, "grad_norm": 2.6223562992885108, "learning_rate": 1.0643914033061757e-06, "loss": 0.0976, "step": 5535 }, { "epoch": 1.259613196814562, "grad_norm": 1.1669553650719042, "learning_rate": 1.0643278553777477e-06, "loss": 0.0836, "step": 5536 }, { "epoch": 1.2598407281001138, "grad_norm": 3.0375633482678013, "learning_rate": 1.0642642984702632e-06, "loss": 0.1243, "step": 5537 }, { "epoch": 1.2600682593856656, "grad_norm": 1.168545824136467, "learning_rate": 1.0642007325850207e-06, "loss": 0.083, "step": 5538 }, { "epoch": 1.2602957906712173, "grad_norm": 2.2256486202114676, "learning_rate": 1.0641371577233197e-06, "loss": 0.1316, "step": 5539 }, { "epoch": 1.260523321956769, "grad_norm": 2.136519087012331, "learning_rate": 1.0640735738864594e-06, "loss": 0.071, "step": 5540 }, { "epoch": 1.2607508532423208, "grad_norm": 1.6283300874870004, "learning_rate": 1.0640099810757394e-06, "loss": 0.0911, "step": 5541 }, { "epoch": 1.2609783845278726, "grad_norm": 1.1416528868312568, "learning_rate": 1.0639463792924592e-06, "loss": 0.0657, "step": 5542 }, { "epoch": 1.2612059158134243, "grad_norm": 3.4581463177480702, "learning_rate": 1.0638827685379191e-06, "loss": 0.0751, "step": 5543 }, { "epoch": 1.261433447098976, "grad_norm": 1.6347126585276193, "learning_rate": 1.063819148813419e-06, "loss": 0.0988, "step": 5544 }, { "epoch": 1.2616609783845278, "grad_norm": 1.9297479475314918, "learning_rate": 1.063755520120259e-06, "loss": 0.1323, "step": 5545 }, { "epoch": 1.2618885096700796, "grad_norm": 1.4945227351451722, "learning_rate": 1.0636918824597397e-06, "loss": 0.074, "step": 5546 }, { "epoch": 1.2621160409556313, "grad_norm": 1.2720191090234823, "learning_rate": 1.063628235833162e-06, "loss": 0.0211, "step": 5547 }, { "epoch": 1.262343572241183, "grad_norm": 2.0454700040014284, "learning_rate": 1.0635645802418263e-06, "loss": 0.1092, "step": 5548 }, { "epoch": 1.2625711035267349, "grad_norm": 3.526245551948375, "learning_rate": 1.0635009156870338e-06, "loss": 0.122, "step": 5549 }, { "epoch": 1.2627986348122868, "grad_norm": 1.0127431108984934, "learning_rate": 1.0634372421700858e-06, "loss": 0.0495, "step": 5550 }, { "epoch": 1.2630261660978386, "grad_norm": 1.5378058015868816, "learning_rate": 1.0633735596922834e-06, "loss": 0.1009, "step": 5551 }, { "epoch": 1.2632536973833903, "grad_norm": 1.0282183974082975, "learning_rate": 1.0633098682549282e-06, "loss": 0.0469, "step": 5552 }, { "epoch": 1.263481228668942, "grad_norm": 1.402805359819934, "learning_rate": 1.063246167859322e-06, "loss": 0.0961, "step": 5553 }, { "epoch": 1.2637087599544938, "grad_norm": 1.6230015516881557, "learning_rate": 1.0631824585067668e-06, "loss": 0.0797, "step": 5554 }, { "epoch": 1.2639362912400456, "grad_norm": 1.2909297831561177, "learning_rate": 1.0631187401985647e-06, "loss": 0.0845, "step": 5555 }, { "epoch": 1.2641638225255973, "grad_norm": 1.6465161376952524, "learning_rate": 1.0630550129360179e-06, "loss": 0.0516, "step": 5556 }, { "epoch": 1.264391353811149, "grad_norm": 1.3793066474762286, "learning_rate": 1.062991276720429e-06, "loss": 0.0632, "step": 5557 }, { "epoch": 1.2646188850967008, "grad_norm": 1.8076204975362697, "learning_rate": 1.0629275315531005e-06, "loss": 0.0838, "step": 5558 }, { "epoch": 1.2648464163822526, "grad_norm": 1.4158184088750396, "learning_rate": 1.0628637774353351e-06, "loss": 0.0835, "step": 5559 }, { "epoch": 1.2650739476678043, "grad_norm": 1.4013673117505225, "learning_rate": 1.062800014368436e-06, "loss": 0.0642, "step": 5560 }, { "epoch": 1.265301478953356, "grad_norm": 1.391664380207402, "learning_rate": 1.0627362423537065e-06, "loss": 0.0977, "step": 5561 }, { "epoch": 1.2655290102389078, "grad_norm": 2.030166330980807, "learning_rate": 1.06267246139245e-06, "loss": 0.1186, "step": 5562 }, { "epoch": 1.2657565415244596, "grad_norm": 1.677270778893888, "learning_rate": 1.06260867148597e-06, "loss": 0.0582, "step": 5563 }, { "epoch": 1.2659840728100114, "grad_norm": 2.386010353958561, "learning_rate": 1.06254487263557e-06, "loss": 0.0916, "step": 5564 }, { "epoch": 1.266211604095563, "grad_norm": 1.2929316919365015, "learning_rate": 1.062481064842554e-06, "loss": 0.0915, "step": 5565 }, { "epoch": 1.2664391353811149, "grad_norm": 1.8947340385343405, "learning_rate": 1.0624172481082265e-06, "loss": 0.073, "step": 5566 }, { "epoch": 1.2666666666666666, "grad_norm": 1.9806391604348212, "learning_rate": 1.0623534224338916e-06, "loss": 0.1069, "step": 5567 }, { "epoch": 1.2668941979522184, "grad_norm": 2.0768859821030805, "learning_rate": 1.0622895878208535e-06, "loss": 0.1271, "step": 5568 }, { "epoch": 1.2671217292377701, "grad_norm": 1.7245435012523789, "learning_rate": 1.0622257442704174e-06, "loss": 0.1509, "step": 5569 }, { "epoch": 1.2673492605233219, "grad_norm": 1.7895301814757045, "learning_rate": 1.0621618917838874e-06, "loss": 0.1585, "step": 5570 }, { "epoch": 1.2675767918088736, "grad_norm": 1.6904234075905629, "learning_rate": 1.062098030362569e-06, "loss": 0.0695, "step": 5571 }, { "epoch": 1.2678043230944254, "grad_norm": 2.4457978209810913, "learning_rate": 1.0620341600077675e-06, "loss": 0.1008, "step": 5572 }, { "epoch": 1.2680318543799771, "grad_norm": 2.161084791259615, "learning_rate": 1.0619702807207881e-06, "loss": 0.0829, "step": 5573 }, { "epoch": 1.268259385665529, "grad_norm": 2.2600901965292937, "learning_rate": 1.0619063925029367e-06, "loss": 0.068, "step": 5574 }, { "epoch": 1.2684869169510808, "grad_norm": 1.0840331512965, "learning_rate": 1.0618424953555186e-06, "loss": 0.0616, "step": 5575 }, { "epoch": 1.2687144482366326, "grad_norm": 1.2319354830540028, "learning_rate": 1.0617785892798399e-06, "loss": 0.0385, "step": 5576 }, { "epoch": 1.2689419795221843, "grad_norm": 1.5534769449009878, "learning_rate": 1.061714674277207e-06, "loss": 0.0855, "step": 5577 }, { "epoch": 1.269169510807736, "grad_norm": 2.4306350480501657, "learning_rate": 1.0616507503489255e-06, "loss": 0.0877, "step": 5578 }, { "epoch": 1.2693970420932879, "grad_norm": 1.7942644238062382, "learning_rate": 1.0615868174963025e-06, "loss": 0.0649, "step": 5579 }, { "epoch": 1.2696245733788396, "grad_norm": 2.5654321765997885, "learning_rate": 1.0615228757206448e-06, "loss": 0.1688, "step": 5580 }, { "epoch": 1.2698521046643914, "grad_norm": 1.6812385027743317, "learning_rate": 1.0614589250232588e-06, "loss": 0.1631, "step": 5581 }, { "epoch": 1.270079635949943, "grad_norm": 1.4665264164731382, "learning_rate": 1.0613949654054518e-06, "loss": 0.0838, "step": 5582 }, { "epoch": 1.2703071672354949, "grad_norm": 2.560767700352155, "learning_rate": 1.0613309968685308e-06, "loss": 0.1086, "step": 5583 }, { "epoch": 1.2705346985210466, "grad_norm": 1.5508544451796749, "learning_rate": 1.0612670194138033e-06, "loss": 0.0702, "step": 5584 }, { "epoch": 1.2707622298065984, "grad_norm": 2.1461634004729837, "learning_rate": 1.061203033042577e-06, "loss": 0.0986, "step": 5585 }, { "epoch": 1.2709897610921501, "grad_norm": 2.4218915402318193, "learning_rate": 1.0611390377561596e-06, "loss": 0.1215, "step": 5586 }, { "epoch": 1.2712172923777019, "grad_norm": 0.6658643110079683, "learning_rate": 1.0610750335558589e-06, "loss": 0.0471, "step": 5587 }, { "epoch": 1.2714448236632536, "grad_norm": 1.3141548979843396, "learning_rate": 1.0610110204429832e-06, "loss": 0.071, "step": 5588 }, { "epoch": 1.2716723549488056, "grad_norm": 1.7639431438675843, "learning_rate": 1.060946998418841e-06, "loss": 0.1114, "step": 5589 }, { "epoch": 1.2718998862343573, "grad_norm": 1.506178749665914, "learning_rate": 1.06088296748474e-06, "loss": 0.0562, "step": 5590 }, { "epoch": 1.272127417519909, "grad_norm": 1.9579583267558633, "learning_rate": 1.0608189276419898e-06, "loss": 0.0871, "step": 5591 }, { "epoch": 1.2723549488054609, "grad_norm": 2.0110213837918134, "learning_rate": 1.0607548788918989e-06, "loss": 0.1181, "step": 5592 }, { "epoch": 1.2725824800910126, "grad_norm": 2.0737318256837542, "learning_rate": 1.0606908212357764e-06, "loss": 0.1301, "step": 5593 }, { "epoch": 1.2728100113765644, "grad_norm": 1.9325897325011154, "learning_rate": 1.0606267546749312e-06, "loss": 0.0946, "step": 5594 }, { "epoch": 1.273037542662116, "grad_norm": 1.9916873026614195, "learning_rate": 1.0605626792106729e-06, "loss": 0.0904, "step": 5595 }, { "epoch": 1.2732650739476679, "grad_norm": 2.584223969309277, "learning_rate": 1.0604985948443115e-06, "loss": 0.2029, "step": 5596 }, { "epoch": 1.2734926052332196, "grad_norm": 2.407891422443583, "learning_rate": 1.0604345015771561e-06, "loss": 0.097, "step": 5597 }, { "epoch": 1.2737201365187714, "grad_norm": 1.8811406288698242, "learning_rate": 1.060370399410517e-06, "loss": 0.1304, "step": 5598 }, { "epoch": 1.2739476678043231, "grad_norm": 1.0291908406249004, "learning_rate": 1.0603062883457044e-06, "loss": 0.0576, "step": 5599 }, { "epoch": 1.2741751990898749, "grad_norm": 1.760148573333007, "learning_rate": 1.0602421683840283e-06, "loss": 0.0556, "step": 5600 }, { "epoch": 1.2744027303754266, "grad_norm": 3.241467227070589, "learning_rate": 1.0601780395267997e-06, "loss": 0.0584, "step": 5601 }, { "epoch": 1.2746302616609784, "grad_norm": 1.5240599185793644, "learning_rate": 1.0601139017753286e-06, "loss": 0.0954, "step": 5602 }, { "epoch": 1.2748577929465301, "grad_norm": 1.5993856935822532, "learning_rate": 1.0600497551309263e-06, "loss": 0.0654, "step": 5603 }, { "epoch": 1.2750853242320819, "grad_norm": 1.174588757527884, "learning_rate": 1.0599855995949038e-06, "loss": 0.1085, "step": 5604 }, { "epoch": 1.2753128555176336, "grad_norm": 1.5969271473918978, "learning_rate": 1.0599214351685724e-06, "loss": 0.0532, "step": 5605 }, { "epoch": 1.2755403868031854, "grad_norm": 1.2741242300770832, "learning_rate": 1.0598572618532433e-06, "loss": 0.0927, "step": 5606 }, { "epoch": 1.2757679180887371, "grad_norm": 1.970426944694558, "learning_rate": 1.0597930796502282e-06, "loss": 0.059, "step": 5607 }, { "epoch": 1.2759954493742889, "grad_norm": 1.7974688284252667, "learning_rate": 1.059728888560839e-06, "loss": 0.0546, "step": 5608 }, { "epoch": 1.2762229806598406, "grad_norm": 2.347519835833904, "learning_rate": 1.059664688586387e-06, "loss": 0.0678, "step": 5609 }, { "epoch": 1.2764505119453924, "grad_norm": 1.9966964077823932, "learning_rate": 1.0596004797281853e-06, "loss": 0.1171, "step": 5610 }, { "epoch": 1.2766780432309441, "grad_norm": 2.3005969263882973, "learning_rate": 1.0595362619875455e-06, "loss": 0.1085, "step": 5611 }, { "epoch": 1.276905574516496, "grad_norm": 1.6333485760567956, "learning_rate": 1.0594720353657802e-06, "loss": 0.117, "step": 5612 }, { "epoch": 1.2771331058020479, "grad_norm": 1.9147011409795431, "learning_rate": 1.0594077998642025e-06, "loss": 0.0874, "step": 5613 }, { "epoch": 1.2773606370875996, "grad_norm": 2.1078634316924187, "learning_rate": 1.0593435554841247e-06, "loss": 0.2271, "step": 5614 }, { "epoch": 1.2775881683731514, "grad_norm": 2.022126628668214, "learning_rate": 1.0592793022268603e-06, "loss": 0.0842, "step": 5615 }, { "epoch": 1.2778156996587031, "grad_norm": 1.9052082685253122, "learning_rate": 1.0592150400937222e-06, "loss": 0.099, "step": 5616 }, { "epoch": 1.2780432309442549, "grad_norm": 1.4084668092686947, "learning_rate": 1.059150769086024e-06, "loss": 0.1706, "step": 5617 }, { "epoch": 1.2782707622298066, "grad_norm": 2.5181732763637887, "learning_rate": 1.059086489205079e-06, "loss": 0.1101, "step": 5618 }, { "epoch": 1.2784982935153584, "grad_norm": 1.438424800867154, "learning_rate": 1.0590222004522012e-06, "loss": 0.0597, "step": 5619 }, { "epoch": 1.2787258248009101, "grad_norm": 2.542196122167591, "learning_rate": 1.0589579028287045e-06, "loss": 0.1012, "step": 5620 }, { "epoch": 1.2789533560864619, "grad_norm": 1.9099915285362725, "learning_rate": 1.0588935963359032e-06, "loss": 0.123, "step": 5621 }, { "epoch": 1.2791808873720136, "grad_norm": 1.409727510364223, "learning_rate": 1.0588292809751112e-06, "loss": 0.1016, "step": 5622 }, { "epoch": 1.2794084186575654, "grad_norm": 1.3690280829539982, "learning_rate": 1.0587649567476434e-06, "loss": 0.0966, "step": 5623 }, { "epoch": 1.2796359499431171, "grad_norm": 1.5389585621222681, "learning_rate": 1.0587006236548142e-06, "loss": 0.0743, "step": 5624 }, { "epoch": 1.2798634812286689, "grad_norm": 1.7026203900857915, "learning_rate": 1.0586362816979383e-06, "loss": 0.0732, "step": 5625 }, { "epoch": 1.2800910125142206, "grad_norm": 1.5673530833581029, "learning_rate": 1.0585719308783316e-06, "loss": 0.0803, "step": 5626 }, { "epoch": 1.2803185437997724, "grad_norm": 2.7308896344498383, "learning_rate": 1.058507571197308e-06, "loss": 0.1711, "step": 5627 }, { "epoch": 1.2805460750853244, "grad_norm": 0.9517054919299557, "learning_rate": 1.058443202656184e-06, "loss": 0.0311, "step": 5628 }, { "epoch": 1.2807736063708761, "grad_norm": 1.891198334894284, "learning_rate": 1.0583788252562745e-06, "loss": 0.1027, "step": 5629 }, { "epoch": 1.2810011376564279, "grad_norm": 2.3500902017479754, "learning_rate": 1.058314438998896e-06, "loss": 0.2345, "step": 5630 }, { "epoch": 1.2812286689419796, "grad_norm": 1.5797405508748195, "learning_rate": 1.0582500438853631e-06, "loss": 0.1152, "step": 5631 }, { "epoch": 1.2814562002275314, "grad_norm": 1.5413701487828582, "learning_rate": 1.0581856399169934e-06, "loss": 0.0678, "step": 5632 }, { "epoch": 1.2816837315130831, "grad_norm": 1.078485797497606, "learning_rate": 1.0581212270951024e-06, "loss": 0.0311, "step": 5633 }, { "epoch": 1.2819112627986349, "grad_norm": 2.2121037992989, "learning_rate": 1.058056805421007e-06, "loss": 0.1267, "step": 5634 }, { "epoch": 1.2821387940841866, "grad_norm": 2.0746165641618486, "learning_rate": 1.057992374896023e-06, "loss": 0.0901, "step": 5635 }, { "epoch": 1.2823663253697384, "grad_norm": 1.2046274771811845, "learning_rate": 1.0579279355214683e-06, "loss": 0.0671, "step": 5636 }, { "epoch": 1.2825938566552901, "grad_norm": 2.0273686949332292, "learning_rate": 1.0578634872986592e-06, "loss": 0.0875, "step": 5637 }, { "epoch": 1.2828213879408419, "grad_norm": 1.8866997216058947, "learning_rate": 1.0577990302289136e-06, "loss": 0.0966, "step": 5638 }, { "epoch": 1.2830489192263936, "grad_norm": 1.5093028228498133, "learning_rate": 1.0577345643135482e-06, "loss": 0.1298, "step": 5639 }, { "epoch": 1.2832764505119454, "grad_norm": 1.9771803724128194, "learning_rate": 1.0576700895538809e-06, "loss": 0.1155, "step": 5640 }, { "epoch": 1.2835039817974971, "grad_norm": 1.7222932217295666, "learning_rate": 1.0576056059512292e-06, "loss": 0.0774, "step": 5641 }, { "epoch": 1.283731513083049, "grad_norm": 1.3538690259349349, "learning_rate": 1.057541113506911e-06, "loss": 0.0756, "step": 5642 }, { "epoch": 1.2839590443686006, "grad_norm": 1.8641486336878552, "learning_rate": 1.057476612222245e-06, "loss": 0.0695, "step": 5643 }, { "epoch": 1.2841865756541524, "grad_norm": 1.2831640838979976, "learning_rate": 1.057412102098549e-06, "loss": 0.1428, "step": 5644 }, { "epoch": 1.2844141069397041, "grad_norm": 1.3642696261409177, "learning_rate": 1.0573475831371416e-06, "loss": 0.0862, "step": 5645 }, { "epoch": 1.284641638225256, "grad_norm": 1.9245965484657077, "learning_rate": 1.0572830553393412e-06, "loss": 0.0904, "step": 5646 }, { "epoch": 1.2848691695108077, "grad_norm": 1.2171403811385195, "learning_rate": 1.057218518706467e-06, "loss": 0.0336, "step": 5647 }, { "epoch": 1.2850967007963594, "grad_norm": 1.3095189728109227, "learning_rate": 1.0571539732398378e-06, "loss": 0.0418, "step": 5648 }, { "epoch": 1.2853242320819112, "grad_norm": 1.918484794100106, "learning_rate": 1.057089418940773e-06, "loss": 0.0619, "step": 5649 }, { "epoch": 1.285551763367463, "grad_norm": 1.1443578318237353, "learning_rate": 1.0570248558105915e-06, "loss": 0.0674, "step": 5650 }, { "epoch": 1.2857792946530147, "grad_norm": 1.606788398963902, "learning_rate": 1.0569602838506136e-06, "loss": 0.0614, "step": 5651 }, { "epoch": 1.2860068259385666, "grad_norm": 1.5390260189373348, "learning_rate": 1.0568957030621582e-06, "loss": 0.0453, "step": 5652 }, { "epoch": 1.2862343572241184, "grad_norm": 1.4702477528553395, "learning_rate": 1.0568311134465457e-06, "loss": 0.1226, "step": 5653 }, { "epoch": 1.2864618885096701, "grad_norm": 1.46344076229504, "learning_rate": 1.056766515005096e-06, "loss": 0.0922, "step": 5654 }, { "epoch": 1.286689419795222, "grad_norm": 1.9431330097306456, "learning_rate": 1.0567019077391296e-06, "loss": 0.063, "step": 5655 }, { "epoch": 1.2869169510807736, "grad_norm": 1.444515304458717, "learning_rate": 1.056637291649967e-06, "loss": 0.0745, "step": 5656 }, { "epoch": 1.2871444823663254, "grad_norm": 2.0390282770012718, "learning_rate": 1.0565726667389284e-06, "loss": 0.1238, "step": 5657 }, { "epoch": 1.2873720136518771, "grad_norm": 1.6617432624086061, "learning_rate": 1.056508033007335e-06, "loss": 0.0546, "step": 5658 }, { "epoch": 1.287599544937429, "grad_norm": 2.318932450133174, "learning_rate": 1.0564433904565078e-06, "loss": 0.1079, "step": 5659 }, { "epoch": 1.2878270762229806, "grad_norm": 1.650902114497156, "learning_rate": 1.0563787390877677e-06, "loss": 0.0884, "step": 5660 }, { "epoch": 1.2880546075085324, "grad_norm": 2.147322191567513, "learning_rate": 1.0563140789024363e-06, "loss": 0.0977, "step": 5661 }, { "epoch": 1.2882821387940842, "grad_norm": 1.4183546752619831, "learning_rate": 1.0562494099018346e-06, "loss": 0.0749, "step": 5662 }, { "epoch": 1.288509670079636, "grad_norm": 1.8061494369968136, "learning_rate": 1.0561847320872853e-06, "loss": 0.093, "step": 5663 }, { "epoch": 1.2887372013651877, "grad_norm": 2.081733636480595, "learning_rate": 1.0561200454601097e-06, "loss": 0.1204, "step": 5664 }, { "epoch": 1.2889647326507394, "grad_norm": 1.2291622298614695, "learning_rate": 1.0560553500216298e-06, "loss": 0.0756, "step": 5665 }, { "epoch": 1.2891922639362912, "grad_norm": 2.9727470638088787, "learning_rate": 1.0559906457731678e-06, "loss": 0.0822, "step": 5666 }, { "epoch": 1.2894197952218431, "grad_norm": 2.481036835543893, "learning_rate": 1.0559259327160465e-06, "loss": 0.0894, "step": 5667 }, { "epoch": 1.2896473265073949, "grad_norm": 0.96010237753671, "learning_rate": 1.0558612108515883e-06, "loss": 0.0232, "step": 5668 }, { "epoch": 1.2898748577929466, "grad_norm": 1.2978970896725612, "learning_rate": 1.0557964801811162e-06, "loss": 0.0443, "step": 5669 }, { "epoch": 1.2901023890784984, "grad_norm": 1.7666859988255192, "learning_rate": 1.0557317407059529e-06, "loss": 0.0789, "step": 5670 }, { "epoch": 1.2903299203640501, "grad_norm": 1.9566224899598423, "learning_rate": 1.0556669924274217e-06, "loss": 0.0995, "step": 5671 }, { "epoch": 1.290557451649602, "grad_norm": 2.3581648947367713, "learning_rate": 1.0556022353468459e-06, "loss": 0.0985, "step": 5672 }, { "epoch": 1.2907849829351536, "grad_norm": 1.8768821502934563, "learning_rate": 1.055537469465549e-06, "loss": 0.1041, "step": 5673 }, { "epoch": 1.2910125142207054, "grad_norm": 1.0645494201548409, "learning_rate": 1.0554726947848545e-06, "loss": 0.0558, "step": 5674 }, { "epoch": 1.2912400455062572, "grad_norm": 1.56718896783486, "learning_rate": 1.0554079113060869e-06, "loss": 0.0923, "step": 5675 }, { "epoch": 1.291467576791809, "grad_norm": 1.488761679619322, "learning_rate": 1.0553431190305695e-06, "loss": 0.082, "step": 5676 }, { "epoch": 1.2916951080773607, "grad_norm": 1.5383291552422151, "learning_rate": 1.055278317959627e-06, "loss": 0.0508, "step": 5677 }, { "epoch": 1.2919226393629124, "grad_norm": 1.3268434182473585, "learning_rate": 1.0552135080945839e-06, "loss": 0.0828, "step": 5678 }, { "epoch": 1.2921501706484642, "grad_norm": 1.2228706582710298, "learning_rate": 1.0551486894367643e-06, "loss": 0.1055, "step": 5679 }, { "epoch": 1.292377701934016, "grad_norm": 0.9920593257927844, "learning_rate": 1.0550838619874933e-06, "loss": 0.0755, "step": 5680 }, { "epoch": 1.2926052332195677, "grad_norm": 2.1725969925810866, "learning_rate": 1.055019025748096e-06, "loss": 0.1201, "step": 5681 }, { "epoch": 1.2928327645051194, "grad_norm": 1.660596292271756, "learning_rate": 1.0549541807198974e-06, "loss": 0.0754, "step": 5682 }, { "epoch": 1.2930602957906712, "grad_norm": 2.723536633803254, "learning_rate": 1.0548893269042226e-06, "loss": 0.0805, "step": 5683 }, { "epoch": 1.293287827076223, "grad_norm": 1.5224476485394507, "learning_rate": 1.0548244643023972e-06, "loss": 0.1975, "step": 5684 }, { "epoch": 1.2935153583617747, "grad_norm": 1.7245323067588136, "learning_rate": 1.054759592915747e-06, "loss": 0.0894, "step": 5685 }, { "epoch": 1.2937428896473264, "grad_norm": 2.351366087052394, "learning_rate": 1.054694712745598e-06, "loss": 0.0762, "step": 5686 }, { "epoch": 1.2939704209328782, "grad_norm": 2.481113435758225, "learning_rate": 1.0546298237932757e-06, "loss": 0.1448, "step": 5687 }, { "epoch": 1.29419795221843, "grad_norm": 1.7881104717575749, "learning_rate": 1.0545649260601068e-06, "loss": 0.1129, "step": 5688 }, { "epoch": 1.2944254835039817, "grad_norm": 1.6533591614039695, "learning_rate": 1.0545000195474175e-06, "loss": 0.0931, "step": 5689 }, { "epoch": 1.2946530147895334, "grad_norm": 2.977524456984651, "learning_rate": 1.0544351042565344e-06, "loss": 0.0972, "step": 5690 }, { "epoch": 1.2948805460750854, "grad_norm": 2.6291144709864818, "learning_rate": 1.0543701801887842e-06, "loss": 0.1104, "step": 5691 }, { "epoch": 1.2951080773606372, "grad_norm": 2.1587512273916354, "learning_rate": 1.054305247345494e-06, "loss": 0.0771, "step": 5692 }, { "epoch": 1.295335608646189, "grad_norm": 2.151718306148393, "learning_rate": 1.0542403057279907e-06, "loss": 0.0545, "step": 5693 }, { "epoch": 1.2955631399317407, "grad_norm": 1.2060203503251004, "learning_rate": 1.0541753553376016e-06, "loss": 0.1352, "step": 5694 }, { "epoch": 1.2957906712172924, "grad_norm": 1.6546320164361477, "learning_rate": 1.0541103961756543e-06, "loss": 0.0775, "step": 5695 }, { "epoch": 1.2960182025028442, "grad_norm": 1.5435451270919913, "learning_rate": 1.0540454282434765e-06, "loss": 0.1312, "step": 5696 }, { "epoch": 1.296245733788396, "grad_norm": 3.3085895333931234, "learning_rate": 1.0539804515423955e-06, "loss": 0.2124, "step": 5697 }, { "epoch": 1.2964732650739477, "grad_norm": 1.2152294810518844, "learning_rate": 1.0539154660737401e-06, "loss": 0.1194, "step": 5698 }, { "epoch": 1.2967007963594994, "grad_norm": 1.885666654668208, "learning_rate": 1.053850471838838e-06, "loss": 0.0736, "step": 5699 }, { "epoch": 1.2969283276450512, "grad_norm": 1.0678926873042414, "learning_rate": 1.0537854688390175e-06, "loss": 0.0816, "step": 5700 }, { "epoch": 1.297155858930603, "grad_norm": 1.56587703849235, "learning_rate": 1.0537204570756076e-06, "loss": 0.062, "step": 5701 }, { "epoch": 1.2973833902161547, "grad_norm": 1.806050287456521, "learning_rate": 1.0536554365499367e-06, "loss": 0.0823, "step": 5702 }, { "epoch": 1.2976109215017064, "grad_norm": 3.053384552284598, "learning_rate": 1.0535904072633334e-06, "loss": 0.2154, "step": 5703 }, { "epoch": 1.2978384527872582, "grad_norm": 2.837347986125031, "learning_rate": 1.0535253692171273e-06, "loss": 0.1194, "step": 5704 }, { "epoch": 1.29806598407281, "grad_norm": 1.0783174429070619, "learning_rate": 1.0534603224126474e-06, "loss": 0.1224, "step": 5705 }, { "epoch": 1.298293515358362, "grad_norm": 1.4126873741655113, "learning_rate": 1.0533952668512231e-06, "loss": 0.0402, "step": 5706 }, { "epoch": 1.2985210466439137, "grad_norm": 1.2900165683757403, "learning_rate": 1.0533302025341843e-06, "loss": 0.0818, "step": 5707 }, { "epoch": 1.2987485779294654, "grad_norm": 1.5354168050539385, "learning_rate": 1.0532651294628607e-06, "loss": 0.1133, "step": 5708 }, { "epoch": 1.2989761092150172, "grad_norm": 2.3982642364346876, "learning_rate": 1.053200047638582e-06, "loss": 0.134, "step": 5709 }, { "epoch": 1.299203640500569, "grad_norm": 2.049947095929528, "learning_rate": 1.0531349570626787e-06, "loss": 0.129, "step": 5710 }, { "epoch": 1.2994311717861207, "grad_norm": 1.530794000421886, "learning_rate": 1.0530698577364807e-06, "loss": 0.0717, "step": 5711 }, { "epoch": 1.2996587030716724, "grad_norm": 1.3695000986945034, "learning_rate": 1.053004749661319e-06, "loss": 0.0917, "step": 5712 }, { "epoch": 1.2998862343572242, "grad_norm": 1.382838780567727, "learning_rate": 1.0529396328385238e-06, "loss": 0.0727, "step": 5713 }, { "epoch": 1.300113765642776, "grad_norm": 2.002717365583164, "learning_rate": 1.0528745072694266e-06, "loss": 0.237, "step": 5714 }, { "epoch": 1.3003412969283277, "grad_norm": 2.643373983806965, "learning_rate": 1.052809372955358e-06, "loss": 0.0887, "step": 5715 }, { "epoch": 1.3005688282138794, "grad_norm": 2.232032108317541, "learning_rate": 1.0527442298976492e-06, "loss": 0.0973, "step": 5716 }, { "epoch": 1.3007963594994312, "grad_norm": 1.776755863040562, "learning_rate": 1.0526790780976318e-06, "loss": 0.1761, "step": 5717 }, { "epoch": 1.301023890784983, "grad_norm": 2.101698342878711, "learning_rate": 1.052613917556637e-06, "loss": 0.099, "step": 5718 }, { "epoch": 1.3012514220705347, "grad_norm": 2.301450668536455, "learning_rate": 1.0525487482759975e-06, "loss": 0.1094, "step": 5719 }, { "epoch": 1.3014789533560864, "grad_norm": 1.226004654267289, "learning_rate": 1.052483570257044e-06, "loss": 0.1364, "step": 5720 }, { "epoch": 1.3017064846416382, "grad_norm": 2.0134123240196744, "learning_rate": 1.0524183835011095e-06, "loss": 0.0869, "step": 5721 }, { "epoch": 1.30193401592719, "grad_norm": 1.534286561621179, "learning_rate": 1.052353188009526e-06, "loss": 0.0985, "step": 5722 }, { "epoch": 1.3021615472127417, "grad_norm": 1.9316225583331454, "learning_rate": 1.052287983783626e-06, "loss": 0.101, "step": 5723 }, { "epoch": 1.3023890784982934, "grad_norm": 2.2788899795164443, "learning_rate": 1.052222770824742e-06, "loss": 0.1353, "step": 5724 }, { "epoch": 1.3026166097838452, "grad_norm": 1.8692663859425822, "learning_rate": 1.0521575491342074e-06, "loss": 0.0766, "step": 5725 }, { "epoch": 1.302844141069397, "grad_norm": 1.8953396577295798, "learning_rate": 1.0520923187133544e-06, "loss": 0.1077, "step": 5726 }, { "epoch": 1.3030716723549487, "grad_norm": 2.517469477681662, "learning_rate": 1.0520270795635167e-06, "loss": 0.1913, "step": 5727 }, { "epoch": 1.3032992036405004, "grad_norm": 1.4886467817121716, "learning_rate": 1.0519618316860274e-06, "loss": 0.0373, "step": 5728 }, { "epoch": 1.3035267349260522, "grad_norm": 2.0300452591888134, "learning_rate": 1.0518965750822204e-06, "loss": 0.1017, "step": 5729 }, { "epoch": 1.3037542662116042, "grad_norm": 1.2890598526750778, "learning_rate": 1.0518313097534292e-06, "loss": 0.0604, "step": 5730 }, { "epoch": 1.303981797497156, "grad_norm": 0.9524365472372076, "learning_rate": 1.0517660357009877e-06, "loss": 0.1005, "step": 5731 }, { "epoch": 1.3042093287827077, "grad_norm": 1.2925513698786057, "learning_rate": 1.0517007529262301e-06, "loss": 0.0721, "step": 5732 }, { "epoch": 1.3044368600682594, "grad_norm": 1.969070459589164, "learning_rate": 1.0516354614304905e-06, "loss": 0.1578, "step": 5733 }, { "epoch": 1.3046643913538112, "grad_norm": 1.9412894909614369, "learning_rate": 1.0515701612151035e-06, "loss": 0.1085, "step": 5734 }, { "epoch": 1.304891922639363, "grad_norm": 2.1131197697011195, "learning_rate": 1.0515048522814034e-06, "loss": 0.0863, "step": 5735 }, { "epoch": 1.3051194539249147, "grad_norm": 1.2923421707711995, "learning_rate": 1.0514395346307254e-06, "loss": 0.073, "step": 5736 }, { "epoch": 1.3053469852104664, "grad_norm": 1.8321089482558766, "learning_rate": 1.0513742082644043e-06, "loss": 0.1271, "step": 5737 }, { "epoch": 1.3055745164960182, "grad_norm": 1.485078710256677, "learning_rate": 1.0513088731837753e-06, "loss": 0.0854, "step": 5738 }, { "epoch": 1.30580204778157, "grad_norm": 1.3330940238964653, "learning_rate": 1.0512435293901737e-06, "loss": 0.079, "step": 5739 }, { "epoch": 1.3060295790671217, "grad_norm": 1.536444213687335, "learning_rate": 1.051178176884935e-06, "loss": 0.0992, "step": 5740 }, { "epoch": 1.3062571103526734, "grad_norm": 2.2452413636951554, "learning_rate": 1.0511128156693947e-06, "loss": 0.1075, "step": 5741 }, { "epoch": 1.3064846416382252, "grad_norm": 1.4528356683489017, "learning_rate": 1.0510474457448888e-06, "loss": 0.0695, "step": 5742 }, { "epoch": 1.306712172923777, "grad_norm": 1.7443487526246482, "learning_rate": 1.0509820671127535e-06, "loss": 0.08, "step": 5743 }, { "epoch": 1.3069397042093287, "grad_norm": 1.5210003023160206, "learning_rate": 1.050916679774325e-06, "loss": 0.0447, "step": 5744 }, { "epoch": 1.3071672354948807, "grad_norm": 1.5217878952043518, "learning_rate": 1.0508512837309394e-06, "loss": 0.0469, "step": 5745 }, { "epoch": 1.3073947667804324, "grad_norm": 1.569608575831104, "learning_rate": 1.0507858789839336e-06, "loss": 0.0461, "step": 5746 }, { "epoch": 1.3076222980659842, "grad_norm": 1.7294471484004552, "learning_rate": 1.0507204655346442e-06, "loss": 0.132, "step": 5747 }, { "epoch": 1.307849829351536, "grad_norm": 1.6430994302512345, "learning_rate": 1.050655043384408e-06, "loss": 0.1076, "step": 5748 }, { "epoch": 1.3080773606370877, "grad_norm": 1.2577545749106074, "learning_rate": 1.0505896125345624e-06, "loss": 0.0643, "step": 5749 }, { "epoch": 1.3083048919226394, "grad_norm": 2.2746902540206464, "learning_rate": 1.0505241729864446e-06, "loss": 0.1662, "step": 5750 }, { "epoch": 1.3085324232081912, "grad_norm": 1.7990261754234909, "learning_rate": 1.050458724741392e-06, "loss": 0.0451, "step": 5751 }, { "epoch": 1.308759954493743, "grad_norm": 2.0834042000512145, "learning_rate": 1.0503932678007423e-06, "loss": 0.147, "step": 5752 }, { "epoch": 1.3089874857792947, "grad_norm": 1.7939490285705315, "learning_rate": 1.0503278021658331e-06, "loss": 0.0655, "step": 5753 }, { "epoch": 1.3092150170648464, "grad_norm": 1.5956435196651289, "learning_rate": 1.0502623278380025e-06, "loss": 0.0505, "step": 5754 }, { "epoch": 1.3094425483503982, "grad_norm": 1.936354732487317, "learning_rate": 1.050196844818589e-06, "loss": 0.081, "step": 5755 }, { "epoch": 1.30967007963595, "grad_norm": 1.912194809038067, "learning_rate": 1.0501313531089306e-06, "loss": 0.0677, "step": 5756 }, { "epoch": 1.3098976109215017, "grad_norm": 2.168014313975063, "learning_rate": 1.050065852710366e-06, "loss": 0.1209, "step": 5757 }, { "epoch": 1.3101251422070535, "grad_norm": 2.32550846320349, "learning_rate": 1.0500003436242338e-06, "loss": 0.1123, "step": 5758 }, { "epoch": 1.3103526734926052, "grad_norm": 2.358664055020327, "learning_rate": 1.049934825851873e-06, "loss": 0.1254, "step": 5759 }, { "epoch": 1.310580204778157, "grad_norm": 1.8343721616257782, "learning_rate": 1.0498692993946225e-06, "loss": 0.1464, "step": 5760 }, { "epoch": 1.3108077360637087, "grad_norm": 1.9635602038856228, "learning_rate": 1.0498037642538215e-06, "loss": 0.0844, "step": 5761 }, { "epoch": 1.3110352673492605, "grad_norm": 2.0496799670315857, "learning_rate": 1.0497382204308099e-06, "loss": 0.0733, "step": 5762 }, { "epoch": 1.3112627986348122, "grad_norm": 1.210777013561021, "learning_rate": 1.0496726679269265e-06, "loss": 0.085, "step": 5763 }, { "epoch": 1.311490329920364, "grad_norm": 1.7241240680481036, "learning_rate": 1.0496071067435118e-06, "loss": 0.0803, "step": 5764 }, { "epoch": 1.3117178612059157, "grad_norm": 2.0878916046381977, "learning_rate": 1.0495415368819057e-06, "loss": 0.09, "step": 5765 }, { "epoch": 1.3119453924914675, "grad_norm": 1.7632105499111685, "learning_rate": 1.0494759583434478e-06, "loss": 0.1059, "step": 5766 }, { "epoch": 1.3121729237770192, "grad_norm": 1.5672705509207427, "learning_rate": 1.0494103711294786e-06, "loss": 0.0851, "step": 5767 }, { "epoch": 1.312400455062571, "grad_norm": 1.4938195763871733, "learning_rate": 1.049344775241339e-06, "loss": 0.0979, "step": 5768 }, { "epoch": 1.312627986348123, "grad_norm": 1.7176031546340367, "learning_rate": 1.049279170680369e-06, "loss": 0.0921, "step": 5769 }, { "epoch": 1.3128555176336747, "grad_norm": 1.1557472694595412, "learning_rate": 1.0492135574479097e-06, "loss": 0.161, "step": 5770 }, { "epoch": 1.3130830489192264, "grad_norm": 2.1783183998983797, "learning_rate": 1.0491479355453026e-06, "loss": 0.1091, "step": 5771 }, { "epoch": 1.3133105802047782, "grad_norm": 1.8021717680419203, "learning_rate": 1.0490823049738884e-06, "loss": 0.0778, "step": 5772 }, { "epoch": 1.31353811149033, "grad_norm": 1.3721710815692068, "learning_rate": 1.0490166657350084e-06, "loss": 0.0703, "step": 5773 }, { "epoch": 1.3137656427758817, "grad_norm": 2.074316848775226, "learning_rate": 1.0489510178300043e-06, "loss": 0.0835, "step": 5774 }, { "epoch": 1.3139931740614335, "grad_norm": 3.4881842631245656, "learning_rate": 1.0488853612602178e-06, "loss": 0.0772, "step": 5775 }, { "epoch": 1.3142207053469852, "grad_norm": 1.8006810241587943, "learning_rate": 1.0488196960269912e-06, "loss": 0.114, "step": 5776 }, { "epoch": 1.314448236632537, "grad_norm": 1.7856100748843726, "learning_rate": 1.0487540221316659e-06, "loss": 0.0555, "step": 5777 }, { "epoch": 1.3146757679180887, "grad_norm": 1.067332294646937, "learning_rate": 1.0486883395755845e-06, "loss": 0.1084, "step": 5778 }, { "epoch": 1.3149032992036405, "grad_norm": 1.70673215150491, "learning_rate": 1.0486226483600894e-06, "loss": 0.1335, "step": 5779 }, { "epoch": 1.3151308304891922, "grad_norm": 3.4711332840947757, "learning_rate": 1.0485569484865231e-06, "loss": 0.1033, "step": 5780 }, { "epoch": 1.315358361774744, "grad_norm": 2.0387320114180114, "learning_rate": 1.0484912399562285e-06, "loss": 0.076, "step": 5781 }, { "epoch": 1.3155858930602957, "grad_norm": 1.8189138137721752, "learning_rate": 1.0484255227705487e-06, "loss": 0.0809, "step": 5782 }, { "epoch": 1.3158134243458475, "grad_norm": 2.0535694365802715, "learning_rate": 1.0483597969308266e-06, "loss": 0.0621, "step": 5783 }, { "epoch": 1.3160409556313994, "grad_norm": 1.946344026400018, "learning_rate": 1.0482940624384054e-06, "loss": 0.0685, "step": 5784 }, { "epoch": 1.3162684869169512, "grad_norm": 1.1032176541225551, "learning_rate": 1.048228319294629e-06, "loss": 0.0996, "step": 5785 }, { "epoch": 1.316496018202503, "grad_norm": 1.7898000777406582, "learning_rate": 1.0481625675008409e-06, "loss": 0.1119, "step": 5786 }, { "epoch": 1.3167235494880547, "grad_norm": 2.206016130040065, "learning_rate": 1.048096807058385e-06, "loss": 0.1098, "step": 5787 }, { "epoch": 1.3169510807736065, "grad_norm": 1.8133767218889647, "learning_rate": 1.0480310379686048e-06, "loss": 0.1035, "step": 5788 }, { "epoch": 1.3171786120591582, "grad_norm": 1.4796571765990723, "learning_rate": 1.0479652602328453e-06, "loss": 0.0605, "step": 5789 }, { "epoch": 1.31740614334471, "grad_norm": 1.1968819875341754, "learning_rate": 1.0478994738524504e-06, "loss": 0.0683, "step": 5790 }, { "epoch": 1.3176336746302617, "grad_norm": 1.2404482616708883, "learning_rate": 1.047833678828765e-06, "loss": 0.0509, "step": 5791 }, { "epoch": 1.3178612059158135, "grad_norm": 2.1533741321795046, "learning_rate": 1.0477678751631332e-06, "loss": 0.1336, "step": 5792 }, { "epoch": 1.3180887372013652, "grad_norm": 1.5517989824699339, "learning_rate": 1.0477020628569005e-06, "loss": 0.0844, "step": 5793 }, { "epoch": 1.318316268486917, "grad_norm": 1.4786711835589954, "learning_rate": 1.0476362419114117e-06, "loss": 0.1779, "step": 5794 }, { "epoch": 1.3185437997724687, "grad_norm": 1.6768383320832863, "learning_rate": 1.047570412328012e-06, "loss": 0.0618, "step": 5795 }, { "epoch": 1.3187713310580205, "grad_norm": 1.5663335122379476, "learning_rate": 1.0475045741080473e-06, "loss": 0.1298, "step": 5796 }, { "epoch": 1.3189988623435722, "grad_norm": 2.4337391656673364, "learning_rate": 1.0474387272528627e-06, "loss": 0.1788, "step": 5797 }, { "epoch": 1.319226393629124, "grad_norm": 2.66491053649857, "learning_rate": 1.047372871763804e-06, "loss": 0.0773, "step": 5798 }, { "epoch": 1.3194539249146757, "grad_norm": 1.8928878472670687, "learning_rate": 1.0473070076422176e-06, "loss": 0.0908, "step": 5799 }, { "epoch": 1.3196814562002275, "grad_norm": 2.276868624611035, "learning_rate": 1.0472411348894492e-06, "loss": 0.1277, "step": 5800 }, { "epoch": 1.3199089874857792, "grad_norm": 1.5594575132733213, "learning_rate": 1.0471752535068455e-06, "loss": 0.1108, "step": 5801 }, { "epoch": 1.320136518771331, "grad_norm": 1.3384265244459481, "learning_rate": 1.0471093634957528e-06, "loss": 0.0729, "step": 5802 }, { "epoch": 1.3203640500568827, "grad_norm": 1.931742246823199, "learning_rate": 1.0470434648575175e-06, "loss": 0.0799, "step": 5803 }, { "epoch": 1.3205915813424345, "grad_norm": 1.2351662386947617, "learning_rate": 1.046977557593487e-06, "loss": 0.0879, "step": 5804 }, { "epoch": 1.3208191126279862, "grad_norm": 1.5595235915936758, "learning_rate": 1.0469116417050078e-06, "loss": 0.1352, "step": 5805 }, { "epoch": 1.321046643913538, "grad_norm": 3.0284116645875083, "learning_rate": 1.0468457171934276e-06, "loss": 0.1001, "step": 5806 }, { "epoch": 1.3212741751990897, "grad_norm": 1.1006401738487668, "learning_rate": 1.0467797840600934e-06, "loss": 0.1256, "step": 5807 }, { "epoch": 1.3215017064846417, "grad_norm": 2.048683668619229, "learning_rate": 1.0467138423063529e-06, "loss": 0.1206, "step": 5808 }, { "epoch": 1.3217292377701935, "grad_norm": 2.721602472652372, "learning_rate": 1.0466478919335538e-06, "loss": 0.1185, "step": 5809 }, { "epoch": 1.3219567690557452, "grad_norm": 1.4264521873334444, "learning_rate": 1.0465819329430439e-06, "loss": 0.1311, "step": 5810 }, { "epoch": 1.322184300341297, "grad_norm": 1.6730255927051096, "learning_rate": 1.0465159653361716e-06, "loss": 0.0815, "step": 5811 }, { "epoch": 1.3224118316268487, "grad_norm": 1.2282421184268042, "learning_rate": 1.0464499891142847e-06, "loss": 0.0617, "step": 5812 }, { "epoch": 1.3226393629124005, "grad_norm": 1.54847311265808, "learning_rate": 1.046384004278732e-06, "loss": 0.0799, "step": 5813 }, { "epoch": 1.3228668941979522, "grad_norm": 1.621833713328949, "learning_rate": 1.0463180108308615e-06, "loss": 0.0712, "step": 5814 }, { "epoch": 1.323094425483504, "grad_norm": 1.9271344527036025, "learning_rate": 1.0462520087720231e-06, "loss": 0.0931, "step": 5815 }, { "epoch": 1.3233219567690557, "grad_norm": 1.2022404430865519, "learning_rate": 1.0461859981035649e-06, "loss": 0.054, "step": 5816 }, { "epoch": 1.3235494880546075, "grad_norm": 1.3792412126497369, "learning_rate": 1.0461199788268364e-06, "loss": 0.0906, "step": 5817 }, { "epoch": 1.3237770193401592, "grad_norm": 1.3530300025052608, "learning_rate": 1.0460539509431865e-06, "loss": 0.0482, "step": 5818 }, { "epoch": 1.324004550625711, "grad_norm": 1.3311479250314509, "learning_rate": 1.045987914453965e-06, "loss": 0.0517, "step": 5819 }, { "epoch": 1.3242320819112627, "grad_norm": 2.197848133474945, "learning_rate": 1.0459218693605216e-06, "loss": 0.1283, "step": 5820 }, { "epoch": 1.3244596131968145, "grad_norm": 1.7362479038886882, "learning_rate": 1.0458558156642063e-06, "loss": 0.088, "step": 5821 }, { "epoch": 1.3246871444823665, "grad_norm": 1.8362008981229307, "learning_rate": 1.0457897533663686e-06, "loss": 0.117, "step": 5822 }, { "epoch": 1.3249146757679182, "grad_norm": 1.4992468656529518, "learning_rate": 1.0457236824683592e-06, "loss": 0.0961, "step": 5823 }, { "epoch": 1.32514220705347, "grad_norm": 2.0624079614706985, "learning_rate": 1.045657602971528e-06, "loss": 0.1186, "step": 5824 }, { "epoch": 1.3253697383390217, "grad_norm": 1.387226086927801, "learning_rate": 1.0455915148772262e-06, "loss": 0.0644, "step": 5825 }, { "epoch": 1.3255972696245735, "grad_norm": 1.451657775323959, "learning_rate": 1.0455254181868037e-06, "loss": 0.084, "step": 5826 }, { "epoch": 1.3258248009101252, "grad_norm": 3.203594309996943, "learning_rate": 1.0454593129016121e-06, "loss": 0.1307, "step": 5827 }, { "epoch": 1.326052332195677, "grad_norm": 1.2359629926153486, "learning_rate": 1.045393199023002e-06, "loss": 0.071, "step": 5828 }, { "epoch": 1.3262798634812287, "grad_norm": 2.09626696688845, "learning_rate": 1.0453270765523247e-06, "loss": 0.1149, "step": 5829 }, { "epoch": 1.3265073947667805, "grad_norm": 1.1510394557592603, "learning_rate": 1.045260945490932e-06, "loss": 0.074, "step": 5830 }, { "epoch": 1.3267349260523322, "grad_norm": 2.8998180438743857, "learning_rate": 1.045194805840175e-06, "loss": 0.1232, "step": 5831 }, { "epoch": 1.326962457337884, "grad_norm": 2.0371831357255625, "learning_rate": 1.045128657601406e-06, "loss": 0.148, "step": 5832 }, { "epoch": 1.3271899886234357, "grad_norm": 1.6823112018670245, "learning_rate": 1.0450625007759765e-06, "loss": 0.0794, "step": 5833 }, { "epoch": 1.3274175199089875, "grad_norm": 1.378507230734654, "learning_rate": 1.044996335365239e-06, "loss": 0.0473, "step": 5834 }, { "epoch": 1.3276450511945392, "grad_norm": 2.501713620892651, "learning_rate": 1.0449301613705453e-06, "loss": 0.1352, "step": 5835 }, { "epoch": 1.327872582480091, "grad_norm": 1.310982383924543, "learning_rate": 1.0448639787932482e-06, "loss": 0.0629, "step": 5836 }, { "epoch": 1.3281001137656427, "grad_norm": 1.4961669986775104, "learning_rate": 1.0447977876347005e-06, "loss": 0.0505, "step": 5837 }, { "epoch": 1.3283276450511945, "grad_norm": 2.0946957506049677, "learning_rate": 1.0447315878962547e-06, "loss": 0.0564, "step": 5838 }, { "epoch": 1.3285551763367462, "grad_norm": 1.3448895548736886, "learning_rate": 1.044665379579264e-06, "loss": 0.072, "step": 5839 }, { "epoch": 1.328782707622298, "grad_norm": 1.6449819772089789, "learning_rate": 1.0445991626850816e-06, "loss": 0.1275, "step": 5840 }, { "epoch": 1.3290102389078498, "grad_norm": 1.3217135874023973, "learning_rate": 1.0445329372150607e-06, "loss": 0.1001, "step": 5841 }, { "epoch": 1.3292377701934015, "grad_norm": 1.3083326572244676, "learning_rate": 1.0444667031705549e-06, "loss": 0.0633, "step": 5842 }, { "epoch": 1.3294653014789533, "grad_norm": 1.6851774826419728, "learning_rate": 1.0444004605529178e-06, "loss": 0.1603, "step": 5843 }, { "epoch": 1.329692832764505, "grad_norm": 1.4502489521313568, "learning_rate": 1.0443342093635036e-06, "loss": 0.0826, "step": 5844 }, { "epoch": 1.3299203640500568, "grad_norm": 1.9821203054145495, "learning_rate": 1.044267949603666e-06, "loss": 0.1617, "step": 5845 }, { "epoch": 1.3301478953356085, "grad_norm": 1.8051039972539313, "learning_rate": 1.0442016812747594e-06, "loss": 0.0716, "step": 5846 }, { "epoch": 1.3303754266211605, "grad_norm": 2.129526007883718, "learning_rate": 1.0441354043781381e-06, "loss": 0.0649, "step": 5847 }, { "epoch": 1.3306029579067122, "grad_norm": 1.4233191957308409, "learning_rate": 1.0440691189151567e-06, "loss": 0.0607, "step": 5848 }, { "epoch": 1.330830489192264, "grad_norm": 1.6736105046051897, "learning_rate": 1.0440028248871702e-06, "loss": 0.0712, "step": 5849 }, { "epoch": 1.3310580204778157, "grad_norm": 1.4967468018831813, "learning_rate": 1.0439365222955332e-06, "loss": 0.0454, "step": 5850 }, { "epoch": 1.3312855517633675, "grad_norm": 0.9536612302057513, "learning_rate": 1.043870211141601e-06, "loss": 0.0591, "step": 5851 }, { "epoch": 1.3315130830489192, "grad_norm": 2.1762186577819103, "learning_rate": 1.0438038914267287e-06, "loss": 0.141, "step": 5852 }, { "epoch": 1.331740614334471, "grad_norm": 1.4591366909505499, "learning_rate": 1.043737563152272e-06, "loss": 0.0627, "step": 5853 }, { "epoch": 1.3319681456200227, "grad_norm": 4.350121279354156, "learning_rate": 1.0436712263195862e-06, "loss": 0.1319, "step": 5854 }, { "epoch": 1.3321956769055745, "grad_norm": 1.7016647523273083, "learning_rate": 1.0436048809300273e-06, "loss": 0.1901, "step": 5855 }, { "epoch": 1.3324232081911263, "grad_norm": 1.1948459627791288, "learning_rate": 1.0435385269849515e-06, "loss": 0.0885, "step": 5856 }, { "epoch": 1.332650739476678, "grad_norm": 2.231584916495476, "learning_rate": 1.0434721644857146e-06, "loss": 0.0842, "step": 5857 }, { "epoch": 1.3328782707622298, "grad_norm": 1.3919515812269696, "learning_rate": 1.043405793433673e-06, "loss": 0.0733, "step": 5858 }, { "epoch": 1.3331058020477815, "grad_norm": 0.9859935017445751, "learning_rate": 1.0433394138301835e-06, "loss": 0.0439, "step": 5859 }, { "epoch": 1.3333333333333333, "grad_norm": 2.1136452812594873, "learning_rate": 1.0432730256766022e-06, "loss": 0.1696, "step": 5860 }, { "epoch": 1.3335608646188852, "grad_norm": 1.7286776962693724, "learning_rate": 1.0432066289742864e-06, "loss": 0.1255, "step": 5861 }, { "epoch": 1.333788395904437, "grad_norm": 1.8643295717261203, "learning_rate": 1.0431402237245932e-06, "loss": 0.077, "step": 5862 }, { "epoch": 1.3340159271899887, "grad_norm": 1.0528362516615255, "learning_rate": 1.0430738099288794e-06, "loss": 0.0466, "step": 5863 }, { "epoch": 1.3342434584755405, "grad_norm": 1.51341037677569, "learning_rate": 1.0430073875885026e-06, "loss": 0.0691, "step": 5864 }, { "epoch": 1.3344709897610922, "grad_norm": 1.699780878797244, "learning_rate": 1.0429409567048205e-06, "loss": 0.0593, "step": 5865 }, { "epoch": 1.334698521046644, "grad_norm": 0.9013097690218285, "learning_rate": 1.0428745172791905e-06, "loss": 0.1108, "step": 5866 }, { "epoch": 1.3349260523321957, "grad_norm": 2.5759017069295944, "learning_rate": 1.0428080693129708e-06, "loss": 0.1563, "step": 5867 }, { "epoch": 1.3351535836177475, "grad_norm": 1.3870463099298531, "learning_rate": 1.0427416128075192e-06, "loss": 0.0459, "step": 5868 }, { "epoch": 1.3353811149032992, "grad_norm": 1.220961536090874, "learning_rate": 1.0426751477641941e-06, "loss": 0.1204, "step": 5869 }, { "epoch": 1.335608646188851, "grad_norm": 1.2909291825710791, "learning_rate": 1.042608674184354e-06, "loss": 0.0746, "step": 5870 }, { "epoch": 1.3358361774744028, "grad_norm": 2.606205725169108, "learning_rate": 1.0425421920693575e-06, "loss": 0.179, "step": 5871 }, { "epoch": 1.3360637087599545, "grad_norm": 0.8052339430093359, "learning_rate": 1.042475701420563e-06, "loss": 0.0446, "step": 5872 }, { "epoch": 1.3362912400455063, "grad_norm": 1.5627987415782247, "learning_rate": 1.04240920223933e-06, "loss": 0.0587, "step": 5873 }, { "epoch": 1.336518771331058, "grad_norm": 1.5997474951882902, "learning_rate": 1.0423426945270174e-06, "loss": 0.0876, "step": 5874 }, { "epoch": 1.3367463026166098, "grad_norm": 1.1449459131825908, "learning_rate": 1.0422761782849842e-06, "loss": 0.0739, "step": 5875 }, { "epoch": 1.3369738339021615, "grad_norm": 1.851697392588764, "learning_rate": 1.0422096535145902e-06, "loss": 0.1552, "step": 5876 }, { "epoch": 1.3372013651877133, "grad_norm": 2.3632246540349593, "learning_rate": 1.042143120217195e-06, "loss": 0.1272, "step": 5877 }, { "epoch": 1.337428896473265, "grad_norm": 1.6754416909482417, "learning_rate": 1.0420765783941586e-06, "loss": 0.0454, "step": 5878 }, { "epoch": 1.3376564277588168, "grad_norm": 2.32343084336057, "learning_rate": 1.0420100280468404e-06, "loss": 0.1528, "step": 5879 }, { "epoch": 1.3378839590443685, "grad_norm": 1.9816599470429406, "learning_rate": 1.0419434691766012e-06, "loss": 0.1561, "step": 5880 }, { "epoch": 1.3381114903299203, "grad_norm": 1.5497562642603393, "learning_rate": 1.041876901784801e-06, "loss": 0.0758, "step": 5881 }, { "epoch": 1.338339021615472, "grad_norm": 1.9049260514033595, "learning_rate": 1.0418103258728001e-06, "loss": 0.0979, "step": 5882 }, { "epoch": 1.3385665529010238, "grad_norm": 1.3282399491488064, "learning_rate": 1.04174374144196e-06, "loss": 0.0717, "step": 5883 }, { "epoch": 1.3387940841865755, "grad_norm": 1.4546413694820024, "learning_rate": 1.0416771484936409e-06, "loss": 0.0364, "step": 5884 }, { "epoch": 1.3390216154721273, "grad_norm": 1.380966775798661, "learning_rate": 1.041610547029204e-06, "loss": 0.1469, "step": 5885 }, { "epoch": 1.3392491467576793, "grad_norm": 1.4838765642178313, "learning_rate": 1.0415439370500104e-06, "loss": 0.095, "step": 5886 }, { "epoch": 1.339476678043231, "grad_norm": 0.9893478877017928, "learning_rate": 1.0414773185574214e-06, "loss": 0.0988, "step": 5887 }, { "epoch": 1.3397042093287828, "grad_norm": 2.552307381411876, "learning_rate": 1.0414106915527991e-06, "loss": 0.0769, "step": 5888 }, { "epoch": 1.3399317406143345, "grad_norm": 1.5341685999322272, "learning_rate": 1.041344056037505e-06, "loss": 0.163, "step": 5889 }, { "epoch": 1.3401592718998863, "grad_norm": 2.050981202480822, "learning_rate": 1.0412774120129004e-06, "loss": 0.1326, "step": 5890 }, { "epoch": 1.340386803185438, "grad_norm": 1.0449449099860342, "learning_rate": 1.0412107594803484e-06, "loss": 0.1196, "step": 5891 }, { "epoch": 1.3406143344709898, "grad_norm": 1.9352276977320042, "learning_rate": 1.0411440984412103e-06, "loss": 0.1095, "step": 5892 }, { "epoch": 1.3408418657565415, "grad_norm": 2.5332880240233955, "learning_rate": 1.0410774288968492e-06, "loss": 0.1012, "step": 5893 }, { "epoch": 1.3410693970420933, "grad_norm": 1.6964443954344153, "learning_rate": 1.0410107508486272e-06, "loss": 0.0618, "step": 5894 }, { "epoch": 1.341296928327645, "grad_norm": 1.6754493818964415, "learning_rate": 1.0409440642979077e-06, "loss": 0.0646, "step": 5895 }, { "epoch": 1.3415244596131968, "grad_norm": 1.2216423929052722, "learning_rate": 1.040877369246053e-06, "loss": 0.1081, "step": 5896 }, { "epoch": 1.3417519908987485, "grad_norm": 1.5997557392502906, "learning_rate": 1.0408106656944267e-06, "loss": 0.0664, "step": 5897 }, { "epoch": 1.3419795221843003, "grad_norm": 1.8788278907156983, "learning_rate": 1.0407439536443919e-06, "loss": 0.1048, "step": 5898 }, { "epoch": 1.342207053469852, "grad_norm": 1.7938675169169382, "learning_rate": 1.040677233097312e-06, "loss": 0.0697, "step": 5899 }, { "epoch": 1.342434584755404, "grad_norm": 0.9355624853300721, "learning_rate": 1.0406105040545509e-06, "loss": 0.0284, "step": 5900 }, { "epoch": 1.3426621160409558, "grad_norm": 1.5042553303858235, "learning_rate": 1.0405437665174719e-06, "loss": 0.1187, "step": 5901 }, { "epoch": 1.3428896473265075, "grad_norm": 1.4026846157680415, "learning_rate": 1.0404770204874396e-06, "loss": 0.0765, "step": 5902 }, { "epoch": 1.3431171786120593, "grad_norm": 1.8439416331349125, "learning_rate": 1.040410265965818e-06, "loss": 0.0685, "step": 5903 }, { "epoch": 1.343344709897611, "grad_norm": 1.315124752696636, "learning_rate": 1.0403435029539711e-06, "loss": 0.076, "step": 5904 }, { "epoch": 1.3435722411831628, "grad_norm": 2.8348421300082554, "learning_rate": 1.0402767314532638e-06, "loss": 0.1332, "step": 5905 }, { "epoch": 1.3437997724687145, "grad_norm": 1.2357365501581734, "learning_rate": 1.0402099514650607e-06, "loss": 0.0785, "step": 5906 }, { "epoch": 1.3440273037542663, "grad_norm": 1.062333481303275, "learning_rate": 1.0401431629907267e-06, "loss": 0.0453, "step": 5907 }, { "epoch": 1.344254835039818, "grad_norm": 1.9191232064798869, "learning_rate": 1.0400763660316265e-06, "loss": 0.0642, "step": 5908 }, { "epoch": 1.3444823663253698, "grad_norm": 1.8190539631092906, "learning_rate": 1.0400095605891258e-06, "loss": 0.1386, "step": 5909 }, { "epoch": 1.3447098976109215, "grad_norm": 2.321732624283394, "learning_rate": 1.0399427466645895e-06, "loss": 0.0609, "step": 5910 }, { "epoch": 1.3449374288964733, "grad_norm": 2.707788483644483, "learning_rate": 1.0398759242593834e-06, "loss": 0.1499, "step": 5911 }, { "epoch": 1.345164960182025, "grad_norm": 1.664041570190974, "learning_rate": 1.0398090933748733e-06, "loss": 0.1073, "step": 5912 }, { "epoch": 1.3453924914675768, "grad_norm": 2.4540612257121603, "learning_rate": 1.039742254012425e-06, "loss": 0.099, "step": 5913 }, { "epoch": 1.3456200227531285, "grad_norm": 1.219808928274102, "learning_rate": 1.0396754061734047e-06, "loss": 0.1104, "step": 5914 }, { "epoch": 1.3458475540386803, "grad_norm": 2.7416891076755023, "learning_rate": 1.0396085498591785e-06, "loss": 0.1198, "step": 5915 }, { "epoch": 1.346075085324232, "grad_norm": 1.578816398295786, "learning_rate": 1.0395416850711127e-06, "loss": 0.1303, "step": 5916 }, { "epoch": 1.3463026166097838, "grad_norm": 1.6764518514726592, "learning_rate": 1.0394748118105743e-06, "loss": 0.0917, "step": 5917 }, { "epoch": 1.3465301478953355, "grad_norm": 2.803240136856215, "learning_rate": 1.0394079300789296e-06, "loss": 0.0922, "step": 5918 }, { "epoch": 1.3467576791808873, "grad_norm": 2.438798620238522, "learning_rate": 1.0393410398775459e-06, "loss": 0.148, "step": 5919 }, { "epoch": 1.346985210466439, "grad_norm": 2.025396852926161, "learning_rate": 1.03927414120779e-06, "loss": 0.0781, "step": 5920 }, { "epoch": 1.3472127417519908, "grad_norm": 1.7022905945417979, "learning_rate": 1.0392072340710296e-06, "loss": 0.1126, "step": 5921 }, { "epoch": 1.3474402730375425, "grad_norm": 1.820096766632113, "learning_rate": 1.0391403184686318e-06, "loss": 0.113, "step": 5922 }, { "epoch": 1.3476678043230943, "grad_norm": 1.445494347851026, "learning_rate": 1.0390733944019645e-06, "loss": 0.0577, "step": 5923 }, { "epoch": 1.347895335608646, "grad_norm": 1.6969833902189329, "learning_rate": 1.0390064618723952e-06, "loss": 0.0563, "step": 5924 }, { "epoch": 1.348122866894198, "grad_norm": 2.5942714651292698, "learning_rate": 1.038939520881292e-06, "loss": 0.0867, "step": 5925 }, { "epoch": 1.3483503981797498, "grad_norm": 1.5855460417612113, "learning_rate": 1.038872571430023e-06, "loss": 0.1554, "step": 5926 }, { "epoch": 1.3485779294653015, "grad_norm": 1.6885697182726835, "learning_rate": 1.038805613519957e-06, "loss": 0.1151, "step": 5927 }, { "epoch": 1.3488054607508533, "grad_norm": 1.1799583949182342, "learning_rate": 1.038738647152462e-06, "loss": 0.0321, "step": 5928 }, { "epoch": 1.349032992036405, "grad_norm": 1.5007571959530954, "learning_rate": 1.0386716723289063e-06, "loss": 0.0903, "step": 5929 }, { "epoch": 1.3492605233219568, "grad_norm": 1.0886205302970793, "learning_rate": 1.0386046890506596e-06, "loss": 0.0503, "step": 5930 }, { "epoch": 1.3494880546075085, "grad_norm": 2.5513820854170914, "learning_rate": 1.0385376973190906e-06, "loss": 0.0841, "step": 5931 }, { "epoch": 1.3497155858930603, "grad_norm": 1.6875766939167716, "learning_rate": 1.0384706971355683e-06, "loss": 0.1215, "step": 5932 }, { "epoch": 1.349943117178612, "grad_norm": 5.082685707073952, "learning_rate": 1.038403688501462e-06, "loss": 0.1221, "step": 5933 }, { "epoch": 1.3501706484641638, "grad_norm": 1.5607491563614442, "learning_rate": 1.0383366714181419e-06, "loss": 0.0603, "step": 5934 }, { "epoch": 1.3503981797497155, "grad_norm": 1.6201503065498213, "learning_rate": 1.038269645886977e-06, "loss": 0.0782, "step": 5935 }, { "epoch": 1.3506257110352673, "grad_norm": 2.0996414301663333, "learning_rate": 1.0382026119093372e-06, "loss": 0.1101, "step": 5936 }, { "epoch": 1.350853242320819, "grad_norm": 2.1247499282829443, "learning_rate": 1.038135569486593e-06, "loss": 0.0509, "step": 5937 }, { "epoch": 1.3510807736063708, "grad_norm": 3.3212216068998774, "learning_rate": 1.0380685186201143e-06, "loss": 0.1659, "step": 5938 }, { "epoch": 1.3513083048919228, "grad_norm": 1.7561604700268703, "learning_rate": 1.0380014593112714e-06, "loss": 0.1308, "step": 5939 }, { "epoch": 1.3515358361774745, "grad_norm": 1.1220194869116855, "learning_rate": 1.0379343915614354e-06, "loss": 0.065, "step": 5940 }, { "epoch": 1.3517633674630263, "grad_norm": 1.83273858306615, "learning_rate": 1.0378673153719764e-06, "loss": 0.1116, "step": 5941 }, { "epoch": 1.351990898748578, "grad_norm": 2.0716912267355942, "learning_rate": 1.0378002307442659e-06, "loss": 0.0669, "step": 5942 }, { "epoch": 1.3522184300341298, "grad_norm": 0.8977133455560301, "learning_rate": 1.0377331376796745e-06, "loss": 0.0999, "step": 5943 }, { "epoch": 1.3524459613196815, "grad_norm": 1.2511184360608965, "learning_rate": 1.0376660361795738e-06, "loss": 0.1479, "step": 5944 }, { "epoch": 1.3526734926052333, "grad_norm": 1.3086756806996873, "learning_rate": 1.0375989262453348e-06, "loss": 0.1577, "step": 5945 }, { "epoch": 1.352901023890785, "grad_norm": 1.4033184337593902, "learning_rate": 1.0375318078783294e-06, "loss": 0.1543, "step": 5946 }, { "epoch": 1.3531285551763368, "grad_norm": 1.3177544568938675, "learning_rate": 1.0374646810799297e-06, "loss": 0.1321, "step": 5947 }, { "epoch": 1.3533560864618885, "grad_norm": 1.6253035269611373, "learning_rate": 1.037397545851507e-06, "loss": 0.1215, "step": 5948 }, { "epoch": 1.3535836177474403, "grad_norm": 2.0350270154931365, "learning_rate": 1.0373304021944338e-06, "loss": 0.0774, "step": 5949 }, { "epoch": 1.353811149032992, "grad_norm": 1.5562479173929296, "learning_rate": 1.0372632501100826e-06, "loss": 0.0434, "step": 5950 }, { "epoch": 1.3540386803185438, "grad_norm": 1.9639000027131648, "learning_rate": 1.0371960895998252e-06, "loss": 0.1173, "step": 5951 }, { "epoch": 1.3542662116040955, "grad_norm": 2.83162310264471, "learning_rate": 1.0371289206650349e-06, "loss": 0.1132, "step": 5952 }, { "epoch": 1.3544937428896473, "grad_norm": 1.6006614306610998, "learning_rate": 1.0370617433070842e-06, "loss": 0.0796, "step": 5953 }, { "epoch": 1.354721274175199, "grad_norm": 1.2529908090115869, "learning_rate": 1.036994557527346e-06, "loss": 0.0691, "step": 5954 }, { "epoch": 1.3549488054607508, "grad_norm": 3.8878650458910258, "learning_rate": 1.0369273633271936e-06, "loss": 0.1068, "step": 5955 }, { "epoch": 1.3551763367463026, "grad_norm": 1.504373255205242, "learning_rate": 1.0368601607080004e-06, "loss": 0.0542, "step": 5956 }, { "epoch": 1.3554038680318543, "grad_norm": 2.1975103651204004, "learning_rate": 1.0367929496711397e-06, "loss": 0.1272, "step": 5957 }, { "epoch": 1.355631399317406, "grad_norm": 1.4461832329122462, "learning_rate": 1.0367257302179853e-06, "loss": 0.0621, "step": 5958 }, { "epoch": 1.3558589306029578, "grad_norm": 1.8573245201747381, "learning_rate": 1.036658502349911e-06, "loss": 0.101, "step": 5959 }, { "epoch": 1.3560864618885096, "grad_norm": 1.567187644910704, "learning_rate": 1.0365912660682908e-06, "loss": 0.0984, "step": 5960 }, { "epoch": 1.3563139931740613, "grad_norm": 2.162745505227273, "learning_rate": 1.036524021374499e-06, "loss": 0.0802, "step": 5961 }, { "epoch": 1.356541524459613, "grad_norm": 2.3326169965757884, "learning_rate": 1.0364567682699098e-06, "loss": 0.1185, "step": 5962 }, { "epoch": 1.356769055745165, "grad_norm": 1.5145485778181955, "learning_rate": 1.036389506755898e-06, "loss": 0.0715, "step": 5963 }, { "epoch": 1.3569965870307168, "grad_norm": 1.3551941984066445, "learning_rate": 1.036322236833838e-06, "loss": 0.0768, "step": 5964 }, { "epoch": 1.3572241183162685, "grad_norm": 1.1211006521431286, "learning_rate": 1.0362549585051046e-06, "loss": 0.0894, "step": 5965 }, { "epoch": 1.3574516496018203, "grad_norm": 1.9996402931539106, "learning_rate": 1.0361876717710731e-06, "loss": 0.1432, "step": 5966 }, { "epoch": 1.357679180887372, "grad_norm": 1.522756728382017, "learning_rate": 1.0361203766331187e-06, "loss": 0.0979, "step": 5967 }, { "epoch": 1.3579067121729238, "grad_norm": 1.1331201405305724, "learning_rate": 1.036053073092617e-06, "loss": 0.0397, "step": 5968 }, { "epoch": 1.3581342434584756, "grad_norm": 2.497612271094558, "learning_rate": 1.0359857611509428e-06, "loss": 0.1579, "step": 5969 }, { "epoch": 1.3583617747440273, "grad_norm": 1.9306593556926885, "learning_rate": 1.0359184408094726e-06, "loss": 0.089, "step": 5970 }, { "epoch": 1.358589306029579, "grad_norm": 1.6257772134491746, "learning_rate": 1.0358511120695819e-06, "loss": 0.0781, "step": 5971 }, { "epoch": 1.3588168373151308, "grad_norm": 2.0008785362216304, "learning_rate": 1.0357837749326471e-06, "loss": 0.0939, "step": 5972 }, { "epoch": 1.3590443686006826, "grad_norm": 1.510087734989364, "learning_rate": 1.0357164294000442e-06, "loss": 0.0556, "step": 5973 }, { "epoch": 1.3592718998862343, "grad_norm": 1.329932710456839, "learning_rate": 1.0356490754731496e-06, "loss": 0.1131, "step": 5974 }, { "epoch": 1.359499431171786, "grad_norm": 2.409170005936539, "learning_rate": 1.03558171315334e-06, "loss": 0.0818, "step": 5975 }, { "epoch": 1.3597269624573378, "grad_norm": 1.2096360812807305, "learning_rate": 1.0355143424419922e-06, "loss": 0.0421, "step": 5976 }, { "epoch": 1.3599544937428896, "grad_norm": 1.6720121080102, "learning_rate": 1.035446963340483e-06, "loss": 0.1711, "step": 5977 }, { "epoch": 1.3601820250284415, "grad_norm": 1.4457125424363366, "learning_rate": 1.0353795758501894e-06, "loss": 0.1127, "step": 5978 }, { "epoch": 1.3604095563139933, "grad_norm": 2.109294137925049, "learning_rate": 1.0353121799724892e-06, "loss": 0.1251, "step": 5979 }, { "epoch": 1.360637087599545, "grad_norm": 1.463533989091872, "learning_rate": 1.0352447757087592e-06, "loss": 0.0521, "step": 5980 }, { "epoch": 1.3608646188850968, "grad_norm": 2.139182529319199, "learning_rate": 1.0351773630603774e-06, "loss": 0.0889, "step": 5981 }, { "epoch": 1.3610921501706486, "grad_norm": 1.6032438033790135, "learning_rate": 1.0351099420287213e-06, "loss": 0.0731, "step": 5982 }, { "epoch": 1.3613196814562003, "grad_norm": 1.5891744053001757, "learning_rate": 1.0350425126151694e-06, "loss": 0.1427, "step": 5983 }, { "epoch": 1.361547212741752, "grad_norm": 1.7389302044250665, "learning_rate": 1.0349750748210994e-06, "loss": 0.0719, "step": 5984 }, { "epoch": 1.3617747440273038, "grad_norm": 1.716288886034002, "learning_rate": 1.0349076286478897e-06, "loss": 0.0841, "step": 5985 }, { "epoch": 1.3620022753128556, "grad_norm": 1.5231208210255824, "learning_rate": 1.0348401740969188e-06, "loss": 0.0888, "step": 5986 }, { "epoch": 1.3622298065984073, "grad_norm": 1.9618341869885971, "learning_rate": 1.0347727111695652e-06, "loss": 0.0703, "step": 5987 }, { "epoch": 1.362457337883959, "grad_norm": 2.119675238920378, "learning_rate": 1.0347052398672079e-06, "loss": 0.1164, "step": 5988 }, { "epoch": 1.3626848691695108, "grad_norm": 1.4923771093419005, "learning_rate": 1.034637760191226e-06, "loss": 0.1033, "step": 5989 }, { "epoch": 1.3629124004550626, "grad_norm": 1.4764363477830744, "learning_rate": 1.0345702721429982e-06, "loss": 0.0973, "step": 5990 }, { "epoch": 1.3631399317406143, "grad_norm": 1.070134987056995, "learning_rate": 1.0345027757239044e-06, "loss": 0.053, "step": 5991 }, { "epoch": 1.363367463026166, "grad_norm": 1.1915968190738013, "learning_rate": 1.0344352709353237e-06, "loss": 0.081, "step": 5992 }, { "epoch": 1.3635949943117178, "grad_norm": 1.0769956353353518, "learning_rate": 1.034367757778636e-06, "loss": 0.0696, "step": 5993 }, { "epoch": 1.3638225255972696, "grad_norm": 1.928908351422348, "learning_rate": 1.034300236255221e-06, "loss": 0.0729, "step": 5994 }, { "epoch": 1.3640500568828213, "grad_norm": 2.8515506900628194, "learning_rate": 1.0342327063664587e-06, "loss": 0.0955, "step": 5995 }, { "epoch": 1.364277588168373, "grad_norm": 1.8960944981081354, "learning_rate": 1.0341651681137293e-06, "loss": 0.0725, "step": 5996 }, { "epoch": 1.3645051194539248, "grad_norm": 1.4003406696741232, "learning_rate": 1.0340976214984136e-06, "loss": 0.0408, "step": 5997 }, { "epoch": 1.3647326507394766, "grad_norm": 1.6929433228108035, "learning_rate": 1.0340300665218913e-06, "loss": 0.0823, "step": 5998 }, { "epoch": 1.3649601820250283, "grad_norm": 1.3416521952685851, "learning_rate": 1.0339625031855438e-06, "loss": 0.0755, "step": 5999 }, { "epoch": 1.36518771331058, "grad_norm": 1.3736008241682673, "learning_rate": 1.0338949314907515e-06, "loss": 0.1204, "step": 6000 }, { "epoch": 1.3654152445961318, "grad_norm": 1.1265827930392238, "learning_rate": 1.0338273514388958e-06, "loss": 0.0705, "step": 6001 }, { "epoch": 1.3656427758816838, "grad_norm": 2.0430120786187835, "learning_rate": 1.0337597630313578e-06, "loss": 0.1022, "step": 6002 }, { "epoch": 1.3658703071672356, "grad_norm": 1.556682501798468, "learning_rate": 1.0336921662695188e-06, "loss": 0.1093, "step": 6003 }, { "epoch": 1.3660978384527873, "grad_norm": 1.7795318504483018, "learning_rate": 1.0336245611547605e-06, "loss": 0.0873, "step": 6004 }, { "epoch": 1.366325369738339, "grad_norm": 0.7929012709354717, "learning_rate": 1.0335569476884643e-06, "loss": 0.0532, "step": 6005 }, { "epoch": 1.3665529010238908, "grad_norm": 1.6984974466407434, "learning_rate": 1.0334893258720124e-06, "loss": 0.122, "step": 6006 }, { "epoch": 1.3667804323094426, "grad_norm": 1.7720537171200441, "learning_rate": 1.033421695706787e-06, "loss": 0.0573, "step": 6007 }, { "epoch": 1.3670079635949943, "grad_norm": 1.915680831871843, "learning_rate": 1.03335405719417e-06, "loss": 0.0656, "step": 6008 }, { "epoch": 1.367235494880546, "grad_norm": 1.7862258212947102, "learning_rate": 1.0332864103355438e-06, "loss": 0.0953, "step": 6009 }, { "epoch": 1.3674630261660978, "grad_norm": 1.6810109121162469, "learning_rate": 1.0332187551322914e-06, "loss": 0.0922, "step": 6010 }, { "epoch": 1.3676905574516496, "grad_norm": 1.7563798308384426, "learning_rate": 1.0331510915857951e-06, "loss": 0.0441, "step": 6011 }, { "epoch": 1.3679180887372013, "grad_norm": 1.605413277057477, "learning_rate": 1.0330834196974378e-06, "loss": 0.0719, "step": 6012 }, { "epoch": 1.368145620022753, "grad_norm": 1.2335738036590032, "learning_rate": 1.0330157394686031e-06, "loss": 0.1298, "step": 6013 }, { "epoch": 1.3683731513083048, "grad_norm": 2.883850918158107, "learning_rate": 1.0329480509006738e-06, "loss": 0.1038, "step": 6014 }, { "epoch": 1.3686006825938566, "grad_norm": 2.689802752529953, "learning_rate": 1.0328803539950332e-06, "loss": 0.0876, "step": 6015 }, { "epoch": 1.3688282138794083, "grad_norm": 4.58606537572728, "learning_rate": 1.0328126487530657e-06, "loss": 0.0673, "step": 6016 }, { "epoch": 1.3690557451649603, "grad_norm": 1.6526437618261232, "learning_rate": 1.0327449351761542e-06, "loss": 0.0868, "step": 6017 }, { "epoch": 1.369283276450512, "grad_norm": 1.6946562162827543, "learning_rate": 1.0326772132656828e-06, "loss": 0.0715, "step": 6018 }, { "epoch": 1.3695108077360638, "grad_norm": 3.007213425143118, "learning_rate": 1.0326094830230362e-06, "loss": 0.1021, "step": 6019 }, { "epoch": 1.3697383390216156, "grad_norm": 2.611585168793522, "learning_rate": 1.032541744449598e-06, "loss": 0.2578, "step": 6020 }, { "epoch": 1.3699658703071673, "grad_norm": 1.4761408630263304, "learning_rate": 1.0324739975467529e-06, "loss": 0.1518, "step": 6021 }, { "epoch": 1.370193401592719, "grad_norm": 1.3284428317452652, "learning_rate": 1.0324062423158857e-06, "loss": 0.0949, "step": 6022 }, { "epoch": 1.3704209328782708, "grad_norm": 1.2642919336112104, "learning_rate": 1.0323384787583809e-06, "loss": 0.087, "step": 6023 }, { "epoch": 1.3706484641638226, "grad_norm": 2.504112434393499, "learning_rate": 1.0322707068756238e-06, "loss": 0.0953, "step": 6024 }, { "epoch": 1.3708759954493743, "grad_norm": 1.1108553042003824, "learning_rate": 1.0322029266689992e-06, "loss": 0.112, "step": 6025 }, { "epoch": 1.371103526734926, "grad_norm": 1.9589141312824296, "learning_rate": 1.0321351381398926e-06, "loss": 0.1449, "step": 6026 }, { "epoch": 1.3713310580204778, "grad_norm": 1.5284346282963852, "learning_rate": 1.0320673412896891e-06, "loss": 0.0704, "step": 6027 }, { "epoch": 1.3715585893060296, "grad_norm": 1.6360808959018123, "learning_rate": 1.0319995361197752e-06, "loss": 0.1075, "step": 6028 }, { "epoch": 1.3717861205915813, "grad_norm": 1.0651159743295313, "learning_rate": 1.0319317226315358e-06, "loss": 0.1076, "step": 6029 }, { "epoch": 1.372013651877133, "grad_norm": 1.4465076881759498, "learning_rate": 1.0318639008263572e-06, "loss": 0.0904, "step": 6030 }, { "epoch": 1.3722411831626848, "grad_norm": 1.5828337267735475, "learning_rate": 1.0317960707056256e-06, "loss": 0.106, "step": 6031 }, { "epoch": 1.3724687144482366, "grad_norm": 1.3535896802540421, "learning_rate": 1.0317282322707275e-06, "loss": 0.0623, "step": 6032 }, { "epoch": 1.3726962457337883, "grad_norm": 0.6064410036946437, "learning_rate": 1.0316603855230492e-06, "loss": 0.0251, "step": 6033 }, { "epoch": 1.37292377701934, "grad_norm": 1.9326954205683844, "learning_rate": 1.0315925304639773e-06, "loss": 0.0851, "step": 6034 }, { "epoch": 1.3731513083048918, "grad_norm": 1.6547627103908698, "learning_rate": 1.0315246670948988e-06, "loss": 0.066, "step": 6035 }, { "epoch": 1.3733788395904436, "grad_norm": 1.521791506208634, "learning_rate": 1.0314567954172006e-06, "loss": 0.1252, "step": 6036 }, { "epoch": 1.3736063708759954, "grad_norm": 1.5173057147246578, "learning_rate": 1.03138891543227e-06, "loss": 0.1001, "step": 6037 }, { "epoch": 1.373833902161547, "grad_norm": 1.6935760456434947, "learning_rate": 1.031321027141494e-06, "loss": 0.0764, "step": 6038 }, { "epoch": 1.3740614334470989, "grad_norm": 2.1148518007647645, "learning_rate": 1.0312531305462607e-06, "loss": 0.2322, "step": 6039 }, { "epoch": 1.3742889647326506, "grad_norm": 1.5431113869692263, "learning_rate": 1.031185225647957e-06, "loss": 0.0815, "step": 6040 }, { "epoch": 1.3745164960182026, "grad_norm": 1.7571930584978857, "learning_rate": 1.0311173124479715e-06, "loss": 0.0729, "step": 6041 }, { "epoch": 1.3747440273037543, "grad_norm": 1.6709560157545251, "learning_rate": 1.0310493909476916e-06, "loss": 0.0888, "step": 6042 }, { "epoch": 1.374971558589306, "grad_norm": 0.9656044017250746, "learning_rate": 1.0309814611485062e-06, "loss": 0.0697, "step": 6043 }, { "epoch": 1.3751990898748578, "grad_norm": 1.7934133654712006, "learning_rate": 1.0309135230518028e-06, "loss": 0.0923, "step": 6044 }, { "epoch": 1.3754266211604096, "grad_norm": 1.7460975815711577, "learning_rate": 1.0308455766589706e-06, "loss": 0.0788, "step": 6045 }, { "epoch": 1.3756541524459613, "grad_norm": 2.3162330853750124, "learning_rate": 1.030777621971398e-06, "loss": 0.066, "step": 6046 }, { "epoch": 1.375881683731513, "grad_norm": 2.2361579800366034, "learning_rate": 1.0307096589904742e-06, "loss": 0.0959, "step": 6047 }, { "epoch": 1.3761092150170648, "grad_norm": 2.2482212896632454, "learning_rate": 1.030641687717588e-06, "loss": 0.0972, "step": 6048 }, { "epoch": 1.3763367463026166, "grad_norm": 3.063555572971071, "learning_rate": 1.0305737081541283e-06, "loss": 0.0879, "step": 6049 }, { "epoch": 1.3765642775881684, "grad_norm": 1.1688880694396802, "learning_rate": 1.0305057203014848e-06, "loss": 0.0986, "step": 6050 }, { "epoch": 1.37679180887372, "grad_norm": 1.220418623684574, "learning_rate": 1.0304377241610472e-06, "loss": 0.1351, "step": 6051 }, { "epoch": 1.3770193401592719, "grad_norm": 1.6807893089278623, "learning_rate": 1.030369719734205e-06, "loss": 0.1322, "step": 6052 }, { "epoch": 1.3772468714448236, "grad_norm": 1.9365626811701335, "learning_rate": 1.0303017070223482e-06, "loss": 0.1129, "step": 6053 }, { "epoch": 1.3774744027303754, "grad_norm": 0.8533626037721147, "learning_rate": 1.0302336860268667e-06, "loss": 0.0496, "step": 6054 }, { "epoch": 1.377701934015927, "grad_norm": 1.5261483411582581, "learning_rate": 1.0301656567491507e-06, "loss": 0.121, "step": 6055 }, { "epoch": 1.377929465301479, "grad_norm": 2.511979075018236, "learning_rate": 1.0300976191905907e-06, "loss": 0.0824, "step": 6056 }, { "epoch": 1.3781569965870308, "grad_norm": 1.38134265855808, "learning_rate": 1.0300295733525774e-06, "loss": 0.1248, "step": 6057 }, { "epoch": 1.3783845278725826, "grad_norm": 1.5095526065881761, "learning_rate": 1.0299615192365015e-06, "loss": 0.0656, "step": 6058 }, { "epoch": 1.3786120591581343, "grad_norm": 1.244350640420765, "learning_rate": 1.0298934568437535e-06, "loss": 0.0809, "step": 6059 }, { "epoch": 1.378839590443686, "grad_norm": 2.00551764047578, "learning_rate": 1.029825386175725e-06, "loss": 0.1414, "step": 6060 }, { "epoch": 1.3790671217292378, "grad_norm": 0.9661969179329034, "learning_rate": 1.029757307233807e-06, "loss": 0.0969, "step": 6061 }, { "epoch": 1.3792946530147896, "grad_norm": 2.2025332944612495, "learning_rate": 1.0296892200193908e-06, "loss": 0.169, "step": 6062 }, { "epoch": 1.3795221843003413, "grad_norm": 1.7254179219751133, "learning_rate": 1.029621124533868e-06, "loss": 0.0718, "step": 6063 }, { "epoch": 1.379749715585893, "grad_norm": 1.7478260517346846, "learning_rate": 1.0295530207786307e-06, "loss": 0.0525, "step": 6064 }, { "epoch": 1.3799772468714449, "grad_norm": 2.139388267720325, "learning_rate": 1.0294849087550703e-06, "loss": 0.1528, "step": 6065 }, { "epoch": 1.3802047781569966, "grad_norm": 2.303671631702034, "learning_rate": 1.0294167884645795e-06, "loss": 0.1523, "step": 6066 }, { "epoch": 1.3804323094425484, "grad_norm": 3.0251883633272167, "learning_rate": 1.02934865990855e-06, "loss": 0.1039, "step": 6067 }, { "epoch": 1.3806598407281, "grad_norm": 1.845259206551197, "learning_rate": 1.0292805230883743e-06, "loss": 0.0584, "step": 6068 }, { "epoch": 1.3808873720136519, "grad_norm": 2.085602227170942, "learning_rate": 1.0292123780054452e-06, "loss": 0.0759, "step": 6069 }, { "epoch": 1.3811149032992036, "grad_norm": 1.5485285983889232, "learning_rate": 1.0291442246611555e-06, "loss": 0.1108, "step": 6070 }, { "epoch": 1.3813424345847554, "grad_norm": 1.9756390644683244, "learning_rate": 1.029076063056898e-06, "loss": 0.1089, "step": 6071 }, { "epoch": 1.3815699658703071, "grad_norm": 1.210855828233612, "learning_rate": 1.0290078931940656e-06, "loss": 0.0958, "step": 6072 }, { "epoch": 1.3817974971558589, "grad_norm": 0.8802448365353132, "learning_rate": 1.028939715074052e-06, "loss": 0.0334, "step": 6073 }, { "epoch": 1.3820250284414106, "grad_norm": 1.3463530588489843, "learning_rate": 1.0288715286982504e-06, "loss": 0.0677, "step": 6074 }, { "epoch": 1.3822525597269624, "grad_norm": 0.7421571324570394, "learning_rate": 1.0288033340680543e-06, "loss": 0.026, "step": 6075 }, { "epoch": 1.3824800910125141, "grad_norm": 1.0235714825965652, "learning_rate": 1.0287351311848574e-06, "loss": 0.0874, "step": 6076 }, { "epoch": 1.3827076222980659, "grad_norm": 2.778457322941933, "learning_rate": 1.028666920050054e-06, "loss": 0.0906, "step": 6077 }, { "epoch": 1.3829351535836176, "grad_norm": 1.0504053551900818, "learning_rate": 1.0285987006650381e-06, "loss": 0.0382, "step": 6078 }, { "epoch": 1.3831626848691694, "grad_norm": 1.5439276989638069, "learning_rate": 1.028530473031204e-06, "loss": 0.1201, "step": 6079 }, { "epoch": 1.3833902161547214, "grad_norm": 2.2472197379141927, "learning_rate": 1.0284622371499457e-06, "loss": 0.0875, "step": 6080 }, { "epoch": 1.383617747440273, "grad_norm": 1.3277316421883985, "learning_rate": 1.0283939930226584e-06, "loss": 0.1003, "step": 6081 }, { "epoch": 1.3838452787258249, "grad_norm": 1.8061757451329605, "learning_rate": 1.0283257406507366e-06, "loss": 0.0628, "step": 6082 }, { "epoch": 1.3840728100113766, "grad_norm": 1.2434351612293517, "learning_rate": 1.0282574800355755e-06, "loss": 0.0762, "step": 6083 }, { "epoch": 1.3843003412969284, "grad_norm": 2.1571207801279955, "learning_rate": 1.0281892111785699e-06, "loss": 0.1653, "step": 6084 }, { "epoch": 1.3845278725824801, "grad_norm": 0.7925456249329077, "learning_rate": 1.0281209340811151e-06, "loss": 0.0503, "step": 6085 }, { "epoch": 1.3847554038680319, "grad_norm": 1.6782816202693709, "learning_rate": 1.0280526487446069e-06, "loss": 0.0482, "step": 6086 }, { "epoch": 1.3849829351535836, "grad_norm": 2.1389563232253104, "learning_rate": 1.0279843551704409e-06, "loss": 0.0586, "step": 6087 }, { "epoch": 1.3852104664391354, "grad_norm": 1.5865791625267798, "learning_rate": 1.0279160533600121e-06, "loss": 0.1161, "step": 6088 }, { "epoch": 1.3854379977246871, "grad_norm": 1.4237771848423213, "learning_rate": 1.0278477433147176e-06, "loss": 0.1359, "step": 6089 }, { "epoch": 1.3856655290102389, "grad_norm": 1.5778953854419226, "learning_rate": 1.0277794250359529e-06, "loss": 0.0863, "step": 6090 }, { "epoch": 1.3858930602957906, "grad_norm": 2.5592814368264274, "learning_rate": 1.0277110985251142e-06, "loss": 0.1515, "step": 6091 }, { "epoch": 1.3861205915813424, "grad_norm": 1.5625510631280042, "learning_rate": 1.0276427637835984e-06, "loss": 0.1457, "step": 6092 }, { "epoch": 1.3863481228668941, "grad_norm": 1.4218575035452607, "learning_rate": 1.0275744208128019e-06, "loss": 0.0434, "step": 6093 }, { "epoch": 1.3865756541524459, "grad_norm": 1.3700974293267705, "learning_rate": 1.0275060696141215e-06, "loss": 0.0574, "step": 6094 }, { "epoch": 1.3868031854379979, "grad_norm": 2.074735787061168, "learning_rate": 1.027437710188954e-06, "loss": 0.078, "step": 6095 }, { "epoch": 1.3870307167235496, "grad_norm": 2.2617424180316648, "learning_rate": 1.027369342538697e-06, "loss": 0.16, "step": 6096 }, { "epoch": 1.3872582480091014, "grad_norm": 1.6506933315691088, "learning_rate": 1.0273009666647472e-06, "loss": 0.1009, "step": 6097 }, { "epoch": 1.387485779294653, "grad_norm": 2.093753106006641, "learning_rate": 1.0272325825685028e-06, "loss": 0.0834, "step": 6098 }, { "epoch": 1.3877133105802049, "grad_norm": 1.4578100498874582, "learning_rate": 1.027164190251361e-06, "loss": 0.1295, "step": 6099 }, { "epoch": 1.3879408418657566, "grad_norm": 0.8839198324440494, "learning_rate": 1.0270957897147196e-06, "loss": 0.0752, "step": 6100 }, { "epoch": 1.3881683731513084, "grad_norm": 1.8268726950343663, "learning_rate": 1.0270273809599764e-06, "loss": 0.0755, "step": 6101 }, { "epoch": 1.3883959044368601, "grad_norm": 1.7221146859777103, "learning_rate": 1.0269589639885302e-06, "loss": 0.0634, "step": 6102 }, { "epoch": 1.3886234357224119, "grad_norm": 2.2959019107378036, "learning_rate": 1.0268905388017788e-06, "loss": 0.1049, "step": 6103 }, { "epoch": 1.3888509670079636, "grad_norm": 0.9734172611334406, "learning_rate": 1.0268221054011208e-06, "loss": 0.0407, "step": 6104 }, { "epoch": 1.3890784982935154, "grad_norm": 1.6445095549398978, "learning_rate": 1.026753663787955e-06, "loss": 0.0779, "step": 6105 }, { "epoch": 1.3893060295790671, "grad_norm": 1.7481612724663955, "learning_rate": 1.0266852139636799e-06, "loss": 0.0655, "step": 6106 }, { "epoch": 1.3895335608646189, "grad_norm": 2.387058614741269, "learning_rate": 1.0266167559296946e-06, "loss": 0.1849, "step": 6107 }, { "epoch": 1.3897610921501706, "grad_norm": 1.749430126521713, "learning_rate": 1.0265482896873986e-06, "loss": 0.0571, "step": 6108 }, { "epoch": 1.3899886234357224, "grad_norm": 2.154234273156198, "learning_rate": 1.0264798152381907e-06, "loss": 0.0949, "step": 6109 }, { "epoch": 1.3902161547212741, "grad_norm": 2.0717344001521942, "learning_rate": 1.026411332583471e-06, "loss": 0.0803, "step": 6110 }, { "epoch": 1.3904436860068259, "grad_norm": 1.9152543686027883, "learning_rate": 1.0263428417246385e-06, "loss": 0.109, "step": 6111 }, { "epoch": 1.3906712172923776, "grad_norm": 1.4284369217746564, "learning_rate": 1.0262743426630935e-06, "loss": 0.0646, "step": 6112 }, { "epoch": 1.3908987485779294, "grad_norm": 1.5184843974473854, "learning_rate": 1.0262058354002357e-06, "loss": 0.1204, "step": 6113 }, { "epoch": 1.3911262798634811, "grad_norm": 1.4056897180706915, "learning_rate": 1.0261373199374655e-06, "loss": 0.0607, "step": 6114 }, { "epoch": 1.391353811149033, "grad_norm": 2.348101018413372, "learning_rate": 1.026068796276183e-06, "loss": 0.066, "step": 6115 }, { "epoch": 1.3915813424345846, "grad_norm": 1.9137612245987456, "learning_rate": 1.0260002644177892e-06, "loss": 0.0845, "step": 6116 }, { "epoch": 1.3918088737201364, "grad_norm": 2.3856609440966827, "learning_rate": 1.025931724363684e-06, "loss": 0.1256, "step": 6117 }, { "epoch": 1.3920364050056881, "grad_norm": 1.3816079520128584, "learning_rate": 1.0258631761152687e-06, "loss": 0.094, "step": 6118 }, { "epoch": 1.3922639362912401, "grad_norm": 1.433005286962213, "learning_rate": 1.0257946196739444e-06, "loss": 0.1512, "step": 6119 }, { "epoch": 1.3924914675767919, "grad_norm": 1.2323580910038312, "learning_rate": 1.025726055041112e-06, "loss": 0.0493, "step": 6120 }, { "epoch": 1.3927189988623436, "grad_norm": 1.7407247244496606, "learning_rate": 1.0256574822181727e-06, "loss": 0.1078, "step": 6121 }, { "epoch": 1.3929465301478954, "grad_norm": 1.5994369802036565, "learning_rate": 1.0255889012065285e-06, "loss": 0.0655, "step": 6122 }, { "epoch": 1.3931740614334471, "grad_norm": 1.7930383624127975, "learning_rate": 1.025520312007581e-06, "loss": 0.0875, "step": 6123 }, { "epoch": 1.3934015927189989, "grad_norm": 2.1120758522904453, "learning_rate": 1.0254517146227314e-06, "loss": 0.1572, "step": 6124 }, { "epoch": 1.3936291240045506, "grad_norm": 2.5678979874544856, "learning_rate": 1.0253831090533823e-06, "loss": 0.1329, "step": 6125 }, { "epoch": 1.3938566552901024, "grad_norm": 2.1764102691534175, "learning_rate": 1.0253144953009357e-06, "loss": 0.1409, "step": 6126 }, { "epoch": 1.3940841865756541, "grad_norm": 1.3223191843329276, "learning_rate": 1.025245873366794e-06, "loss": 0.0666, "step": 6127 }, { "epoch": 1.394311717861206, "grad_norm": 1.9410808222093632, "learning_rate": 1.0251772432523596e-06, "loss": 0.2062, "step": 6128 }, { "epoch": 1.3945392491467576, "grad_norm": 1.8157179540741168, "learning_rate": 1.0251086049590355e-06, "loss": 0.1529, "step": 6129 }, { "epoch": 1.3947667804323094, "grad_norm": 1.7518458655719975, "learning_rate": 1.0250399584882239e-06, "loss": 0.077, "step": 6130 }, { "epoch": 1.3949943117178611, "grad_norm": 2.1802078459114016, "learning_rate": 1.0249713038413285e-06, "loss": 0.0698, "step": 6131 }, { "epoch": 1.395221843003413, "grad_norm": 1.4485958979135534, "learning_rate": 1.024902641019752e-06, "loss": 0.0684, "step": 6132 }, { "epoch": 1.3954493742889647, "grad_norm": 1.9433569593797686, "learning_rate": 1.024833970024898e-06, "loss": 0.101, "step": 6133 }, { "epoch": 1.3956769055745166, "grad_norm": 2.3198698152365664, "learning_rate": 1.0247652908581697e-06, "loss": 0.1356, "step": 6134 }, { "epoch": 1.3959044368600684, "grad_norm": 1.9234892150048948, "learning_rate": 1.0246966035209712e-06, "loss": 0.1432, "step": 6135 }, { "epoch": 1.3961319681456201, "grad_norm": 2.227200428647969, "learning_rate": 1.024627908014706e-06, "loss": 0.0873, "step": 6136 }, { "epoch": 1.3963594994311719, "grad_norm": 1.3512910037945858, "learning_rate": 1.0245592043407784e-06, "loss": 0.0561, "step": 6137 }, { "epoch": 1.3965870307167236, "grad_norm": 1.5050664712520612, "learning_rate": 1.0244904925005924e-06, "loss": 0.0631, "step": 6138 }, { "epoch": 1.3968145620022754, "grad_norm": 1.3276673190831771, "learning_rate": 1.0244217724955523e-06, "loss": 0.1134, "step": 6139 }, { "epoch": 1.3970420932878271, "grad_norm": 2.11597419896245, "learning_rate": 1.0243530443270627e-06, "loss": 0.0511, "step": 6140 }, { "epoch": 1.3972696245733789, "grad_norm": 1.383620506367811, "learning_rate": 1.0242843079965281e-06, "loss": 0.0396, "step": 6141 }, { "epoch": 1.3974971558589306, "grad_norm": 2.0579977081129304, "learning_rate": 1.024215563505354e-06, "loss": 0.0488, "step": 6142 }, { "epoch": 1.3977246871444824, "grad_norm": 1.6176146828457385, "learning_rate": 1.0241468108549443e-06, "loss": 0.0657, "step": 6143 }, { "epoch": 1.3979522184300341, "grad_norm": 1.1605364666028937, "learning_rate": 1.024078050046705e-06, "loss": 0.1009, "step": 6144 }, { "epoch": 1.398179749715586, "grad_norm": 1.9390494144680035, "learning_rate": 1.0240092810820412e-06, "loss": 0.1228, "step": 6145 }, { "epoch": 1.3984072810011376, "grad_norm": 2.2751796101471444, "learning_rate": 1.0239405039623585e-06, "loss": 0.1679, "step": 6146 }, { "epoch": 1.3986348122866894, "grad_norm": 1.4446668035390078, "learning_rate": 1.0238717186890625e-06, "loss": 0.047, "step": 6147 }, { "epoch": 1.3988623435722412, "grad_norm": 1.2522080333454766, "learning_rate": 1.0238029252635591e-06, "loss": 0.0541, "step": 6148 }, { "epoch": 1.399089874857793, "grad_norm": 1.206361926946461, "learning_rate": 1.0237341236872544e-06, "loss": 0.0262, "step": 6149 }, { "epoch": 1.3993174061433447, "grad_norm": 1.3900768598387714, "learning_rate": 1.0236653139615542e-06, "loss": 0.0632, "step": 6150 }, { "epoch": 1.3995449374288964, "grad_norm": 1.446506693950247, "learning_rate": 1.0235964960878655e-06, "loss": 0.135, "step": 6151 }, { "epoch": 1.3997724687144482, "grad_norm": 1.2859328173017583, "learning_rate": 1.023527670067594e-06, "loss": 0.0906, "step": 6152 }, { "epoch": 1.4, "grad_norm": 1.8510910035348387, "learning_rate": 1.023458835902147e-06, "loss": 0.1158, "step": 6153 }, { "epoch": 1.4002275312855517, "grad_norm": 2.006291037320859, "learning_rate": 1.0233899935929311e-06, "loss": 0.0972, "step": 6154 }, { "epoch": 1.4004550625711034, "grad_norm": 1.9171493600614367, "learning_rate": 1.0233211431413534e-06, "loss": 0.1664, "step": 6155 }, { "epoch": 1.4006825938566552, "grad_norm": 1.9981434882673672, "learning_rate": 1.023252284548821e-06, "loss": 0.1034, "step": 6156 }, { "epoch": 1.400910125142207, "grad_norm": 1.6198664782069099, "learning_rate": 1.0231834178167412e-06, "loss": 0.049, "step": 6157 }, { "epoch": 1.401137656427759, "grad_norm": 2.1959481687542817, "learning_rate": 1.0231145429465216e-06, "loss": 0.0896, "step": 6158 }, { "epoch": 1.4013651877133106, "grad_norm": 1.5186673742926442, "learning_rate": 1.02304565993957e-06, "loss": 0.0876, "step": 6159 }, { "epoch": 1.4015927189988624, "grad_norm": 1.6726004894471778, "learning_rate": 1.022976768797294e-06, "loss": 0.0649, "step": 6160 }, { "epoch": 1.4018202502844141, "grad_norm": 1.7616787253173187, "learning_rate": 1.0229078695211015e-06, "loss": 0.0689, "step": 6161 }, { "epoch": 1.402047781569966, "grad_norm": 2.101238409018286, "learning_rate": 1.0228389621124011e-06, "loss": 0.0962, "step": 6162 }, { "epoch": 1.4022753128555177, "grad_norm": 18.423779024002584, "learning_rate": 1.022770046572601e-06, "loss": 0.1132, "step": 6163 }, { "epoch": 1.4025028441410694, "grad_norm": 1.3626382690966223, "learning_rate": 1.0227011229031095e-06, "loss": 0.0923, "step": 6164 }, { "epoch": 1.4027303754266212, "grad_norm": 1.5664192058369393, "learning_rate": 1.0226321911053353e-06, "loss": 0.0883, "step": 6165 }, { "epoch": 1.402957906712173, "grad_norm": 2.1407280846260557, "learning_rate": 1.0225632511806873e-06, "loss": 0.0813, "step": 6166 }, { "epoch": 1.4031854379977247, "grad_norm": 1.9126432331266539, "learning_rate": 1.0224943031305747e-06, "loss": 0.083, "step": 6167 }, { "epoch": 1.4034129692832764, "grad_norm": 1.2929386744191405, "learning_rate": 1.0224253469564067e-06, "loss": 0.0842, "step": 6168 }, { "epoch": 1.4036405005688282, "grad_norm": 1.5870857265163638, "learning_rate": 1.0223563826595923e-06, "loss": 0.1172, "step": 6169 }, { "epoch": 1.40386803185438, "grad_norm": 1.1823753570324391, "learning_rate": 1.0222874102415412e-06, "loss": 0.0551, "step": 6170 }, { "epoch": 1.4040955631399317, "grad_norm": 2.996937816725621, "learning_rate": 1.0222184297036628e-06, "loss": 0.1564, "step": 6171 }, { "epoch": 1.4043230944254836, "grad_norm": 2.2927668992732997, "learning_rate": 1.0221494410473674e-06, "loss": 0.1045, "step": 6172 }, { "epoch": 1.4045506257110354, "grad_norm": 1.1281259385923086, "learning_rate": 1.0220804442740648e-06, "loss": 0.042, "step": 6173 }, { "epoch": 1.4047781569965871, "grad_norm": 1.9469775916421443, "learning_rate": 1.022011439385165e-06, "loss": 0.0999, "step": 6174 }, { "epoch": 1.405005688282139, "grad_norm": 1.6529622749650812, "learning_rate": 1.0219424263820784e-06, "loss": 0.0777, "step": 6175 }, { "epoch": 1.4052332195676907, "grad_norm": 1.803447728801388, "learning_rate": 1.0218734052662158e-06, "loss": 0.0877, "step": 6176 }, { "epoch": 1.4054607508532424, "grad_norm": 1.0877665384054194, "learning_rate": 1.0218043760389875e-06, "loss": 0.1214, "step": 6177 }, { "epoch": 1.4056882821387942, "grad_norm": 3.036932254264831, "learning_rate": 1.0217353387018045e-06, "loss": 0.1388, "step": 6178 }, { "epoch": 1.405915813424346, "grad_norm": 1.6432118346236038, "learning_rate": 1.0216662932560779e-06, "loss": 0.091, "step": 6179 }, { "epoch": 1.4061433447098977, "grad_norm": 1.4641635382861418, "learning_rate": 1.0215972397032185e-06, "loss": 0.0498, "step": 6180 }, { "epoch": 1.4063708759954494, "grad_norm": 2.2055527595278366, "learning_rate": 1.0215281780446378e-06, "loss": 0.1207, "step": 6181 }, { "epoch": 1.4065984072810012, "grad_norm": 1.3294688000691555, "learning_rate": 1.0214591082817477e-06, "loss": 0.1173, "step": 6182 }, { "epoch": 1.406825938566553, "grad_norm": 2.179512895860987, "learning_rate": 1.0213900304159592e-06, "loss": 0.0584, "step": 6183 }, { "epoch": 1.4070534698521047, "grad_norm": 1.6222476017901606, "learning_rate": 1.0213209444486844e-06, "loss": 0.0875, "step": 6184 }, { "epoch": 1.4072810011376564, "grad_norm": 1.93817268709916, "learning_rate": 1.0212518503813356e-06, "loss": 0.1459, "step": 6185 }, { "epoch": 1.4075085324232082, "grad_norm": 1.741064264886961, "learning_rate": 1.0211827482153244e-06, "loss": 0.0728, "step": 6186 }, { "epoch": 1.40773606370876, "grad_norm": 2.5576686669016864, "learning_rate": 1.0211136379520636e-06, "loss": 0.2464, "step": 6187 }, { "epoch": 1.4079635949943117, "grad_norm": 1.187363532717576, "learning_rate": 1.0210445195929653e-06, "loss": 0.0429, "step": 6188 }, { "epoch": 1.4081911262798634, "grad_norm": 1.7031117991008564, "learning_rate": 1.0209753931394426e-06, "loss": 0.1466, "step": 6189 }, { "epoch": 1.4084186575654152, "grad_norm": 1.4426123688536225, "learning_rate": 1.0209062585929077e-06, "loss": 0.0296, "step": 6190 }, { "epoch": 1.408646188850967, "grad_norm": 1.9144448784443266, "learning_rate": 1.0208371159547742e-06, "loss": 0.1058, "step": 6191 }, { "epoch": 1.4088737201365187, "grad_norm": 1.244136782652026, "learning_rate": 1.020767965226455e-06, "loss": 0.1711, "step": 6192 }, { "epoch": 1.4091012514220704, "grad_norm": 1.452605807632957, "learning_rate": 1.0206988064093633e-06, "loss": 0.0815, "step": 6193 }, { "epoch": 1.4093287827076222, "grad_norm": 1.8442597312009432, "learning_rate": 1.0206296395049128e-06, "loss": 0.0674, "step": 6194 }, { "epoch": 1.409556313993174, "grad_norm": 1.7262145580460375, "learning_rate": 1.020560464514517e-06, "loss": 0.0657, "step": 6195 }, { "epoch": 1.4097838452787257, "grad_norm": 1.5207284157163985, "learning_rate": 1.0204912814395898e-06, "loss": 0.105, "step": 6196 }, { "epoch": 1.4100113765642777, "grad_norm": 2.3815556969730314, "learning_rate": 1.020422090281545e-06, "loss": 0.0772, "step": 6197 }, { "epoch": 1.4102389078498294, "grad_norm": 2.268950151122507, "learning_rate": 1.0203528910417967e-06, "loss": 0.1148, "step": 6198 }, { "epoch": 1.4104664391353812, "grad_norm": 1.7550413594498013, "learning_rate": 1.0202836837217597e-06, "loss": 0.0923, "step": 6199 }, { "epoch": 1.410693970420933, "grad_norm": 1.1355924309874208, "learning_rate": 1.0202144683228478e-06, "loss": 0.065, "step": 6200 }, { "epoch": 1.4109215017064847, "grad_norm": 1.3913974009246857, "learning_rate": 1.0201452448464762e-06, "loss": 0.073, "step": 6201 }, { "epoch": 1.4111490329920364, "grad_norm": 5.592058527828976, "learning_rate": 1.0200760132940597e-06, "loss": 0.2201, "step": 6202 }, { "epoch": 1.4113765642775882, "grad_norm": 1.1062997370223235, "learning_rate": 1.0200067736670125e-06, "loss": 0.0628, "step": 6203 }, { "epoch": 1.41160409556314, "grad_norm": 0.8021573016810204, "learning_rate": 1.0199375259667505e-06, "loss": 0.0557, "step": 6204 }, { "epoch": 1.4118316268486917, "grad_norm": 1.344669498905401, "learning_rate": 1.0198682701946889e-06, "loss": 0.1438, "step": 6205 }, { "epoch": 1.4120591581342434, "grad_norm": 2.0484954391129833, "learning_rate": 1.0197990063522428e-06, "loss": 0.0764, "step": 6206 }, { "epoch": 1.4122866894197952, "grad_norm": 1.1107361142156864, "learning_rate": 1.0197297344408284e-06, "loss": 0.0598, "step": 6207 }, { "epoch": 1.412514220705347, "grad_norm": 1.818289355783258, "learning_rate": 1.0196604544618607e-06, "loss": 0.0911, "step": 6208 }, { "epoch": 1.4127417519908987, "grad_norm": 2.3440454116714324, "learning_rate": 1.0195911664167562e-06, "loss": 0.075, "step": 6209 }, { "epoch": 1.4129692832764504, "grad_norm": 1.4981628212882372, "learning_rate": 1.0195218703069311e-06, "loss": 0.1064, "step": 6210 }, { "epoch": 1.4131968145620024, "grad_norm": 1.6748847246596112, "learning_rate": 1.0194525661338014e-06, "loss": 0.0789, "step": 6211 }, { "epoch": 1.4134243458475542, "grad_norm": 1.6005524221534388, "learning_rate": 1.0193832538987838e-06, "loss": 0.0713, "step": 6212 }, { "epoch": 1.413651877133106, "grad_norm": 1.737186728394442, "learning_rate": 1.0193139336032945e-06, "loss": 0.0587, "step": 6213 }, { "epoch": 1.4138794084186577, "grad_norm": 1.0915457381923228, "learning_rate": 1.0192446052487505e-06, "loss": 0.0754, "step": 6214 }, { "epoch": 1.4141069397042094, "grad_norm": 1.7435000498468016, "learning_rate": 1.0191752688365691e-06, "loss": 0.0765, "step": 6215 }, { "epoch": 1.4143344709897612, "grad_norm": 1.8752909733484666, "learning_rate": 1.019105924368167e-06, "loss": 0.0922, "step": 6216 }, { "epoch": 1.414562002275313, "grad_norm": 1.4392973922132166, "learning_rate": 1.0190365718449616e-06, "loss": 0.0568, "step": 6217 }, { "epoch": 1.4147895335608647, "grad_norm": 2.2754532688396485, "learning_rate": 1.0189672112683704e-06, "loss": 0.1008, "step": 6218 }, { "epoch": 1.4150170648464164, "grad_norm": 2.431173380429492, "learning_rate": 1.0188978426398107e-06, "loss": 0.099, "step": 6219 }, { "epoch": 1.4152445961319682, "grad_norm": 1.5991864100743545, "learning_rate": 1.0188284659607007e-06, "loss": 0.143, "step": 6220 }, { "epoch": 1.41547212741752, "grad_norm": 1.3594759675442543, "learning_rate": 1.018759081232458e-06, "loss": 0.1335, "step": 6221 }, { "epoch": 1.4156996587030717, "grad_norm": 0.7702332161662481, "learning_rate": 1.0186896884565005e-06, "loss": 0.031, "step": 6222 }, { "epoch": 1.4159271899886234, "grad_norm": 1.7864404602384039, "learning_rate": 1.0186202876342473e-06, "loss": 0.0831, "step": 6223 }, { "epoch": 1.4161547212741752, "grad_norm": 1.484634480862312, "learning_rate": 1.0185508787671162e-06, "loss": 0.0801, "step": 6224 }, { "epoch": 1.416382252559727, "grad_norm": 1.7693126707625348, "learning_rate": 1.0184814618565257e-06, "loss": 0.0877, "step": 6225 }, { "epoch": 1.4166097838452787, "grad_norm": 2.12446577736148, "learning_rate": 1.0184120369038948e-06, "loss": 0.1375, "step": 6226 }, { "epoch": 1.4168373151308304, "grad_norm": 2.71536098313585, "learning_rate": 1.0183426039106425e-06, "loss": 0.0992, "step": 6227 }, { "epoch": 1.4170648464163822, "grad_norm": 2.3403843742524977, "learning_rate": 1.0182731628781876e-06, "loss": 0.1272, "step": 6228 }, { "epoch": 1.417292377701934, "grad_norm": 0.8873199147810173, "learning_rate": 1.0182037138079494e-06, "loss": 0.0724, "step": 6229 }, { "epoch": 1.4175199089874857, "grad_norm": 2.1962588500843285, "learning_rate": 1.0181342567013477e-06, "loss": 0.0874, "step": 6230 }, { "epoch": 1.4177474402730375, "grad_norm": 1.046799205589163, "learning_rate": 1.0180647915598017e-06, "loss": 0.0615, "step": 6231 }, { "epoch": 1.4179749715585892, "grad_norm": 1.559125762785821, "learning_rate": 1.017995318384731e-06, "loss": 0.0967, "step": 6232 }, { "epoch": 1.418202502844141, "grad_norm": 2.221222240769814, "learning_rate": 1.017925837177556e-06, "loss": 0.1052, "step": 6233 }, { "epoch": 1.4184300341296927, "grad_norm": 1.148681278966676, "learning_rate": 1.0178563479396964e-06, "loss": 0.0888, "step": 6234 }, { "epoch": 1.4186575654152445, "grad_norm": 2.368699801178719, "learning_rate": 1.0177868506725725e-06, "loss": 0.0786, "step": 6235 }, { "epoch": 1.4188850967007964, "grad_norm": 1.8803457207584942, "learning_rate": 1.0177173453776047e-06, "loss": 0.1253, "step": 6236 }, { "epoch": 1.4191126279863482, "grad_norm": 1.8625616067005561, "learning_rate": 1.0176478320562136e-06, "loss": 0.1561, "step": 6237 }, { "epoch": 1.4193401592719, "grad_norm": 1.4241783140991238, "learning_rate": 1.01757831070982e-06, "loss": 0.1423, "step": 6238 }, { "epoch": 1.4195676905574517, "grad_norm": 1.1462264802545075, "learning_rate": 1.0175087813398446e-06, "loss": 0.1293, "step": 6239 }, { "epoch": 1.4197952218430034, "grad_norm": 1.7660807457711354, "learning_rate": 1.0174392439477087e-06, "loss": 0.1749, "step": 6240 }, { "epoch": 1.4200227531285552, "grad_norm": 1.731636472675248, "learning_rate": 1.0173696985348333e-06, "loss": 0.2122, "step": 6241 }, { "epoch": 1.420250284414107, "grad_norm": 3.206125566573037, "learning_rate": 1.0173001451026396e-06, "loss": 0.176, "step": 6242 }, { "epoch": 1.4204778156996587, "grad_norm": 1.7861193001189515, "learning_rate": 1.0172305836525498e-06, "loss": 0.1038, "step": 6243 }, { "epoch": 1.4207053469852104, "grad_norm": 1.564303100927579, "learning_rate": 1.017161014185985e-06, "loss": 0.0623, "step": 6244 }, { "epoch": 1.4209328782707622, "grad_norm": 2.39397601371307, "learning_rate": 1.0170914367043672e-06, "loss": 0.0967, "step": 6245 }, { "epoch": 1.421160409556314, "grad_norm": 2.2487565032595995, "learning_rate": 1.0170218512091188e-06, "loss": 0.1099, "step": 6246 }, { "epoch": 1.4213879408418657, "grad_norm": 2.4116148271697786, "learning_rate": 1.0169522577016614e-06, "loss": 0.1188, "step": 6247 }, { "epoch": 1.4216154721274175, "grad_norm": 1.2570520555601248, "learning_rate": 1.016882656183418e-06, "loss": 0.0385, "step": 6248 }, { "epoch": 1.4218430034129692, "grad_norm": 2.6411908340836767, "learning_rate": 1.0168130466558106e-06, "loss": 0.0763, "step": 6249 }, { "epoch": 1.4220705346985212, "grad_norm": 1.7982694079050958, "learning_rate": 1.0167434291202622e-06, "loss": 0.0911, "step": 6250 }, { "epoch": 1.422298065984073, "grad_norm": 1.3577959316062924, "learning_rate": 1.0166738035781954e-06, "loss": 0.0727, "step": 6251 }, { "epoch": 1.4225255972696247, "grad_norm": 1.8897772791113623, "learning_rate": 1.0166041700310334e-06, "loss": 0.0667, "step": 6252 }, { "epoch": 1.4227531285551764, "grad_norm": 1.2512416221815659, "learning_rate": 1.0165345284801995e-06, "loss": 0.0558, "step": 6253 }, { "epoch": 1.4229806598407282, "grad_norm": 1.1610234015826426, "learning_rate": 1.0164648789271167e-06, "loss": 0.1102, "step": 6254 }, { "epoch": 1.42320819112628, "grad_norm": 2.7364696648328106, "learning_rate": 1.016395221373209e-06, "loss": 0.1659, "step": 6255 }, { "epoch": 1.4234357224118317, "grad_norm": 2.492044091165344, "learning_rate": 1.0163255558198995e-06, "loss": 0.1253, "step": 6256 }, { "epoch": 1.4236632536973834, "grad_norm": 1.4141996293268364, "learning_rate": 1.0162558822686123e-06, "loss": 0.063, "step": 6257 }, { "epoch": 1.4238907849829352, "grad_norm": 2.1623801632900137, "learning_rate": 1.0161862007207715e-06, "loss": 0.2044, "step": 6258 }, { "epoch": 1.424118316268487, "grad_norm": 3.0022631890111215, "learning_rate": 1.0161165111778013e-06, "loss": 0.071, "step": 6259 }, { "epoch": 1.4243458475540387, "grad_norm": 1.5430074698777752, "learning_rate": 1.0160468136411258e-06, "loss": 0.0437, "step": 6260 }, { "epoch": 1.4245733788395905, "grad_norm": 1.4418875571900045, "learning_rate": 1.0159771081121697e-06, "loss": 0.1592, "step": 6261 }, { "epoch": 1.4248009101251422, "grad_norm": 1.1023568443010037, "learning_rate": 1.0159073945923575e-06, "loss": 0.0544, "step": 6262 }, { "epoch": 1.425028441410694, "grad_norm": 2.2051068207070386, "learning_rate": 1.015837673083114e-06, "loss": 0.1451, "step": 6263 }, { "epoch": 1.4252559726962457, "grad_norm": 1.8003514085030992, "learning_rate": 1.0157679435858643e-06, "loss": 0.1134, "step": 6264 }, { "epoch": 1.4254835039817975, "grad_norm": 1.671768891780443, "learning_rate": 1.0156982061020335e-06, "loss": 0.0697, "step": 6265 }, { "epoch": 1.4257110352673492, "grad_norm": 1.691638560811478, "learning_rate": 1.0156284606330468e-06, "loss": 0.1116, "step": 6266 }, { "epoch": 1.425938566552901, "grad_norm": 1.668218037713917, "learning_rate": 1.0155587071803298e-06, "loss": 0.0734, "step": 6267 }, { "epoch": 1.4261660978384527, "grad_norm": 1.4322553363365562, "learning_rate": 1.0154889457453082e-06, "loss": 0.0602, "step": 6268 }, { "epoch": 1.4263936291240045, "grad_norm": 1.15268242289466, "learning_rate": 1.0154191763294077e-06, "loss": 0.0818, "step": 6269 }, { "epoch": 1.4266211604095562, "grad_norm": 1.80984807928571, "learning_rate": 1.015349398934054e-06, "loss": 0.0739, "step": 6270 }, { "epoch": 1.426848691695108, "grad_norm": 2.6026819179736687, "learning_rate": 1.0152796135606739e-06, "loss": 0.1506, "step": 6271 }, { "epoch": 1.4270762229806597, "grad_norm": 1.5071088896936038, "learning_rate": 1.015209820210693e-06, "loss": 0.1376, "step": 6272 }, { "epoch": 1.4273037542662115, "grad_norm": 1.4586819449421182, "learning_rate": 1.015140018885538e-06, "loss": 0.1053, "step": 6273 }, { "epoch": 1.4275312855517632, "grad_norm": 1.1403111332425095, "learning_rate": 1.0150702095866354e-06, "loss": 0.0494, "step": 6274 }, { "epoch": 1.4277588168373152, "grad_norm": 1.626188422438232, "learning_rate": 1.0150003923154124e-06, "loss": 0.1072, "step": 6275 }, { "epoch": 1.427986348122867, "grad_norm": 2.510844293567412, "learning_rate": 1.0149305670732953e-06, "loss": 0.1233, "step": 6276 }, { "epoch": 1.4282138794084187, "grad_norm": 1.1258865254836934, "learning_rate": 1.0148607338617118e-06, "loss": 0.0837, "step": 6277 }, { "epoch": 1.4284414106939705, "grad_norm": 2.059538582647301, "learning_rate": 1.0147908926820887e-06, "loss": 0.1354, "step": 6278 }, { "epoch": 1.4286689419795222, "grad_norm": 2.2333199746531194, "learning_rate": 1.014721043535854e-06, "loss": 0.0965, "step": 6279 }, { "epoch": 1.428896473265074, "grad_norm": 2.348990396070376, "learning_rate": 1.0146511864244344e-06, "loss": 0.117, "step": 6280 }, { "epoch": 1.4291240045506257, "grad_norm": 2.579726909586119, "learning_rate": 1.0145813213492587e-06, "loss": 0.0875, "step": 6281 }, { "epoch": 1.4293515358361775, "grad_norm": 2.491278810072405, "learning_rate": 1.0145114483117539e-06, "loss": 0.0861, "step": 6282 }, { "epoch": 1.4295790671217292, "grad_norm": 1.785513438132783, "learning_rate": 1.0144415673133485e-06, "loss": 0.0973, "step": 6283 }, { "epoch": 1.429806598407281, "grad_norm": 1.43857963983858, "learning_rate": 1.0143716783554709e-06, "loss": 0.0474, "step": 6284 }, { "epoch": 1.4300341296928327, "grad_norm": 1.3085731530788816, "learning_rate": 1.0143017814395489e-06, "loss": 0.084, "step": 6285 }, { "epoch": 1.4302616609783845, "grad_norm": 1.8064962742353994, "learning_rate": 1.0142318765670117e-06, "loss": 0.077, "step": 6286 }, { "epoch": 1.4304891922639362, "grad_norm": 1.878771887521387, "learning_rate": 1.0141619637392878e-06, "loss": 0.1095, "step": 6287 }, { "epoch": 1.430716723549488, "grad_norm": 2.343408941947069, "learning_rate": 1.0140920429578061e-06, "loss": 0.1102, "step": 6288 }, { "epoch": 1.43094425483504, "grad_norm": 2.0560219852914607, "learning_rate": 1.0140221142239957e-06, "loss": 0.0991, "step": 6289 }, { "epoch": 1.4311717861205917, "grad_norm": 0.795792793486454, "learning_rate": 1.0139521775392856e-06, "loss": 0.0553, "step": 6290 }, { "epoch": 1.4313993174061435, "grad_norm": 1.8826815884172625, "learning_rate": 1.0138822329051052e-06, "loss": 0.1291, "step": 6291 }, { "epoch": 1.4316268486916952, "grad_norm": 1.8726101517248754, "learning_rate": 1.0138122803228843e-06, "loss": 0.0669, "step": 6292 }, { "epoch": 1.431854379977247, "grad_norm": 2.3660511817894196, "learning_rate": 1.0137423197940527e-06, "loss": 0.1365, "step": 6293 }, { "epoch": 1.4320819112627987, "grad_norm": 1.7241008303823981, "learning_rate": 1.0136723513200396e-06, "loss": 0.1627, "step": 6294 }, { "epoch": 1.4323094425483505, "grad_norm": 1.4719757229629116, "learning_rate": 1.0136023749022759e-06, "loss": 0.157, "step": 6295 }, { "epoch": 1.4325369738339022, "grad_norm": 1.5457067737199763, "learning_rate": 1.013532390542191e-06, "loss": 0.0758, "step": 6296 }, { "epoch": 1.432764505119454, "grad_norm": 1.563734167251608, "learning_rate": 1.0134623982412156e-06, "loss": 0.0911, "step": 6297 }, { "epoch": 1.4329920364050057, "grad_norm": 1.9585239556119394, "learning_rate": 1.0133923980007804e-06, "loss": 0.0684, "step": 6298 }, { "epoch": 1.4332195676905575, "grad_norm": 1.3489415515735705, "learning_rate": 1.013322389822316e-06, "loss": 0.05, "step": 6299 }, { "epoch": 1.4334470989761092, "grad_norm": 1.7767579179072022, "learning_rate": 1.0132523737072528e-06, "loss": 0.1488, "step": 6300 }, { "epoch": 1.433674630261661, "grad_norm": 1.8711312931697477, "learning_rate": 1.0131823496570222e-06, "loss": 0.0709, "step": 6301 }, { "epoch": 1.4339021615472127, "grad_norm": 1.9155497814506361, "learning_rate": 1.0131123176730555e-06, "loss": 0.0941, "step": 6302 }, { "epoch": 1.4341296928327645, "grad_norm": 2.3525073153549143, "learning_rate": 1.0130422777567837e-06, "loss": 0.1123, "step": 6303 }, { "epoch": 1.4343572241183162, "grad_norm": 1.7137222313794656, "learning_rate": 1.0129722299096385e-06, "loss": 0.0805, "step": 6304 }, { "epoch": 1.434584755403868, "grad_norm": 1.7017112708859887, "learning_rate": 1.0129021741330514e-06, "loss": 0.0906, "step": 6305 }, { "epoch": 1.4348122866894197, "grad_norm": 2.550190726939763, "learning_rate": 1.0128321104284541e-06, "loss": 0.1724, "step": 6306 }, { "epoch": 1.4350398179749715, "grad_norm": 1.950569258241582, "learning_rate": 1.0127620387972789e-06, "loss": 0.0888, "step": 6307 }, { "epoch": 1.4352673492605232, "grad_norm": 1.0716771624591401, "learning_rate": 1.0126919592409578e-06, "loss": 0.0818, "step": 6308 }, { "epoch": 1.435494880546075, "grad_norm": 2.719604541403, "learning_rate": 1.012621871760923e-06, "loss": 0.1134, "step": 6309 }, { "epoch": 1.4357224118316267, "grad_norm": 1.5418381863106374, "learning_rate": 1.012551776358607e-06, "loss": 0.0622, "step": 6310 }, { "epoch": 1.4359499431171785, "grad_norm": 1.094940212737539, "learning_rate": 1.0124816730354428e-06, "loss": 0.0349, "step": 6311 }, { "epoch": 1.4361774744027302, "grad_norm": 2.214067429631255, "learning_rate": 1.0124115617928626e-06, "loss": 0.1129, "step": 6312 }, { "epoch": 1.4364050056882822, "grad_norm": 1.9161547207577005, "learning_rate": 1.0123414426322995e-06, "loss": 0.0811, "step": 6313 }, { "epoch": 1.436632536973834, "grad_norm": 1.4061438449920354, "learning_rate": 1.0122713155551867e-06, "loss": 0.1189, "step": 6314 }, { "epoch": 1.4368600682593857, "grad_norm": 2.1150900833574315, "learning_rate": 1.0122011805629574e-06, "loss": 0.1468, "step": 6315 }, { "epoch": 1.4370875995449375, "grad_norm": 1.3331813768009924, "learning_rate": 1.0121310376570454e-06, "loss": 0.1308, "step": 6316 }, { "epoch": 1.4373151308304892, "grad_norm": 2.505967190170033, "learning_rate": 1.0120608868388837e-06, "loss": 0.1062, "step": 6317 }, { "epoch": 1.437542662116041, "grad_norm": 1.9193512163717878, "learning_rate": 1.0119907281099066e-06, "loss": 0.1135, "step": 6318 }, { "epoch": 1.4377701934015927, "grad_norm": 1.6143484552181144, "learning_rate": 1.0119205614715476e-06, "loss": 0.0827, "step": 6319 }, { "epoch": 1.4379977246871445, "grad_norm": 1.6262807068571428, "learning_rate": 1.011850386925241e-06, "loss": 0.065, "step": 6320 }, { "epoch": 1.4382252559726962, "grad_norm": 3.4130385631807187, "learning_rate": 1.011780204472421e-06, "loss": 0.0972, "step": 6321 }, { "epoch": 1.438452787258248, "grad_norm": 1.7183448758361486, "learning_rate": 1.0117100141145217e-06, "loss": 0.087, "step": 6322 }, { "epoch": 1.4386803185437997, "grad_norm": 1.7894363102296018, "learning_rate": 1.0116398158529782e-06, "loss": 0.0868, "step": 6323 }, { "epoch": 1.4389078498293515, "grad_norm": 1.3683813014203625, "learning_rate": 1.0115696096892247e-06, "loss": 0.1473, "step": 6324 }, { "epoch": 1.4391353811149032, "grad_norm": 2.167526729040326, "learning_rate": 1.0114993956246968e-06, "loss": 0.0716, "step": 6325 }, { "epoch": 1.439362912400455, "grad_norm": 1.9967306110570626, "learning_rate": 1.0114291736608289e-06, "loss": 0.1219, "step": 6326 }, { "epoch": 1.4395904436860067, "grad_norm": 1.9523121423220702, "learning_rate": 1.0113589437990562e-06, "loss": 0.1192, "step": 6327 }, { "epoch": 1.4398179749715587, "grad_norm": 1.4074354798186564, "learning_rate": 1.0112887060408144e-06, "loss": 0.1657, "step": 6328 }, { "epoch": 1.4400455062571105, "grad_norm": 2.224946954285757, "learning_rate": 1.0112184603875387e-06, "loss": 0.1528, "step": 6329 }, { "epoch": 1.4402730375426622, "grad_norm": 1.7404203745348572, "learning_rate": 1.0111482068406655e-06, "loss": 0.0488, "step": 6330 }, { "epoch": 1.440500568828214, "grad_norm": 2.139255550161412, "learning_rate": 1.0110779454016299e-06, "loss": 0.0723, "step": 6331 }, { "epoch": 1.4407281001137657, "grad_norm": 0.9791070299648054, "learning_rate": 1.011007676071868e-06, "loss": 0.0234, "step": 6332 }, { "epoch": 1.4409556313993175, "grad_norm": 1.255456216854933, "learning_rate": 1.0109373988528161e-06, "loss": 0.0931, "step": 6333 }, { "epoch": 1.4411831626848692, "grad_norm": 2.415122633007749, "learning_rate": 1.010867113745911e-06, "loss": 0.0976, "step": 6334 }, { "epoch": 1.441410693970421, "grad_norm": 1.1072274876147699, "learning_rate": 1.0107968207525884e-06, "loss": 0.0799, "step": 6335 }, { "epoch": 1.4416382252559727, "grad_norm": 2.294103829148771, "learning_rate": 1.0107265198742855e-06, "loss": 0.1874, "step": 6336 }, { "epoch": 1.4418657565415245, "grad_norm": 1.6789846212867021, "learning_rate": 1.010656211112439e-06, "loss": 0.127, "step": 6337 }, { "epoch": 1.4420932878270762, "grad_norm": 1.7387215182234683, "learning_rate": 1.0105858944684856e-06, "loss": 0.1075, "step": 6338 }, { "epoch": 1.442320819112628, "grad_norm": 1.758435671842687, "learning_rate": 1.010515569943863e-06, "loss": 0.1135, "step": 6339 }, { "epoch": 1.4425483503981797, "grad_norm": 1.875372603637353, "learning_rate": 1.0104452375400078e-06, "loss": 0.1024, "step": 6340 }, { "epoch": 1.4427758816837315, "grad_norm": 1.1327411263887621, "learning_rate": 1.0103748972583582e-06, "loss": 0.1037, "step": 6341 }, { "epoch": 1.4430034129692833, "grad_norm": 1.6265307975280838, "learning_rate": 1.0103045491003514e-06, "loss": 0.0978, "step": 6342 }, { "epoch": 1.443230944254835, "grad_norm": 0.9532939564545074, "learning_rate": 1.0102341930674252e-06, "loss": 0.0985, "step": 6343 }, { "epoch": 1.4434584755403868, "grad_norm": 2.1579266950046216, "learning_rate": 1.0101638291610176e-06, "loss": 0.0841, "step": 6344 }, { "epoch": 1.4436860068259385, "grad_norm": 1.912319337135995, "learning_rate": 1.0100934573825668e-06, "loss": 0.159, "step": 6345 }, { "epoch": 1.4439135381114903, "grad_norm": 1.6177628381389577, "learning_rate": 1.010023077733511e-06, "loss": 0.118, "step": 6346 }, { "epoch": 1.444141069397042, "grad_norm": 2.01085232773475, "learning_rate": 1.0099526902152886e-06, "loss": 0.1013, "step": 6347 }, { "epoch": 1.4443686006825938, "grad_norm": 1.8693082354619368, "learning_rate": 1.0098822948293382e-06, "loss": 0.1215, "step": 6348 }, { "epoch": 1.4445961319681455, "grad_norm": 1.9616406323151174, "learning_rate": 1.0098118915770985e-06, "loss": 0.1025, "step": 6349 }, { "epoch": 1.4448236632536973, "grad_norm": 1.74247396817358, "learning_rate": 1.0097414804600087e-06, "loss": 0.1232, "step": 6350 }, { "epoch": 1.445051194539249, "grad_norm": 1.706851733043719, "learning_rate": 1.0096710614795077e-06, "loss": 0.0759, "step": 6351 }, { "epoch": 1.445278725824801, "grad_norm": 2.4531940640602996, "learning_rate": 1.0096006346370344e-06, "loss": 0.1526, "step": 6352 }, { "epoch": 1.4455062571103527, "grad_norm": 2.3321099387624473, "learning_rate": 1.0095301999340285e-06, "loss": 0.1587, "step": 6353 }, { "epoch": 1.4457337883959045, "grad_norm": 1.5497257332095864, "learning_rate": 1.0094597573719297e-06, "loss": 0.1221, "step": 6354 }, { "epoch": 1.4459613196814562, "grad_norm": 1.7921451438519347, "learning_rate": 1.0093893069521777e-06, "loss": 0.0806, "step": 6355 }, { "epoch": 1.446188850967008, "grad_norm": 1.4730470898269366, "learning_rate": 1.009318848676212e-06, "loss": 0.0486, "step": 6356 }, { "epoch": 1.4464163822525598, "grad_norm": 1.5931102826996517, "learning_rate": 1.0092483825454729e-06, "loss": 0.1791, "step": 6357 }, { "epoch": 1.4466439135381115, "grad_norm": 1.882918083630657, "learning_rate": 1.0091779085614006e-06, "loss": 0.0707, "step": 6358 }, { "epoch": 1.4468714448236633, "grad_norm": 1.5885687355433298, "learning_rate": 1.0091074267254355e-06, "loss": 0.0796, "step": 6359 }, { "epoch": 1.447098976109215, "grad_norm": 2.2821278057106476, "learning_rate": 1.009036937039018e-06, "loss": 0.1604, "step": 6360 }, { "epoch": 1.4473265073947668, "grad_norm": 1.4470125949831585, "learning_rate": 1.008966439503589e-06, "loss": 0.0735, "step": 6361 }, { "epoch": 1.4475540386803185, "grad_norm": 1.4464111232881574, "learning_rate": 1.008895934120589e-06, "loss": 0.048, "step": 6362 }, { "epoch": 1.4477815699658703, "grad_norm": 1.7415639336355635, "learning_rate": 1.0088254208914593e-06, "loss": 0.0542, "step": 6363 }, { "epoch": 1.448009101251422, "grad_norm": 2.709852589489752, "learning_rate": 1.0087548998176409e-06, "loss": 0.0975, "step": 6364 }, { "epoch": 1.4482366325369738, "grad_norm": 1.6286911486133107, "learning_rate": 1.008684370900575e-06, "loss": 0.141, "step": 6365 }, { "epoch": 1.4484641638225255, "grad_norm": 2.054520820963295, "learning_rate": 1.0086138341417035e-06, "loss": 0.0991, "step": 6366 }, { "epoch": 1.4486916951080775, "grad_norm": 1.511113359825543, "learning_rate": 1.0085432895424674e-06, "loss": 0.0743, "step": 6367 }, { "epoch": 1.4489192263936292, "grad_norm": 1.7275913423025957, "learning_rate": 1.0084727371043094e-06, "loss": 0.0957, "step": 6368 }, { "epoch": 1.449146757679181, "grad_norm": 2.4101408073521893, "learning_rate": 1.0084021768286706e-06, "loss": 0.0752, "step": 6369 }, { "epoch": 1.4493742889647327, "grad_norm": 1.139924427258152, "learning_rate": 1.0083316087169935e-06, "loss": 0.1332, "step": 6370 }, { "epoch": 1.4496018202502845, "grad_norm": 1.4397211555456666, "learning_rate": 1.0082610327707204e-06, "loss": 0.0654, "step": 6371 }, { "epoch": 1.4498293515358363, "grad_norm": 1.7349452038054234, "learning_rate": 1.0081904489912937e-06, "loss": 0.1151, "step": 6372 }, { "epoch": 1.450056882821388, "grad_norm": 2.177613333168079, "learning_rate": 1.008119857380156e-06, "loss": 0.1433, "step": 6373 }, { "epoch": 1.4502844141069398, "grad_norm": 2.313688898466905, "learning_rate": 1.0080492579387503e-06, "loss": 0.1393, "step": 6374 }, { "epoch": 1.4505119453924915, "grad_norm": 2.2444922001845287, "learning_rate": 1.007978650668519e-06, "loss": 0.0651, "step": 6375 }, { "epoch": 1.4507394766780433, "grad_norm": 1.1098239666150351, "learning_rate": 1.0079080355709057e-06, "loss": 0.0886, "step": 6376 }, { "epoch": 1.450967007963595, "grad_norm": 2.197433600973728, "learning_rate": 1.007837412647353e-06, "loss": 0.0504, "step": 6377 }, { "epoch": 1.4511945392491468, "grad_norm": 1.9050357892618024, "learning_rate": 1.007766781899305e-06, "loss": 0.0663, "step": 6378 }, { "epoch": 1.4514220705346985, "grad_norm": 2.020215521457231, "learning_rate": 1.0076961433282051e-06, "loss": 0.0671, "step": 6379 }, { "epoch": 1.4516496018202503, "grad_norm": 1.6555451394956837, "learning_rate": 1.0076254969354967e-06, "loss": 0.052, "step": 6380 }, { "epoch": 1.451877133105802, "grad_norm": 1.0205024219802799, "learning_rate": 1.0075548427226241e-06, "loss": 0.0818, "step": 6381 }, { "epoch": 1.4521046643913538, "grad_norm": 1.110373279767029, "learning_rate": 1.007484180691031e-06, "loss": 0.0618, "step": 6382 }, { "epoch": 1.4523321956769055, "grad_norm": 1.1903813445076674, "learning_rate": 1.0074135108421616e-06, "loss": 0.0493, "step": 6383 }, { "epoch": 1.4525597269624573, "grad_norm": 1.4692733192683496, "learning_rate": 1.0073428331774605e-06, "loss": 0.0586, "step": 6384 }, { "epoch": 1.452787258248009, "grad_norm": 1.7369951256720448, "learning_rate": 1.007272147698372e-06, "loss": 0.1068, "step": 6385 }, { "epoch": 1.4530147895335608, "grad_norm": 3.0366934736471203, "learning_rate": 1.0072014544063412e-06, "loss": 0.1184, "step": 6386 }, { "epoch": 1.4532423208191125, "grad_norm": 1.438235770345755, "learning_rate": 1.0071307533028125e-06, "loss": 0.1207, "step": 6387 }, { "epoch": 1.4534698521046643, "grad_norm": 1.5692102048952972, "learning_rate": 1.007060044389231e-06, "loss": 0.1218, "step": 6388 }, { "epoch": 1.453697383390216, "grad_norm": 1.3218250523299544, "learning_rate": 1.006989327667042e-06, "loss": 0.0731, "step": 6389 }, { "epoch": 1.4539249146757678, "grad_norm": 2.189807535289241, "learning_rate": 1.0069186031376906e-06, "loss": 0.0947, "step": 6390 }, { "epoch": 1.4541524459613198, "grad_norm": 1.9105068604592736, "learning_rate": 1.0068478708026224e-06, "loss": 0.1026, "step": 6391 }, { "epoch": 1.4543799772468715, "grad_norm": 0.5964990943736891, "learning_rate": 1.0067771306632832e-06, "loss": 0.0295, "step": 6392 }, { "epoch": 1.4546075085324233, "grad_norm": 1.8016037599952333, "learning_rate": 1.0067063827211184e-06, "loss": 0.0669, "step": 6393 }, { "epoch": 1.454835039817975, "grad_norm": 1.125517173679015, "learning_rate": 1.0066356269775744e-06, "loss": 0.0479, "step": 6394 }, { "epoch": 1.4550625711035268, "grad_norm": 1.4902876720368317, "learning_rate": 1.0065648634340971e-06, "loss": 0.0559, "step": 6395 }, { "epoch": 1.4552901023890785, "grad_norm": 3.2050237634656478, "learning_rate": 1.0064940920921328e-06, "loss": 0.0683, "step": 6396 }, { "epoch": 1.4555176336746303, "grad_norm": 1.5099892690669368, "learning_rate": 1.006423312953128e-06, "loss": 0.1018, "step": 6397 }, { "epoch": 1.455745164960182, "grad_norm": 2.861502444994587, "learning_rate": 1.0063525260185288e-06, "loss": 0.1235, "step": 6398 }, { "epoch": 1.4559726962457338, "grad_norm": 1.5797883956515744, "learning_rate": 1.0062817312897826e-06, "loss": 0.0837, "step": 6399 }, { "epoch": 1.4562002275312855, "grad_norm": 2.0656400096946554, "learning_rate": 1.0062109287683364e-06, "loss": 0.1421, "step": 6400 }, { "epoch": 1.4564277588168373, "grad_norm": 2.3153617321211897, "learning_rate": 1.0061401184556366e-06, "loss": 0.0708, "step": 6401 }, { "epoch": 1.456655290102389, "grad_norm": 1.5512424090621548, "learning_rate": 1.006069300353131e-06, "loss": 0.0609, "step": 6402 }, { "epoch": 1.4568828213879408, "grad_norm": 2.546713550355769, "learning_rate": 1.005998474462267e-06, "loss": 0.1593, "step": 6403 }, { "epoch": 1.4571103526734925, "grad_norm": 1.691309762382103, "learning_rate": 1.0059276407844915e-06, "loss": 0.0659, "step": 6404 }, { "epoch": 1.4573378839590443, "grad_norm": 1.7505170604113123, "learning_rate": 1.0058567993212528e-06, "loss": 0.099, "step": 6405 }, { "epoch": 1.4575654152445963, "grad_norm": 1.1854966704428127, "learning_rate": 1.005785950073999e-06, "loss": 0.0727, "step": 6406 }, { "epoch": 1.457792946530148, "grad_norm": 1.0488854883043695, "learning_rate": 1.0057150930441772e-06, "loss": 0.0772, "step": 6407 }, { "epoch": 1.4580204778156998, "grad_norm": 1.513523792992345, "learning_rate": 1.0056442282332365e-06, "loss": 0.0548, "step": 6408 }, { "epoch": 1.4582480091012515, "grad_norm": 1.1397568424774098, "learning_rate": 1.0055733556426248e-06, "loss": 0.0925, "step": 6409 }, { "epoch": 1.4584755403868033, "grad_norm": 1.595106295663973, "learning_rate": 1.0055024752737906e-06, "loss": 0.1603, "step": 6410 }, { "epoch": 1.458703071672355, "grad_norm": 2.1292594035873136, "learning_rate": 1.0054315871281828e-06, "loss": 0.073, "step": 6411 }, { "epoch": 1.4589306029579068, "grad_norm": 2.1734889912318645, "learning_rate": 1.00536069120725e-06, "loss": 0.1034, "step": 6412 }, { "epoch": 1.4591581342434585, "grad_norm": 2.4603522129310105, "learning_rate": 1.0052897875124412e-06, "loss": 0.0854, "step": 6413 }, { "epoch": 1.4593856655290103, "grad_norm": 2.085924463649985, "learning_rate": 1.005218876045206e-06, "loss": 0.124, "step": 6414 }, { "epoch": 1.459613196814562, "grad_norm": 2.2584471880345847, "learning_rate": 1.0051479568069927e-06, "loss": 0.0859, "step": 6415 }, { "epoch": 1.4598407281001138, "grad_norm": 1.6411154403946306, "learning_rate": 1.0050770297992517e-06, "loss": 0.0827, "step": 6416 }, { "epoch": 1.4600682593856655, "grad_norm": 0.9421738537038823, "learning_rate": 1.0050060950234324e-06, "loss": 0.0385, "step": 6417 }, { "epoch": 1.4602957906712173, "grad_norm": 1.9520741759536568, "learning_rate": 1.0049351524809842e-06, "loss": 0.0855, "step": 6418 }, { "epoch": 1.460523321956769, "grad_norm": 1.1983158344001326, "learning_rate": 1.0048642021733576e-06, "loss": 0.0365, "step": 6419 }, { "epoch": 1.4607508532423208, "grad_norm": 2.441857013431579, "learning_rate": 1.0047932441020022e-06, "loss": 0.0925, "step": 6420 }, { "epoch": 1.4609783845278725, "grad_norm": 1.5665363909548544, "learning_rate": 1.0047222782683686e-06, "loss": 0.0977, "step": 6421 }, { "epoch": 1.4612059158134243, "grad_norm": 2.264590681075332, "learning_rate": 1.0046513046739069e-06, "loss": 0.148, "step": 6422 }, { "epoch": 1.461433447098976, "grad_norm": 1.6526202515649944, "learning_rate": 1.0045803233200679e-06, "loss": 0.041, "step": 6423 }, { "epoch": 1.4616609783845278, "grad_norm": 1.7626217119136827, "learning_rate": 1.0045093342083022e-06, "loss": 0.1285, "step": 6424 }, { "epoch": 1.4618885096700796, "grad_norm": 2.2562587860636873, "learning_rate": 1.0044383373400608e-06, "loss": 0.0864, "step": 6425 }, { "epoch": 1.4621160409556313, "grad_norm": 1.4449976996724352, "learning_rate": 1.0043673327167946e-06, "loss": 0.1135, "step": 6426 }, { "epoch": 1.462343572241183, "grad_norm": 1.2114903089701574, "learning_rate": 1.004296320339955e-06, "loss": 0.0497, "step": 6427 }, { "epoch": 1.4625711035267348, "grad_norm": 2.3353065793883965, "learning_rate": 1.0042253002109933e-06, "loss": 0.0995, "step": 6428 }, { "epoch": 1.4627986348122866, "grad_norm": 1.213988405070411, "learning_rate": 1.004154272331361e-06, "loss": 0.0522, "step": 6429 }, { "epoch": 1.4630261660978385, "grad_norm": 2.1531614083113406, "learning_rate": 1.0040832367025097e-06, "loss": 0.0997, "step": 6430 }, { "epoch": 1.4632536973833903, "grad_norm": 2.413926356303523, "learning_rate": 1.0040121933258912e-06, "loss": 0.0909, "step": 6431 }, { "epoch": 1.463481228668942, "grad_norm": 2.957734256860313, "learning_rate": 1.0039411422029578e-06, "loss": 0.1351, "step": 6432 }, { "epoch": 1.4637087599544938, "grad_norm": 1.5243086926860814, "learning_rate": 1.0038700833351612e-06, "loss": 0.0928, "step": 6433 }, { "epoch": 1.4639362912400455, "grad_norm": 1.2116195117085764, "learning_rate": 1.0037990167239542e-06, "loss": 0.0571, "step": 6434 }, { "epoch": 1.4641638225255973, "grad_norm": 2.615635797890528, "learning_rate": 1.0037279423707888e-06, "loss": 0.0696, "step": 6435 }, { "epoch": 1.464391353811149, "grad_norm": 1.7244140162748254, "learning_rate": 1.0036568602771179e-06, "loss": 0.189, "step": 6436 }, { "epoch": 1.4646188850967008, "grad_norm": 2.6958961651306486, "learning_rate": 1.0035857704443944e-06, "loss": 0.1115, "step": 6437 }, { "epoch": 1.4648464163822525, "grad_norm": 1.2190590501428018, "learning_rate": 1.0035146728740712e-06, "loss": 0.1424, "step": 6438 }, { "epoch": 1.4650739476678043, "grad_norm": 1.5054931397105094, "learning_rate": 1.0034435675676012e-06, "loss": 0.0908, "step": 6439 }, { "epoch": 1.465301478953356, "grad_norm": 1.6291323643009734, "learning_rate": 1.0033724545264378e-06, "loss": 0.089, "step": 6440 }, { "epoch": 1.4655290102389078, "grad_norm": 1.341085989404762, "learning_rate": 1.0033013337520342e-06, "loss": 0.0486, "step": 6441 }, { "epoch": 1.4657565415244596, "grad_norm": 1.011164152814645, "learning_rate": 1.0032302052458443e-06, "loss": 0.0499, "step": 6442 }, { "epoch": 1.4659840728100113, "grad_norm": 1.0948910983582214, "learning_rate": 1.0031590690093214e-06, "loss": 0.0425, "step": 6443 }, { "epoch": 1.466211604095563, "grad_norm": 1.7286333048940579, "learning_rate": 1.0030879250439203e-06, "loss": 0.0568, "step": 6444 }, { "epoch": 1.466439135381115, "grad_norm": 0.9290542248967091, "learning_rate": 1.003016773351094e-06, "loss": 0.0223, "step": 6445 }, { "epoch": 1.4666666666666668, "grad_norm": 2.5778455131993376, "learning_rate": 1.0029456139322973e-06, "loss": 0.0923, "step": 6446 }, { "epoch": 1.4668941979522185, "grad_norm": 2.2511687310056416, "learning_rate": 1.0028744467889845e-06, "loss": 0.0967, "step": 6447 }, { "epoch": 1.4671217292377703, "grad_norm": 2.725626168515591, "learning_rate": 1.0028032719226097e-06, "loss": 0.1895, "step": 6448 }, { "epoch": 1.467349260523322, "grad_norm": 2.179394448530092, "learning_rate": 1.0027320893346284e-06, "loss": 0.1575, "step": 6449 }, { "epoch": 1.4675767918088738, "grad_norm": 2.714923875843345, "learning_rate": 1.0026608990264946e-06, "loss": 0.1003, "step": 6450 }, { "epoch": 1.4678043230944255, "grad_norm": 2.2884422798139217, "learning_rate": 1.0025897009996639e-06, "loss": 0.1281, "step": 6451 }, { "epoch": 1.4680318543799773, "grad_norm": 2.13186315358863, "learning_rate": 1.0025184952555911e-06, "loss": 0.168, "step": 6452 }, { "epoch": 1.468259385665529, "grad_norm": 2.088602911180568, "learning_rate": 1.0024472817957318e-06, "loss": 0.0728, "step": 6453 }, { "epoch": 1.4684869169510808, "grad_norm": 2.0115528940586342, "learning_rate": 1.0023760606215412e-06, "loss": 0.1641, "step": 6454 }, { "epoch": 1.4687144482366326, "grad_norm": 1.5082622000114985, "learning_rate": 1.0023048317344752e-06, "loss": 0.0494, "step": 6455 }, { "epoch": 1.4689419795221843, "grad_norm": 2.0733909322591004, "learning_rate": 1.0022335951359894e-06, "loss": 0.1157, "step": 6456 }, { "epoch": 1.469169510807736, "grad_norm": 1.854647627348043, "learning_rate": 1.0021623508275397e-06, "loss": 0.1409, "step": 6457 }, { "epoch": 1.4693970420932878, "grad_norm": 2.3064994581234277, "learning_rate": 1.0020910988105822e-06, "loss": 0.0987, "step": 6458 }, { "epoch": 1.4696245733788396, "grad_norm": 1.529342248309303, "learning_rate": 1.0020198390865735e-06, "loss": 0.0995, "step": 6459 }, { "epoch": 1.4698521046643913, "grad_norm": 1.7759972116630178, "learning_rate": 1.0019485716569696e-06, "loss": 0.0773, "step": 6460 }, { "epoch": 1.470079635949943, "grad_norm": 1.5298887299228214, "learning_rate": 1.0018772965232272e-06, "loss": 0.0803, "step": 6461 }, { "epoch": 1.4703071672354948, "grad_norm": 1.7926988565702198, "learning_rate": 1.0018060136868033e-06, "loss": 0.1503, "step": 6462 }, { "epoch": 1.4705346985210466, "grad_norm": 1.95957874155669, "learning_rate": 1.0017347231491544e-06, "loss": 0.0749, "step": 6463 }, { "epoch": 1.4707622298065983, "grad_norm": 1.8565870188101044, "learning_rate": 1.0016634249117378e-06, "loss": 0.0903, "step": 6464 }, { "epoch": 1.47098976109215, "grad_norm": 2.3785869049832487, "learning_rate": 1.0015921189760105e-06, "loss": 0.1752, "step": 6465 }, { "epoch": 1.4712172923777018, "grad_norm": 1.08120840897355, "learning_rate": 1.00152080534343e-06, "loss": 0.0837, "step": 6466 }, { "epoch": 1.4714448236632536, "grad_norm": 1.2600015453121518, "learning_rate": 1.001449484015454e-06, "loss": 0.0938, "step": 6467 }, { "epoch": 1.4716723549488053, "grad_norm": 1.1675909506811741, "learning_rate": 1.0013781549935396e-06, "loss": 0.0802, "step": 6468 }, { "epoch": 1.4718998862343573, "grad_norm": 2.5320154156973995, "learning_rate": 1.0013068182791454e-06, "loss": 0.0838, "step": 6469 }, { "epoch": 1.472127417519909, "grad_norm": 1.2168783337222795, "learning_rate": 1.0012354738737288e-06, "loss": 0.1312, "step": 6470 }, { "epoch": 1.4723549488054608, "grad_norm": 2.040097512167998, "learning_rate": 1.0011641217787481e-06, "loss": 0.1133, "step": 6471 }, { "epoch": 1.4725824800910126, "grad_norm": 1.5937269991105336, "learning_rate": 1.001092761995662e-06, "loss": 0.1045, "step": 6472 }, { "epoch": 1.4728100113765643, "grad_norm": 1.6603935998287558, "learning_rate": 1.0010213945259282e-06, "loss": 0.0628, "step": 6473 }, { "epoch": 1.473037542662116, "grad_norm": 1.7229506631941387, "learning_rate": 1.000950019371006e-06, "loss": 0.0624, "step": 6474 }, { "epoch": 1.4732650739476678, "grad_norm": 1.2010677843671187, "learning_rate": 1.0008786365323539e-06, "loss": 0.0706, "step": 6475 }, { "epoch": 1.4734926052332196, "grad_norm": 1.8751575864533414, "learning_rate": 1.0008072460114308e-06, "loss": 0.0628, "step": 6476 }, { "epoch": 1.4737201365187713, "grad_norm": 1.73050953802294, "learning_rate": 1.0007358478096959e-06, "loss": 0.0981, "step": 6477 }, { "epoch": 1.473947667804323, "grad_norm": 1.5313330145032578, "learning_rate": 1.0006644419286084e-06, "loss": 0.0871, "step": 6478 }, { "epoch": 1.4741751990898748, "grad_norm": 1.7211307536072435, "learning_rate": 1.0005930283696277e-06, "loss": 0.0804, "step": 6479 }, { "epoch": 1.4744027303754266, "grad_norm": 1.041669030198631, "learning_rate": 1.0005216071342133e-06, "loss": 0.11, "step": 6480 }, { "epoch": 1.4746302616609783, "grad_norm": 1.9063568947913965, "learning_rate": 1.000450178223825e-06, "loss": 0.1378, "step": 6481 }, { "epoch": 1.47485779294653, "grad_norm": 1.4363869120455537, "learning_rate": 1.0003787416399226e-06, "loss": 0.1435, "step": 6482 }, { "epoch": 1.4750853242320818, "grad_norm": 1.656907400406917, "learning_rate": 1.0003072973839665e-06, "loss": 0.067, "step": 6483 }, { "epoch": 1.4753128555176338, "grad_norm": 1.9091492086095017, "learning_rate": 1.0002358454574163e-06, "loss": 0.0559, "step": 6484 }, { "epoch": 1.4755403868031856, "grad_norm": 1.933307503414605, "learning_rate": 1.0001643858617326e-06, "loss": 0.199, "step": 6485 }, { "epoch": 1.4757679180887373, "grad_norm": 1.524360431972506, "learning_rate": 1.0000929185983762e-06, "loss": 0.0514, "step": 6486 }, { "epoch": 1.475995449374289, "grad_norm": 2.558602268977592, "learning_rate": 1.0000214436688074e-06, "loss": 0.1992, "step": 6487 }, { "epoch": 1.4762229806598408, "grad_norm": 1.5057019130864449, "learning_rate": 9.99949961074487e-07, "loss": 0.1788, "step": 6488 }, { "epoch": 1.4764505119453926, "grad_norm": 1.9974779073044902, "learning_rate": 9.998784708168762e-07, "loss": 0.1041, "step": 6489 }, { "epoch": 1.4766780432309443, "grad_norm": 2.146999426549233, "learning_rate": 9.998069728974357e-07, "loss": 0.1536, "step": 6490 }, { "epoch": 1.476905574516496, "grad_norm": 1.9204261446003483, "learning_rate": 9.997354673176273e-07, "loss": 0.12, "step": 6491 }, { "epoch": 1.4771331058020478, "grad_norm": 1.7900450156383576, "learning_rate": 9.996639540789124e-07, "loss": 0.0709, "step": 6492 }, { "epoch": 1.4773606370875996, "grad_norm": 2.706054567060273, "learning_rate": 9.995924331827521e-07, "loss": 0.1134, "step": 6493 }, { "epoch": 1.4775881683731513, "grad_norm": 2.1430131676281143, "learning_rate": 9.99520904630609e-07, "loss": 0.1307, "step": 6494 }, { "epoch": 1.477815699658703, "grad_norm": 1.3831116860997594, "learning_rate": 9.994493684239443e-07, "loss": 0.1063, "step": 6495 }, { "epoch": 1.4780432309442548, "grad_norm": 0.8455695567282069, "learning_rate": 9.993778245642202e-07, "loss": 0.0408, "step": 6496 }, { "epoch": 1.4782707622298066, "grad_norm": 2.0481093026394936, "learning_rate": 9.99306273052899e-07, "loss": 0.0507, "step": 6497 }, { "epoch": 1.4784982935153583, "grad_norm": 2.701397993282172, "learning_rate": 9.99234713891443e-07, "loss": 0.0766, "step": 6498 }, { "epoch": 1.47872582480091, "grad_norm": 2.060007223620782, "learning_rate": 9.99163147081315e-07, "loss": 0.0827, "step": 6499 }, { "epoch": 1.4789533560864618, "grad_norm": 2.1178800053991895, "learning_rate": 9.990915726239774e-07, "loss": 0.0989, "step": 6500 }, { "epoch": 1.4791808873720136, "grad_norm": 1.2820131971427713, "learning_rate": 9.990199905208933e-07, "loss": 0.1165, "step": 6501 }, { "epoch": 1.4794084186575653, "grad_norm": 1.455912203082024, "learning_rate": 9.989484007735256e-07, "loss": 0.1711, "step": 6502 }, { "epoch": 1.479635949943117, "grad_norm": 1.3882286358374694, "learning_rate": 9.988768033833374e-07, "loss": 0.1038, "step": 6503 }, { "epoch": 1.4798634812286688, "grad_norm": 1.586658752493086, "learning_rate": 9.98805198351792e-07, "loss": 0.1503, "step": 6504 }, { "epoch": 1.4800910125142206, "grad_norm": 1.216647170795259, "learning_rate": 9.98733585680353e-07, "loss": 0.0537, "step": 6505 }, { "epoch": 1.4803185437997723, "grad_norm": 1.3218892161161693, "learning_rate": 9.986619653704838e-07, "loss": 0.0938, "step": 6506 }, { "epoch": 1.480546075085324, "grad_norm": 2.0695991159537237, "learning_rate": 9.985903374236487e-07, "loss": 0.0963, "step": 6507 }, { "epoch": 1.480773606370876, "grad_norm": 1.392307307265326, "learning_rate": 9.985187018413108e-07, "loss": 0.1009, "step": 6508 }, { "epoch": 1.4810011376564278, "grad_norm": 1.322014214217586, "learning_rate": 9.98447058624935e-07, "loss": 0.052, "step": 6509 }, { "epoch": 1.4812286689419796, "grad_norm": 1.5462590599779562, "learning_rate": 9.983754077759852e-07, "loss": 0.1301, "step": 6510 }, { "epoch": 1.4814562002275313, "grad_norm": 1.767717707012614, "learning_rate": 9.98303749295926e-07, "loss": 0.137, "step": 6511 }, { "epoch": 1.481683731513083, "grad_norm": 1.7186132236718707, "learning_rate": 9.982320831862217e-07, "loss": 0.0881, "step": 6512 }, { "epoch": 1.4819112627986348, "grad_norm": 1.5236204963454782, "learning_rate": 9.981604094483374e-07, "loss": 0.1413, "step": 6513 }, { "epoch": 1.4821387940841866, "grad_norm": 1.9188472431082706, "learning_rate": 9.980887280837377e-07, "loss": 0.2172, "step": 6514 }, { "epoch": 1.4823663253697383, "grad_norm": 2.4374555651002483, "learning_rate": 9.980170390938873e-07, "loss": 0.1176, "step": 6515 }, { "epoch": 1.48259385665529, "grad_norm": 2.1093125480487527, "learning_rate": 9.979453424802522e-07, "loss": 0.1106, "step": 6516 }, { "epoch": 1.4828213879408418, "grad_norm": 1.915584556202085, "learning_rate": 9.978736382442969e-07, "loss": 0.1678, "step": 6517 }, { "epoch": 1.4830489192263936, "grad_norm": 1.8641625340477204, "learning_rate": 9.978019263874875e-07, "loss": 0.0863, "step": 6518 }, { "epoch": 1.4832764505119453, "grad_norm": 2.7441229909928615, "learning_rate": 9.977302069112896e-07, "loss": 0.1262, "step": 6519 }, { "epoch": 1.483503981797497, "grad_norm": 1.5055670521242785, "learning_rate": 9.97658479817169e-07, "loss": 0.049, "step": 6520 }, { "epoch": 1.4837315130830488, "grad_norm": 2.122217127690761, "learning_rate": 9.975867451065913e-07, "loss": 0.0481, "step": 6521 }, { "epoch": 1.4839590443686006, "grad_norm": 2.4305735770475234, "learning_rate": 9.97515002781023e-07, "loss": 0.1932, "step": 6522 }, { "epoch": 1.4841865756541526, "grad_norm": 1.826505366433851, "learning_rate": 9.9744325284193e-07, "loss": 0.1239, "step": 6523 }, { "epoch": 1.4844141069397043, "grad_norm": 2.0963354537171797, "learning_rate": 9.973714952907792e-07, "loss": 0.1266, "step": 6524 }, { "epoch": 1.484641638225256, "grad_norm": 2.3608423699852055, "learning_rate": 9.97299730129037e-07, "loss": 0.1219, "step": 6525 }, { "epoch": 1.4848691695108078, "grad_norm": 2.1044457678017245, "learning_rate": 9.972279573581705e-07, "loss": 0.0771, "step": 6526 }, { "epoch": 1.4850967007963596, "grad_norm": 1.7779259303890518, "learning_rate": 9.97156176979646e-07, "loss": 0.0842, "step": 6527 }, { "epoch": 1.4853242320819113, "grad_norm": 1.6092438981897077, "learning_rate": 9.970843889949305e-07, "loss": 0.0797, "step": 6528 }, { "epoch": 1.485551763367463, "grad_norm": 1.866981974786664, "learning_rate": 9.97012593405492e-07, "loss": 0.1233, "step": 6529 }, { "epoch": 1.4857792946530148, "grad_norm": 2.8790996399351703, "learning_rate": 9.969407902127972e-07, "loss": 0.1261, "step": 6530 }, { "epoch": 1.4860068259385666, "grad_norm": 1.9010114215145653, "learning_rate": 9.968689794183137e-07, "loss": 0.0963, "step": 6531 }, { "epoch": 1.4862343572241183, "grad_norm": 1.4621814472728394, "learning_rate": 9.967971610235095e-07, "loss": 0.0952, "step": 6532 }, { "epoch": 1.48646188850967, "grad_norm": 2.057756155886733, "learning_rate": 9.967253350298522e-07, "loss": 0.0779, "step": 6533 }, { "epoch": 1.4866894197952218, "grad_norm": 2.397744809721191, "learning_rate": 9.966535014388098e-07, "loss": 0.0818, "step": 6534 }, { "epoch": 1.4869169510807736, "grad_norm": 1.6045481156972197, "learning_rate": 9.965816602518505e-07, "loss": 0.066, "step": 6535 }, { "epoch": 1.4871444823663253, "grad_norm": 1.3037900416564931, "learning_rate": 9.965098114704425e-07, "loss": 0.1013, "step": 6536 }, { "epoch": 1.487372013651877, "grad_norm": 1.9894750167479516, "learning_rate": 9.964379550960544e-07, "loss": 0.086, "step": 6537 }, { "epoch": 1.4875995449374289, "grad_norm": 1.3172380324912327, "learning_rate": 9.96366091130155e-07, "loss": 0.1158, "step": 6538 }, { "epoch": 1.4878270762229806, "grad_norm": 1.4623068919383868, "learning_rate": 9.962942195742125e-07, "loss": 0.1483, "step": 6539 }, { "epoch": 1.4880546075085324, "grad_norm": 2.1750563920153283, "learning_rate": 9.96222340429696e-07, "loss": 0.0826, "step": 6540 }, { "epoch": 1.488282138794084, "grad_norm": 1.2779440444083219, "learning_rate": 9.96150453698075e-07, "loss": 0.0834, "step": 6541 }, { "epoch": 1.4885096700796359, "grad_norm": 2.4346203961684267, "learning_rate": 9.960785593808187e-07, "loss": 0.0693, "step": 6542 }, { "epoch": 1.4887372013651876, "grad_norm": 2.4068679198331844, "learning_rate": 9.960066574793959e-07, "loss": 0.1503, "step": 6543 }, { "epoch": 1.4889647326507394, "grad_norm": 1.7234434466434458, "learning_rate": 9.959347479952764e-07, "loss": 0.0743, "step": 6544 }, { "epoch": 1.4891922639362911, "grad_norm": 1.992864343711084, "learning_rate": 9.958628309299303e-07, "loss": 0.1348, "step": 6545 }, { "epoch": 1.4894197952218429, "grad_norm": 1.6688641478519832, "learning_rate": 9.95790906284827e-07, "loss": 0.0975, "step": 6546 }, { "epoch": 1.4896473265073948, "grad_norm": 1.461032294107854, "learning_rate": 9.957189740614364e-07, "loss": 0.08, "step": 6547 }, { "epoch": 1.4898748577929466, "grad_norm": 1.6301476658725782, "learning_rate": 9.956470342612292e-07, "loss": 0.1803, "step": 6548 }, { "epoch": 1.4901023890784983, "grad_norm": 1.8707264579028058, "learning_rate": 9.955750868856753e-07, "loss": 0.1212, "step": 6549 }, { "epoch": 1.49032992036405, "grad_norm": 1.1641651322644342, "learning_rate": 9.955031319362455e-07, "loss": 0.092, "step": 6550 }, { "epoch": 1.4905574516496019, "grad_norm": 1.0019288135962998, "learning_rate": 9.954311694144101e-07, "loss": 0.0959, "step": 6551 }, { "epoch": 1.4907849829351536, "grad_norm": 2.052231836856024, "learning_rate": 9.9535919932164e-07, "loss": 0.0883, "step": 6552 }, { "epoch": 1.4910125142207054, "grad_norm": 2.915207302536033, "learning_rate": 9.952872216594062e-07, "loss": 0.1058, "step": 6553 }, { "epoch": 1.491240045506257, "grad_norm": 1.920148074541123, "learning_rate": 9.952152364291795e-07, "loss": 0.1474, "step": 6554 }, { "epoch": 1.4914675767918089, "grad_norm": 1.285894577133101, "learning_rate": 9.951432436324317e-07, "loss": 0.2073, "step": 6555 }, { "epoch": 1.4916951080773606, "grad_norm": 1.8218299718178723, "learning_rate": 9.950712432706338e-07, "loss": 0.0898, "step": 6556 }, { "epoch": 1.4919226393629124, "grad_norm": 1.4323762701705023, "learning_rate": 9.949992353452575e-07, "loss": 0.0552, "step": 6557 }, { "epoch": 1.4921501706484641, "grad_norm": 1.3545291611167616, "learning_rate": 9.949272198577741e-07, "loss": 0.1064, "step": 6558 }, { "epoch": 1.4923777019340159, "grad_norm": 1.807647397449778, "learning_rate": 9.948551968096562e-07, "loss": 0.1034, "step": 6559 }, { "epoch": 1.4926052332195676, "grad_norm": 1.4266173320216142, "learning_rate": 9.947831662023751e-07, "loss": 0.0439, "step": 6560 }, { "epoch": 1.4928327645051196, "grad_norm": 1.9762698521372128, "learning_rate": 9.947111280374036e-07, "loss": 0.1725, "step": 6561 }, { "epoch": 1.4930602957906713, "grad_norm": 1.851925457742424, "learning_rate": 9.946390823162136e-07, "loss": 0.2263, "step": 6562 }, { "epoch": 1.493287827076223, "grad_norm": 1.5693473014259545, "learning_rate": 9.945670290402778e-07, "loss": 0.0571, "step": 6563 }, { "epoch": 1.4935153583617748, "grad_norm": 2.2381860291552544, "learning_rate": 9.944949682110689e-07, "loss": 0.0704, "step": 6564 }, { "epoch": 1.4937428896473266, "grad_norm": 2.894165644581643, "learning_rate": 9.944228998300592e-07, "loss": 0.1759, "step": 6565 }, { "epoch": 1.4939704209328784, "grad_norm": 2.2783175420383595, "learning_rate": 9.943508238987223e-07, "loss": 0.1116, "step": 6566 }, { "epoch": 1.49419795221843, "grad_norm": 2.3625175390423623, "learning_rate": 9.942787404185307e-07, "loss": 0.0811, "step": 6567 }, { "epoch": 1.4944254835039819, "grad_norm": 1.3641102146894355, "learning_rate": 9.942066493909582e-07, "loss": 0.1175, "step": 6568 }, { "epoch": 1.4946530147895336, "grad_norm": 1.962191132038736, "learning_rate": 9.941345508174778e-07, "loss": 0.1533, "step": 6569 }, { "epoch": 1.4948805460750854, "grad_norm": 1.7938107718153122, "learning_rate": 9.940624446995633e-07, "loss": 0.0799, "step": 6570 }, { "epoch": 1.4951080773606371, "grad_norm": 2.0893704746422386, "learning_rate": 9.939903310386883e-07, "loss": 0.1693, "step": 6571 }, { "epoch": 1.4953356086461889, "grad_norm": 1.218836143766395, "learning_rate": 9.939182098363267e-07, "loss": 0.0514, "step": 6572 }, { "epoch": 1.4955631399317406, "grad_norm": 2.225528007582617, "learning_rate": 9.938460810939526e-07, "loss": 0.0524, "step": 6573 }, { "epoch": 1.4957906712172924, "grad_norm": 1.0703565767955194, "learning_rate": 9.9377394481304e-07, "loss": 0.0441, "step": 6574 }, { "epoch": 1.4960182025028441, "grad_norm": 0.9831353955217996, "learning_rate": 9.937018009950637e-07, "loss": 0.1142, "step": 6575 }, { "epoch": 1.4962457337883959, "grad_norm": 1.7656731882413514, "learning_rate": 9.936296496414974e-07, "loss": 0.1091, "step": 6576 }, { "epoch": 1.4964732650739476, "grad_norm": 1.6503005620914835, "learning_rate": 9.935574907538162e-07, "loss": 0.0796, "step": 6577 }, { "epoch": 1.4967007963594994, "grad_norm": 1.4721719805155578, "learning_rate": 9.93485324333495e-07, "loss": 0.1494, "step": 6578 }, { "epoch": 1.4969283276450511, "grad_norm": 1.587186305176549, "learning_rate": 9.934131503820086e-07, "loss": 0.0994, "step": 6579 }, { "epoch": 1.4971558589306029, "grad_norm": 2.077685939286771, "learning_rate": 9.933409689008323e-07, "loss": 0.1364, "step": 6580 }, { "epoch": 1.4973833902161546, "grad_norm": 1.705800226622494, "learning_rate": 9.932687798914408e-07, "loss": 0.0697, "step": 6581 }, { "epoch": 1.4976109215017064, "grad_norm": 0.8984678868158584, "learning_rate": 9.9319658335531e-07, "loss": 0.0444, "step": 6582 }, { "epoch": 1.4978384527872581, "grad_norm": 1.340663582582159, "learning_rate": 9.931243792939157e-07, "loss": 0.0691, "step": 6583 }, { "epoch": 1.4980659840728099, "grad_norm": 1.3634631218721727, "learning_rate": 9.93052167708733e-07, "loss": 0.0617, "step": 6584 }, { "epoch": 1.4982935153583616, "grad_norm": 2.2638220421161956, "learning_rate": 9.929799486012381e-07, "loss": 0.1215, "step": 6585 }, { "epoch": 1.4985210466439136, "grad_norm": 1.7511442172371627, "learning_rate": 9.92907721972907e-07, "loss": 0.0777, "step": 6586 }, { "epoch": 1.4987485779294654, "grad_norm": 1.5992381545154368, "learning_rate": 9.928354878252156e-07, "loss": 0.1163, "step": 6587 }, { "epoch": 1.4989761092150171, "grad_norm": 2.502483423033871, "learning_rate": 9.927632461596409e-07, "loss": 0.0938, "step": 6588 }, { "epoch": 1.4992036405005689, "grad_norm": 2.7902929599302206, "learning_rate": 9.926909969776588e-07, "loss": 0.1, "step": 6589 }, { "epoch": 1.4994311717861206, "grad_norm": 1.6546051994345607, "learning_rate": 9.926187402807461e-07, "loss": 0.0455, "step": 6590 }, { "epoch": 1.4996587030716724, "grad_norm": 1.4386855095650237, "learning_rate": 9.925464760703796e-07, "loss": 0.1305, "step": 6591 }, { "epoch": 1.4998862343572241, "grad_norm": 2.1550963420089855, "learning_rate": 9.924742043480361e-07, "loss": 0.1383, "step": 6592 }, { "epoch": 1.5001137656427759, "grad_norm": 1.6050123025797802, "learning_rate": 9.924019251151932e-07, "loss": 0.0692, "step": 6593 }, { "epoch": 1.5003412969283276, "grad_norm": 1.9173243071797523, "learning_rate": 9.923296383733274e-07, "loss": 0.1308, "step": 6594 }, { "epoch": 1.5005688282138794, "grad_norm": 1.7215501934103334, "learning_rate": 9.92257344123917e-07, "loss": 0.0748, "step": 6595 }, { "epoch": 1.5007963594994311, "grad_norm": 1.4572833583837446, "learning_rate": 9.921850423684387e-07, "loss": 0.1466, "step": 6596 }, { "epoch": 1.5010238907849829, "grad_norm": 1.6185551035449188, "learning_rate": 9.921127331083708e-07, "loss": 0.0662, "step": 6597 }, { "epoch": 1.5012514220705349, "grad_norm": 1.768377172897687, "learning_rate": 9.92040416345191e-07, "loss": 0.0475, "step": 6598 }, { "epoch": 1.5014789533560866, "grad_norm": 1.3879959304267988, "learning_rate": 9.91968092080377e-07, "loss": 0.1134, "step": 6599 }, { "epoch": 1.5017064846416384, "grad_norm": 3.2549540579687686, "learning_rate": 9.918957603154076e-07, "loss": 0.1008, "step": 6600 }, { "epoch": 1.5019340159271901, "grad_norm": 2.547758885220799, "learning_rate": 9.918234210517606e-07, "loss": 0.066, "step": 6601 }, { "epoch": 1.5021615472127419, "grad_norm": 1.542292781810862, "learning_rate": 9.917510742909147e-07, "loss": 0.1515, "step": 6602 }, { "epoch": 1.5023890784982936, "grad_norm": 1.9701921831117433, "learning_rate": 9.916787200343487e-07, "loss": 0.0722, "step": 6603 }, { "epoch": 1.5026166097838454, "grad_norm": 1.4978983128830725, "learning_rate": 9.91606358283541e-07, "loss": 0.0415, "step": 6604 }, { "epoch": 1.5028441410693971, "grad_norm": 1.5436341118379642, "learning_rate": 9.915339890399707e-07, "loss": 0.1133, "step": 6605 }, { "epoch": 1.5030716723549489, "grad_norm": 2.011492185812262, "learning_rate": 9.914616123051172e-07, "loss": 0.0556, "step": 6606 }, { "epoch": 1.5032992036405006, "grad_norm": 2.038059208259661, "learning_rate": 9.913892280804593e-07, "loss": 0.0631, "step": 6607 }, { "epoch": 1.5035267349260524, "grad_norm": 1.9867057182536725, "learning_rate": 9.913168363674768e-07, "loss": 0.1106, "step": 6608 }, { "epoch": 1.5037542662116041, "grad_norm": 2.195537896793738, "learning_rate": 9.912444371676489e-07, "loss": 0.0736, "step": 6609 }, { "epoch": 1.5039817974971559, "grad_norm": 1.523328848008561, "learning_rate": 9.911720304824555e-07, "loss": 0.0933, "step": 6610 }, { "epoch": 1.5042093287827076, "grad_norm": 2.3802529025066637, "learning_rate": 9.910996163133762e-07, "loss": 0.0734, "step": 6611 }, { "epoch": 1.5044368600682594, "grad_norm": 1.5938524758852861, "learning_rate": 9.910271946618913e-07, "loss": 0.0709, "step": 6612 }, { "epoch": 1.5046643913538111, "grad_norm": 2.3867229570238675, "learning_rate": 9.90954765529481e-07, "loss": 0.1531, "step": 6613 }, { "epoch": 1.5048919226393629, "grad_norm": 1.493728906940114, "learning_rate": 9.908823289176255e-07, "loss": 0.1798, "step": 6614 }, { "epoch": 1.5051194539249146, "grad_norm": 1.5985296880996096, "learning_rate": 9.90809884827805e-07, "loss": 0.1794, "step": 6615 }, { "epoch": 1.5053469852104664, "grad_norm": 1.570406258704178, "learning_rate": 9.907374332615007e-07, "loss": 0.0955, "step": 6616 }, { "epoch": 1.5055745164960181, "grad_norm": 1.9969551241681303, "learning_rate": 9.90664974220193e-07, "loss": 0.0821, "step": 6617 }, { "epoch": 1.50580204778157, "grad_norm": 1.7680267450626428, "learning_rate": 9.90592507705363e-07, "loss": 0.0656, "step": 6618 }, { "epoch": 1.5060295790671216, "grad_norm": 1.4735623654469852, "learning_rate": 9.905200337184915e-07, "loss": 0.1243, "step": 6619 }, { "epoch": 1.5062571103526734, "grad_norm": 2.3922194742332845, "learning_rate": 9.904475522610602e-07, "loss": 0.1074, "step": 6620 }, { "epoch": 1.5064846416382252, "grad_norm": 1.4767746353973168, "learning_rate": 9.9037506333455e-07, "loss": 0.1425, "step": 6621 }, { "epoch": 1.506712172923777, "grad_norm": 1.91538419707602, "learning_rate": 9.90302566940443e-07, "loss": 0.191, "step": 6622 }, { "epoch": 1.5069397042093287, "grad_norm": 1.2549638165273682, "learning_rate": 9.902300630802201e-07, "loss": 0.0748, "step": 6623 }, { "epoch": 1.5071672354948804, "grad_norm": 1.7074464037284505, "learning_rate": 9.901575517553636e-07, "loss": 0.0994, "step": 6624 }, { "epoch": 1.5073947667804322, "grad_norm": 1.9037982472501616, "learning_rate": 9.900850329673559e-07, "loss": 0.0547, "step": 6625 }, { "epoch": 1.507622298065984, "grad_norm": 2.0585060161102726, "learning_rate": 9.900125067176782e-07, "loss": 0.1257, "step": 6626 }, { "epoch": 1.5078498293515359, "grad_norm": 0.8030202615841675, "learning_rate": 9.899399730078138e-07, "loss": 0.0853, "step": 6627 }, { "epoch": 1.5080773606370876, "grad_norm": 1.6767887588871828, "learning_rate": 9.898674318392446e-07, "loss": 0.051, "step": 6628 }, { "epoch": 1.5083048919226394, "grad_norm": 2.054186286357012, "learning_rate": 9.897948832134532e-07, "loss": 0.0668, "step": 6629 }, { "epoch": 1.5085324232081911, "grad_norm": 1.5401946815877716, "learning_rate": 9.897223271319227e-07, "loss": 0.0701, "step": 6630 }, { "epoch": 1.508759954493743, "grad_norm": 1.2266849927770183, "learning_rate": 9.896497635961357e-07, "loss": 0.064, "step": 6631 }, { "epoch": 1.5089874857792946, "grad_norm": 1.893883359862433, "learning_rate": 9.895771926075753e-07, "loss": 0.1201, "step": 6632 }, { "epoch": 1.5092150170648464, "grad_norm": 2.2124782416693165, "learning_rate": 9.895046141677248e-07, "loss": 0.1076, "step": 6633 }, { "epoch": 1.5094425483503981, "grad_norm": 1.4193547479482487, "learning_rate": 9.894320282780675e-07, "loss": 0.0629, "step": 6634 }, { "epoch": 1.50967007963595, "grad_norm": 1.4318969910886052, "learning_rate": 9.89359434940087e-07, "loss": 0.072, "step": 6635 }, { "epoch": 1.5098976109215017, "grad_norm": 2.3903951577867364, "learning_rate": 9.89286834155267e-07, "loss": 0.1012, "step": 6636 }, { "epoch": 1.5101251422070536, "grad_norm": 1.183723374024596, "learning_rate": 9.89214225925091e-07, "loss": 0.0379, "step": 6637 }, { "epoch": 1.5103526734926054, "grad_norm": 1.5171515744161823, "learning_rate": 9.891416102510436e-07, "loss": 0.0696, "step": 6638 }, { "epoch": 1.5105802047781571, "grad_norm": 1.7171746933495065, "learning_rate": 9.890689871346084e-07, "loss": 0.1177, "step": 6639 }, { "epoch": 1.5108077360637089, "grad_norm": 1.7717611457403284, "learning_rate": 9.8899635657727e-07, "loss": 0.1159, "step": 6640 }, { "epoch": 1.5110352673492606, "grad_norm": 2.677419290783526, "learning_rate": 9.889237185805126e-07, "loss": 0.0774, "step": 6641 }, { "epoch": 1.5112627986348124, "grad_norm": 1.7854353991829401, "learning_rate": 9.88851073145821e-07, "loss": 0.1097, "step": 6642 }, { "epoch": 1.5114903299203641, "grad_norm": 1.6833529414717199, "learning_rate": 9.887784202746797e-07, "loss": 0.1238, "step": 6643 }, { "epoch": 1.511717861205916, "grad_norm": 1.532779466839382, "learning_rate": 9.887057599685735e-07, "loss": 0.1802, "step": 6644 }, { "epoch": 1.5119453924914676, "grad_norm": 2.045124714315863, "learning_rate": 9.88633092228988e-07, "loss": 0.1381, "step": 6645 }, { "epoch": 1.5121729237770194, "grad_norm": 1.2599289970561616, "learning_rate": 9.885604170574081e-07, "loss": 0.0993, "step": 6646 }, { "epoch": 1.5124004550625711, "grad_norm": 1.7609732931837845, "learning_rate": 9.884877344553189e-07, "loss": 0.0797, "step": 6647 }, { "epoch": 1.512627986348123, "grad_norm": 2.4222684044480105, "learning_rate": 9.884150444242063e-07, "loss": 0.1511, "step": 6648 }, { "epoch": 1.5128555176336747, "grad_norm": 2.7535858833714357, "learning_rate": 9.883423469655553e-07, "loss": 0.124, "step": 6649 }, { "epoch": 1.5130830489192264, "grad_norm": 2.1990285652154173, "learning_rate": 9.882696420808526e-07, "loss": 0.1464, "step": 6650 }, { "epoch": 1.5133105802047782, "grad_norm": 1.3786062317925045, "learning_rate": 9.881969297715836e-07, "loss": 0.1032, "step": 6651 }, { "epoch": 1.51353811149033, "grad_norm": 1.5534353780953807, "learning_rate": 9.881242100392346e-07, "loss": 0.0711, "step": 6652 }, { "epoch": 1.5137656427758817, "grad_norm": 1.3604382033229832, "learning_rate": 9.880514828852916e-07, "loss": 0.0569, "step": 6653 }, { "epoch": 1.5139931740614334, "grad_norm": 1.169340702023143, "learning_rate": 9.879787483112413e-07, "loss": 0.1239, "step": 6654 }, { "epoch": 1.5142207053469852, "grad_norm": 2.2308897011011575, "learning_rate": 9.879060063185702e-07, "loss": 0.1562, "step": 6655 }, { "epoch": 1.514448236632537, "grad_norm": 1.3587637173777, "learning_rate": 9.878332569087647e-07, "loss": 0.0329, "step": 6656 }, { "epoch": 1.5146757679180887, "grad_norm": 2.545733827960348, "learning_rate": 9.877605000833122e-07, "loss": 0.1105, "step": 6657 }, { "epoch": 1.5149032992036404, "grad_norm": 1.5953663605342474, "learning_rate": 9.876877358436996e-07, "loss": 0.057, "step": 6658 }, { "epoch": 1.5151308304891922, "grad_norm": 1.1323787710966986, "learning_rate": 9.876149641914135e-07, "loss": 0.0752, "step": 6659 }, { "epoch": 1.515358361774744, "grad_norm": 2.450063405723873, "learning_rate": 9.875421851279419e-07, "loss": 0.1737, "step": 6660 }, { "epoch": 1.5155858930602957, "grad_norm": 1.3511044892138, "learning_rate": 9.874693986547717e-07, "loss": 0.0854, "step": 6661 }, { "epoch": 1.5158134243458474, "grad_norm": 1.4537383245165247, "learning_rate": 9.87396604773391e-07, "loss": 0.0981, "step": 6662 }, { "epoch": 1.5160409556313992, "grad_norm": 2.150257842110415, "learning_rate": 9.873238034852875e-07, "loss": 0.054, "step": 6663 }, { "epoch": 1.516268486916951, "grad_norm": 1.6009149655756139, "learning_rate": 9.872509947919489e-07, "loss": 0.0934, "step": 6664 }, { "epoch": 1.5164960182025027, "grad_norm": 2.69299983979343, "learning_rate": 9.871781786948636e-07, "loss": 0.0956, "step": 6665 }, { "epoch": 1.5167235494880547, "grad_norm": 1.9231658149059578, "learning_rate": 9.871053551955194e-07, "loss": 0.0938, "step": 6666 }, { "epoch": 1.5169510807736064, "grad_norm": 1.5996264354941863, "learning_rate": 9.87032524295405e-07, "loss": 0.0851, "step": 6667 }, { "epoch": 1.5171786120591582, "grad_norm": 2.3602848121961135, "learning_rate": 9.869596859960087e-07, "loss": 0.0878, "step": 6668 }, { "epoch": 1.51740614334471, "grad_norm": 2.49783971066406, "learning_rate": 9.868868402988194e-07, "loss": 0.1241, "step": 6669 }, { "epoch": 1.5176336746302617, "grad_norm": 1.9002930155631617, "learning_rate": 9.86813987205326e-07, "loss": 0.1825, "step": 6670 }, { "epoch": 1.5178612059158134, "grad_norm": 2.175376286906289, "learning_rate": 9.867411267170171e-07, "loss": 0.237, "step": 6671 }, { "epoch": 1.5180887372013652, "grad_norm": 2.136172560112039, "learning_rate": 9.866682588353823e-07, "loss": 0.1025, "step": 6672 }, { "epoch": 1.518316268486917, "grad_norm": 2.2409744987674522, "learning_rate": 9.865953835619105e-07, "loss": 0.1267, "step": 6673 }, { "epoch": 1.5185437997724687, "grad_norm": 1.1677610227530477, "learning_rate": 9.865225008980913e-07, "loss": 0.077, "step": 6674 }, { "epoch": 1.5187713310580204, "grad_norm": 1.6686212093967283, "learning_rate": 9.864496108454142e-07, "loss": 0.0827, "step": 6675 }, { "epoch": 1.5189988623435724, "grad_norm": 1.4606352032280105, "learning_rate": 9.863767134053691e-07, "loss": 0.0657, "step": 6676 }, { "epoch": 1.5192263936291241, "grad_norm": 1.6472537173662813, "learning_rate": 9.86303808579446e-07, "loss": 0.1342, "step": 6677 }, { "epoch": 1.519453924914676, "grad_norm": 1.7887052016764249, "learning_rate": 9.862308963691344e-07, "loss": 0.1469, "step": 6678 }, { "epoch": 1.5196814562002277, "grad_norm": 2.0564231913518194, "learning_rate": 9.86157976775925e-07, "loss": 0.2687, "step": 6679 }, { "epoch": 1.5199089874857794, "grad_norm": 2.173256072420634, "learning_rate": 9.86085049801308e-07, "loss": 0.1331, "step": 6680 }, { "epoch": 1.5201365187713312, "grad_norm": 1.828606013658809, "learning_rate": 9.860121154467738e-07, "loss": 0.082, "step": 6681 }, { "epoch": 1.520364050056883, "grad_norm": 1.433527391061732, "learning_rate": 9.859391737138132e-07, "loss": 0.1203, "step": 6682 }, { "epoch": 1.5205915813424347, "grad_norm": 2.462623028096727, "learning_rate": 9.85866224603917e-07, "loss": 0.1017, "step": 6683 }, { "epoch": 1.5208191126279864, "grad_norm": 1.5185381292209394, "learning_rate": 9.85793268118576e-07, "loss": 0.1266, "step": 6684 }, { "epoch": 1.5210466439135382, "grad_norm": 1.68148981523328, "learning_rate": 9.857203042592813e-07, "loss": 0.0575, "step": 6685 }, { "epoch": 1.52127417519909, "grad_norm": 2.3064553659517126, "learning_rate": 9.856473330275243e-07, "loss": 0.0862, "step": 6686 }, { "epoch": 1.5215017064846417, "grad_norm": 1.9203644918552756, "learning_rate": 9.855743544247962e-07, "loss": 0.1983, "step": 6687 }, { "epoch": 1.5217292377701934, "grad_norm": 1.828129246833364, "learning_rate": 9.855013684525888e-07, "loss": 0.0802, "step": 6688 }, { "epoch": 1.5219567690557452, "grad_norm": 2.5899944461839497, "learning_rate": 9.854283751123935e-07, "loss": 0.0769, "step": 6689 }, { "epoch": 1.522184300341297, "grad_norm": 1.1795018537600235, "learning_rate": 9.853553744057023e-07, "loss": 0.0245, "step": 6690 }, { "epoch": 1.5224118316268487, "grad_norm": 1.8602779860567926, "learning_rate": 9.852823663340074e-07, "loss": 0.0683, "step": 6691 }, { "epoch": 1.5226393629124004, "grad_norm": 2.468331705268543, "learning_rate": 9.852093508988006e-07, "loss": 0.1311, "step": 6692 }, { "epoch": 1.5228668941979522, "grad_norm": 3.072633064169901, "learning_rate": 9.851363281015745e-07, "loss": 0.1717, "step": 6693 }, { "epoch": 1.523094425483504, "grad_norm": 1.7646833258471915, "learning_rate": 9.850632979438211e-07, "loss": 0.0604, "step": 6694 }, { "epoch": 1.5233219567690557, "grad_norm": 1.5840900489872247, "learning_rate": 9.849902604270335e-07, "loss": 0.1526, "step": 6695 }, { "epoch": 1.5235494880546074, "grad_norm": 1.9100983840195138, "learning_rate": 9.849172155527044e-07, "loss": 0.0927, "step": 6696 }, { "epoch": 1.5237770193401592, "grad_norm": 1.7968210113673053, "learning_rate": 9.848441633223266e-07, "loss": 0.1103, "step": 6697 }, { "epoch": 1.524004550625711, "grad_norm": 1.7268833657807332, "learning_rate": 9.847711037373928e-07, "loss": 0.1204, "step": 6698 }, { "epoch": 1.5242320819112627, "grad_norm": 1.1093468674675278, "learning_rate": 9.846980367993968e-07, "loss": 0.1065, "step": 6699 }, { "epoch": 1.5244596131968144, "grad_norm": 0.8658636255558, "learning_rate": 9.846249625098317e-07, "loss": 0.0542, "step": 6700 }, { "epoch": 1.5246871444823662, "grad_norm": 1.333652555504289, "learning_rate": 9.845518808701906e-07, "loss": 0.1258, "step": 6701 }, { "epoch": 1.524914675767918, "grad_norm": 1.910866305841753, "learning_rate": 9.84478791881968e-07, "loss": 0.1404, "step": 6702 }, { "epoch": 1.5251422070534697, "grad_norm": 1.6361267289033996, "learning_rate": 9.844056955466571e-07, "loss": 0.0944, "step": 6703 }, { "epoch": 1.5253697383390215, "grad_norm": 1.5309137515954483, "learning_rate": 9.84332591865752e-07, "loss": 0.0522, "step": 6704 }, { "epoch": 1.5255972696245734, "grad_norm": 3.572339727844091, "learning_rate": 9.842594808407467e-07, "loss": 0.1908, "step": 6705 }, { "epoch": 1.5258248009101252, "grad_norm": 3.4734172827801526, "learning_rate": 9.841863624731358e-07, "loss": 0.1023, "step": 6706 }, { "epoch": 1.526052332195677, "grad_norm": 1.7120902324796543, "learning_rate": 9.841132367644133e-07, "loss": 0.0958, "step": 6707 }, { "epoch": 1.5262798634812287, "grad_norm": 1.95257483276106, "learning_rate": 9.840401037160737e-07, "loss": 0.087, "step": 6708 }, { "epoch": 1.5265073947667804, "grad_norm": 1.839958674531052, "learning_rate": 9.839669633296122e-07, "loss": 0.1274, "step": 6709 }, { "epoch": 1.5267349260523322, "grad_norm": 1.1867164810093782, "learning_rate": 9.838938156065236e-07, "loss": 0.0737, "step": 6710 }, { "epoch": 1.526962457337884, "grad_norm": 1.6245966674135188, "learning_rate": 9.838206605483024e-07, "loss": 0.0657, "step": 6711 }, { "epoch": 1.5271899886234357, "grad_norm": 1.9555111968047711, "learning_rate": 9.83747498156444e-07, "loss": 0.0641, "step": 6712 }, { "epoch": 1.5274175199089874, "grad_norm": 2.2335184965178723, "learning_rate": 9.836743284324438e-07, "loss": 0.1571, "step": 6713 }, { "epoch": 1.5276450511945392, "grad_norm": 1.2440844987147237, "learning_rate": 9.836011513777975e-07, "loss": 0.0987, "step": 6714 }, { "epoch": 1.5278725824800912, "grad_norm": 1.774129851231337, "learning_rate": 9.835279669940002e-07, "loss": 0.1022, "step": 6715 }, { "epoch": 1.528100113765643, "grad_norm": 3.0157041807810834, "learning_rate": 9.834547752825477e-07, "loss": 0.1205, "step": 6716 }, { "epoch": 1.5283276450511947, "grad_norm": 1.426750353390932, "learning_rate": 9.833815762449364e-07, "loss": 0.0717, "step": 6717 }, { "epoch": 1.5285551763367464, "grad_norm": 2.4046750983405674, "learning_rate": 9.833083698826618e-07, "loss": 0.1465, "step": 6718 }, { "epoch": 1.5287827076222982, "grad_norm": 1.0940696422129481, "learning_rate": 9.832351561972204e-07, "loss": 0.0456, "step": 6719 }, { "epoch": 1.52901023890785, "grad_norm": 1.62018463307765, "learning_rate": 9.831619351901083e-07, "loss": 0.0318, "step": 6720 }, { "epoch": 1.5292377701934017, "grad_norm": 1.7579436507766677, "learning_rate": 9.830887068628223e-07, "loss": 0.074, "step": 6721 }, { "epoch": 1.5294653014789534, "grad_norm": 1.3534465665094793, "learning_rate": 9.830154712168591e-07, "loss": 0.0873, "step": 6722 }, { "epoch": 1.5296928327645052, "grad_norm": 2.061404616403052, "learning_rate": 9.829422282537152e-07, "loss": 0.072, "step": 6723 }, { "epoch": 1.529920364050057, "grad_norm": 1.5775828869406339, "learning_rate": 9.828689779748877e-07, "loss": 0.0909, "step": 6724 }, { "epoch": 1.5301478953356087, "grad_norm": 2.3455959707506664, "learning_rate": 9.827957203818737e-07, "loss": 0.0916, "step": 6725 }, { "epoch": 1.5303754266211604, "grad_norm": 2.5840806262089835, "learning_rate": 9.827224554761705e-07, "loss": 0.1354, "step": 6726 }, { "epoch": 1.5306029579067122, "grad_norm": 1.0556116471214028, "learning_rate": 9.826491832592754e-07, "loss": 0.1295, "step": 6727 }, { "epoch": 1.530830489192264, "grad_norm": 2.196821539587016, "learning_rate": 9.825759037326861e-07, "loss": 0.1352, "step": 6728 }, { "epoch": 1.5310580204778157, "grad_norm": 1.7033480648148973, "learning_rate": 9.825026168979001e-07, "loss": 0.1256, "step": 6729 }, { "epoch": 1.5312855517633674, "grad_norm": 1.314871959729546, "learning_rate": 9.824293227564154e-07, "loss": 0.1147, "step": 6730 }, { "epoch": 1.5315130830489192, "grad_norm": 2.295215182711722, "learning_rate": 9.8235602130973e-07, "loss": 0.087, "step": 6731 }, { "epoch": 1.531740614334471, "grad_norm": 1.7613060587120912, "learning_rate": 9.822827125593417e-07, "loss": 0.0816, "step": 6732 }, { "epoch": 1.5319681456200227, "grad_norm": 3.6189711422913757, "learning_rate": 9.822093965067492e-07, "loss": 0.217, "step": 6733 }, { "epoch": 1.5321956769055745, "grad_norm": 1.5407713472072504, "learning_rate": 9.821360731534512e-07, "loss": 0.049, "step": 6734 }, { "epoch": 1.5324232081911262, "grad_norm": 1.6479162072423494, "learning_rate": 9.820627425009455e-07, "loss": 0.0688, "step": 6735 }, { "epoch": 1.532650739476678, "grad_norm": 0.7948885817082926, "learning_rate": 9.819894045507315e-07, "loss": 0.0599, "step": 6736 }, { "epoch": 1.5328782707622297, "grad_norm": 1.1151560312631288, "learning_rate": 9.819160593043076e-07, "loss": 0.1409, "step": 6737 }, { "epoch": 1.5331058020477815, "grad_norm": 1.923769297389666, "learning_rate": 9.818427067631733e-07, "loss": 0.0986, "step": 6738 }, { "epoch": 1.5333333333333332, "grad_norm": 1.2519127952741742, "learning_rate": 9.817693469288276e-07, "loss": 0.1078, "step": 6739 }, { "epoch": 1.533560864618885, "grad_norm": 1.046024360494148, "learning_rate": 9.8169597980277e-07, "loss": 0.0381, "step": 6740 }, { "epoch": 1.5337883959044367, "grad_norm": 1.1028938926163987, "learning_rate": 9.816226053864996e-07, "loss": 0.1161, "step": 6741 }, { "epoch": 1.5340159271899885, "grad_norm": 1.7266193972711186, "learning_rate": 9.815492236815163e-07, "loss": 0.0788, "step": 6742 }, { "epoch": 1.5342434584755402, "grad_norm": 1.6081871572113473, "learning_rate": 9.8147583468932e-07, "loss": 0.1339, "step": 6743 }, { "epoch": 1.5344709897610922, "grad_norm": 1.402571830332417, "learning_rate": 9.814024384114102e-07, "loss": 0.0378, "step": 6744 }, { "epoch": 1.534698521046644, "grad_norm": 1.846203157196796, "learning_rate": 9.813290348492874e-07, "loss": 0.0693, "step": 6745 }, { "epoch": 1.5349260523321957, "grad_norm": 1.6339574311431553, "learning_rate": 9.812556240044518e-07, "loss": 0.0803, "step": 6746 }, { "epoch": 1.5351535836177475, "grad_norm": 1.3328670448791076, "learning_rate": 9.811822058784038e-07, "loss": 0.1181, "step": 6747 }, { "epoch": 1.5353811149032992, "grad_norm": 1.4665000621481399, "learning_rate": 9.811087804726436e-07, "loss": 0.1358, "step": 6748 }, { "epoch": 1.535608646188851, "grad_norm": 1.7442320100495479, "learning_rate": 9.810353477886722e-07, "loss": 0.0473, "step": 6749 }, { "epoch": 1.5358361774744027, "grad_norm": 1.8155220142705994, "learning_rate": 9.809619078279904e-07, "loss": 0.0756, "step": 6750 }, { "epoch": 1.5360637087599545, "grad_norm": 1.2458858709598846, "learning_rate": 9.80888460592099e-07, "loss": 0.1036, "step": 6751 }, { "epoch": 1.5362912400455062, "grad_norm": 1.4447722998575065, "learning_rate": 9.808150060824995e-07, "loss": 0.1606, "step": 6752 }, { "epoch": 1.5365187713310582, "grad_norm": 1.5230317243883573, "learning_rate": 9.807415443006926e-07, "loss": 0.092, "step": 6753 }, { "epoch": 1.53674630261661, "grad_norm": 1.6478604856615626, "learning_rate": 9.806680752481803e-07, "loss": 0.0621, "step": 6754 }, { "epoch": 1.5369738339021617, "grad_norm": 2.437908355394251, "learning_rate": 9.805945989264638e-07, "loss": 0.081, "step": 6755 }, { "epoch": 1.5372013651877134, "grad_norm": 1.266952056682482, "learning_rate": 9.805211153370448e-07, "loss": 0.0891, "step": 6756 }, { "epoch": 1.5374288964732652, "grad_norm": 2.1205351302476227, "learning_rate": 9.804476244814253e-07, "loss": 0.0786, "step": 6757 }, { "epoch": 1.537656427758817, "grad_norm": 1.3261225574966184, "learning_rate": 9.803741263611076e-07, "loss": 0.1225, "step": 6758 }, { "epoch": 1.5378839590443687, "grad_norm": 2.0857772110578683, "learning_rate": 9.803006209775936e-07, "loss": 0.1329, "step": 6759 }, { "epoch": 1.5381114903299204, "grad_norm": 1.6409653695306137, "learning_rate": 9.802271083323855e-07, "loss": 0.1046, "step": 6760 }, { "epoch": 1.5383390216154722, "grad_norm": 1.9140415782586275, "learning_rate": 9.801535884269859e-07, "loss": 0.0583, "step": 6761 }, { "epoch": 1.538566552901024, "grad_norm": 1.5742717342382722, "learning_rate": 9.800800612628971e-07, "loss": 0.1474, "step": 6762 }, { "epoch": 1.5387940841865757, "grad_norm": 2.587740921277969, "learning_rate": 9.800065268416226e-07, "loss": 0.1413, "step": 6763 }, { "epoch": 1.5390216154721275, "grad_norm": 1.9113702724685222, "learning_rate": 9.799329851646643e-07, "loss": 0.0734, "step": 6764 }, { "epoch": 1.5392491467576792, "grad_norm": 1.9971326212884126, "learning_rate": 9.798594362335265e-07, "loss": 0.0665, "step": 6765 }, { "epoch": 1.539476678043231, "grad_norm": 1.9851093003136897, "learning_rate": 9.797858800497112e-07, "loss": 0.1075, "step": 6766 }, { "epoch": 1.5397042093287827, "grad_norm": 2.0738730426091574, "learning_rate": 9.797123166147224e-07, "loss": 0.1353, "step": 6767 }, { "epoch": 1.5399317406143345, "grad_norm": 2.7095905938056988, "learning_rate": 9.796387459300635e-07, "loss": 0.0688, "step": 6768 }, { "epoch": 1.5401592718998862, "grad_norm": 1.239920336046195, "learning_rate": 9.795651679972382e-07, "loss": 0.1014, "step": 6769 }, { "epoch": 1.540386803185438, "grad_norm": 1.607016101863712, "learning_rate": 9.7949158281775e-07, "loss": 0.0505, "step": 6770 }, { "epoch": 1.5406143344709897, "grad_norm": 1.386117850600802, "learning_rate": 9.794179903931035e-07, "loss": 0.0687, "step": 6771 }, { "epoch": 1.5408418657565415, "grad_norm": 1.236749985995557, "learning_rate": 9.79344390724802e-07, "loss": 0.0327, "step": 6772 }, { "epoch": 1.5410693970420932, "grad_norm": 1.6797050069106503, "learning_rate": 9.7927078381435e-07, "loss": 0.0795, "step": 6773 }, { "epoch": 1.541296928327645, "grad_norm": 2.9730659229323093, "learning_rate": 9.791971696632523e-07, "loss": 0.0906, "step": 6774 }, { "epoch": 1.5415244596131967, "grad_norm": 1.8652340188388545, "learning_rate": 9.79123548273013e-07, "loss": 0.1489, "step": 6775 }, { "epoch": 1.5417519908987485, "grad_norm": 1.744502185028443, "learning_rate": 9.79049919645137e-07, "loss": 0.0638, "step": 6776 }, { "epoch": 1.5419795221843002, "grad_norm": 2.096956448280094, "learning_rate": 9.78976283781129e-07, "loss": 0.0925, "step": 6777 }, { "epoch": 1.542207053469852, "grad_norm": 1.9057593286282604, "learning_rate": 9.78902640682494e-07, "loss": 0.0763, "step": 6778 }, { "epoch": 1.5424345847554037, "grad_norm": 1.634924562850608, "learning_rate": 9.788289903507373e-07, "loss": 0.1582, "step": 6779 }, { "epoch": 1.5426621160409555, "grad_norm": 2.052440866489605, "learning_rate": 9.787553327873637e-07, "loss": 0.0615, "step": 6780 }, { "epoch": 1.5428896473265072, "grad_norm": 1.5557781540863105, "learning_rate": 9.786816679938794e-07, "loss": 0.1339, "step": 6781 }, { "epoch": 1.543117178612059, "grad_norm": 1.6106987587996306, "learning_rate": 9.786079959717892e-07, "loss": 0.0947, "step": 6782 }, { "epoch": 1.543344709897611, "grad_norm": 1.5775713242046356, "learning_rate": 9.785343167225994e-07, "loss": 0.0633, "step": 6783 }, { "epoch": 1.5435722411831627, "grad_norm": 1.6760823520678827, "learning_rate": 9.784606302478155e-07, "loss": 0.0872, "step": 6784 }, { "epoch": 1.5437997724687145, "grad_norm": 2.696297293597503, "learning_rate": 9.783869365489437e-07, "loss": 0.0909, "step": 6785 }, { "epoch": 1.5440273037542662, "grad_norm": 1.1664906375290016, "learning_rate": 9.783132356274901e-07, "loss": 0.042, "step": 6786 }, { "epoch": 1.544254835039818, "grad_norm": 1.9040250600939805, "learning_rate": 9.78239527484961e-07, "loss": 0.1065, "step": 6787 }, { "epoch": 1.5444823663253697, "grad_norm": 1.198846442121248, "learning_rate": 9.781658121228628e-07, "loss": 0.044, "step": 6788 }, { "epoch": 1.5447098976109215, "grad_norm": 1.245850403558456, "learning_rate": 9.780920895427025e-07, "loss": 0.0615, "step": 6789 }, { "epoch": 1.5449374288964732, "grad_norm": 1.8321354791933504, "learning_rate": 9.780183597459864e-07, "loss": 0.0969, "step": 6790 }, { "epoch": 1.545164960182025, "grad_norm": 2.222629959611143, "learning_rate": 9.779446227342216e-07, "loss": 0.2203, "step": 6791 }, { "epoch": 1.545392491467577, "grad_norm": 2.458814173048237, "learning_rate": 9.77870878508915e-07, "loss": 0.1028, "step": 6792 }, { "epoch": 1.5456200227531287, "grad_norm": 1.9091345644157072, "learning_rate": 9.77797127071574e-07, "loss": 0.0589, "step": 6793 }, { "epoch": 1.5458475540386805, "grad_norm": 1.789562373939949, "learning_rate": 9.777233684237056e-07, "loss": 0.0805, "step": 6794 }, { "epoch": 1.5460750853242322, "grad_norm": 1.7950526990429894, "learning_rate": 9.776496025668174e-07, "loss": 0.049, "step": 6795 }, { "epoch": 1.546302616609784, "grad_norm": 1.2634480571691555, "learning_rate": 9.775758295024177e-07, "loss": 0.0752, "step": 6796 }, { "epoch": 1.5465301478953357, "grad_norm": 1.7316859014052663, "learning_rate": 9.775020492320136e-07, "loss": 0.0714, "step": 6797 }, { "epoch": 1.5467576791808875, "grad_norm": 1.7667136366773373, "learning_rate": 9.774282617571129e-07, "loss": 0.1049, "step": 6798 }, { "epoch": 1.5469852104664392, "grad_norm": 1.8930266011594143, "learning_rate": 9.773544670792243e-07, "loss": 0.1935, "step": 6799 }, { "epoch": 1.547212741751991, "grad_norm": 1.7813371718575919, "learning_rate": 9.772806651998555e-07, "loss": 0.1241, "step": 6800 }, { "epoch": 1.5474402730375427, "grad_norm": 1.504169607795343, "learning_rate": 9.772068561205152e-07, "loss": 0.0828, "step": 6801 }, { "epoch": 1.5476678043230945, "grad_norm": 1.3029079334587395, "learning_rate": 9.771330398427118e-07, "loss": 0.1542, "step": 6802 }, { "epoch": 1.5478953356086462, "grad_norm": 1.4723702531747105, "learning_rate": 9.770592163679539e-07, "loss": 0.0748, "step": 6803 }, { "epoch": 1.548122866894198, "grad_norm": 1.5167655147301062, "learning_rate": 9.769853856977503e-07, "loss": 0.0596, "step": 6804 }, { "epoch": 1.5483503981797497, "grad_norm": 1.2483226690375377, "learning_rate": 9.769115478336102e-07, "loss": 0.0558, "step": 6805 }, { "epoch": 1.5485779294653015, "grad_norm": 2.610342632868789, "learning_rate": 9.768377027770427e-07, "loss": 0.2088, "step": 6806 }, { "epoch": 1.5488054607508532, "grad_norm": 0.948433081408228, "learning_rate": 9.767638505295566e-07, "loss": 0.0764, "step": 6807 }, { "epoch": 1.549032992036405, "grad_norm": 1.110321529246112, "learning_rate": 9.766899910926617e-07, "loss": 0.1255, "step": 6808 }, { "epoch": 1.5492605233219567, "grad_norm": 2.1808152587794467, "learning_rate": 9.766161244678675e-07, "loss": 0.1556, "step": 6809 }, { "epoch": 1.5494880546075085, "grad_norm": 1.9798598047343043, "learning_rate": 9.765422506566837e-07, "loss": 0.0551, "step": 6810 }, { "epoch": 1.5497155858930602, "grad_norm": 1.6743554420444267, "learning_rate": 9.7646836966062e-07, "loss": 0.1103, "step": 6811 }, { "epoch": 1.549943117178612, "grad_norm": 2.2413105664096924, "learning_rate": 9.763944814811866e-07, "loss": 0.1012, "step": 6812 }, { "epoch": 1.5501706484641637, "grad_norm": 1.447294551212359, "learning_rate": 9.763205861198935e-07, "loss": 0.0626, "step": 6813 }, { "epoch": 1.5503981797497155, "grad_norm": 1.1736663175641113, "learning_rate": 9.76246683578251e-07, "loss": 0.0856, "step": 6814 }, { "epoch": 1.5506257110352673, "grad_norm": 1.5996826005862175, "learning_rate": 9.761727738577698e-07, "loss": 0.0855, "step": 6815 }, { "epoch": 1.550853242320819, "grad_norm": 3.3557607688406166, "learning_rate": 9.760988569599602e-07, "loss": 0.2147, "step": 6816 }, { "epoch": 1.5510807736063708, "grad_norm": 2.009826047910445, "learning_rate": 9.760249328863328e-07, "loss": 0.1139, "step": 6817 }, { "epoch": 1.5513083048919225, "grad_norm": 2.5898350691364675, "learning_rate": 9.759510016383987e-07, "loss": 0.0993, "step": 6818 }, { "epoch": 1.5515358361774743, "grad_norm": 1.7623781439792425, "learning_rate": 9.758770632176688e-07, "loss": 0.1161, "step": 6819 }, { "epoch": 1.551763367463026, "grad_norm": 3.6161571986406833, "learning_rate": 9.758031176256543e-07, "loss": 0.1231, "step": 6820 }, { "epoch": 1.5519908987485778, "grad_norm": 3.205748656453346, "learning_rate": 9.757291648638666e-07, "loss": 0.1015, "step": 6821 }, { "epoch": 1.5522184300341297, "grad_norm": 1.9988303163461136, "learning_rate": 9.756552049338174e-07, "loss": 0.0921, "step": 6822 }, { "epoch": 1.5524459613196815, "grad_norm": 2.203100020710235, "learning_rate": 9.755812378370177e-07, "loss": 0.1434, "step": 6823 }, { "epoch": 1.5526734926052332, "grad_norm": 1.6940970795845784, "learning_rate": 9.755072635749795e-07, "loss": 0.1209, "step": 6824 }, { "epoch": 1.552901023890785, "grad_norm": 2.3652238714277196, "learning_rate": 9.754332821492148e-07, "loss": 0.0594, "step": 6825 }, { "epoch": 1.5531285551763367, "grad_norm": 1.330771904775176, "learning_rate": 9.753592935612358e-07, "loss": 0.0703, "step": 6826 }, { "epoch": 1.5533560864618885, "grad_norm": 1.3972072191448723, "learning_rate": 9.752852978125544e-07, "loss": 0.0647, "step": 6827 }, { "epoch": 1.5535836177474402, "grad_norm": 1.3360409284842407, "learning_rate": 9.75211294904683e-07, "loss": 0.112, "step": 6828 }, { "epoch": 1.553811149032992, "grad_norm": 2.137018392575463, "learning_rate": 9.75137284839134e-07, "loss": 0.129, "step": 6829 }, { "epoch": 1.5540386803185438, "grad_norm": 2.097333377582825, "learning_rate": 9.750632676174201e-07, "loss": 0.1122, "step": 6830 }, { "epoch": 1.5542662116040957, "grad_norm": 2.657275222687091, "learning_rate": 9.749892432410544e-07, "loss": 0.1439, "step": 6831 }, { "epoch": 1.5544937428896475, "grad_norm": 4.847015432196619, "learning_rate": 9.749152117115494e-07, "loss": 0.1597, "step": 6832 }, { "epoch": 1.5547212741751992, "grad_norm": 1.2808956168274448, "learning_rate": 9.748411730304184e-07, "loss": 0.0966, "step": 6833 }, { "epoch": 1.554948805460751, "grad_norm": 1.1006280066879472, "learning_rate": 9.747671271991746e-07, "loss": 0.0373, "step": 6834 }, { "epoch": 1.5551763367463027, "grad_norm": 1.5930242822758494, "learning_rate": 9.746930742193307e-07, "loss": 0.064, "step": 6835 }, { "epoch": 1.5554038680318545, "grad_norm": 1.6233957248439397, "learning_rate": 9.746190140924014e-07, "loss": 0.1068, "step": 6836 }, { "epoch": 1.5556313993174062, "grad_norm": 1.6523988807255852, "learning_rate": 9.745449468198997e-07, "loss": 0.0785, "step": 6837 }, { "epoch": 1.555858930602958, "grad_norm": 2.005971472506433, "learning_rate": 9.744708724033393e-07, "loss": 0.1289, "step": 6838 }, { "epoch": 1.5560864618885097, "grad_norm": 0.9656668110662248, "learning_rate": 9.743967908442343e-07, "loss": 0.0903, "step": 6839 }, { "epoch": 1.5563139931740615, "grad_norm": 1.4731049586322165, "learning_rate": 9.743227021440988e-07, "loss": 0.0798, "step": 6840 }, { "epoch": 1.5565415244596132, "grad_norm": 2.445080003949355, "learning_rate": 9.74248606304447e-07, "loss": 0.0935, "step": 6841 }, { "epoch": 1.556769055745165, "grad_norm": 1.7224040698601348, "learning_rate": 9.741745033267932e-07, "loss": 0.0714, "step": 6842 }, { "epoch": 1.5569965870307167, "grad_norm": 2.235660729949979, "learning_rate": 9.741003932126522e-07, "loss": 0.1476, "step": 6843 }, { "epoch": 1.5572241183162685, "grad_norm": 1.7270359857667794, "learning_rate": 9.740262759635386e-07, "loss": 0.1641, "step": 6844 }, { "epoch": 1.5574516496018203, "grad_norm": 1.5709024234225488, "learning_rate": 9.739521515809669e-07, "loss": 0.0562, "step": 6845 }, { "epoch": 1.557679180887372, "grad_norm": 1.6021103887440078, "learning_rate": 9.738780200664525e-07, "loss": 0.0635, "step": 6846 }, { "epoch": 1.5579067121729238, "grad_norm": 1.5543318427321744, "learning_rate": 9.738038814215102e-07, "loss": 0.0687, "step": 6847 }, { "epoch": 1.5581342434584755, "grad_norm": 1.9060796993827795, "learning_rate": 9.737297356476554e-07, "loss": 0.0939, "step": 6848 }, { "epoch": 1.5583617747440273, "grad_norm": 1.2877821231918085, "learning_rate": 9.736555827464034e-07, "loss": 0.1147, "step": 6849 }, { "epoch": 1.558589306029579, "grad_norm": 2.1612992390892454, "learning_rate": 9.7358142271927e-07, "loss": 0.0986, "step": 6850 }, { "epoch": 1.5588168373151308, "grad_norm": 2.139466326520036, "learning_rate": 9.735072555677705e-07, "loss": 0.079, "step": 6851 }, { "epoch": 1.5590443686006825, "grad_norm": 1.690342851607921, "learning_rate": 9.73433081293421e-07, "loss": 0.0702, "step": 6852 }, { "epoch": 1.5592718998862343, "grad_norm": 2.288411560849199, "learning_rate": 9.733588998977376e-07, "loss": 0.1057, "step": 6853 }, { "epoch": 1.559499431171786, "grad_norm": 1.4867185756863013, "learning_rate": 9.73284711382236e-07, "loss": 0.0917, "step": 6854 }, { "epoch": 1.5597269624573378, "grad_norm": 2.007357013936276, "learning_rate": 9.732105157484332e-07, "loss": 0.1453, "step": 6855 }, { "epoch": 1.5599544937428895, "grad_norm": 2.2567812237722786, "learning_rate": 9.731363129978447e-07, "loss": 0.0901, "step": 6856 }, { "epoch": 1.5601820250284413, "grad_norm": 2.386291564760578, "learning_rate": 9.730621031319878e-07, "loss": 0.1216, "step": 6857 }, { "epoch": 1.560409556313993, "grad_norm": 2.2353388504604723, "learning_rate": 9.729878861523788e-07, "loss": 0.0815, "step": 6858 }, { "epoch": 1.5606370875995448, "grad_norm": 2.077499479091852, "learning_rate": 9.729136620605347e-07, "loss": 0.0838, "step": 6859 }, { "epoch": 1.5608646188850968, "grad_norm": 2.2800936343714078, "learning_rate": 9.728394308579727e-07, "loss": 0.1203, "step": 6860 }, { "epoch": 1.5610921501706485, "grad_norm": 2.1406140087060828, "learning_rate": 9.727651925462098e-07, "loss": 0.1006, "step": 6861 }, { "epoch": 1.5613196814562003, "grad_norm": 1.5127165240822227, "learning_rate": 9.726909471267632e-07, "loss": 0.0694, "step": 6862 }, { "epoch": 1.561547212741752, "grad_norm": 1.6310699035842238, "learning_rate": 9.726166946011503e-07, "loss": 0.0844, "step": 6863 }, { "epoch": 1.5617747440273038, "grad_norm": 1.7691994331743215, "learning_rate": 9.72542434970889e-07, "loss": 0.1549, "step": 6864 }, { "epoch": 1.5620022753128555, "grad_norm": 1.8051651007893625, "learning_rate": 9.724681682374965e-07, "loss": 0.0917, "step": 6865 }, { "epoch": 1.5622298065984073, "grad_norm": 2.047807046008497, "learning_rate": 9.723938944024913e-07, "loss": 0.1174, "step": 6866 }, { "epoch": 1.562457337883959, "grad_norm": 1.6345050861116583, "learning_rate": 9.72319613467391e-07, "loss": 0.0487, "step": 6867 }, { "epoch": 1.5626848691695108, "grad_norm": 2.9889942861711254, "learning_rate": 9.722453254337139e-07, "loss": 0.2182, "step": 6868 }, { "epoch": 1.5629124004550625, "grad_norm": 2.086195248205776, "learning_rate": 9.721710303029783e-07, "loss": 0.0827, "step": 6869 }, { "epoch": 1.5631399317406145, "grad_norm": 1.7647791997831161, "learning_rate": 9.720967280767026e-07, "loss": 0.0589, "step": 6870 }, { "epoch": 1.5633674630261662, "grad_norm": 2.0543205301100684, "learning_rate": 9.720224187564057e-07, "loss": 0.0689, "step": 6871 }, { "epoch": 1.563594994311718, "grad_norm": 1.6868564624508946, "learning_rate": 9.719481023436059e-07, "loss": 0.0729, "step": 6872 }, { "epoch": 1.5638225255972698, "grad_norm": 1.51683971139059, "learning_rate": 9.718737788398223e-07, "loss": 0.0938, "step": 6873 }, { "epoch": 1.5640500568828215, "grad_norm": 2.696283500518205, "learning_rate": 9.71799448246574e-07, "loss": 0.2259, "step": 6874 }, { "epoch": 1.5642775881683733, "grad_norm": 1.3830068949348764, "learning_rate": 9.717251105653799e-07, "loss": 0.0391, "step": 6875 }, { "epoch": 1.564505119453925, "grad_norm": 2.369926309898731, "learning_rate": 9.716507657977597e-07, "loss": 0.1235, "step": 6876 }, { "epoch": 1.5647326507394768, "grad_norm": 2.801317707906041, "learning_rate": 9.715764139452327e-07, "loss": 0.1541, "step": 6877 }, { "epoch": 1.5649601820250285, "grad_norm": 1.8375888059655026, "learning_rate": 9.715020550093185e-07, "loss": 0.1213, "step": 6878 }, { "epoch": 1.5651877133105803, "grad_norm": 1.6745414210291196, "learning_rate": 9.71427688991537e-07, "loss": 0.0851, "step": 6879 }, { "epoch": 1.565415244596132, "grad_norm": 1.8212934551331674, "learning_rate": 9.713533158934079e-07, "loss": 0.0878, "step": 6880 }, { "epoch": 1.5656427758816838, "grad_norm": 1.6967874312270814, "learning_rate": 9.712789357164512e-07, "loss": 0.0521, "step": 6881 }, { "epoch": 1.5658703071672355, "grad_norm": 2.0607765520963945, "learning_rate": 9.712045484621874e-07, "loss": 0.1194, "step": 6882 }, { "epoch": 1.5660978384527873, "grad_norm": 1.5593897600597133, "learning_rate": 9.711301541321365e-07, "loss": 0.0679, "step": 6883 }, { "epoch": 1.566325369738339, "grad_norm": 1.7857655925492695, "learning_rate": 9.710557527278195e-07, "loss": 0.0687, "step": 6884 }, { "epoch": 1.5665529010238908, "grad_norm": 2.032305412937479, "learning_rate": 9.709813442507565e-07, "loss": 0.0935, "step": 6885 }, { "epoch": 1.5667804323094425, "grad_norm": 1.983196083400438, "learning_rate": 9.709069287024684e-07, "loss": 0.1605, "step": 6886 }, { "epoch": 1.5670079635949943, "grad_norm": 1.5093436712574515, "learning_rate": 9.708325060844761e-07, "loss": 0.0709, "step": 6887 }, { "epoch": 1.567235494880546, "grad_norm": 1.9807068159490688, "learning_rate": 9.707580763983008e-07, "loss": 0.0753, "step": 6888 }, { "epoch": 1.5674630261660978, "grad_norm": 1.5396665395951017, "learning_rate": 9.706836396454638e-07, "loss": 0.0645, "step": 6889 }, { "epoch": 1.5676905574516495, "grad_norm": 1.8268486418358298, "learning_rate": 9.70609195827486e-07, "loss": 0.0563, "step": 6890 }, { "epoch": 1.5679180887372013, "grad_norm": 1.643220539321596, "learning_rate": 9.705347449458896e-07, "loss": 0.0881, "step": 6891 }, { "epoch": 1.568145620022753, "grad_norm": 1.442144111123451, "learning_rate": 9.704602870021954e-07, "loss": 0.1025, "step": 6892 }, { "epoch": 1.5683731513083048, "grad_norm": 1.576138061818805, "learning_rate": 9.70385821997926e-07, "loss": 0.169, "step": 6893 }, { "epoch": 1.5686006825938565, "grad_norm": 2.5159254541918026, "learning_rate": 9.703113499346026e-07, "loss": 0.1124, "step": 6894 }, { "epoch": 1.5688282138794083, "grad_norm": 0.9136715615253127, "learning_rate": 9.70236870813748e-07, "loss": 0.0634, "step": 6895 }, { "epoch": 1.56905574516496, "grad_norm": 1.3668108620677517, "learning_rate": 9.701623846368836e-07, "loss": 0.0427, "step": 6896 }, { "epoch": 1.5692832764505118, "grad_norm": 1.6270016341076774, "learning_rate": 9.700878914055325e-07, "loss": 0.17, "step": 6897 }, { "epoch": 1.5695108077360636, "grad_norm": 2.350790731713196, "learning_rate": 9.700133911212168e-07, "loss": 0.1382, "step": 6898 }, { "epoch": 1.5697383390216155, "grad_norm": 2.0493315460254795, "learning_rate": 9.699388837854593e-07, "loss": 0.0823, "step": 6899 }, { "epoch": 1.5699658703071673, "grad_norm": 1.6522265372325529, "learning_rate": 9.698643693997827e-07, "loss": 0.1321, "step": 6900 }, { "epoch": 1.570193401592719, "grad_norm": 1.348507321966463, "learning_rate": 9.697898479657098e-07, "loss": 0.0652, "step": 6901 }, { "epoch": 1.5704209328782708, "grad_norm": 1.4026345515667455, "learning_rate": 9.697153194847641e-07, "loss": 0.15, "step": 6902 }, { "epoch": 1.5706484641638225, "grad_norm": 1.5516065262403587, "learning_rate": 9.696407839584684e-07, "loss": 0.1302, "step": 6903 }, { "epoch": 1.5708759954493743, "grad_norm": 1.736103408391572, "learning_rate": 9.695662413883466e-07, "loss": 0.0623, "step": 6904 }, { "epoch": 1.571103526734926, "grad_norm": 1.0606800077660063, "learning_rate": 9.694916917759218e-07, "loss": 0.0757, "step": 6905 }, { "epoch": 1.5713310580204778, "grad_norm": 1.8230964381380002, "learning_rate": 9.694171351227175e-07, "loss": 0.1306, "step": 6906 }, { "epoch": 1.5715585893060295, "grad_norm": 2.1893081391718052, "learning_rate": 9.693425714302577e-07, "loss": 0.066, "step": 6907 }, { "epoch": 1.5717861205915813, "grad_norm": 1.2965655169374837, "learning_rate": 9.692680007000663e-07, "loss": 0.03, "step": 6908 }, { "epoch": 1.5720136518771333, "grad_norm": 1.9214288656500598, "learning_rate": 9.691934229336677e-07, "loss": 0.058, "step": 6909 }, { "epoch": 1.572241183162685, "grad_norm": 2.0891285566817372, "learning_rate": 9.69118838132586e-07, "loss": 0.0738, "step": 6910 }, { "epoch": 1.5724687144482368, "grad_norm": 1.4112326396809503, "learning_rate": 9.69044246298345e-07, "loss": 0.0964, "step": 6911 }, { "epoch": 1.5726962457337885, "grad_norm": 1.1066871989030969, "learning_rate": 9.689696474324703e-07, "loss": 0.0537, "step": 6912 }, { "epoch": 1.5729237770193403, "grad_norm": 1.128146729687468, "learning_rate": 9.688950415364855e-07, "loss": 0.102, "step": 6913 }, { "epoch": 1.573151308304892, "grad_norm": 1.5871840992559776, "learning_rate": 9.68820428611916e-07, "loss": 0.2003, "step": 6914 }, { "epoch": 1.5733788395904438, "grad_norm": 2.315116795851502, "learning_rate": 9.687458086602866e-07, "loss": 0.1135, "step": 6915 }, { "epoch": 1.5736063708759955, "grad_norm": 1.214880530889042, "learning_rate": 9.686711816831226e-07, "loss": 0.0717, "step": 6916 }, { "epoch": 1.5738339021615473, "grad_norm": 1.8811606454809604, "learning_rate": 9.68596547681949e-07, "loss": 0.0666, "step": 6917 }, { "epoch": 1.574061433447099, "grad_norm": 1.4841201037319591, "learning_rate": 9.68521906658291e-07, "loss": 0.1282, "step": 6918 }, { "epoch": 1.5742889647326508, "grad_norm": 1.6912614659007157, "learning_rate": 9.684472586136745e-07, "loss": 0.0507, "step": 6919 }, { "epoch": 1.5745164960182025, "grad_norm": 1.100232031105278, "learning_rate": 9.68372603549625e-07, "loss": 0.0753, "step": 6920 }, { "epoch": 1.5747440273037543, "grad_norm": 2.0618384339064644, "learning_rate": 9.682979414676682e-07, "loss": 0.0743, "step": 6921 }, { "epoch": 1.574971558589306, "grad_norm": 3.092052885379181, "learning_rate": 9.682232723693305e-07, "loss": 0.0985, "step": 6922 }, { "epoch": 1.5751990898748578, "grad_norm": 1.26881499733035, "learning_rate": 9.681485962561377e-07, "loss": 0.0757, "step": 6923 }, { "epoch": 1.5754266211604095, "grad_norm": 1.2213566354259484, "learning_rate": 9.680739131296158e-07, "loss": 0.0995, "step": 6924 }, { "epoch": 1.5756541524459613, "grad_norm": 1.8835777410462962, "learning_rate": 9.679992229912914e-07, "loss": 0.1492, "step": 6925 }, { "epoch": 1.575881683731513, "grad_norm": 2.5455892384333185, "learning_rate": 9.67924525842691e-07, "loss": 0.1431, "step": 6926 }, { "epoch": 1.5761092150170648, "grad_norm": 1.7592564778289752, "learning_rate": 9.678498216853416e-07, "loss": 0.0809, "step": 6927 }, { "epoch": 1.5763367463026166, "grad_norm": 2.5212143965708287, "learning_rate": 9.677751105207696e-07, "loss": 0.0813, "step": 6928 }, { "epoch": 1.5765642775881683, "grad_norm": 1.6364687457943778, "learning_rate": 9.677003923505025e-07, "loss": 0.1237, "step": 6929 }, { "epoch": 1.57679180887372, "grad_norm": 1.9567829536702195, "learning_rate": 9.676256671760665e-07, "loss": 0.1214, "step": 6930 }, { "epoch": 1.5770193401592718, "grad_norm": 1.8218655104471277, "learning_rate": 9.675509349989896e-07, "loss": 0.108, "step": 6931 }, { "epoch": 1.5772468714448236, "grad_norm": 2.0740005049352344, "learning_rate": 9.674761958207986e-07, "loss": 0.0797, "step": 6932 }, { "epoch": 1.5774744027303753, "grad_norm": 2.0550059922871786, "learning_rate": 9.674014496430218e-07, "loss": 0.0698, "step": 6933 }, { "epoch": 1.577701934015927, "grad_norm": 1.636582063223626, "learning_rate": 9.673266964671862e-07, "loss": 0.0513, "step": 6934 }, { "epoch": 1.5779294653014788, "grad_norm": 3.082308543745888, "learning_rate": 9.6725193629482e-07, "loss": 0.0955, "step": 6935 }, { "epoch": 1.5781569965870306, "grad_norm": 1.7853501137033265, "learning_rate": 9.671771691274508e-07, "loss": 0.0976, "step": 6936 }, { "epoch": 1.5783845278725823, "grad_norm": 3.0256879779417534, "learning_rate": 9.671023949666073e-07, "loss": 0.0987, "step": 6937 }, { "epoch": 1.5786120591581343, "grad_norm": 1.3498446575744398, "learning_rate": 9.67027613813817e-07, "loss": 0.071, "step": 6938 }, { "epoch": 1.578839590443686, "grad_norm": 1.7436971564140509, "learning_rate": 9.66952825670609e-07, "loss": 0.1357, "step": 6939 }, { "epoch": 1.5790671217292378, "grad_norm": 1.5228285224595741, "learning_rate": 9.66878030538511e-07, "loss": 0.0502, "step": 6940 }, { "epoch": 1.5792946530147896, "grad_norm": 2.9682409454518948, "learning_rate": 9.668032284190529e-07, "loss": 0.0779, "step": 6941 }, { "epoch": 1.5795221843003413, "grad_norm": 1.6246373361607298, "learning_rate": 9.667284193137622e-07, "loss": 0.0805, "step": 6942 }, { "epoch": 1.579749715585893, "grad_norm": 1.3560913793817733, "learning_rate": 9.666536032241687e-07, "loss": 0.1614, "step": 6943 }, { "epoch": 1.5799772468714448, "grad_norm": 1.6031076552379593, "learning_rate": 9.66578780151801e-07, "loss": 0.1452, "step": 6944 }, { "epoch": 1.5802047781569966, "grad_norm": 2.14630884242415, "learning_rate": 9.66503950098189e-07, "loss": 0.0938, "step": 6945 }, { "epoch": 1.5804323094425483, "grad_norm": 2.300918036454428, "learning_rate": 9.664291130648616e-07, "loss": 0.1036, "step": 6946 }, { "epoch": 1.5806598407281, "grad_norm": 2.076711835486533, "learning_rate": 9.663542690533485e-07, "loss": 0.0978, "step": 6947 }, { "epoch": 1.580887372013652, "grad_norm": 1.9858408778781345, "learning_rate": 9.66279418065179e-07, "loss": 0.1235, "step": 6948 }, { "epoch": 1.5811149032992038, "grad_norm": 2.8134635569970556, "learning_rate": 9.662045601018834e-07, "loss": 0.0959, "step": 6949 }, { "epoch": 1.5813424345847555, "grad_norm": 2.412041671272594, "learning_rate": 9.661296951649914e-07, "loss": 0.1581, "step": 6950 }, { "epoch": 1.5815699658703073, "grad_norm": 2.212966835054143, "learning_rate": 9.660548232560331e-07, "loss": 0.1705, "step": 6951 }, { "epoch": 1.581797497155859, "grad_norm": 1.0167103004961158, "learning_rate": 9.659799443765392e-07, "loss": 0.1194, "step": 6952 }, { "epoch": 1.5820250284414108, "grad_norm": 1.5945837819374222, "learning_rate": 9.659050585280394e-07, "loss": 0.1693, "step": 6953 }, { "epoch": 1.5822525597269625, "grad_norm": 1.401656682392233, "learning_rate": 9.658301657120646e-07, "loss": 0.1292, "step": 6954 }, { "epoch": 1.5824800910125143, "grad_norm": 1.2110702975067411, "learning_rate": 9.657552659301455e-07, "loss": 0.0494, "step": 6955 }, { "epoch": 1.582707622298066, "grad_norm": 2.610040851944766, "learning_rate": 9.656803591838126e-07, "loss": 0.1847, "step": 6956 }, { "epoch": 1.5829351535836178, "grad_norm": 0.863503979710921, "learning_rate": 9.656054454745973e-07, "loss": 0.0815, "step": 6957 }, { "epoch": 1.5831626848691696, "grad_norm": 1.3367436245719442, "learning_rate": 9.655305248040302e-07, "loss": 0.1176, "step": 6958 }, { "epoch": 1.5833902161547213, "grad_norm": 1.8868194318678535, "learning_rate": 9.654555971736431e-07, "loss": 0.0984, "step": 6959 }, { "epoch": 1.583617747440273, "grad_norm": 1.4598869301223507, "learning_rate": 9.653806625849671e-07, "loss": 0.1053, "step": 6960 }, { "epoch": 1.5838452787258248, "grad_norm": 0.9993222033681556, "learning_rate": 9.653057210395338e-07, "loss": 0.0589, "step": 6961 }, { "epoch": 1.5840728100113766, "grad_norm": 1.4700139285725813, "learning_rate": 9.652307725388746e-07, "loss": 0.0704, "step": 6962 }, { "epoch": 1.5843003412969283, "grad_norm": 1.499868989401943, "learning_rate": 9.651558170845216e-07, "loss": 0.0495, "step": 6963 }, { "epoch": 1.58452787258248, "grad_norm": 2.7042028681340655, "learning_rate": 9.650808546780068e-07, "loss": 0.1304, "step": 6964 }, { "epoch": 1.5847554038680318, "grad_norm": 2.9874751883735424, "learning_rate": 9.65005885320862e-07, "loss": 0.0828, "step": 6965 }, { "epoch": 1.5849829351535836, "grad_norm": 1.655538505482436, "learning_rate": 9.649309090146194e-07, "loss": 0.0925, "step": 6966 }, { "epoch": 1.5852104664391353, "grad_norm": 2.0441507873994125, "learning_rate": 9.64855925760812e-07, "loss": 0.0516, "step": 6967 }, { "epoch": 1.585437997724687, "grad_norm": 1.1883886573945852, "learning_rate": 9.647809355609715e-07, "loss": 0.1587, "step": 6968 }, { "epoch": 1.5856655290102388, "grad_norm": 1.7093461262727119, "learning_rate": 9.647059384166314e-07, "loss": 0.0608, "step": 6969 }, { "epoch": 1.5858930602957906, "grad_norm": 1.8247176331808013, "learning_rate": 9.646309343293237e-07, "loss": 0.1467, "step": 6970 }, { "epoch": 1.5861205915813423, "grad_norm": 1.8193934335145596, "learning_rate": 9.645559233005819e-07, "loss": 0.0931, "step": 6971 }, { "epoch": 1.586348122866894, "grad_norm": 2.347813013209486, "learning_rate": 9.644809053319388e-07, "loss": 0.1074, "step": 6972 }, { "epoch": 1.5865756541524458, "grad_norm": 1.9276522554322204, "learning_rate": 9.644058804249276e-07, "loss": 0.1117, "step": 6973 }, { "epoch": 1.5868031854379976, "grad_norm": 1.1894784473133884, "learning_rate": 9.64330848581082e-07, "loss": 0.1122, "step": 6974 }, { "epoch": 1.5870307167235493, "grad_norm": 1.381690182100237, "learning_rate": 9.642558098019353e-07, "loss": 0.0875, "step": 6975 }, { "epoch": 1.587258248009101, "grad_norm": 1.379264433533777, "learning_rate": 9.641807640890212e-07, "loss": 0.0586, "step": 6976 }, { "epoch": 1.587485779294653, "grad_norm": 1.3421330124356945, "learning_rate": 9.64105711443873e-07, "loss": 0.0991, "step": 6977 }, { "epoch": 1.5877133105802048, "grad_norm": 1.517449553301864, "learning_rate": 9.640306518680257e-07, "loss": 0.0792, "step": 6978 }, { "epoch": 1.5879408418657566, "grad_norm": 1.4648923960371314, "learning_rate": 9.639555853630126e-07, "loss": 0.0689, "step": 6979 }, { "epoch": 1.5881683731513083, "grad_norm": 2.1326027621988772, "learning_rate": 9.63880511930368e-07, "loss": 0.0876, "step": 6980 }, { "epoch": 1.58839590443686, "grad_norm": 1.5553369120899478, "learning_rate": 9.638054315716264e-07, "loss": 0.0973, "step": 6981 }, { "epoch": 1.5886234357224118, "grad_norm": 1.440135727652256, "learning_rate": 9.637303442883223e-07, "loss": 0.0428, "step": 6982 }, { "epoch": 1.5888509670079636, "grad_norm": 1.756091503554684, "learning_rate": 9.636552500819903e-07, "loss": 0.0636, "step": 6983 }, { "epoch": 1.5890784982935153, "grad_norm": 1.0526721397744394, "learning_rate": 9.635801489541652e-07, "loss": 0.0631, "step": 6984 }, { "epoch": 1.589306029579067, "grad_norm": 1.9369758747288157, "learning_rate": 9.635050409063818e-07, "loss": 0.0821, "step": 6985 }, { "epoch": 1.5895335608646188, "grad_norm": 1.4734142882640557, "learning_rate": 9.634299259401756e-07, "loss": 0.1045, "step": 6986 }, { "epoch": 1.5897610921501708, "grad_norm": 1.7504083792088394, "learning_rate": 9.633548040570815e-07, "loss": 0.1305, "step": 6987 }, { "epoch": 1.5899886234357226, "grad_norm": 2.0842740380498563, "learning_rate": 9.632796752586345e-07, "loss": 0.2348, "step": 6988 }, { "epoch": 1.5902161547212743, "grad_norm": 1.5809150377120453, "learning_rate": 9.632045395463708e-07, "loss": 0.1034, "step": 6989 }, { "epoch": 1.590443686006826, "grad_norm": 2.2274737894742227, "learning_rate": 9.631293969218256e-07, "loss": 0.1022, "step": 6990 }, { "epoch": 1.5906712172923778, "grad_norm": 1.4521152592202264, "learning_rate": 9.63054247386535e-07, "loss": 0.1371, "step": 6991 }, { "epoch": 1.5908987485779296, "grad_norm": 2.0326995835147565, "learning_rate": 9.629790909420344e-07, "loss": 0.0669, "step": 6992 }, { "epoch": 1.5911262798634813, "grad_norm": 1.6732679871311429, "learning_rate": 9.629039275898603e-07, "loss": 0.0411, "step": 6993 }, { "epoch": 1.591353811149033, "grad_norm": 1.682539766220766, "learning_rate": 9.628287573315488e-07, "loss": 0.0717, "step": 6994 }, { "epoch": 1.5915813424345848, "grad_norm": 1.883899405259209, "learning_rate": 9.627535801686359e-07, "loss": 0.1088, "step": 6995 }, { "epoch": 1.5918088737201366, "grad_norm": 1.765171359290216, "learning_rate": 9.626783961026586e-07, "loss": 0.0915, "step": 6996 }, { "epoch": 1.5920364050056883, "grad_norm": 1.5215975735336589, "learning_rate": 9.626032051351534e-07, "loss": 0.135, "step": 6997 }, { "epoch": 1.59226393629124, "grad_norm": 1.3707195749768784, "learning_rate": 9.62528007267657e-07, "loss": 0.0686, "step": 6998 }, { "epoch": 1.5924914675767918, "grad_norm": 1.7331076088661026, "learning_rate": 9.62452802501706e-07, "loss": 0.0607, "step": 6999 }, { "epoch": 1.5927189988623436, "grad_norm": 1.7262671826564764, "learning_rate": 9.62377590838838e-07, "loss": 0.086, "step": 7000 }, { "epoch": 1.5929465301478953, "grad_norm": 2.108256338543314, "learning_rate": 9.623023722805898e-07, "loss": 0.2314, "step": 7001 }, { "epoch": 1.593174061433447, "grad_norm": 1.6648982855436203, "learning_rate": 9.62227146828499e-07, "loss": 0.1015, "step": 7002 }, { "epoch": 1.5934015927189988, "grad_norm": 1.764553597167271, "learning_rate": 9.621519144841028e-07, "loss": 0.1638, "step": 7003 }, { "epoch": 1.5936291240045506, "grad_norm": 1.8331089224965664, "learning_rate": 9.62076675248939e-07, "loss": 0.0737, "step": 7004 }, { "epoch": 1.5938566552901023, "grad_norm": 1.9442080447235466, "learning_rate": 9.620014291245452e-07, "loss": 0.1642, "step": 7005 }, { "epoch": 1.594084186575654, "grad_norm": 1.6777130725526501, "learning_rate": 9.619261761124592e-07, "loss": 0.0947, "step": 7006 }, { "epoch": 1.5943117178612058, "grad_norm": 2.4255255413124663, "learning_rate": 9.618509162142196e-07, "loss": 0.1277, "step": 7007 }, { "epoch": 1.5945392491467576, "grad_norm": 1.1516566125989556, "learning_rate": 9.61775649431364e-07, "loss": 0.0717, "step": 7008 }, { "epoch": 1.5947667804323093, "grad_norm": 1.6950557748553574, "learning_rate": 9.617003757654309e-07, "loss": 0.1676, "step": 7009 }, { "epoch": 1.594994311717861, "grad_norm": 1.4209377855156373, "learning_rate": 9.616250952179586e-07, "loss": 0.0595, "step": 7010 }, { "epoch": 1.5952218430034129, "grad_norm": 1.3998940240833262, "learning_rate": 9.615498077904865e-07, "loss": 0.0571, "step": 7011 }, { "epoch": 1.5954493742889646, "grad_norm": 2.4228825196425334, "learning_rate": 9.614745134845518e-07, "loss": 0.0836, "step": 7012 }, { "epoch": 1.5956769055745164, "grad_norm": 2.5226928466592855, "learning_rate": 9.613992123016948e-07, "loss": 0.0989, "step": 7013 }, { "epoch": 1.595904436860068, "grad_norm": 2.828698420545608, "learning_rate": 9.61323904243454e-07, "loss": 0.1082, "step": 7014 }, { "epoch": 1.5961319681456199, "grad_norm": 0.658379542494829, "learning_rate": 9.612485893113682e-07, "loss": 0.0617, "step": 7015 }, { "epoch": 1.5963594994311718, "grad_norm": 2.043515550440299, "learning_rate": 9.611732675069773e-07, "loss": 0.0592, "step": 7016 }, { "epoch": 1.5965870307167236, "grad_norm": 1.420713270979496, "learning_rate": 9.610979388318206e-07, "loss": 0.0522, "step": 7017 }, { "epoch": 1.5968145620022753, "grad_norm": 1.8286267570389563, "learning_rate": 9.610226032874374e-07, "loss": 0.0704, "step": 7018 }, { "epoch": 1.597042093287827, "grad_norm": 1.9593689633156057, "learning_rate": 9.609472608753676e-07, "loss": 0.0873, "step": 7019 }, { "epoch": 1.5972696245733788, "grad_norm": 3.025411157925881, "learning_rate": 9.60871911597151e-07, "loss": 0.1941, "step": 7020 }, { "epoch": 1.5974971558589306, "grad_norm": 1.1756630023838337, "learning_rate": 9.607965554543276e-07, "loss": 0.0605, "step": 7021 }, { "epoch": 1.5977246871444823, "grad_norm": 1.98439511132966, "learning_rate": 9.607211924484378e-07, "loss": 0.0863, "step": 7022 }, { "epoch": 1.597952218430034, "grad_norm": 1.4953595829239505, "learning_rate": 9.606458225810214e-07, "loss": 0.1295, "step": 7023 }, { "epoch": 1.5981797497155859, "grad_norm": 2.7182642433005006, "learning_rate": 9.605704458536193e-07, "loss": 0.1064, "step": 7024 }, { "epoch": 1.5984072810011376, "grad_norm": 2.3527859069877817, "learning_rate": 9.604950622677717e-07, "loss": 0.1657, "step": 7025 }, { "epoch": 1.5986348122866896, "grad_norm": 1.036550268362939, "learning_rate": 9.604196718250197e-07, "loss": 0.0853, "step": 7026 }, { "epoch": 1.5988623435722413, "grad_norm": 1.8246053867576546, "learning_rate": 9.603442745269036e-07, "loss": 0.0692, "step": 7027 }, { "epoch": 1.599089874857793, "grad_norm": 1.657164284314806, "learning_rate": 9.602688703749648e-07, "loss": 0.1082, "step": 7028 }, { "epoch": 1.5993174061433448, "grad_norm": 1.6135815088813417, "learning_rate": 9.601934593707444e-07, "loss": 0.1634, "step": 7029 }, { "epoch": 1.5995449374288966, "grad_norm": 2.5168553017718325, "learning_rate": 9.601180415157834e-07, "loss": 0.1249, "step": 7030 }, { "epoch": 1.5997724687144483, "grad_norm": 1.9332350220212315, "learning_rate": 9.600426168116237e-07, "loss": 0.0954, "step": 7031 }, { "epoch": 1.6, "grad_norm": 2.779016388521835, "learning_rate": 9.599671852598062e-07, "loss": 0.1163, "step": 7032 }, { "epoch": 1.6002275312855518, "grad_norm": 1.709991621608222, "learning_rate": 9.59891746861873e-07, "loss": 0.103, "step": 7033 }, { "epoch": 1.6004550625711036, "grad_norm": 1.601744659007227, "learning_rate": 9.598163016193656e-07, "loss": 0.128, "step": 7034 }, { "epoch": 1.6006825938566553, "grad_norm": 1.427848052455716, "learning_rate": 9.597408495338266e-07, "loss": 0.085, "step": 7035 }, { "epoch": 1.600910125142207, "grad_norm": 1.7114236276422017, "learning_rate": 9.596653906067974e-07, "loss": 0.0751, "step": 7036 }, { "epoch": 1.6011376564277588, "grad_norm": 1.099324963342583, "learning_rate": 9.595899248398209e-07, "loss": 0.0571, "step": 7037 }, { "epoch": 1.6013651877133106, "grad_norm": 1.138796067907748, "learning_rate": 9.595144522344386e-07, "loss": 0.1072, "step": 7038 }, { "epoch": 1.6015927189988624, "grad_norm": 1.25580073682027, "learning_rate": 9.594389727921937e-07, "loss": 0.1145, "step": 7039 }, { "epoch": 1.601820250284414, "grad_norm": 2.2093517995718455, "learning_rate": 9.593634865146286e-07, "loss": 0.0875, "step": 7040 }, { "epoch": 1.6020477815699659, "grad_norm": 1.1512438683598305, "learning_rate": 9.592879934032864e-07, "loss": 0.1016, "step": 7041 }, { "epoch": 1.6022753128555176, "grad_norm": 2.151102117346863, "learning_rate": 9.5921249345971e-07, "loss": 0.081, "step": 7042 }, { "epoch": 1.6025028441410694, "grad_norm": 1.8116958423155012, "learning_rate": 9.59136986685442e-07, "loss": 0.1139, "step": 7043 }, { "epoch": 1.6027303754266211, "grad_norm": 1.4728303591830172, "learning_rate": 9.59061473082026e-07, "loss": 0.0792, "step": 7044 }, { "epoch": 1.6029579067121729, "grad_norm": 1.348123202641275, "learning_rate": 9.589859526510053e-07, "loss": 0.1171, "step": 7045 }, { "epoch": 1.6031854379977246, "grad_norm": 1.652518244804472, "learning_rate": 9.589104253939233e-07, "loss": 0.0981, "step": 7046 }, { "epoch": 1.6034129692832764, "grad_norm": 1.6491842823027245, "learning_rate": 9.588348913123238e-07, "loss": 0.152, "step": 7047 }, { "epoch": 1.6036405005688281, "grad_norm": 2.3757077558379214, "learning_rate": 9.587593504077506e-07, "loss": 0.1021, "step": 7048 }, { "epoch": 1.6038680318543799, "grad_norm": 1.7408054303248572, "learning_rate": 9.586838026817475e-07, "loss": 0.0624, "step": 7049 }, { "epoch": 1.6040955631399316, "grad_norm": 1.4722586598022023, "learning_rate": 9.586082481358587e-07, "loss": 0.0811, "step": 7050 }, { "epoch": 1.6043230944254834, "grad_norm": 1.209641522630536, "learning_rate": 9.58532686771628e-07, "loss": 0.084, "step": 7051 }, { "epoch": 1.6045506257110351, "grad_norm": 1.5600339093769533, "learning_rate": 9.584571185906002e-07, "loss": 0.1278, "step": 7052 }, { "epoch": 1.6047781569965869, "grad_norm": 2.2242771765608502, "learning_rate": 9.583815435943195e-07, "loss": 0.1165, "step": 7053 }, { "epoch": 1.6050056882821386, "grad_norm": 2.2003878293393844, "learning_rate": 9.583059617843306e-07, "loss": 0.1519, "step": 7054 }, { "epoch": 1.6052332195676906, "grad_norm": 2.030595699092688, "learning_rate": 9.582303731621784e-07, "loss": 0.1063, "step": 7055 }, { "epoch": 1.6054607508532424, "grad_norm": 1.3464973730328764, "learning_rate": 9.581547777294076e-07, "loss": 0.0988, "step": 7056 }, { "epoch": 1.605688282138794, "grad_norm": 1.7661580546805442, "learning_rate": 9.580791754875631e-07, "loss": 0.0608, "step": 7057 }, { "epoch": 1.6059158134243459, "grad_norm": 1.7729834089831826, "learning_rate": 9.580035664381905e-07, "loss": 0.0704, "step": 7058 }, { "epoch": 1.6061433447098976, "grad_norm": 4.823012303438253, "learning_rate": 9.579279505828348e-07, "loss": 0.0881, "step": 7059 }, { "epoch": 1.6063708759954494, "grad_norm": 1.9764505036357716, "learning_rate": 9.578523279230414e-07, "loss": 0.1049, "step": 7060 }, { "epoch": 1.6065984072810011, "grad_norm": 2.0091897106409458, "learning_rate": 9.577766984603562e-07, "loss": 0.1441, "step": 7061 }, { "epoch": 1.6068259385665529, "grad_norm": 1.2132768642438971, "learning_rate": 9.577010621963249e-07, "loss": 0.0754, "step": 7062 }, { "epoch": 1.6070534698521046, "grad_norm": 1.9676241704559914, "learning_rate": 9.576254191324926e-07, "loss": 0.058, "step": 7063 }, { "epoch": 1.6072810011376564, "grad_norm": 1.2672826972829434, "learning_rate": 9.575497692704063e-07, "loss": 0.0846, "step": 7064 }, { "epoch": 1.6075085324232083, "grad_norm": 1.5216162690213744, "learning_rate": 9.574741126116118e-07, "loss": 0.0742, "step": 7065 }, { "epoch": 1.60773606370876, "grad_norm": 2.2158930745759986, "learning_rate": 9.573984491576554e-07, "loss": 0.081, "step": 7066 }, { "epoch": 1.6079635949943119, "grad_norm": 1.2776286929824625, "learning_rate": 9.573227789100833e-07, "loss": 0.05, "step": 7067 }, { "epoch": 1.6081911262798636, "grad_norm": 1.5841000629893869, "learning_rate": 9.572471018704422e-07, "loss": 0.0628, "step": 7068 }, { "epoch": 1.6084186575654154, "grad_norm": 1.5963700008271497, "learning_rate": 9.57171418040279e-07, "loss": 0.0905, "step": 7069 }, { "epoch": 1.608646188850967, "grad_norm": 1.3320973228563622, "learning_rate": 9.570957274211399e-07, "loss": 0.0436, "step": 7070 }, { "epoch": 1.6088737201365189, "grad_norm": 1.7565713941984569, "learning_rate": 9.570200300145727e-07, "loss": 0.1369, "step": 7071 }, { "epoch": 1.6091012514220706, "grad_norm": 1.4051062508002854, "learning_rate": 9.569443258221243e-07, "loss": 0.1311, "step": 7072 }, { "epoch": 1.6093287827076224, "grad_norm": 1.5003808219816102, "learning_rate": 9.568686148453412e-07, "loss": 0.0552, "step": 7073 }, { "epoch": 1.6095563139931741, "grad_norm": 1.136113330681752, "learning_rate": 9.56792897085772e-07, "loss": 0.1055, "step": 7074 }, { "epoch": 1.6097838452787259, "grad_norm": 1.6147782764940433, "learning_rate": 9.567171725449635e-07, "loss": 0.084, "step": 7075 }, { "epoch": 1.6100113765642776, "grad_norm": 1.4354474070417866, "learning_rate": 9.566414412244635e-07, "loss": 0.0394, "step": 7076 }, { "epoch": 1.6102389078498294, "grad_norm": 1.7446646500466492, "learning_rate": 9.565657031258196e-07, "loss": 0.0676, "step": 7077 }, { "epoch": 1.6104664391353811, "grad_norm": 2.4698974949806565, "learning_rate": 9.564899582505802e-07, "loss": 0.0857, "step": 7078 }, { "epoch": 1.6106939704209329, "grad_norm": 2.489527530624285, "learning_rate": 9.56414206600293e-07, "loss": 0.092, "step": 7079 }, { "epoch": 1.6109215017064846, "grad_norm": 1.563596833456884, "learning_rate": 9.563384481765064e-07, "loss": 0.0412, "step": 7080 }, { "epoch": 1.6111490329920364, "grad_norm": 2.5558659608398955, "learning_rate": 9.562626829807689e-07, "loss": 0.1769, "step": 7081 }, { "epoch": 1.6113765642775881, "grad_norm": 1.8816520646545773, "learning_rate": 9.561869110146288e-07, "loss": 0.127, "step": 7082 }, { "epoch": 1.6116040955631399, "grad_norm": 1.6643867089828204, "learning_rate": 9.561111322796346e-07, "loss": 0.0764, "step": 7083 }, { "epoch": 1.6118316268486916, "grad_norm": 1.5341770274431337, "learning_rate": 9.560353467773354e-07, "loss": 0.054, "step": 7084 }, { "epoch": 1.6120591581342434, "grad_norm": 2.2467041325463004, "learning_rate": 9.5595955450928e-07, "loss": 0.1787, "step": 7085 }, { "epoch": 1.6122866894197951, "grad_norm": 2.4601590783176164, "learning_rate": 9.558837554770173e-07, "loss": 0.0728, "step": 7086 }, { "epoch": 1.612514220705347, "grad_norm": 1.4520641015196234, "learning_rate": 9.558079496820965e-07, "loss": 0.093, "step": 7087 }, { "epoch": 1.6127417519908986, "grad_norm": 1.2574529969406754, "learning_rate": 9.557321371260675e-07, "loss": 0.1536, "step": 7088 }, { "epoch": 1.6129692832764504, "grad_norm": 4.775882995639071, "learning_rate": 9.55656317810479e-07, "loss": 0.0628, "step": 7089 }, { "epoch": 1.6131968145620021, "grad_norm": 1.2500051050487235, "learning_rate": 9.555804917368808e-07, "loss": 0.1272, "step": 7090 }, { "epoch": 1.613424345847554, "grad_norm": 1.3616233079512923, "learning_rate": 9.55504658906823e-07, "loss": 0.0395, "step": 7091 }, { "epoch": 1.6136518771331056, "grad_norm": 1.4428559066579976, "learning_rate": 9.554288193218552e-07, "loss": 0.0638, "step": 7092 }, { "epoch": 1.6138794084186574, "grad_norm": 2.157620490776601, "learning_rate": 9.553529729835275e-07, "loss": 0.1196, "step": 7093 }, { "epoch": 1.6141069397042094, "grad_norm": 1.4309452846628332, "learning_rate": 9.552771198933903e-07, "loss": 0.117, "step": 7094 }, { "epoch": 1.6143344709897611, "grad_norm": 1.3480658748775887, "learning_rate": 9.552012600529934e-07, "loss": 0.0449, "step": 7095 }, { "epoch": 1.6145620022753129, "grad_norm": 1.171421665597667, "learning_rate": 9.551253934638874e-07, "loss": 0.0657, "step": 7096 }, { "epoch": 1.6147895335608646, "grad_norm": 1.8892686669171361, "learning_rate": 9.550495201276231e-07, "loss": 0.0705, "step": 7097 }, { "epoch": 1.6150170648464164, "grad_norm": 1.3180111718701897, "learning_rate": 9.54973640045751e-07, "loss": 0.071, "step": 7098 }, { "epoch": 1.6152445961319681, "grad_norm": 2.238229572587678, "learning_rate": 9.54897753219822e-07, "loss": 0.1586, "step": 7099 }, { "epoch": 1.6154721274175199, "grad_norm": 3.299443853412907, "learning_rate": 9.548218596513871e-07, "loss": 0.1078, "step": 7100 }, { "epoch": 1.6156996587030716, "grad_norm": 1.5809997114750205, "learning_rate": 9.547459593419975e-07, "loss": 0.0908, "step": 7101 }, { "epoch": 1.6159271899886234, "grad_norm": 1.5987662948298578, "learning_rate": 9.546700522932042e-07, "loss": 0.0794, "step": 7102 }, { "epoch": 1.6161547212741754, "grad_norm": 1.6890028281579352, "learning_rate": 9.54594138506559e-07, "loss": 0.0551, "step": 7103 }, { "epoch": 1.6163822525597271, "grad_norm": 1.132133276504026, "learning_rate": 9.545182179836132e-07, "loss": 0.0503, "step": 7104 }, { "epoch": 1.6166097838452789, "grad_norm": 1.5304822279177308, "learning_rate": 9.544422907259185e-07, "loss": 0.0649, "step": 7105 }, { "epoch": 1.6168373151308306, "grad_norm": 2.002856930667923, "learning_rate": 9.543663567350267e-07, "loss": 0.1061, "step": 7106 }, { "epoch": 1.6170648464163824, "grad_norm": 1.2661716612665568, "learning_rate": 9.542904160124896e-07, "loss": 0.058, "step": 7107 }, { "epoch": 1.6172923777019341, "grad_norm": 1.294677148035939, "learning_rate": 9.542144685598598e-07, "loss": 0.1243, "step": 7108 }, { "epoch": 1.6175199089874859, "grad_norm": 1.537674166999775, "learning_rate": 9.54138514378689e-07, "loss": 0.1044, "step": 7109 }, { "epoch": 1.6177474402730376, "grad_norm": 1.1942736822351498, "learning_rate": 9.540625534705297e-07, "loss": 0.0983, "step": 7110 }, { "epoch": 1.6179749715585894, "grad_norm": 1.6860430625022544, "learning_rate": 9.539865858369347e-07, "loss": 0.074, "step": 7111 }, { "epoch": 1.6182025028441411, "grad_norm": 2.7741366323041934, "learning_rate": 9.539106114794564e-07, "loss": 0.1305, "step": 7112 }, { "epoch": 1.6184300341296929, "grad_norm": 1.5967947474659638, "learning_rate": 9.538346303996472e-07, "loss": 0.1135, "step": 7113 }, { "epoch": 1.6186575654152446, "grad_norm": 1.9548245891512768, "learning_rate": 9.537586425990604e-07, "loss": 0.0773, "step": 7114 }, { "epoch": 1.6188850967007964, "grad_norm": 2.2461488842288366, "learning_rate": 9.536826480792493e-07, "loss": 0.1846, "step": 7115 }, { "epoch": 1.6191126279863481, "grad_norm": 1.9145559325768369, "learning_rate": 9.53606646841767e-07, "loss": 0.0559, "step": 7116 }, { "epoch": 1.6193401592719, "grad_norm": 1.5353243471583073, "learning_rate": 9.535306388881663e-07, "loss": 0.1401, "step": 7117 }, { "epoch": 1.6195676905574516, "grad_norm": 1.2965207126861003, "learning_rate": 9.534546242200012e-07, "loss": 0.0991, "step": 7118 }, { "epoch": 1.6197952218430034, "grad_norm": 2.39739525863025, "learning_rate": 9.53378602838825e-07, "loss": 0.1789, "step": 7119 }, { "epoch": 1.6200227531285551, "grad_norm": 2.3161329141037346, "learning_rate": 9.533025747461916e-07, "loss": 0.1097, "step": 7120 }, { "epoch": 1.620250284414107, "grad_norm": 1.2769845925916703, "learning_rate": 9.532265399436549e-07, "loss": 0.0564, "step": 7121 }, { "epoch": 1.6204778156996587, "grad_norm": 1.5848254675436142, "learning_rate": 9.531504984327688e-07, "loss": 0.0901, "step": 7122 }, { "epoch": 1.6207053469852104, "grad_norm": 1.8035770243114464, "learning_rate": 9.530744502150874e-07, "loss": 0.089, "step": 7123 }, { "epoch": 1.6209328782707622, "grad_norm": 2.0544769768196316, "learning_rate": 9.529983952921652e-07, "loss": 0.0696, "step": 7124 }, { "epoch": 1.621160409556314, "grad_norm": 2.415000920373807, "learning_rate": 9.529223336655565e-07, "loss": 0.1083, "step": 7125 }, { "epoch": 1.6213879408418657, "grad_norm": 1.7883493546660625, "learning_rate": 9.528462653368158e-07, "loss": 0.0644, "step": 7126 }, { "epoch": 1.6216154721274174, "grad_norm": 0.9354416438272177, "learning_rate": 9.527701903074977e-07, "loss": 0.0451, "step": 7127 }, { "epoch": 1.6218430034129692, "grad_norm": 1.9495521336227608, "learning_rate": 9.526941085791574e-07, "loss": 0.0657, "step": 7128 }, { "epoch": 1.622070534698521, "grad_norm": 2.1414259183644795, "learning_rate": 9.526180201533497e-07, "loss": 0.1335, "step": 7129 }, { "epoch": 1.6222980659840727, "grad_norm": 2.081666693341058, "learning_rate": 9.525419250316295e-07, "loss": 0.1073, "step": 7130 }, { "epoch": 1.6225255972696244, "grad_norm": 1.298777524655821, "learning_rate": 9.524658232155524e-07, "loss": 0.0812, "step": 7131 }, { "epoch": 1.6227531285551762, "grad_norm": 2.266832318052867, "learning_rate": 9.523897147066735e-07, "loss": 0.132, "step": 7132 }, { "epoch": 1.6229806598407281, "grad_norm": 1.7503424667417475, "learning_rate": 9.523135995065483e-07, "loss": 0.1555, "step": 7133 }, { "epoch": 1.62320819112628, "grad_norm": 1.4805919827253016, "learning_rate": 9.522374776167328e-07, "loss": 0.0802, "step": 7134 }, { "epoch": 1.6234357224118316, "grad_norm": 1.3424048205748553, "learning_rate": 9.521613490387824e-07, "loss": 0.1015, "step": 7135 }, { "epoch": 1.6236632536973834, "grad_norm": 1.8135164351802546, "learning_rate": 9.520852137742534e-07, "loss": 0.0866, "step": 7136 }, { "epoch": 1.6238907849829352, "grad_norm": 2.0423720356000206, "learning_rate": 9.520090718247016e-07, "loss": 0.0555, "step": 7137 }, { "epoch": 1.624118316268487, "grad_norm": 2.1346845240346686, "learning_rate": 9.519329231916831e-07, "loss": 0.1197, "step": 7138 }, { "epoch": 1.6243458475540387, "grad_norm": 1.7824098626608995, "learning_rate": 9.518567678767546e-07, "loss": 0.0743, "step": 7139 }, { "epoch": 1.6245733788395904, "grad_norm": 0.9942649821530546, "learning_rate": 9.51780605881472e-07, "loss": 0.0776, "step": 7140 }, { "epoch": 1.6248009101251422, "grad_norm": 1.240154830498896, "learning_rate": 9.517044372073927e-07, "loss": 0.1216, "step": 7141 }, { "epoch": 1.6250284414106941, "grad_norm": 1.4726407561890795, "learning_rate": 9.516282618560727e-07, "loss": 0.0645, "step": 7142 }, { "epoch": 1.6252559726962459, "grad_norm": 1.4504003744629235, "learning_rate": 9.515520798290695e-07, "loss": 0.1242, "step": 7143 }, { "epoch": 1.6254835039817976, "grad_norm": 1.9699918214488714, "learning_rate": 9.514758911279398e-07, "loss": 0.1662, "step": 7144 }, { "epoch": 1.6257110352673494, "grad_norm": 2.0421004880361626, "learning_rate": 9.513996957542407e-07, "loss": 0.0782, "step": 7145 }, { "epoch": 1.6259385665529011, "grad_norm": 1.0829443140069515, "learning_rate": 9.513234937095296e-07, "loss": 0.0864, "step": 7146 }, { "epoch": 1.626166097838453, "grad_norm": 1.905465586127579, "learning_rate": 9.512472849953639e-07, "loss": 0.0566, "step": 7147 }, { "epoch": 1.6263936291240046, "grad_norm": 1.6659429452735535, "learning_rate": 9.511710696133012e-07, "loss": 0.1531, "step": 7148 }, { "epoch": 1.6266211604095564, "grad_norm": 1.128530637007164, "learning_rate": 9.510948475648993e-07, "loss": 0.0476, "step": 7149 }, { "epoch": 1.6268486916951082, "grad_norm": 1.7859175113663965, "learning_rate": 9.510186188517159e-07, "loss": 0.084, "step": 7150 }, { "epoch": 1.62707622298066, "grad_norm": 2.2211450363669547, "learning_rate": 9.50942383475309e-07, "loss": 0.1057, "step": 7151 }, { "epoch": 1.6273037542662117, "grad_norm": 1.7188290771119823, "learning_rate": 9.508661414372367e-07, "loss": 0.0628, "step": 7152 }, { "epoch": 1.6275312855517634, "grad_norm": 2.33047844943163, "learning_rate": 9.507898927390571e-07, "loss": 0.0941, "step": 7153 }, { "epoch": 1.6277588168373152, "grad_norm": 1.9604490750514882, "learning_rate": 9.507136373823289e-07, "loss": 0.0727, "step": 7154 }, { "epoch": 1.627986348122867, "grad_norm": 0.988973424139529, "learning_rate": 9.506373753686104e-07, "loss": 0.0795, "step": 7155 }, { "epoch": 1.6282138794084187, "grad_norm": 1.7604699945221596, "learning_rate": 9.505611066994605e-07, "loss": 0.1175, "step": 7156 }, { "epoch": 1.6284414106939704, "grad_norm": 2.014909081415411, "learning_rate": 9.504848313764375e-07, "loss": 0.0645, "step": 7157 }, { "epoch": 1.6286689419795222, "grad_norm": 1.2736611112774021, "learning_rate": 9.50408549401101e-07, "loss": 0.1051, "step": 7158 }, { "epoch": 1.628896473265074, "grad_norm": 1.2474664294183557, "learning_rate": 9.503322607750096e-07, "loss": 0.0771, "step": 7159 }, { "epoch": 1.6291240045506257, "grad_norm": 1.2307537903220425, "learning_rate": 9.502559654997226e-07, "loss": 0.0545, "step": 7160 }, { "epoch": 1.6293515358361774, "grad_norm": 1.6777703584025532, "learning_rate": 9.501796635767992e-07, "loss": 0.0956, "step": 7161 }, { "epoch": 1.6295790671217292, "grad_norm": 3.3940231093078364, "learning_rate": 9.501033550077993e-07, "loss": 0.0723, "step": 7162 }, { "epoch": 1.629806598407281, "grad_norm": 1.4027176613897487, "learning_rate": 9.500270397942819e-07, "loss": 0.1389, "step": 7163 }, { "epoch": 1.6300341296928327, "grad_norm": 1.4510463438526642, "learning_rate": 9.499507179378072e-07, "loss": 0.0988, "step": 7164 }, { "epoch": 1.6302616609783844, "grad_norm": 1.740850173674952, "learning_rate": 9.498743894399348e-07, "loss": 0.0695, "step": 7165 }, { "epoch": 1.6304891922639362, "grad_norm": 1.147692771315242, "learning_rate": 9.497980543022251e-07, "loss": 0.0267, "step": 7166 }, { "epoch": 1.630716723549488, "grad_norm": 1.3365492534347532, "learning_rate": 9.497217125262378e-07, "loss": 0.1274, "step": 7167 }, { "epoch": 1.6309442548350397, "grad_norm": 1.6871961606908568, "learning_rate": 9.496453641135337e-07, "loss": 0.1147, "step": 7168 }, { "epoch": 1.6311717861205914, "grad_norm": 2.1846968460485083, "learning_rate": 9.495690090656728e-07, "loss": 0.0436, "step": 7169 }, { "epoch": 1.6313993174061432, "grad_norm": 1.358235913812706, "learning_rate": 9.494926473842156e-07, "loss": 0.0722, "step": 7170 }, { "epoch": 1.631626848691695, "grad_norm": 1.120586198535551, "learning_rate": 9.494162790707232e-07, "loss": 0.0273, "step": 7171 }, { "epoch": 1.631854379977247, "grad_norm": 1.5332225602063558, "learning_rate": 9.493399041267559e-07, "loss": 0.043, "step": 7172 }, { "epoch": 1.6320819112627987, "grad_norm": 2.609985215164489, "learning_rate": 9.492635225538751e-07, "loss": 0.1224, "step": 7173 }, { "epoch": 1.6323094425483504, "grad_norm": 1.2149222886137578, "learning_rate": 9.491871343536418e-07, "loss": 0.0824, "step": 7174 }, { "epoch": 1.6325369738339022, "grad_norm": 2.0312203841057213, "learning_rate": 9.491107395276172e-07, "loss": 0.0969, "step": 7175 }, { "epoch": 1.632764505119454, "grad_norm": 2.3293223801623895, "learning_rate": 9.490343380773629e-07, "loss": 0.108, "step": 7176 }, { "epoch": 1.6329920364050057, "grad_norm": 2.367637061172044, "learning_rate": 9.489579300044396e-07, "loss": 0.0863, "step": 7177 }, { "epoch": 1.6332195676905574, "grad_norm": 1.5769317594673877, "learning_rate": 9.4888151531041e-07, "loss": 0.0798, "step": 7178 }, { "epoch": 1.6334470989761092, "grad_norm": 1.4455661912823792, "learning_rate": 9.488050939968351e-07, "loss": 0.1161, "step": 7179 }, { "epoch": 1.633674630261661, "grad_norm": 2.8524695247450853, "learning_rate": 9.487286660652773e-07, "loss": 0.077, "step": 7180 }, { "epoch": 1.633902161547213, "grad_norm": 2.7487698553006075, "learning_rate": 9.486522315172983e-07, "loss": 0.1453, "step": 7181 }, { "epoch": 1.6341296928327647, "grad_norm": 1.4587274045828376, "learning_rate": 9.485757903544606e-07, "loss": 0.0651, "step": 7182 }, { "epoch": 1.6343572241183164, "grad_norm": 2.04298506693863, "learning_rate": 9.484993425783262e-07, "loss": 0.0962, "step": 7183 }, { "epoch": 1.6345847554038682, "grad_norm": 1.7933342325194643, "learning_rate": 9.484228881904577e-07, "loss": 0.1191, "step": 7184 }, { "epoch": 1.63481228668942, "grad_norm": 2.2774414909954506, "learning_rate": 9.483464271924177e-07, "loss": 0.0589, "step": 7185 }, { "epoch": 1.6350398179749717, "grad_norm": 1.4174058926031983, "learning_rate": 9.482699595857689e-07, "loss": 0.0845, "step": 7186 }, { "epoch": 1.6352673492605234, "grad_norm": 1.2239168361023993, "learning_rate": 9.481934853720742e-07, "loss": 0.0601, "step": 7187 }, { "epoch": 1.6354948805460752, "grad_norm": 3.0956636955260706, "learning_rate": 9.481170045528968e-07, "loss": 0.1153, "step": 7188 }, { "epoch": 1.635722411831627, "grad_norm": 1.9187976135861455, "learning_rate": 9.480405171297992e-07, "loss": 0.0955, "step": 7189 }, { "epoch": 1.6359499431171787, "grad_norm": 1.4924440579295828, "learning_rate": 9.479640231043453e-07, "loss": 0.0999, "step": 7190 }, { "epoch": 1.6361774744027304, "grad_norm": 1.0007987751530008, "learning_rate": 9.47887522478098e-07, "loss": 0.0598, "step": 7191 }, { "epoch": 1.6364050056882822, "grad_norm": 2.3399915938395885, "learning_rate": 9.478110152526212e-07, "loss": 0.0647, "step": 7192 }, { "epoch": 1.636632536973834, "grad_norm": 1.305800819783954, "learning_rate": 9.477345014294787e-07, "loss": 0.0852, "step": 7193 }, { "epoch": 1.6368600682593857, "grad_norm": 1.6920843391163056, "learning_rate": 9.476579810102337e-07, "loss": 0.1114, "step": 7194 }, { "epoch": 1.6370875995449374, "grad_norm": 2.1814109460896094, "learning_rate": 9.475814539964506e-07, "loss": 0.1856, "step": 7195 }, { "epoch": 1.6373151308304892, "grad_norm": 1.7859494589363305, "learning_rate": 9.475049203896934e-07, "loss": 0.1159, "step": 7196 }, { "epoch": 1.637542662116041, "grad_norm": 1.8121948843839317, "learning_rate": 9.474283801915262e-07, "loss": 0.0515, "step": 7197 }, { "epoch": 1.6377701934015927, "grad_norm": 1.4848641666208762, "learning_rate": 9.473518334035134e-07, "loss": 0.0964, "step": 7198 }, { "epoch": 1.6379977246871444, "grad_norm": 2.035419836700452, "learning_rate": 9.472752800272194e-07, "loss": 0.0666, "step": 7199 }, { "epoch": 1.6382252559726962, "grad_norm": 1.5897835103223596, "learning_rate": 9.471987200642093e-07, "loss": 0.1136, "step": 7200 }, { "epoch": 1.638452787258248, "grad_norm": 1.1177849069853134, "learning_rate": 9.471221535160471e-07, "loss": 0.0528, "step": 7201 }, { "epoch": 1.6386803185437997, "grad_norm": 1.4149779560776885, "learning_rate": 9.470455803842982e-07, "loss": 0.0672, "step": 7202 }, { "epoch": 1.6389078498293514, "grad_norm": 2.1597142960821345, "learning_rate": 9.469690006705274e-07, "loss": 0.0728, "step": 7203 }, { "epoch": 1.6391353811149032, "grad_norm": 1.573868562708633, "learning_rate": 9.468924143762996e-07, "loss": 0.0609, "step": 7204 }, { "epoch": 1.639362912400455, "grad_norm": 1.3969158589202828, "learning_rate": 9.468158215031805e-07, "loss": 0.0595, "step": 7205 }, { "epoch": 1.6395904436860067, "grad_norm": 1.9871624130035885, "learning_rate": 9.467392220527358e-07, "loss": 0.1169, "step": 7206 }, { "epoch": 1.6398179749715585, "grad_norm": 1.6143675451430033, "learning_rate": 9.4666261602653e-07, "loss": 0.1121, "step": 7207 }, { "epoch": 1.6400455062571102, "grad_norm": 2.4786692040030887, "learning_rate": 9.465860034261298e-07, "loss": 0.062, "step": 7208 }, { "epoch": 1.640273037542662, "grad_norm": 1.651797912353839, "learning_rate": 9.465093842531007e-07, "loss": 0.0734, "step": 7209 }, { "epoch": 1.640500568828214, "grad_norm": 1.4933908025967075, "learning_rate": 9.464327585090084e-07, "loss": 0.0681, "step": 7210 }, { "epoch": 1.6407281001137657, "grad_norm": 2.46724683222329, "learning_rate": 9.463561261954192e-07, "loss": 0.129, "step": 7211 }, { "epoch": 1.6409556313993174, "grad_norm": 1.4709863716034994, "learning_rate": 9.462794873138995e-07, "loss": 0.0914, "step": 7212 }, { "epoch": 1.6411831626848692, "grad_norm": 3.2781700122611643, "learning_rate": 9.462028418660155e-07, "loss": 0.154, "step": 7213 }, { "epoch": 1.641410693970421, "grad_norm": 1.2959743830109027, "learning_rate": 9.461261898533333e-07, "loss": 0.0627, "step": 7214 }, { "epoch": 1.6416382252559727, "grad_norm": 1.9860376233888792, "learning_rate": 9.460495312774203e-07, "loss": 0.169, "step": 7215 }, { "epoch": 1.6418657565415244, "grad_norm": 2.818835837267242, "learning_rate": 9.459728661398427e-07, "loss": 0.0847, "step": 7216 }, { "epoch": 1.6420932878270762, "grad_norm": 1.389674709889107, "learning_rate": 9.458961944421674e-07, "loss": 0.085, "step": 7217 }, { "epoch": 1.642320819112628, "grad_norm": 1.876929962467865, "learning_rate": 9.458195161859618e-07, "loss": 0.0596, "step": 7218 }, { "epoch": 1.6425483503981797, "grad_norm": 1.786147919520637, "learning_rate": 9.457428313727927e-07, "loss": 0.089, "step": 7219 }, { "epoch": 1.6427758816837317, "grad_norm": 1.3401964491024454, "learning_rate": 9.456661400042278e-07, "loss": 0.1182, "step": 7220 }, { "epoch": 1.6430034129692834, "grad_norm": 1.5281082687017675, "learning_rate": 9.455894420818339e-07, "loss": 0.1498, "step": 7221 }, { "epoch": 1.6432309442548352, "grad_norm": 2.235395462768349, "learning_rate": 9.455127376071791e-07, "loss": 0.1527, "step": 7222 }, { "epoch": 1.643458475540387, "grad_norm": 1.6316478922952569, "learning_rate": 9.454360265818309e-07, "loss": 0.0635, "step": 7223 }, { "epoch": 1.6436860068259387, "grad_norm": 2.0316717048016084, "learning_rate": 9.453593090073571e-07, "loss": 0.1008, "step": 7224 }, { "epoch": 1.6439135381114904, "grad_norm": 1.8222797124219188, "learning_rate": 9.45282584885326e-07, "loss": 0.1174, "step": 7225 }, { "epoch": 1.6441410693970422, "grad_norm": 1.4162521237098797, "learning_rate": 9.452058542173054e-07, "loss": 0.0531, "step": 7226 }, { "epoch": 1.644368600682594, "grad_norm": 1.4806445425094952, "learning_rate": 9.451291170048632e-07, "loss": 0.0976, "step": 7227 }, { "epoch": 1.6445961319681457, "grad_norm": 2.04356178536637, "learning_rate": 9.450523732495684e-07, "loss": 0.1338, "step": 7228 }, { "epoch": 1.6448236632536974, "grad_norm": 1.592647023562932, "learning_rate": 9.449756229529893e-07, "loss": 0.0617, "step": 7229 }, { "epoch": 1.6450511945392492, "grad_norm": 1.2827950381449074, "learning_rate": 9.448988661166944e-07, "loss": 0.0932, "step": 7230 }, { "epoch": 1.645278725824801, "grad_norm": 1.9362527509370229, "learning_rate": 9.448221027422525e-07, "loss": 0.088, "step": 7231 }, { "epoch": 1.6455062571103527, "grad_norm": 2.150147523402948, "learning_rate": 9.447453328312325e-07, "loss": 0.0864, "step": 7232 }, { "epoch": 1.6457337883959045, "grad_norm": 1.436369267098461, "learning_rate": 9.446685563852037e-07, "loss": 0.1491, "step": 7233 }, { "epoch": 1.6459613196814562, "grad_norm": 2.431568244659582, "learning_rate": 9.445917734057349e-07, "loss": 0.1234, "step": 7234 }, { "epoch": 1.646188850967008, "grad_norm": 2.398693177911805, "learning_rate": 9.445149838943955e-07, "loss": 0.117, "step": 7235 }, { "epoch": 1.6464163822525597, "grad_norm": 1.3252800678869499, "learning_rate": 9.44438187852755e-07, "loss": 0.0532, "step": 7236 }, { "epoch": 1.6466439135381115, "grad_norm": 1.6130505488960083, "learning_rate": 9.443613852823832e-07, "loss": 0.0498, "step": 7237 }, { "epoch": 1.6468714448236632, "grad_norm": 1.5445975785219161, "learning_rate": 9.442845761848493e-07, "loss": 0.0905, "step": 7238 }, { "epoch": 1.647098976109215, "grad_norm": 1.6214589044623604, "learning_rate": 9.442077605617236e-07, "loss": 0.1482, "step": 7239 }, { "epoch": 1.6473265073947667, "grad_norm": 2.5934304521178926, "learning_rate": 9.441309384145758e-07, "loss": 0.1356, "step": 7240 }, { "epoch": 1.6475540386803185, "grad_norm": 2.1821198275071145, "learning_rate": 9.440541097449759e-07, "loss": 0.1202, "step": 7241 }, { "epoch": 1.6477815699658702, "grad_norm": 1.3783319298593066, "learning_rate": 9.439772745544945e-07, "loss": 0.0809, "step": 7242 }, { "epoch": 1.648009101251422, "grad_norm": 1.9235495347033278, "learning_rate": 9.439004328447019e-07, "loss": 0.0793, "step": 7243 }, { "epoch": 1.6482366325369737, "grad_norm": 1.5695626343986404, "learning_rate": 9.438235846171684e-07, "loss": 0.073, "step": 7244 }, { "epoch": 1.6484641638225255, "grad_norm": 2.3576206439809275, "learning_rate": 9.437467298734646e-07, "loss": 0.1557, "step": 7245 }, { "epoch": 1.6486916951080772, "grad_norm": 1.4663932891903617, "learning_rate": 9.436698686151616e-07, "loss": 0.1639, "step": 7246 }, { "epoch": 1.648919226393629, "grad_norm": 1.2759564481247696, "learning_rate": 9.435930008438299e-07, "loss": 0.0874, "step": 7247 }, { "epoch": 1.6491467576791807, "grad_norm": 3.0126912789420293, "learning_rate": 9.435161265610407e-07, "loss": 0.1002, "step": 7248 }, { "epoch": 1.6493742889647327, "grad_norm": 3.268767822166679, "learning_rate": 9.434392457683653e-07, "loss": 0.0983, "step": 7249 }, { "epoch": 1.6496018202502845, "grad_norm": 1.6367805701152656, "learning_rate": 9.433623584673751e-07, "loss": 0.0473, "step": 7250 }, { "epoch": 1.6498293515358362, "grad_norm": 1.7890854849904998, "learning_rate": 9.432854646596412e-07, "loss": 0.0534, "step": 7251 }, { "epoch": 1.650056882821388, "grad_norm": 1.3726332380580277, "learning_rate": 9.432085643467352e-07, "loss": 0.1425, "step": 7252 }, { "epoch": 1.6502844141069397, "grad_norm": 1.9235963258967004, "learning_rate": 9.43131657530229e-07, "loss": 0.1119, "step": 7253 }, { "epoch": 1.6505119453924915, "grad_norm": 2.962111856390726, "learning_rate": 9.430547442116944e-07, "loss": 0.0984, "step": 7254 }, { "epoch": 1.6507394766780432, "grad_norm": 2.1556391413872777, "learning_rate": 9.429778243927031e-07, "loss": 0.1026, "step": 7255 }, { "epoch": 1.650967007963595, "grad_norm": 2.341832429516187, "learning_rate": 9.429008980748279e-07, "loss": 0.1612, "step": 7256 }, { "epoch": 1.6511945392491467, "grad_norm": 2.6524619584140723, "learning_rate": 9.428239652596402e-07, "loss": 0.1038, "step": 7257 }, { "epoch": 1.6514220705346985, "grad_norm": 1.8037046803671937, "learning_rate": 9.427470259487127e-07, "loss": 0.0648, "step": 7258 }, { "epoch": 1.6516496018202504, "grad_norm": 1.3920666963111505, "learning_rate": 9.426700801436181e-07, "loss": 0.0887, "step": 7259 }, { "epoch": 1.6518771331058022, "grad_norm": 2.6323345473885693, "learning_rate": 9.425931278459287e-07, "loss": 0.1154, "step": 7260 }, { "epoch": 1.652104664391354, "grad_norm": 1.235526720771683, "learning_rate": 9.425161690572174e-07, "loss": 0.0673, "step": 7261 }, { "epoch": 1.6523321956769057, "grad_norm": 1.2599138951236994, "learning_rate": 9.42439203779057e-07, "loss": 0.0922, "step": 7262 }, { "epoch": 1.6525597269624575, "grad_norm": 2.8618213081398993, "learning_rate": 9.42362232013021e-07, "loss": 0.1017, "step": 7263 }, { "epoch": 1.6527872582480092, "grad_norm": 1.351501595228745, "learning_rate": 9.422852537606819e-07, "loss": 0.0715, "step": 7264 }, { "epoch": 1.653014789533561, "grad_norm": 1.3482567491714648, "learning_rate": 9.422082690236134e-07, "loss": 0.0969, "step": 7265 }, { "epoch": 1.6532423208191127, "grad_norm": 1.6556273596729216, "learning_rate": 9.421312778033889e-07, "loss": 0.0983, "step": 7266 }, { "epoch": 1.6534698521046645, "grad_norm": 2.480720701978015, "learning_rate": 9.420542801015817e-07, "loss": 0.0726, "step": 7267 }, { "epoch": 1.6536973833902162, "grad_norm": 1.360633062993953, "learning_rate": 9.419772759197656e-07, "loss": 0.0536, "step": 7268 }, { "epoch": 1.653924914675768, "grad_norm": 2.1757990592385634, "learning_rate": 9.41900265259515e-07, "loss": 0.0717, "step": 7269 }, { "epoch": 1.6541524459613197, "grad_norm": 2.2391168303019064, "learning_rate": 9.418232481224029e-07, "loss": 0.0974, "step": 7270 }, { "epoch": 1.6543799772468715, "grad_norm": 1.371405310607541, "learning_rate": 9.417462245100038e-07, "loss": 0.1253, "step": 7271 }, { "epoch": 1.6546075085324232, "grad_norm": 2.1960817643284862, "learning_rate": 9.416691944238922e-07, "loss": 0.1268, "step": 7272 }, { "epoch": 1.654835039817975, "grad_norm": 1.6102464810078585, "learning_rate": 9.415921578656422e-07, "loss": 0.077, "step": 7273 }, { "epoch": 1.6550625711035267, "grad_norm": 1.7189966445140032, "learning_rate": 9.415151148368282e-07, "loss": 0.0674, "step": 7274 }, { "epoch": 1.6552901023890785, "grad_norm": 2.5040526861285923, "learning_rate": 9.41438065339025e-07, "loss": 0.0667, "step": 7275 }, { "epoch": 1.6555176336746302, "grad_norm": 1.5933575864388763, "learning_rate": 9.413610093738072e-07, "loss": 0.2265, "step": 7276 }, { "epoch": 1.655745164960182, "grad_norm": 2.086475507868105, "learning_rate": 9.412839469427498e-07, "loss": 0.0996, "step": 7277 }, { "epoch": 1.6559726962457337, "grad_norm": 1.5557715465268633, "learning_rate": 9.412068780474277e-07, "loss": 0.0458, "step": 7278 }, { "epoch": 1.6562002275312855, "grad_norm": 1.6837503079650977, "learning_rate": 9.411298026894161e-07, "loss": 0.1504, "step": 7279 }, { "epoch": 1.6564277588168372, "grad_norm": 2.6116115808263762, "learning_rate": 9.410527208702905e-07, "loss": 0.1527, "step": 7280 }, { "epoch": 1.656655290102389, "grad_norm": 2.393680145786846, "learning_rate": 9.409756325916259e-07, "loss": 0.0633, "step": 7281 }, { "epoch": 1.6568828213879407, "grad_norm": 1.5217677015722841, "learning_rate": 9.408985378549981e-07, "loss": 0.0723, "step": 7282 }, { "epoch": 1.6571103526734925, "grad_norm": 1.637542506575522, "learning_rate": 9.408214366619829e-07, "loss": 0.0583, "step": 7283 }, { "epoch": 1.6573378839590442, "grad_norm": 3.660853802611821, "learning_rate": 9.407443290141557e-07, "loss": 0.0881, "step": 7284 }, { "epoch": 1.657565415244596, "grad_norm": 1.9416812297062425, "learning_rate": 9.406672149130928e-07, "loss": 0.1396, "step": 7285 }, { "epoch": 1.6577929465301477, "grad_norm": 1.4345822033154445, "learning_rate": 9.405900943603702e-07, "loss": 0.053, "step": 7286 }, { "epoch": 1.6580204778156995, "grad_norm": 2.4200211765437682, "learning_rate": 9.405129673575639e-07, "loss": 0.1002, "step": 7287 }, { "epoch": 1.6582480091012515, "grad_norm": 1.3927335425180136, "learning_rate": 9.404358339062505e-07, "loss": 0.1223, "step": 7288 }, { "epoch": 1.6584755403868032, "grad_norm": 1.4509667046510046, "learning_rate": 9.403586940080063e-07, "loss": 0.0495, "step": 7289 }, { "epoch": 1.658703071672355, "grad_norm": 1.3336177413806338, "learning_rate": 9.40281547664408e-07, "loss": 0.078, "step": 7290 }, { "epoch": 1.6589306029579067, "grad_norm": 1.5111731661957328, "learning_rate": 9.402043948770321e-07, "loss": 0.1164, "step": 7291 }, { "epoch": 1.6591581342434585, "grad_norm": 1.614767504803358, "learning_rate": 9.401272356474557e-07, "loss": 0.0956, "step": 7292 }, { "epoch": 1.6593856655290102, "grad_norm": 1.9721878434789486, "learning_rate": 9.400500699772558e-07, "loss": 0.1389, "step": 7293 }, { "epoch": 1.659613196814562, "grad_norm": 2.342810678020302, "learning_rate": 9.399728978680094e-07, "loss": 0.0946, "step": 7294 }, { "epoch": 1.6598407281001137, "grad_norm": 2.2416455071278922, "learning_rate": 9.39895719321294e-07, "loss": 0.1539, "step": 7295 }, { "epoch": 1.6600682593856655, "grad_norm": 1.5789748727066666, "learning_rate": 9.398185343386866e-07, "loss": 0.0489, "step": 7296 }, { "epoch": 1.6602957906712172, "grad_norm": 1.6124613457863823, "learning_rate": 9.39741342921765e-07, "loss": 0.1324, "step": 7297 }, { "epoch": 1.6605233219567692, "grad_norm": 2.159172767256166, "learning_rate": 9.396641450721067e-07, "loss": 0.0725, "step": 7298 }, { "epoch": 1.660750853242321, "grad_norm": 2.003015363340268, "learning_rate": 9.395869407912897e-07, "loss": 0.1397, "step": 7299 }, { "epoch": 1.6609783845278727, "grad_norm": 1.3324354785637096, "learning_rate": 9.395097300808916e-07, "loss": 0.0889, "step": 7300 }, { "epoch": 1.6612059158134245, "grad_norm": 3.478146164044341, "learning_rate": 9.394325129424906e-07, "loss": 0.1451, "step": 7301 }, { "epoch": 1.6614334470989762, "grad_norm": 2.0254463245059036, "learning_rate": 9.39355289377665e-07, "loss": 0.1064, "step": 7302 }, { "epoch": 1.661660978384528, "grad_norm": 1.6595497039564182, "learning_rate": 9.392780593879932e-07, "loss": 0.0654, "step": 7303 }, { "epoch": 1.6618885096700797, "grad_norm": 2.7480531876747705, "learning_rate": 9.392008229750533e-07, "loss": 0.1202, "step": 7304 }, { "epoch": 1.6621160409556315, "grad_norm": 1.2471523479013982, "learning_rate": 9.391235801404236e-07, "loss": 0.0977, "step": 7305 }, { "epoch": 1.6623435722411832, "grad_norm": 2.0642567756103642, "learning_rate": 9.390463308856837e-07, "loss": 0.1131, "step": 7306 }, { "epoch": 1.662571103526735, "grad_norm": 1.2924016185822214, "learning_rate": 9.389690752124118e-07, "loss": 0.0654, "step": 7307 }, { "epoch": 1.6627986348122867, "grad_norm": 0.8691007635102995, "learning_rate": 9.388918131221869e-07, "loss": 0.0454, "step": 7308 }, { "epoch": 1.6630261660978385, "grad_norm": 1.2818332030580666, "learning_rate": 9.388145446165884e-07, "loss": 0.056, "step": 7309 }, { "epoch": 1.6632536973833902, "grad_norm": 2.6473266198798937, "learning_rate": 9.387372696971952e-07, "loss": 0.0676, "step": 7310 }, { "epoch": 1.663481228668942, "grad_norm": 1.721467410153477, "learning_rate": 9.386599883655869e-07, "loss": 0.0943, "step": 7311 }, { "epoch": 1.6637087599544937, "grad_norm": 1.6056959376364102, "learning_rate": 9.385827006233426e-07, "loss": 0.0786, "step": 7312 }, { "epoch": 1.6639362912400455, "grad_norm": 1.756717640480748, "learning_rate": 9.385054064720425e-07, "loss": 0.0915, "step": 7313 }, { "epoch": 1.6641638225255972, "grad_norm": 2.2466102544864404, "learning_rate": 9.38428105913266e-07, "loss": 0.0912, "step": 7314 }, { "epoch": 1.664391353811149, "grad_norm": 1.5791598649367002, "learning_rate": 9.38350798948593e-07, "loss": 0.1095, "step": 7315 }, { "epoch": 1.6646188850967008, "grad_norm": 1.3715623153388081, "learning_rate": 9.382734855796036e-07, "loss": 0.0893, "step": 7316 }, { "epoch": 1.6648464163822525, "grad_norm": 2.0142585633039354, "learning_rate": 9.381961658078778e-07, "loss": 0.1221, "step": 7317 }, { "epoch": 1.6650739476678043, "grad_norm": 1.5505453307871044, "learning_rate": 9.381188396349958e-07, "loss": 0.0655, "step": 7318 }, { "epoch": 1.665301478953356, "grad_norm": 2.0244620285601327, "learning_rate": 9.380415070625384e-07, "loss": 0.0513, "step": 7319 }, { "epoch": 1.6655290102389078, "grad_norm": 1.7938433474249436, "learning_rate": 9.379641680920859e-07, "loss": 0.1331, "step": 7320 }, { "epoch": 1.6657565415244595, "grad_norm": 1.3409299619748931, "learning_rate": 9.378868227252188e-07, "loss": 0.0918, "step": 7321 }, { "epoch": 1.6659840728100113, "grad_norm": 1.4700377964483387, "learning_rate": 9.378094709635183e-07, "loss": 0.0877, "step": 7322 }, { "epoch": 1.666211604095563, "grad_norm": 2.2568506457344193, "learning_rate": 9.377321128085651e-07, "loss": 0.0944, "step": 7323 }, { "epoch": 1.6664391353811148, "grad_norm": 0.9908159172763481, "learning_rate": 9.3765474826194e-07, "loss": 0.0691, "step": 7324 }, { "epoch": 1.6666666666666665, "grad_norm": 1.2584129227145304, "learning_rate": 9.375773773252248e-07, "loss": 0.0852, "step": 7325 }, { "epoch": 1.6668941979522183, "grad_norm": 2.003696311286243, "learning_rate": 9.375000000000001e-07, "loss": 0.0563, "step": 7326 }, { "epoch": 1.6671217292377702, "grad_norm": 3.3423358537743937, "learning_rate": 9.374226162878478e-07, "loss": 0.1533, "step": 7327 }, { "epoch": 1.667349260523322, "grad_norm": 1.9621823491467638, "learning_rate": 9.373452261903495e-07, "loss": 0.0976, "step": 7328 }, { "epoch": 1.6675767918088737, "grad_norm": 1.4020914530896604, "learning_rate": 9.372678297090867e-07, "loss": 0.0445, "step": 7329 }, { "epoch": 1.6678043230944255, "grad_norm": 2.0898847741309274, "learning_rate": 9.371904268456415e-07, "loss": 0.1187, "step": 7330 }, { "epoch": 1.6680318543799773, "grad_norm": 0.8770107550240693, "learning_rate": 9.371130176015958e-07, "loss": 0.0985, "step": 7331 }, { "epoch": 1.668259385665529, "grad_norm": 1.859585957876411, "learning_rate": 9.370356019785315e-07, "loss": 0.1819, "step": 7332 }, { "epoch": 1.6684869169510808, "grad_norm": 1.7703722436235858, "learning_rate": 9.369581799780308e-07, "loss": 0.0685, "step": 7333 }, { "epoch": 1.6687144482366325, "grad_norm": 1.4134381222208843, "learning_rate": 9.368807516016764e-07, "loss": 0.0582, "step": 7334 }, { "epoch": 1.6689419795221843, "grad_norm": 1.1064915462918847, "learning_rate": 9.368033168510506e-07, "loss": 0.051, "step": 7335 }, { "epoch": 1.669169510807736, "grad_norm": 1.2109521599260762, "learning_rate": 9.36725875727736e-07, "loss": 0.1537, "step": 7336 }, { "epoch": 1.669397042093288, "grad_norm": 2.3473344419643385, "learning_rate": 9.366484282333155e-07, "loss": 0.0998, "step": 7337 }, { "epoch": 1.6696245733788397, "grad_norm": 2.038359008920555, "learning_rate": 9.365709743693718e-07, "loss": 0.064, "step": 7338 }, { "epoch": 1.6698521046643915, "grad_norm": 2.3072660154667686, "learning_rate": 9.364935141374881e-07, "loss": 0.12, "step": 7339 }, { "epoch": 1.6700796359499432, "grad_norm": 1.2461166565952482, "learning_rate": 9.364160475392472e-07, "loss": 0.0792, "step": 7340 }, { "epoch": 1.670307167235495, "grad_norm": 1.682321979793571, "learning_rate": 9.363385745762329e-07, "loss": 0.0823, "step": 7341 }, { "epoch": 1.6705346985210467, "grad_norm": 1.9310846458137203, "learning_rate": 9.362610952500281e-07, "loss": 0.0765, "step": 7342 }, { "epoch": 1.6707622298065985, "grad_norm": 1.2871026073090324, "learning_rate": 9.361836095622167e-07, "loss": 0.095, "step": 7343 }, { "epoch": 1.6709897610921502, "grad_norm": 1.8987569542659957, "learning_rate": 9.361061175143823e-07, "loss": 0.1196, "step": 7344 }, { "epoch": 1.671217292377702, "grad_norm": 1.515884398351274, "learning_rate": 9.360286191081085e-07, "loss": 0.1374, "step": 7345 }, { "epoch": 1.6714448236632538, "grad_norm": 1.7492424501998036, "learning_rate": 9.359511143449794e-07, "loss": 0.0774, "step": 7346 }, { "epoch": 1.6716723549488055, "grad_norm": 0.9697929852387172, "learning_rate": 9.358736032265788e-07, "loss": 0.0666, "step": 7347 }, { "epoch": 1.6718998862343573, "grad_norm": 2.5780166734661876, "learning_rate": 9.357960857544912e-07, "loss": 0.1432, "step": 7348 }, { "epoch": 1.672127417519909, "grad_norm": 2.0838034914374783, "learning_rate": 9.357185619303009e-07, "loss": 0.0782, "step": 7349 }, { "epoch": 1.6723549488054608, "grad_norm": 1.3919641574753976, "learning_rate": 9.356410317555922e-07, "loss": 0.1299, "step": 7350 }, { "epoch": 1.6725824800910125, "grad_norm": 1.9809306658771628, "learning_rate": 9.355634952319498e-07, "loss": 0.0992, "step": 7351 }, { "epoch": 1.6728100113765643, "grad_norm": 2.3969878568860263, "learning_rate": 9.354859523609583e-07, "loss": 0.1136, "step": 7352 }, { "epoch": 1.673037542662116, "grad_norm": 1.1943562344041239, "learning_rate": 9.354084031442027e-07, "loss": 0.0872, "step": 7353 }, { "epoch": 1.6732650739476678, "grad_norm": 1.3707784823503284, "learning_rate": 9.353308475832676e-07, "loss": 0.0509, "step": 7354 }, { "epoch": 1.6734926052332195, "grad_norm": 1.239340089454578, "learning_rate": 9.352532856797382e-07, "loss": 0.0434, "step": 7355 }, { "epoch": 1.6737201365187713, "grad_norm": 2.0296765650090456, "learning_rate": 9.351757174352e-07, "loss": 0.0672, "step": 7356 }, { "epoch": 1.673947667804323, "grad_norm": 1.404142150592023, "learning_rate": 9.350981428512383e-07, "loss": 0.042, "step": 7357 }, { "epoch": 1.6741751990898748, "grad_norm": 1.7723519707770459, "learning_rate": 9.350205619294383e-07, "loss": 0.1413, "step": 7358 }, { "epoch": 1.6744027303754265, "grad_norm": 1.969222231883204, "learning_rate": 9.349429746713859e-07, "loss": 0.0994, "step": 7359 }, { "epoch": 1.6746302616609783, "grad_norm": 2.2660444367482264, "learning_rate": 9.348653810786667e-07, "loss": 0.1153, "step": 7360 }, { "epoch": 1.67485779294653, "grad_norm": 1.3311999975188635, "learning_rate": 9.347877811528666e-07, "loss": 0.1224, "step": 7361 }, { "epoch": 1.6750853242320818, "grad_norm": 1.9003716373130064, "learning_rate": 9.347101748955715e-07, "loss": 0.1709, "step": 7362 }, { "epoch": 1.6753128555176335, "grad_norm": 2.57799394374302, "learning_rate": 9.346325623083679e-07, "loss": 0.0849, "step": 7363 }, { "epoch": 1.6755403868031853, "grad_norm": 1.3887719065547302, "learning_rate": 9.345549433928416e-07, "loss": 0.1361, "step": 7364 }, { "epoch": 1.675767918088737, "grad_norm": 1.6924992160904613, "learning_rate": 9.344773181505793e-07, "loss": 0.1183, "step": 7365 }, { "epoch": 1.675995449374289, "grad_norm": 1.8383687373119448, "learning_rate": 9.343996865831673e-07, "loss": 0.1812, "step": 7366 }, { "epoch": 1.6762229806598408, "grad_norm": 1.8831689141554542, "learning_rate": 9.343220486921924e-07, "loss": 0.0607, "step": 7367 }, { "epoch": 1.6764505119453925, "grad_norm": 1.4982669170761194, "learning_rate": 9.342444044792412e-07, "loss": 0.0613, "step": 7368 }, { "epoch": 1.6766780432309443, "grad_norm": 1.3553553248957957, "learning_rate": 9.341667539459006e-07, "loss": 0.0816, "step": 7369 }, { "epoch": 1.676905574516496, "grad_norm": 2.062071006752431, "learning_rate": 9.340890970937583e-07, "loss": 0.0679, "step": 7370 }, { "epoch": 1.6771331058020478, "grad_norm": 1.6215383369901835, "learning_rate": 9.340114339244006e-07, "loss": 0.0387, "step": 7371 }, { "epoch": 1.6773606370875995, "grad_norm": 1.8044914050783736, "learning_rate": 9.33933764439415e-07, "loss": 0.0636, "step": 7372 }, { "epoch": 1.6775881683731513, "grad_norm": 1.1985423875980186, "learning_rate": 9.338560886403891e-07, "loss": 0.1181, "step": 7373 }, { "epoch": 1.677815699658703, "grad_norm": 1.8611780061417367, "learning_rate": 9.337784065289104e-07, "loss": 0.1201, "step": 7374 }, { "epoch": 1.6780432309442548, "grad_norm": 2.48197193723222, "learning_rate": 9.337007181065667e-07, "loss": 0.214, "step": 7375 }, { "epoch": 1.6782707622298068, "grad_norm": 2.226764853413825, "learning_rate": 9.336230233749456e-07, "loss": 0.0789, "step": 7376 }, { "epoch": 1.6784982935153585, "grad_norm": 1.878909636526424, "learning_rate": 9.335453223356351e-07, "loss": 0.0894, "step": 7377 }, { "epoch": 1.6787258248009103, "grad_norm": 1.7374008228680462, "learning_rate": 9.334676149902233e-07, "loss": 0.1085, "step": 7378 }, { "epoch": 1.678953356086462, "grad_norm": 1.3233416996257326, "learning_rate": 9.333899013402984e-07, "loss": 0.0782, "step": 7379 }, { "epoch": 1.6791808873720138, "grad_norm": 2.4254202349146428, "learning_rate": 9.333121813874486e-07, "loss": 0.0991, "step": 7380 }, { "epoch": 1.6794084186575655, "grad_norm": 1.8306572114812147, "learning_rate": 9.332344551332626e-07, "loss": 0.1109, "step": 7381 }, { "epoch": 1.6796359499431173, "grad_norm": 1.6849794506796347, "learning_rate": 9.331567225793288e-07, "loss": 0.0925, "step": 7382 }, { "epoch": 1.679863481228669, "grad_norm": 1.789407849888052, "learning_rate": 9.330789837272359e-07, "loss": 0.0914, "step": 7383 }, { "epoch": 1.6800910125142208, "grad_norm": 1.9047852958348788, "learning_rate": 9.330012385785729e-07, "loss": 0.1122, "step": 7384 }, { "epoch": 1.6803185437997725, "grad_norm": 1.4475225500771003, "learning_rate": 9.329234871349285e-07, "loss": 0.0829, "step": 7385 }, { "epoch": 1.6805460750853243, "grad_norm": 2.9593046800656366, "learning_rate": 9.328457293978921e-07, "loss": 0.0867, "step": 7386 }, { "epoch": 1.680773606370876, "grad_norm": 1.496333388049632, "learning_rate": 9.327679653690527e-07, "loss": 0.0758, "step": 7387 }, { "epoch": 1.6810011376564278, "grad_norm": 1.9637310115501778, "learning_rate": 9.326901950499997e-07, "loss": 0.148, "step": 7388 }, { "epoch": 1.6812286689419795, "grad_norm": 1.7570532890404065, "learning_rate": 9.326124184423228e-07, "loss": 0.0996, "step": 7389 }, { "epoch": 1.6814562002275313, "grad_norm": 1.562562274864593, "learning_rate": 9.325346355476114e-07, "loss": 0.0752, "step": 7390 }, { "epoch": 1.681683731513083, "grad_norm": 1.146072588137425, "learning_rate": 9.324568463674552e-07, "loss": 0.0484, "step": 7391 }, { "epoch": 1.6819112627986348, "grad_norm": 2.2430769759037266, "learning_rate": 9.323790509034441e-07, "loss": 0.0917, "step": 7392 }, { "epoch": 1.6821387940841865, "grad_norm": 2.3561795405892627, "learning_rate": 9.323012491571682e-07, "loss": 0.1339, "step": 7393 }, { "epoch": 1.6823663253697383, "grad_norm": 1.8302136240158402, "learning_rate": 9.322234411302175e-07, "loss": 0.0976, "step": 7394 }, { "epoch": 1.68259385665529, "grad_norm": 1.6372820312897203, "learning_rate": 9.321456268241824e-07, "loss": 0.0698, "step": 7395 }, { "epoch": 1.6828213879408418, "grad_norm": 1.9643540747530492, "learning_rate": 9.320678062406531e-07, "loss": 0.066, "step": 7396 }, { "epoch": 1.6830489192263935, "grad_norm": 1.838968404769983, "learning_rate": 9.319899793812204e-07, "loss": 0.0824, "step": 7397 }, { "epoch": 1.6832764505119453, "grad_norm": 2.5218651261530125, "learning_rate": 9.319121462474745e-07, "loss": 0.1007, "step": 7398 }, { "epoch": 1.683503981797497, "grad_norm": 1.2388968231459465, "learning_rate": 9.318343068410065e-07, "loss": 0.1034, "step": 7399 }, { "epoch": 1.6837315130830488, "grad_norm": 1.336541589569829, "learning_rate": 9.317564611634072e-07, "loss": 0.0576, "step": 7400 }, { "epoch": 1.6839590443686006, "grad_norm": 2.107630718393057, "learning_rate": 9.316786092162678e-07, "loss": 0.0993, "step": 7401 }, { "epoch": 1.6841865756541523, "grad_norm": 1.766479652983476, "learning_rate": 9.31600751001179e-07, "loss": 0.2175, "step": 7402 }, { "epoch": 1.684414106939704, "grad_norm": 2.4056863315167263, "learning_rate": 9.315228865197326e-07, "loss": 0.1928, "step": 7403 }, { "epoch": 1.6846416382252558, "grad_norm": 3.2428738679985614, "learning_rate": 9.314450157735197e-07, "loss": 0.1178, "step": 7404 }, { "epoch": 1.6848691695108078, "grad_norm": 1.678043329452889, "learning_rate": 9.313671387641318e-07, "loss": 0.142, "step": 7405 }, { "epoch": 1.6850967007963595, "grad_norm": 2.58564835703974, "learning_rate": 9.312892554931608e-07, "loss": 0.1441, "step": 7406 }, { "epoch": 1.6853242320819113, "grad_norm": 1.355706143918413, "learning_rate": 9.312113659621984e-07, "loss": 0.156, "step": 7407 }, { "epoch": 1.685551763367463, "grad_norm": 1.9664685627593343, "learning_rate": 9.311334701728364e-07, "loss": 0.0824, "step": 7408 }, { "epoch": 1.6857792946530148, "grad_norm": 1.3219939112430976, "learning_rate": 9.310555681266668e-07, "loss": 0.0582, "step": 7409 }, { "epoch": 1.6860068259385665, "grad_norm": 2.142310689486401, "learning_rate": 9.30977659825282e-07, "loss": 0.0998, "step": 7410 }, { "epoch": 1.6862343572241183, "grad_norm": 1.3473183389737833, "learning_rate": 9.308997452702743e-07, "loss": 0.069, "step": 7411 }, { "epoch": 1.68646188850967, "grad_norm": 1.3004701099778404, "learning_rate": 9.308218244632358e-07, "loss": 0.0812, "step": 7412 }, { "epoch": 1.6866894197952218, "grad_norm": 1.8468339197188928, "learning_rate": 9.307438974057595e-07, "loss": 0.0723, "step": 7413 }, { "epoch": 1.6869169510807736, "grad_norm": 1.5261190981362804, "learning_rate": 9.306659640994381e-07, "loss": 0.0696, "step": 7414 }, { "epoch": 1.6871444823663255, "grad_norm": 1.1719461672581963, "learning_rate": 9.305880245458637e-07, "loss": 0.1161, "step": 7415 }, { "epoch": 1.6873720136518773, "grad_norm": 1.7890461179460164, "learning_rate": 9.305100787466301e-07, "loss": 0.1343, "step": 7416 }, { "epoch": 1.687599544937429, "grad_norm": 3.307144150549738, "learning_rate": 9.304321267033298e-07, "loss": 0.1053, "step": 7417 }, { "epoch": 1.6878270762229808, "grad_norm": 1.4387304873939588, "learning_rate": 9.303541684175563e-07, "loss": 0.0766, "step": 7418 }, { "epoch": 1.6880546075085325, "grad_norm": 1.5682681746734135, "learning_rate": 9.302762038909028e-07, "loss": 0.1224, "step": 7419 }, { "epoch": 1.6882821387940843, "grad_norm": 1.7379756393731887, "learning_rate": 9.301982331249629e-07, "loss": 0.0902, "step": 7420 }, { "epoch": 1.688509670079636, "grad_norm": 1.788536899485545, "learning_rate": 9.301202561213298e-07, "loss": 0.0872, "step": 7421 }, { "epoch": 1.6887372013651878, "grad_norm": 1.4193679489333435, "learning_rate": 9.300422728815976e-07, "loss": 0.0646, "step": 7422 }, { "epoch": 1.6889647326507395, "grad_norm": 1.4684247824398127, "learning_rate": 9.2996428340736e-07, "loss": 0.0653, "step": 7423 }, { "epoch": 1.6891922639362913, "grad_norm": 1.1675186706745337, "learning_rate": 9.29886287700211e-07, "loss": 0.0772, "step": 7424 }, { "epoch": 1.689419795221843, "grad_norm": 1.2408519690279203, "learning_rate": 9.298082857617446e-07, "loss": 0.0949, "step": 7425 }, { "epoch": 1.6896473265073948, "grad_norm": 1.6086428034451206, "learning_rate": 9.297302775935552e-07, "loss": 0.0828, "step": 7426 }, { "epoch": 1.6898748577929465, "grad_norm": 1.879258209189917, "learning_rate": 9.296522631972368e-07, "loss": 0.1485, "step": 7427 }, { "epoch": 1.6901023890784983, "grad_norm": 1.1973586410272825, "learning_rate": 9.295742425743842e-07, "loss": 0.0956, "step": 7428 }, { "epoch": 1.69032992036405, "grad_norm": 1.4457121713578762, "learning_rate": 9.294962157265918e-07, "loss": 0.0554, "step": 7429 }, { "epoch": 1.6905574516496018, "grad_norm": 1.4104396381813507, "learning_rate": 9.294181826554547e-07, "loss": 0.0562, "step": 7430 }, { "epoch": 1.6907849829351536, "grad_norm": 0.9262969737219219, "learning_rate": 9.29340143362567e-07, "loss": 0.0855, "step": 7431 }, { "epoch": 1.6910125142207053, "grad_norm": 1.453364971460863, "learning_rate": 9.292620978495246e-07, "loss": 0.0564, "step": 7432 }, { "epoch": 1.691240045506257, "grad_norm": 1.551913385880135, "learning_rate": 9.291840461179219e-07, "loss": 0.1061, "step": 7433 }, { "epoch": 1.6914675767918088, "grad_norm": 1.5546023867739276, "learning_rate": 9.291059881693544e-07, "loss": 0.0446, "step": 7434 }, { "epoch": 1.6916951080773606, "grad_norm": 3.377966899747525, "learning_rate": 9.290279240054176e-07, "loss": 0.1998, "step": 7435 }, { "epoch": 1.6919226393629123, "grad_norm": 1.7385496523204151, "learning_rate": 9.289498536277066e-07, "loss": 0.1234, "step": 7436 }, { "epoch": 1.692150170648464, "grad_norm": 2.114012284187751, "learning_rate": 9.288717770378172e-07, "loss": 0.0982, "step": 7437 }, { "epoch": 1.6923777019340158, "grad_norm": 1.351731477417337, "learning_rate": 9.287936942373454e-07, "loss": 0.0787, "step": 7438 }, { "epoch": 1.6926052332195676, "grad_norm": 1.8739219968172143, "learning_rate": 9.287156052278869e-07, "loss": 0.0679, "step": 7439 }, { "epoch": 1.6928327645051193, "grad_norm": 1.5864485237863282, "learning_rate": 9.286375100110376e-07, "loss": 0.0902, "step": 7440 }, { "epoch": 1.693060295790671, "grad_norm": 1.0252685039976748, "learning_rate": 9.285594085883937e-07, "loss": 0.0306, "step": 7441 }, { "epoch": 1.6932878270762228, "grad_norm": 1.921676451564313, "learning_rate": 9.284813009615512e-07, "loss": 0.1212, "step": 7442 }, { "epoch": 1.6935153583617746, "grad_norm": 1.2815356918274832, "learning_rate": 9.28403187132107e-07, "loss": 0.1666, "step": 7443 }, { "epoch": 1.6937428896473266, "grad_norm": 1.5595077639365074, "learning_rate": 9.28325067101657e-07, "loss": 0.1326, "step": 7444 }, { "epoch": 1.6939704209328783, "grad_norm": 1.0162405891241884, "learning_rate": 9.282469408717984e-07, "loss": 0.0482, "step": 7445 }, { "epoch": 1.69419795221843, "grad_norm": 1.3773979926189792, "learning_rate": 9.281688084441277e-07, "loss": 0.1231, "step": 7446 }, { "epoch": 1.6944254835039818, "grad_norm": 1.104581808250732, "learning_rate": 9.280906698202417e-07, "loss": 0.1338, "step": 7447 }, { "epoch": 1.6946530147895336, "grad_norm": 2.2721944255174846, "learning_rate": 9.280125250017375e-07, "loss": 0.1168, "step": 7448 }, { "epoch": 1.6948805460750853, "grad_norm": 1.942154363545402, "learning_rate": 9.279343739902122e-07, "loss": 0.0765, "step": 7449 }, { "epoch": 1.695108077360637, "grad_norm": 2.4606208794929048, "learning_rate": 9.278562167872632e-07, "loss": 0.0913, "step": 7450 }, { "epoch": 1.6953356086461888, "grad_norm": 2.3818147882797263, "learning_rate": 9.277780533944879e-07, "loss": 0.0863, "step": 7451 }, { "epoch": 1.6955631399317406, "grad_norm": 1.3677925854977118, "learning_rate": 9.276998838134834e-07, "loss": 0.0586, "step": 7452 }, { "epoch": 1.6957906712172923, "grad_norm": 1.8230493817169902, "learning_rate": 9.276217080458478e-07, "loss": 0.0808, "step": 7453 }, { "epoch": 1.6960182025028443, "grad_norm": 1.8503778421300845, "learning_rate": 9.275435260931786e-07, "loss": 0.0816, "step": 7454 }, { "epoch": 1.696245733788396, "grad_norm": 2.000832926112369, "learning_rate": 9.274653379570739e-07, "loss": 0.169, "step": 7455 }, { "epoch": 1.6964732650739478, "grad_norm": 1.0146915198809943, "learning_rate": 9.273871436391315e-07, "loss": 0.0714, "step": 7456 }, { "epoch": 1.6967007963594996, "grad_norm": 1.2304016971464222, "learning_rate": 9.273089431409499e-07, "loss": 0.0421, "step": 7457 }, { "epoch": 1.6969283276450513, "grad_norm": 2.047942404694497, "learning_rate": 9.272307364641271e-07, "loss": 0.0755, "step": 7458 }, { "epoch": 1.697155858930603, "grad_norm": 2.6383743586012156, "learning_rate": 9.271525236102614e-07, "loss": 0.107, "step": 7459 }, { "epoch": 1.6973833902161548, "grad_norm": 2.7056667767669076, "learning_rate": 9.270743045809516e-07, "loss": 0.0826, "step": 7460 }, { "epoch": 1.6976109215017066, "grad_norm": 1.7563129007136633, "learning_rate": 9.269960793777963e-07, "loss": 0.0915, "step": 7461 }, { "epoch": 1.6978384527872583, "grad_norm": 1.956944676427302, "learning_rate": 9.26917848002394e-07, "loss": 0.0903, "step": 7462 }, { "epoch": 1.69806598407281, "grad_norm": 2.4161778851437843, "learning_rate": 9.26839610456344e-07, "loss": 0.0969, "step": 7463 }, { "epoch": 1.6982935153583618, "grad_norm": 2.068452668994588, "learning_rate": 9.267613667412453e-07, "loss": 0.0607, "step": 7464 }, { "epoch": 1.6985210466439136, "grad_norm": 2.0151967606843457, "learning_rate": 9.266831168586968e-07, "loss": 0.0696, "step": 7465 }, { "epoch": 1.6987485779294653, "grad_norm": 1.1856067501750585, "learning_rate": 9.266048608102978e-07, "loss": 0.0574, "step": 7466 }, { "epoch": 1.698976109215017, "grad_norm": 2.688891725537148, "learning_rate": 9.265265985976478e-07, "loss": 0.0888, "step": 7467 }, { "epoch": 1.6992036405005688, "grad_norm": 1.4125321029456797, "learning_rate": 9.264483302223464e-07, "loss": 0.1077, "step": 7468 }, { "epoch": 1.6994311717861206, "grad_norm": 1.9819253790117193, "learning_rate": 9.263700556859931e-07, "loss": 0.1409, "step": 7469 }, { "epoch": 1.6996587030716723, "grad_norm": 1.4363926897897454, "learning_rate": 9.26291774990188e-07, "loss": 0.0711, "step": 7470 }, { "epoch": 1.699886234357224, "grad_norm": 0.8563946587398912, "learning_rate": 9.262134881365307e-07, "loss": 0.0416, "step": 7471 }, { "epoch": 1.7001137656427758, "grad_norm": 2.4762136004144315, "learning_rate": 9.261351951266211e-07, "loss": 0.0967, "step": 7472 }, { "epoch": 1.7003412969283276, "grad_norm": 2.4079001396424813, "learning_rate": 9.2605689596206e-07, "loss": 0.1825, "step": 7473 }, { "epoch": 1.7005688282138793, "grad_norm": 1.8068877491506772, "learning_rate": 9.259785906444473e-07, "loss": 0.197, "step": 7474 }, { "epoch": 1.700796359499431, "grad_norm": 1.5866555515774667, "learning_rate": 9.259002791753832e-07, "loss": 0.087, "step": 7475 }, { "epoch": 1.7010238907849828, "grad_norm": 1.6481904048653275, "learning_rate": 9.258219615564684e-07, "loss": 0.1001, "step": 7476 }, { "epoch": 1.7012514220705346, "grad_norm": 1.2942037721759625, "learning_rate": 9.25743637789304e-07, "loss": 0.0349, "step": 7477 }, { "epoch": 1.7014789533560863, "grad_norm": 1.4716549549075366, "learning_rate": 9.256653078754903e-07, "loss": 0.0827, "step": 7478 }, { "epoch": 1.701706484641638, "grad_norm": 2.6593429187871367, "learning_rate": 9.255869718166281e-07, "loss": 0.2166, "step": 7479 }, { "epoch": 1.7019340159271898, "grad_norm": 1.9648955811870432, "learning_rate": 9.255086296143189e-07, "loss": 0.1262, "step": 7480 }, { "epoch": 1.7021615472127416, "grad_norm": 1.7478426250374688, "learning_rate": 9.254302812701636e-07, "loss": 0.0644, "step": 7481 }, { "epoch": 1.7023890784982934, "grad_norm": 2.987380058350133, "learning_rate": 9.253519267857637e-07, "loss": 0.1216, "step": 7482 }, { "epoch": 1.7026166097838453, "grad_norm": 2.3484831882762935, "learning_rate": 9.252735661627204e-07, "loss": 0.0812, "step": 7483 }, { "epoch": 1.702844141069397, "grad_norm": 2.1512910669945664, "learning_rate": 9.251951994026353e-07, "loss": 0.1304, "step": 7484 }, { "epoch": 1.7030716723549488, "grad_norm": 1.528584307846986, "learning_rate": 9.251168265071101e-07, "loss": 0.1446, "step": 7485 }, { "epoch": 1.7032992036405006, "grad_norm": 1.523165761907841, "learning_rate": 9.250384474777465e-07, "loss": 0.1226, "step": 7486 }, { "epoch": 1.7035267349260523, "grad_norm": 1.0735981813202926, "learning_rate": 9.249600623161467e-07, "loss": 0.0632, "step": 7487 }, { "epoch": 1.703754266211604, "grad_norm": 1.7693977564744923, "learning_rate": 9.248816710239125e-07, "loss": 0.1213, "step": 7488 }, { "epoch": 1.7039817974971558, "grad_norm": 1.5156085599699003, "learning_rate": 9.248032736026463e-07, "loss": 0.0664, "step": 7489 }, { "epoch": 1.7042093287827076, "grad_norm": 1.8444395238269065, "learning_rate": 9.247248700539502e-07, "loss": 0.0733, "step": 7490 }, { "epoch": 1.7044368600682593, "grad_norm": 2.282489530965188, "learning_rate": 9.246464603794266e-07, "loss": 0.1731, "step": 7491 }, { "epoch": 1.7046643913538113, "grad_norm": 1.1135323424692893, "learning_rate": 9.245680445806782e-07, "loss": 0.0622, "step": 7492 }, { "epoch": 1.704891922639363, "grad_norm": 1.5015562370177542, "learning_rate": 9.244896226593074e-07, "loss": 0.0755, "step": 7493 }, { "epoch": 1.7051194539249148, "grad_norm": 2.380047859800059, "learning_rate": 9.244111946169173e-07, "loss": 0.1285, "step": 7494 }, { "epoch": 1.7053469852104666, "grad_norm": 1.4197630339019536, "learning_rate": 9.243327604551109e-07, "loss": 0.0716, "step": 7495 }, { "epoch": 1.7055745164960183, "grad_norm": 1.8467214333007922, "learning_rate": 9.242543201754908e-07, "loss": 0.1151, "step": 7496 }, { "epoch": 1.70580204778157, "grad_norm": 2.3732110067695302, "learning_rate": 9.241758737796608e-07, "loss": 0.0923, "step": 7497 }, { "epoch": 1.7060295790671218, "grad_norm": 1.7194839267259672, "learning_rate": 9.240974212692235e-07, "loss": 0.0953, "step": 7498 }, { "epoch": 1.7062571103526736, "grad_norm": 0.8677031615537418, "learning_rate": 9.240189626457828e-07, "loss": 0.0689, "step": 7499 }, { "epoch": 1.7064846416382253, "grad_norm": 1.7933546109365042, "learning_rate": 9.239404979109422e-07, "loss": 0.0692, "step": 7500 }, { "epoch": 1.706712172923777, "grad_norm": 1.5534447964205367, "learning_rate": 9.238620270663053e-07, "loss": 0.0825, "step": 7501 }, { "epoch": 1.7069397042093288, "grad_norm": 1.981780715242399, "learning_rate": 9.237835501134759e-07, "loss": 0.1216, "step": 7502 }, { "epoch": 1.7071672354948806, "grad_norm": 1.95930269394389, "learning_rate": 9.237050670540579e-07, "loss": 0.0715, "step": 7503 }, { "epoch": 1.7073947667804323, "grad_norm": 1.7374402364968724, "learning_rate": 9.236265778896554e-07, "loss": 0.0453, "step": 7504 }, { "epoch": 1.707622298065984, "grad_norm": 1.7191098911921983, "learning_rate": 9.235480826218726e-07, "loss": 0.117, "step": 7505 }, { "epoch": 1.7078498293515358, "grad_norm": 1.3374036497567463, "learning_rate": 9.234695812523137e-07, "loss": 0.0876, "step": 7506 }, { "epoch": 1.7080773606370876, "grad_norm": 1.700497077975526, "learning_rate": 9.233910737825831e-07, "loss": 0.1571, "step": 7507 }, { "epoch": 1.7083048919226393, "grad_norm": 2.4095573392593144, "learning_rate": 9.233125602142856e-07, "loss": 0.0849, "step": 7508 }, { "epoch": 1.708532423208191, "grad_norm": 1.8893150630704287, "learning_rate": 9.232340405490256e-07, "loss": 0.0968, "step": 7509 }, { "epoch": 1.7087599544937428, "grad_norm": 1.8421406492720631, "learning_rate": 9.23155514788408e-07, "loss": 0.0758, "step": 7510 }, { "epoch": 1.7089874857792946, "grad_norm": 2.521534449539196, "learning_rate": 9.230769829340378e-07, "loss": 0.1002, "step": 7511 }, { "epoch": 1.7092150170648464, "grad_norm": 1.896914834149343, "learning_rate": 9.229984449875199e-07, "loss": 0.095, "step": 7512 }, { "epoch": 1.709442548350398, "grad_norm": 1.7323554177641725, "learning_rate": 9.229199009504594e-07, "loss": 0.0693, "step": 7513 }, { "epoch": 1.7096700796359499, "grad_norm": 1.5400684620058087, "learning_rate": 9.228413508244621e-07, "loss": 0.0503, "step": 7514 }, { "epoch": 1.7098976109215016, "grad_norm": 1.4653262523601092, "learning_rate": 9.227627946111328e-07, "loss": 0.1178, "step": 7515 }, { "epoch": 1.7101251422070534, "grad_norm": 2.5594902657863297, "learning_rate": 9.226842323120773e-07, "loss": 0.0907, "step": 7516 }, { "epoch": 1.7103526734926051, "grad_norm": 3.080947181053405, "learning_rate": 9.226056639289013e-07, "loss": 0.1352, "step": 7517 }, { "epoch": 1.7105802047781569, "grad_norm": 1.8768778604470615, "learning_rate": 9.225270894632107e-07, "loss": 0.0519, "step": 7518 }, { "epoch": 1.7108077360637086, "grad_norm": 1.2966109435086275, "learning_rate": 9.224485089166111e-07, "loss": 0.0787, "step": 7519 }, { "epoch": 1.7110352673492604, "grad_norm": 1.4658236968406182, "learning_rate": 9.223699222907088e-07, "loss": 0.0926, "step": 7520 }, { "epoch": 1.7112627986348121, "grad_norm": 2.9174819569607338, "learning_rate": 9.222913295871101e-07, "loss": 0.0919, "step": 7521 }, { "epoch": 1.711490329920364, "grad_norm": 1.2475180743268615, "learning_rate": 9.22212730807421e-07, "loss": 0.1199, "step": 7522 }, { "epoch": 1.7117178612059158, "grad_norm": 1.9562927246156157, "learning_rate": 9.221341259532476e-07, "loss": 0.1351, "step": 7523 }, { "epoch": 1.7119453924914676, "grad_norm": 1.34154787076131, "learning_rate": 9.220555150261973e-07, "loss": 0.0829, "step": 7524 }, { "epoch": 1.7121729237770194, "grad_norm": 2.239735065898415, "learning_rate": 9.219768980278762e-07, "loss": 0.152, "step": 7525 }, { "epoch": 1.712400455062571, "grad_norm": 1.5791759662482927, "learning_rate": 9.218982749598911e-07, "loss": 0.1274, "step": 7526 }, { "epoch": 1.7126279863481229, "grad_norm": 1.413781534828615, "learning_rate": 9.21819645823849e-07, "loss": 0.0514, "step": 7527 }, { "epoch": 1.7128555176336746, "grad_norm": 1.702854036720936, "learning_rate": 9.21741010621357e-07, "loss": 0.0738, "step": 7528 }, { "epoch": 1.7130830489192264, "grad_norm": 2.74163381590818, "learning_rate": 9.216623693540222e-07, "loss": 0.1665, "step": 7529 }, { "epoch": 1.713310580204778, "grad_norm": 0.9798309074213785, "learning_rate": 9.215837220234518e-07, "loss": 0.0385, "step": 7530 }, { "epoch": 1.71353811149033, "grad_norm": 3.196701294240453, "learning_rate": 9.215050686312534e-07, "loss": 0.1205, "step": 7531 }, { "epoch": 1.7137656427758818, "grad_norm": 1.7687344142577266, "learning_rate": 9.214264091790343e-07, "loss": 0.0513, "step": 7532 }, { "epoch": 1.7139931740614336, "grad_norm": 2.5907793564802146, "learning_rate": 9.213477436684025e-07, "loss": 0.1354, "step": 7533 }, { "epoch": 1.7142207053469853, "grad_norm": 1.6937574383193232, "learning_rate": 9.212690721009654e-07, "loss": 0.1259, "step": 7534 }, { "epoch": 1.714448236632537, "grad_norm": 1.3495979297592327, "learning_rate": 9.21190394478331e-07, "loss": 0.128, "step": 7535 }, { "epoch": 1.7146757679180888, "grad_norm": 2.476516605961478, "learning_rate": 9.211117108021074e-07, "loss": 0.1085, "step": 7536 }, { "epoch": 1.7149032992036406, "grad_norm": 1.9707282922873293, "learning_rate": 9.210330210739029e-07, "loss": 0.0851, "step": 7537 }, { "epoch": 1.7151308304891923, "grad_norm": 1.4845383213085293, "learning_rate": 9.209543252953254e-07, "loss": 0.0607, "step": 7538 }, { "epoch": 1.715358361774744, "grad_norm": 2.0048158522616553, "learning_rate": 9.208756234679836e-07, "loss": 0.0919, "step": 7539 }, { "epoch": 1.7155858930602959, "grad_norm": 1.4218673743727959, "learning_rate": 9.20796915593486e-07, "loss": 0.1921, "step": 7540 }, { "epoch": 1.7158134243458476, "grad_norm": 1.3584412932163983, "learning_rate": 9.20718201673441e-07, "loss": 0.1622, "step": 7541 }, { "epoch": 1.7160409556313994, "grad_norm": 1.0608020578801372, "learning_rate": 9.206394817094577e-07, "loss": 0.0509, "step": 7542 }, { "epoch": 1.716268486916951, "grad_norm": 1.6070514150158068, "learning_rate": 9.205607557031446e-07, "loss": 0.0854, "step": 7543 }, { "epoch": 1.7164960182025029, "grad_norm": 1.254528380272989, "learning_rate": 9.204820236561111e-07, "loss": 0.0653, "step": 7544 }, { "epoch": 1.7167235494880546, "grad_norm": 1.5303193243898203, "learning_rate": 9.204032855699663e-07, "loss": 0.0626, "step": 7545 }, { "epoch": 1.7169510807736064, "grad_norm": 2.479100939226992, "learning_rate": 9.203245414463192e-07, "loss": 0.092, "step": 7546 }, { "epoch": 1.7171786120591581, "grad_norm": 1.9985993200466214, "learning_rate": 9.202457912867795e-07, "loss": 0.0841, "step": 7547 }, { "epoch": 1.7174061433447099, "grad_norm": 1.846036514822781, "learning_rate": 9.201670350929564e-07, "loss": 0.1315, "step": 7548 }, { "epoch": 1.7176336746302616, "grad_norm": 2.0790408771999718, "learning_rate": 9.200882728664598e-07, "loss": 0.0576, "step": 7549 }, { "epoch": 1.7178612059158134, "grad_norm": 1.7949883449506443, "learning_rate": 9.20009504608899e-07, "loss": 0.0988, "step": 7550 }, { "epoch": 1.7180887372013651, "grad_norm": 1.5998800056946103, "learning_rate": 9.199307303218844e-07, "loss": 0.0432, "step": 7551 }, { "epoch": 1.7183162684869169, "grad_norm": 1.7524050987728221, "learning_rate": 9.198519500070261e-07, "loss": 0.0697, "step": 7552 }, { "epoch": 1.7185437997724686, "grad_norm": 1.2703856925744228, "learning_rate": 9.197731636659335e-07, "loss": 0.0854, "step": 7553 }, { "epoch": 1.7187713310580204, "grad_norm": 1.8783737724172556, "learning_rate": 9.196943713002177e-07, "loss": 0.1068, "step": 7554 }, { "epoch": 1.7189988623435721, "grad_norm": 1.0936436524766402, "learning_rate": 9.196155729114883e-07, "loss": 0.0411, "step": 7555 }, { "epoch": 1.7192263936291239, "grad_norm": 2.0836304634461746, "learning_rate": 9.195367685013564e-07, "loss": 0.0702, "step": 7556 }, { "epoch": 1.7194539249146756, "grad_norm": 1.6019233848518821, "learning_rate": 9.19457958071432e-07, "loss": 0.1408, "step": 7557 }, { "epoch": 1.7196814562002274, "grad_norm": 1.4860268396568896, "learning_rate": 9.193791416233266e-07, "loss": 0.0727, "step": 7558 }, { "epoch": 1.7199089874857791, "grad_norm": 1.5916056892498054, "learning_rate": 9.193003191586507e-07, "loss": 0.124, "step": 7559 }, { "epoch": 1.7201365187713311, "grad_norm": 1.489766796980958, "learning_rate": 9.192214906790149e-07, "loss": 0.0671, "step": 7560 }, { "epoch": 1.7203640500568829, "grad_norm": 2.0567448731874647, "learning_rate": 9.191426561860308e-07, "loss": 0.1081, "step": 7561 }, { "epoch": 1.7205915813424346, "grad_norm": 1.281462849688222, "learning_rate": 9.190638156813097e-07, "loss": 0.0748, "step": 7562 }, { "epoch": 1.7208191126279864, "grad_norm": 2.0271807019330947, "learning_rate": 9.189849691664626e-07, "loss": 0.0908, "step": 7563 }, { "epoch": 1.7210466439135381, "grad_norm": 1.500561474240595, "learning_rate": 9.189061166431012e-07, "loss": 0.1661, "step": 7564 }, { "epoch": 1.7212741751990899, "grad_norm": 1.2723138560334935, "learning_rate": 9.188272581128372e-07, "loss": 0.0645, "step": 7565 }, { "epoch": 1.7215017064846416, "grad_norm": 1.9257600489647957, "learning_rate": 9.187483935772818e-07, "loss": 0.1276, "step": 7566 }, { "epoch": 1.7217292377701934, "grad_norm": 2.0566117894728513, "learning_rate": 9.186695230380474e-07, "loss": 0.0831, "step": 7567 }, { "epoch": 1.7219567690557451, "grad_norm": 1.5340647500171707, "learning_rate": 9.185906464967459e-07, "loss": 0.0623, "step": 7568 }, { "epoch": 1.7221843003412969, "grad_norm": 2.1190698924957134, "learning_rate": 9.185117639549891e-07, "loss": 0.1153, "step": 7569 }, { "epoch": 1.7224118316268489, "grad_norm": 1.7292016066869216, "learning_rate": 9.184328754143893e-07, "loss": 0.0518, "step": 7570 }, { "epoch": 1.7226393629124006, "grad_norm": 1.1964881114058432, "learning_rate": 9.183539808765591e-07, "loss": 0.0673, "step": 7571 }, { "epoch": 1.7228668941979524, "grad_norm": 1.1565136922837351, "learning_rate": 9.182750803431109e-07, "loss": 0.0851, "step": 7572 }, { "epoch": 1.723094425483504, "grad_norm": 1.8740841010678184, "learning_rate": 9.181961738156568e-07, "loss": 0.1463, "step": 7573 }, { "epoch": 1.7233219567690559, "grad_norm": 2.1491852106368943, "learning_rate": 9.181172612958101e-07, "loss": 0.0839, "step": 7574 }, { "epoch": 1.7235494880546076, "grad_norm": 1.8165011826984763, "learning_rate": 9.180383427851834e-07, "loss": 0.1487, "step": 7575 }, { "epoch": 1.7237770193401594, "grad_norm": 1.9293967119537, "learning_rate": 9.179594182853898e-07, "loss": 0.0593, "step": 7576 }, { "epoch": 1.7240045506257111, "grad_norm": 1.697160909654997, "learning_rate": 9.17880487798042e-07, "loss": 0.1196, "step": 7577 }, { "epoch": 1.7242320819112629, "grad_norm": 1.4096567598186796, "learning_rate": 9.178015513247534e-07, "loss": 0.0752, "step": 7578 }, { "epoch": 1.7244596131968146, "grad_norm": 1.8342504525775487, "learning_rate": 9.177226088671375e-07, "loss": 0.0839, "step": 7579 }, { "epoch": 1.7246871444823664, "grad_norm": 1.5778205964395018, "learning_rate": 9.176436604268073e-07, "loss": 0.0839, "step": 7580 }, { "epoch": 1.7249146757679181, "grad_norm": 1.1085482431210012, "learning_rate": 9.175647060053767e-07, "loss": 0.0974, "step": 7581 }, { "epoch": 1.7251422070534699, "grad_norm": 1.8092570257001175, "learning_rate": 9.174857456044595e-07, "loss": 0.0599, "step": 7582 }, { "epoch": 1.7253697383390216, "grad_norm": 1.0296866950786054, "learning_rate": 9.174067792256693e-07, "loss": 0.0395, "step": 7583 }, { "epoch": 1.7255972696245734, "grad_norm": 1.216069418979427, "learning_rate": 9.1732780687062e-07, "loss": 0.0317, "step": 7584 }, { "epoch": 1.7258248009101251, "grad_norm": 1.7995583471787395, "learning_rate": 9.172488285409256e-07, "loss": 0.0624, "step": 7585 }, { "epoch": 1.7260523321956769, "grad_norm": 2.18914611889024, "learning_rate": 9.171698442382005e-07, "loss": 0.1457, "step": 7586 }, { "epoch": 1.7262798634812286, "grad_norm": 2.1762080793122105, "learning_rate": 9.170908539640587e-07, "loss": 0.0768, "step": 7587 }, { "epoch": 1.7265073947667804, "grad_norm": 2.1161315095336413, "learning_rate": 9.170118577201149e-07, "loss": 0.1035, "step": 7588 }, { "epoch": 1.7267349260523321, "grad_norm": 1.20294784797812, "learning_rate": 9.169328555079836e-07, "loss": 0.0765, "step": 7589 }, { "epoch": 1.726962457337884, "grad_norm": 1.3058798502727609, "learning_rate": 9.168538473292793e-07, "loss": 0.0459, "step": 7590 }, { "epoch": 1.7271899886234356, "grad_norm": 2.3609082153012855, "learning_rate": 9.167748331856169e-07, "loss": 0.1301, "step": 7591 }, { "epoch": 1.7274175199089874, "grad_norm": 1.5176580933563264, "learning_rate": 9.166958130786113e-07, "loss": 0.0417, "step": 7592 }, { "epoch": 1.7276450511945391, "grad_norm": 2.861613410192663, "learning_rate": 9.166167870098773e-07, "loss": 0.1977, "step": 7593 }, { "epoch": 1.727872582480091, "grad_norm": 1.6362586317221, "learning_rate": 9.165377549810305e-07, "loss": 0.0739, "step": 7594 }, { "epoch": 1.7281001137656427, "grad_norm": 1.3870134636338667, "learning_rate": 9.164587169936858e-07, "loss": 0.0564, "step": 7595 }, { "epoch": 1.7283276450511944, "grad_norm": 1.77736625821223, "learning_rate": 9.163796730494587e-07, "loss": 0.0844, "step": 7596 }, { "epoch": 1.7285551763367462, "grad_norm": 1.5654068200172355, "learning_rate": 9.163006231499647e-07, "loss": 0.0794, "step": 7597 }, { "epoch": 1.728782707622298, "grad_norm": 1.6324365372014396, "learning_rate": 9.162215672968194e-07, "loss": 0.0715, "step": 7598 }, { "epoch": 1.7290102389078499, "grad_norm": 1.775487028133511, "learning_rate": 9.161425054916388e-07, "loss": 0.0984, "step": 7599 }, { "epoch": 1.7292377701934016, "grad_norm": 2.596742115531444, "learning_rate": 9.160634377360383e-07, "loss": 0.0943, "step": 7600 }, { "epoch": 1.7294653014789534, "grad_norm": 1.2799569142806528, "learning_rate": 9.159843640316345e-07, "loss": 0.1087, "step": 7601 }, { "epoch": 1.7296928327645051, "grad_norm": 1.3882731828318262, "learning_rate": 9.159052843800431e-07, "loss": 0.0969, "step": 7602 }, { "epoch": 1.729920364050057, "grad_norm": 1.7312412523501368, "learning_rate": 9.158261987828804e-07, "loss": 0.054, "step": 7603 }, { "epoch": 1.7301478953356086, "grad_norm": 1.5319719763944186, "learning_rate": 9.157471072417629e-07, "loss": 0.1203, "step": 7604 }, { "epoch": 1.7303754266211604, "grad_norm": 1.5104710044764469, "learning_rate": 9.156680097583071e-07, "loss": 0.0367, "step": 7605 }, { "epoch": 1.7306029579067121, "grad_norm": 1.3247706686865452, "learning_rate": 9.155889063341293e-07, "loss": 0.0843, "step": 7606 }, { "epoch": 1.730830489192264, "grad_norm": 1.2796887606358964, "learning_rate": 9.155097969708464e-07, "loss": 0.1399, "step": 7607 }, { "epoch": 1.7310580204778157, "grad_norm": 1.3776077953235888, "learning_rate": 9.154306816700755e-07, "loss": 0.1421, "step": 7608 }, { "epoch": 1.7312855517633676, "grad_norm": 2.131102346008641, "learning_rate": 9.153515604334334e-07, "loss": 0.0511, "step": 7609 }, { "epoch": 1.7315130830489194, "grad_norm": 1.1597126843073358, "learning_rate": 9.152724332625369e-07, "loss": 0.0577, "step": 7610 }, { "epoch": 1.7317406143344711, "grad_norm": 1.8881270454348769, "learning_rate": 9.151933001590035e-07, "loss": 0.0532, "step": 7611 }, { "epoch": 1.7319681456200229, "grad_norm": 1.383856972416893, "learning_rate": 9.151141611244507e-07, "loss": 0.1214, "step": 7612 }, { "epoch": 1.7321956769055746, "grad_norm": 1.4899115457796293, "learning_rate": 9.150350161604957e-07, "loss": 0.135, "step": 7613 }, { "epoch": 1.7324232081911264, "grad_norm": 2.132261288734918, "learning_rate": 9.149558652687561e-07, "loss": 0.1575, "step": 7614 }, { "epoch": 1.7326507394766781, "grad_norm": 1.6673093075964234, "learning_rate": 9.148767084508497e-07, "loss": 0.0531, "step": 7615 }, { "epoch": 1.7328782707622299, "grad_norm": 1.894698040329308, "learning_rate": 9.147975457083943e-07, "loss": 0.062, "step": 7616 }, { "epoch": 1.7331058020477816, "grad_norm": 1.6934329179996055, "learning_rate": 9.147183770430076e-07, "loss": 0.0803, "step": 7617 }, { "epoch": 1.7333333333333334, "grad_norm": 2.055058833041149, "learning_rate": 9.146392024563081e-07, "loss": 0.1421, "step": 7618 }, { "epoch": 1.7335608646188851, "grad_norm": 1.2194880106920962, "learning_rate": 9.145600219499137e-07, "loss": 0.0652, "step": 7619 }, { "epoch": 1.733788395904437, "grad_norm": 3.787036965512966, "learning_rate": 9.144808355254426e-07, "loss": 0.1356, "step": 7620 }, { "epoch": 1.7340159271899886, "grad_norm": 1.7698576527312129, "learning_rate": 9.144016431845136e-07, "loss": 0.0539, "step": 7621 }, { "epoch": 1.7342434584755404, "grad_norm": 1.656528845656429, "learning_rate": 9.143224449287449e-07, "loss": 0.076, "step": 7622 }, { "epoch": 1.7344709897610922, "grad_norm": 2.206917546512681, "learning_rate": 9.142432407597552e-07, "loss": 0.1051, "step": 7623 }, { "epoch": 1.734698521046644, "grad_norm": 3.318813955989774, "learning_rate": 9.141640306791635e-07, "loss": 0.062, "step": 7624 }, { "epoch": 1.7349260523321957, "grad_norm": 1.19670148986426, "learning_rate": 9.140848146885888e-07, "loss": 0.1542, "step": 7625 }, { "epoch": 1.7351535836177474, "grad_norm": 1.6504370070610002, "learning_rate": 9.140055927896497e-07, "loss": 0.0701, "step": 7626 }, { "epoch": 1.7353811149032992, "grad_norm": 2.215620986397485, "learning_rate": 9.139263649839654e-07, "loss": 0.1286, "step": 7627 }, { "epoch": 1.735608646188851, "grad_norm": 1.6530715852937048, "learning_rate": 9.138471312731558e-07, "loss": 0.0639, "step": 7628 }, { "epoch": 1.7358361774744027, "grad_norm": 1.199037402415222, "learning_rate": 9.137678916588395e-07, "loss": 0.0884, "step": 7629 }, { "epoch": 1.7360637087599544, "grad_norm": 2.332708041188332, "learning_rate": 9.136886461426363e-07, "loss": 0.1075, "step": 7630 }, { "epoch": 1.7362912400455062, "grad_norm": 1.9520991750590064, "learning_rate": 9.136093947261659e-07, "loss": 0.2023, "step": 7631 }, { "epoch": 1.736518771331058, "grad_norm": 3.361016751047298, "learning_rate": 9.135301374110482e-07, "loss": 0.1957, "step": 7632 }, { "epoch": 1.7367463026166097, "grad_norm": 2.0303541910091347, "learning_rate": 9.134508741989028e-07, "loss": 0.0697, "step": 7633 }, { "epoch": 1.7369738339021614, "grad_norm": 2.0687313535754317, "learning_rate": 9.133716050913499e-07, "loss": 0.0699, "step": 7634 }, { "epoch": 1.7372013651877132, "grad_norm": 1.8929361088138872, "learning_rate": 9.132923300900096e-07, "loss": 0.087, "step": 7635 }, { "epoch": 1.737428896473265, "grad_norm": 1.82190683609358, "learning_rate": 9.132130491965019e-07, "loss": 0.0963, "step": 7636 }, { "epoch": 1.7376564277588167, "grad_norm": 1.784844620846255, "learning_rate": 9.131337624124473e-07, "loss": 0.1103, "step": 7637 }, { "epoch": 1.7378839590443687, "grad_norm": 1.6832982433867738, "learning_rate": 9.130544697394662e-07, "loss": 0.0598, "step": 7638 }, { "epoch": 1.7381114903299204, "grad_norm": 1.6649544579559832, "learning_rate": 9.129751711791796e-07, "loss": 0.0494, "step": 7639 }, { "epoch": 1.7383390216154722, "grad_norm": 1.910777998489734, "learning_rate": 9.128958667332076e-07, "loss": 0.0807, "step": 7640 }, { "epoch": 1.738566552901024, "grad_norm": 2.7566149417687336, "learning_rate": 9.128165564031715e-07, "loss": 0.1526, "step": 7641 }, { "epoch": 1.7387940841865757, "grad_norm": 1.9829806241230596, "learning_rate": 9.127372401906919e-07, "loss": 0.0676, "step": 7642 }, { "epoch": 1.7390216154721274, "grad_norm": 1.2411223705529266, "learning_rate": 9.126579180973904e-07, "loss": 0.1025, "step": 7643 }, { "epoch": 1.7392491467576792, "grad_norm": 1.9871055456257984, "learning_rate": 9.125785901248875e-07, "loss": 0.0926, "step": 7644 }, { "epoch": 1.739476678043231, "grad_norm": 1.3381728325764048, "learning_rate": 9.124992562748051e-07, "loss": 0.1002, "step": 7645 }, { "epoch": 1.7397042093287827, "grad_norm": 2.028450569544906, "learning_rate": 9.124199165487646e-07, "loss": 0.1095, "step": 7646 }, { "epoch": 1.7399317406143344, "grad_norm": 1.5375677076815077, "learning_rate": 9.12340570948387e-07, "loss": 0.1787, "step": 7647 }, { "epoch": 1.7401592718998864, "grad_norm": 1.3000928333153594, "learning_rate": 9.122612194752947e-07, "loss": 0.0964, "step": 7648 }, { "epoch": 1.7403868031854381, "grad_norm": 2.4207627845936783, "learning_rate": 9.12181862131109e-07, "loss": 0.0617, "step": 7649 }, { "epoch": 1.74061433447099, "grad_norm": 1.9625432494712964, "learning_rate": 9.121024989174521e-07, "loss": 0.0534, "step": 7650 }, { "epoch": 1.7408418657565417, "grad_norm": 1.671691037993077, "learning_rate": 9.120231298359458e-07, "loss": 0.0661, "step": 7651 }, { "epoch": 1.7410693970420934, "grad_norm": 1.62650810819884, "learning_rate": 9.119437548882125e-07, "loss": 0.1592, "step": 7652 }, { "epoch": 1.7412969283276452, "grad_norm": 2.0886290403537813, "learning_rate": 9.118643740758744e-07, "loss": 0.0891, "step": 7653 }, { "epoch": 1.741524459613197, "grad_norm": 1.0202798082642592, "learning_rate": 9.117849874005537e-07, "loss": 0.0609, "step": 7654 }, { "epoch": 1.7417519908987487, "grad_norm": 1.1087960114356423, "learning_rate": 9.117055948638731e-07, "loss": 0.1328, "step": 7655 }, { "epoch": 1.7419795221843004, "grad_norm": 2.337569118992264, "learning_rate": 9.116261964674553e-07, "loss": 0.1774, "step": 7656 }, { "epoch": 1.7422070534698522, "grad_norm": 0.833425953569515, "learning_rate": 9.115467922129229e-07, "loss": 0.0644, "step": 7657 }, { "epoch": 1.742434584755404, "grad_norm": 1.8459128416919384, "learning_rate": 9.114673821018987e-07, "loss": 0.1487, "step": 7658 }, { "epoch": 1.7426621160409557, "grad_norm": 3.0180621961746463, "learning_rate": 9.113879661360063e-07, "loss": 0.0997, "step": 7659 }, { "epoch": 1.7428896473265074, "grad_norm": 2.365356291433293, "learning_rate": 9.11308544316868e-07, "loss": 0.0899, "step": 7660 }, { "epoch": 1.7431171786120592, "grad_norm": 0.9947468958069099, "learning_rate": 9.112291166461076e-07, "loss": 0.0648, "step": 7661 }, { "epoch": 1.743344709897611, "grad_norm": 2.0342009053766645, "learning_rate": 9.111496831253481e-07, "loss": 0.1004, "step": 7662 }, { "epoch": 1.7435722411831627, "grad_norm": 1.9020869768225852, "learning_rate": 9.110702437562132e-07, "loss": 0.0512, "step": 7663 }, { "epoch": 1.7437997724687144, "grad_norm": 3.361205986166927, "learning_rate": 9.109907985403265e-07, "loss": 0.0713, "step": 7664 }, { "epoch": 1.7440273037542662, "grad_norm": 2.2716223567748863, "learning_rate": 9.109113474793116e-07, "loss": 0.1792, "step": 7665 }, { "epoch": 1.744254835039818, "grad_norm": 1.4167288377703477, "learning_rate": 9.108318905747924e-07, "loss": 0.041, "step": 7666 }, { "epoch": 1.7444823663253697, "grad_norm": 1.5931326292015675, "learning_rate": 9.107524278283928e-07, "loss": 0.0769, "step": 7667 }, { "epoch": 1.7447098976109214, "grad_norm": 2.1749947008693757, "learning_rate": 9.106729592417368e-07, "loss": 0.0683, "step": 7668 }, { "epoch": 1.7449374288964732, "grad_norm": 2.4611060336156023, "learning_rate": 9.105934848164488e-07, "loss": 0.0866, "step": 7669 }, { "epoch": 1.745164960182025, "grad_norm": 1.4394833957099606, "learning_rate": 9.105140045541532e-07, "loss": 0.128, "step": 7670 }, { "epoch": 1.7453924914675767, "grad_norm": 1.2704249019967093, "learning_rate": 9.10434518456474e-07, "loss": 0.0899, "step": 7671 }, { "epoch": 1.7456200227531284, "grad_norm": 1.8382274357405695, "learning_rate": 9.10355026525036e-07, "loss": 0.0707, "step": 7672 }, { "epoch": 1.7458475540386802, "grad_norm": 1.6516294862787348, "learning_rate": 9.102755287614639e-07, "loss": 0.0628, "step": 7673 }, { "epoch": 1.746075085324232, "grad_norm": 1.4687435497605381, "learning_rate": 9.101960251673825e-07, "loss": 0.0736, "step": 7674 }, { "epoch": 1.7463026166097837, "grad_norm": 2.19983896423635, "learning_rate": 9.101165157444166e-07, "loss": 0.0943, "step": 7675 }, { "epoch": 1.7465301478953354, "grad_norm": 0.9785091789738013, "learning_rate": 9.100370004941912e-07, "loss": 0.0403, "step": 7676 }, { "epoch": 1.7467576791808874, "grad_norm": 1.5092521481168648, "learning_rate": 9.099574794183317e-07, "loss": 0.0863, "step": 7677 }, { "epoch": 1.7469852104664392, "grad_norm": 1.543615972124766, "learning_rate": 9.098779525184631e-07, "loss": 0.0558, "step": 7678 }, { "epoch": 1.747212741751991, "grad_norm": 1.8708202868409067, "learning_rate": 9.097984197962109e-07, "loss": 0.0766, "step": 7679 }, { "epoch": 1.7474402730375427, "grad_norm": 2.326961905782801, "learning_rate": 9.097188812532006e-07, "loss": 0.1163, "step": 7680 }, { "epoch": 1.7476678043230944, "grad_norm": 1.221197109664306, "learning_rate": 9.096393368910578e-07, "loss": 0.051, "step": 7681 }, { "epoch": 1.7478953356086462, "grad_norm": 2.481725149573211, "learning_rate": 9.095597867114082e-07, "loss": 0.0864, "step": 7682 }, { "epoch": 1.748122866894198, "grad_norm": 1.5652542104869682, "learning_rate": 9.094802307158777e-07, "loss": 0.1056, "step": 7683 }, { "epoch": 1.7483503981797497, "grad_norm": 1.9462963975521113, "learning_rate": 9.094006689060924e-07, "loss": 0.1019, "step": 7684 }, { "epoch": 1.7485779294653014, "grad_norm": 1.5304924444892434, "learning_rate": 9.093211012836782e-07, "loss": 0.0718, "step": 7685 }, { "epoch": 1.7488054607508532, "grad_norm": 1.2722942476418655, "learning_rate": 9.092415278502614e-07, "loss": 0.1876, "step": 7686 }, { "epoch": 1.7490329920364052, "grad_norm": 1.619476736827669, "learning_rate": 9.091619486074684e-07, "loss": 0.0692, "step": 7687 }, { "epoch": 1.749260523321957, "grad_norm": 1.4809674215317568, "learning_rate": 9.090823635569254e-07, "loss": 0.1654, "step": 7688 }, { "epoch": 1.7494880546075087, "grad_norm": 1.9547753713840321, "learning_rate": 9.090027727002594e-07, "loss": 0.0753, "step": 7689 }, { "epoch": 1.7497155858930604, "grad_norm": 1.205130127056155, "learning_rate": 9.089231760390968e-07, "loss": 0.0785, "step": 7690 }, { "epoch": 1.7499431171786122, "grad_norm": 1.8649502044332449, "learning_rate": 9.088435735750643e-07, "loss": 0.1367, "step": 7691 }, { "epoch": 1.750170648464164, "grad_norm": 1.9758421731913736, "learning_rate": 9.087639653097892e-07, "loss": 0.1531, "step": 7692 }, { "epoch": 1.7503981797497157, "grad_norm": 2.0250963128586807, "learning_rate": 9.086843512448983e-07, "loss": 0.1064, "step": 7693 }, { "epoch": 1.7506257110352674, "grad_norm": 1.0986110331885017, "learning_rate": 9.086047313820186e-07, "loss": 0.0717, "step": 7694 }, { "epoch": 1.7508532423208192, "grad_norm": 1.575315504512503, "learning_rate": 9.085251057227777e-07, "loss": 0.1608, "step": 7695 }, { "epoch": 1.751080773606371, "grad_norm": 1.8499617034128235, "learning_rate": 9.08445474268803e-07, "loss": 0.0711, "step": 7696 }, { "epoch": 1.7513083048919227, "grad_norm": 1.302390760544348, "learning_rate": 9.083658370217219e-07, "loss": 0.1105, "step": 7697 }, { "epoch": 1.7515358361774744, "grad_norm": 1.5550174209700023, "learning_rate": 9.082861939831619e-07, "loss": 0.174, "step": 7698 }, { "epoch": 1.7517633674630262, "grad_norm": 1.9470548649485333, "learning_rate": 9.08206545154751e-07, "loss": 0.1269, "step": 7699 }, { "epoch": 1.751990898748578, "grad_norm": 1.4523391683511102, "learning_rate": 9.08126890538117e-07, "loss": 0.0811, "step": 7700 }, { "epoch": 1.7522184300341297, "grad_norm": 1.6124064298453191, "learning_rate": 9.080472301348878e-07, "loss": 0.0569, "step": 7701 }, { "epoch": 1.7524459613196814, "grad_norm": 1.051165491939012, "learning_rate": 9.079675639466918e-07, "loss": 0.0766, "step": 7702 }, { "epoch": 1.7526734926052332, "grad_norm": 1.2298198234925792, "learning_rate": 9.078878919751569e-07, "loss": 0.1004, "step": 7703 }, { "epoch": 1.752901023890785, "grad_norm": 1.1952869279452107, "learning_rate": 9.078082142219114e-07, "loss": 0.1095, "step": 7704 }, { "epoch": 1.7531285551763367, "grad_norm": 1.6588372426049813, "learning_rate": 9.077285306885842e-07, "loss": 0.0828, "step": 7705 }, { "epoch": 1.7533560864618885, "grad_norm": 1.8863766574519831, "learning_rate": 9.076488413768035e-07, "loss": 0.085, "step": 7706 }, { "epoch": 1.7535836177474402, "grad_norm": 1.868998557716498, "learning_rate": 9.075691462881981e-07, "loss": 0.1284, "step": 7707 }, { "epoch": 1.753811149032992, "grad_norm": 2.575061219358169, "learning_rate": 9.074894454243968e-07, "loss": 0.0961, "step": 7708 }, { "epoch": 1.7540386803185437, "grad_norm": 2.5451709842460453, "learning_rate": 9.074097387870289e-07, "loss": 0.1331, "step": 7709 }, { "epoch": 1.7542662116040955, "grad_norm": 3.0446870928807233, "learning_rate": 9.07330026377723e-07, "loss": 0.0673, "step": 7710 }, { "epoch": 1.7544937428896472, "grad_norm": 1.6906044696739255, "learning_rate": 9.072503081981081e-07, "loss": 0.1958, "step": 7711 }, { "epoch": 1.754721274175199, "grad_norm": 2.123375136911464, "learning_rate": 9.07170584249814e-07, "loss": 0.0578, "step": 7712 }, { "epoch": 1.7549488054607507, "grad_norm": 1.7145675039121357, "learning_rate": 9.070908545344702e-07, "loss": 0.1375, "step": 7713 }, { "epoch": 1.7551763367463025, "grad_norm": 1.8471429747593309, "learning_rate": 9.070111190537057e-07, "loss": 0.178, "step": 7714 }, { "epoch": 1.7554038680318542, "grad_norm": 1.1550274210194234, "learning_rate": 9.069313778091504e-07, "loss": 0.1264, "step": 7715 }, { "epoch": 1.7556313993174062, "grad_norm": 2.2122684459057758, "learning_rate": 9.068516308024343e-07, "loss": 0.1094, "step": 7716 }, { "epoch": 1.755858930602958, "grad_norm": 2.0615514422002703, "learning_rate": 9.067718780351867e-07, "loss": 0.1321, "step": 7717 }, { "epoch": 1.7560864618885097, "grad_norm": 1.2306617245131113, "learning_rate": 9.066921195090383e-07, "loss": 0.0888, "step": 7718 }, { "epoch": 1.7563139931740614, "grad_norm": 1.9009024264638439, "learning_rate": 9.066123552256187e-07, "loss": 0.1886, "step": 7719 }, { "epoch": 1.7565415244596132, "grad_norm": 2.0336857043554883, "learning_rate": 9.065325851865583e-07, "loss": 0.0602, "step": 7720 }, { "epoch": 1.756769055745165, "grad_norm": 1.716514487313978, "learning_rate": 9.064528093934874e-07, "loss": 0.045, "step": 7721 }, { "epoch": 1.7569965870307167, "grad_norm": 1.4130334728309684, "learning_rate": 9.063730278480368e-07, "loss": 0.0688, "step": 7722 }, { "epoch": 1.7572241183162685, "grad_norm": 1.7148947194848858, "learning_rate": 9.062932405518365e-07, "loss": 0.067, "step": 7723 }, { "epoch": 1.7574516496018202, "grad_norm": 1.1702296450916967, "learning_rate": 9.062134475065176e-07, "loss": 0.1344, "step": 7724 }, { "epoch": 1.757679180887372, "grad_norm": 2.1482513621808828, "learning_rate": 9.06133648713711e-07, "loss": 0.1423, "step": 7725 }, { "epoch": 1.757906712172924, "grad_norm": 2.284486576088291, "learning_rate": 9.060538441750475e-07, "loss": 0.1495, "step": 7726 }, { "epoch": 1.7581342434584757, "grad_norm": 2.191973827420181, "learning_rate": 9.05974033892158e-07, "loss": 0.0847, "step": 7727 }, { "epoch": 1.7583617747440274, "grad_norm": 1.8911279223088515, "learning_rate": 9.058942178666738e-07, "loss": 0.1093, "step": 7728 }, { "epoch": 1.7585893060295792, "grad_norm": 1.607789254154104, "learning_rate": 9.058143961002263e-07, "loss": 0.0697, "step": 7729 }, { "epoch": 1.758816837315131, "grad_norm": 2.531179289862926, "learning_rate": 9.05734568594447e-07, "loss": 0.1071, "step": 7730 }, { "epoch": 1.7590443686006827, "grad_norm": 1.761839393673622, "learning_rate": 9.05654735350967e-07, "loss": 0.0738, "step": 7731 }, { "epoch": 1.7592718998862344, "grad_norm": 1.9432446627659616, "learning_rate": 9.055748963714183e-07, "loss": 0.0888, "step": 7732 }, { "epoch": 1.7594994311717862, "grad_norm": 2.477380296176341, "learning_rate": 9.054950516574327e-07, "loss": 0.1222, "step": 7733 }, { "epoch": 1.759726962457338, "grad_norm": 2.1426177085938822, "learning_rate": 9.054152012106417e-07, "loss": 0.1153, "step": 7734 }, { "epoch": 1.7599544937428897, "grad_norm": 1.7579957725375353, "learning_rate": 9.053353450326777e-07, "loss": 0.1293, "step": 7735 }, { "epoch": 1.7601820250284415, "grad_norm": 1.7277103983923738, "learning_rate": 9.052554831251725e-07, "loss": 0.142, "step": 7736 }, { "epoch": 1.7604095563139932, "grad_norm": 2.3482069939578434, "learning_rate": 9.051756154897587e-07, "loss": 0.0941, "step": 7737 }, { "epoch": 1.760637087599545, "grad_norm": 2.2017522392952644, "learning_rate": 9.050957421280683e-07, "loss": 0.0769, "step": 7738 }, { "epoch": 1.7608646188850967, "grad_norm": 1.2463989976132068, "learning_rate": 9.05015863041734e-07, "loss": 0.0541, "step": 7739 }, { "epoch": 1.7610921501706485, "grad_norm": 2.1184900749319957, "learning_rate": 9.049359782323881e-07, "loss": 0.1823, "step": 7740 }, { "epoch": 1.7613196814562002, "grad_norm": 2.343042918418259, "learning_rate": 9.048560877016637e-07, "loss": 0.1149, "step": 7741 }, { "epoch": 1.761547212741752, "grad_norm": 2.4441312268439574, "learning_rate": 9.047761914511933e-07, "loss": 0.0749, "step": 7742 }, { "epoch": 1.7617747440273037, "grad_norm": 1.8406444859552782, "learning_rate": 9.0469628948261e-07, "loss": 0.0456, "step": 7743 }, { "epoch": 1.7620022753128555, "grad_norm": 2.528726454922943, "learning_rate": 9.046163817975466e-07, "loss": 0.0819, "step": 7744 }, { "epoch": 1.7622298065984072, "grad_norm": 2.233801008165811, "learning_rate": 9.045364683976366e-07, "loss": 0.0814, "step": 7745 }, { "epoch": 1.762457337883959, "grad_norm": 1.386317934062558, "learning_rate": 9.044565492845131e-07, "loss": 0.1425, "step": 7746 }, { "epoch": 1.7626848691695107, "grad_norm": 2.3550014134454798, "learning_rate": 9.043766244598096e-07, "loss": 0.2095, "step": 7747 }, { "epoch": 1.7629124004550625, "grad_norm": 1.7521921266846439, "learning_rate": 9.042966939251595e-07, "loss": 0.1088, "step": 7748 }, { "epoch": 1.7631399317406142, "grad_norm": 1.5105909426282509, "learning_rate": 9.042167576821964e-07, "loss": 0.0839, "step": 7749 }, { "epoch": 1.763367463026166, "grad_norm": 1.346098449702512, "learning_rate": 9.041368157325543e-07, "loss": 0.1096, "step": 7750 }, { "epoch": 1.7635949943117177, "grad_norm": 2.230968633211587, "learning_rate": 9.040568680778668e-07, "loss": 0.1296, "step": 7751 }, { "epoch": 1.7638225255972695, "grad_norm": 2.008287198164812, "learning_rate": 9.03976914719768e-07, "loss": 0.1533, "step": 7752 }, { "epoch": 1.7640500568828212, "grad_norm": 2.0286358209472213, "learning_rate": 9.03896955659892e-07, "loss": 0.1248, "step": 7753 }, { "epoch": 1.764277588168373, "grad_norm": 1.4410538366212453, "learning_rate": 9.03816990899873e-07, "loss": 0.1253, "step": 7754 }, { "epoch": 1.764505119453925, "grad_norm": 2.3003285393100126, "learning_rate": 9.037370204413452e-07, "loss": 0.1108, "step": 7755 }, { "epoch": 1.7647326507394767, "grad_norm": 1.8094108627109409, "learning_rate": 9.036570442859433e-07, "loss": 0.0957, "step": 7756 }, { "epoch": 1.7649601820250285, "grad_norm": 1.8456575539371738, "learning_rate": 9.035770624353018e-07, "loss": 0.1355, "step": 7757 }, { "epoch": 1.7651877133105802, "grad_norm": 2.063327024936909, "learning_rate": 9.034970748910552e-07, "loss": 0.1718, "step": 7758 }, { "epoch": 1.765415244596132, "grad_norm": 1.4051691223639915, "learning_rate": 9.034170816548387e-07, "loss": 0.0486, "step": 7759 }, { "epoch": 1.7656427758816837, "grad_norm": 1.7755553859713349, "learning_rate": 9.033370827282868e-07, "loss": 0.1381, "step": 7760 }, { "epoch": 1.7658703071672355, "grad_norm": 2.3606055646893367, "learning_rate": 9.032570781130346e-07, "loss": 0.0858, "step": 7761 }, { "epoch": 1.7660978384527872, "grad_norm": 1.577299531099599, "learning_rate": 9.031770678107174e-07, "loss": 0.1253, "step": 7762 }, { "epoch": 1.766325369738339, "grad_norm": 1.582327546524722, "learning_rate": 9.030970518229704e-07, "loss": 0.1605, "step": 7763 }, { "epoch": 1.7665529010238907, "grad_norm": 2.0474199246300224, "learning_rate": 9.030170301514289e-07, "loss": 0.0884, "step": 7764 }, { "epoch": 1.7667804323094427, "grad_norm": 2.0688842173234083, "learning_rate": 9.029370027977284e-07, "loss": 0.0616, "step": 7765 }, { "epoch": 1.7670079635949945, "grad_norm": 1.700530417390062, "learning_rate": 9.028569697635047e-07, "loss": 0.1148, "step": 7766 }, { "epoch": 1.7672354948805462, "grad_norm": 1.2065680171495423, "learning_rate": 9.027769310503935e-07, "loss": 0.0862, "step": 7767 }, { "epoch": 1.767463026166098, "grad_norm": 2.763378345600242, "learning_rate": 9.026968866600304e-07, "loss": 0.0837, "step": 7768 }, { "epoch": 1.7676905574516497, "grad_norm": 1.5979987877916608, "learning_rate": 9.026168365940516e-07, "loss": 0.1767, "step": 7769 }, { "epoch": 1.7679180887372015, "grad_norm": 2.679596696381199, "learning_rate": 9.02536780854093e-07, "loss": 0.0819, "step": 7770 }, { "epoch": 1.7681456200227532, "grad_norm": 1.8230684306888139, "learning_rate": 9.024567194417911e-07, "loss": 0.0637, "step": 7771 }, { "epoch": 1.768373151308305, "grad_norm": 1.9631162966685984, "learning_rate": 9.023766523587817e-07, "loss": 0.0615, "step": 7772 }, { "epoch": 1.7686006825938567, "grad_norm": 3.0762963295111554, "learning_rate": 9.022965796067016e-07, "loss": 0.0891, "step": 7773 }, { "epoch": 1.7688282138794085, "grad_norm": 1.5944641746686694, "learning_rate": 9.022165011871873e-07, "loss": 0.0994, "step": 7774 }, { "epoch": 1.7690557451649602, "grad_norm": 1.4729360968816696, "learning_rate": 9.021364171018754e-07, "loss": 0.1397, "step": 7775 }, { "epoch": 1.769283276450512, "grad_norm": 1.6493723281696528, "learning_rate": 9.020563273524027e-07, "loss": 0.0886, "step": 7776 }, { "epoch": 1.7695108077360637, "grad_norm": 1.9381157892220884, "learning_rate": 9.019762319404061e-07, "loss": 0.1045, "step": 7777 }, { "epoch": 1.7697383390216155, "grad_norm": 1.5331658549824572, "learning_rate": 9.018961308675225e-07, "loss": 0.097, "step": 7778 }, { "epoch": 1.7699658703071672, "grad_norm": 1.7149391038670783, "learning_rate": 9.018160241353893e-07, "loss": 0.0906, "step": 7779 }, { "epoch": 1.770193401592719, "grad_norm": 1.8887009132890562, "learning_rate": 9.017359117456434e-07, "loss": 0.1742, "step": 7780 }, { "epoch": 1.7704209328782707, "grad_norm": 2.5088063317551983, "learning_rate": 9.016557936999221e-07, "loss": 0.0995, "step": 7781 }, { "epoch": 1.7706484641638225, "grad_norm": 1.088562934787203, "learning_rate": 9.015756699998632e-07, "loss": 0.1064, "step": 7782 }, { "epoch": 1.7708759954493742, "grad_norm": 1.7457599624243116, "learning_rate": 9.014955406471041e-07, "loss": 0.0916, "step": 7783 }, { "epoch": 1.771103526734926, "grad_norm": 0.990592771673969, "learning_rate": 9.014154056432828e-07, "loss": 0.0636, "step": 7784 }, { "epoch": 1.7713310580204777, "grad_norm": 1.5216263430612997, "learning_rate": 9.013352649900365e-07, "loss": 0.0734, "step": 7785 }, { "epoch": 1.7715585893060295, "grad_norm": 1.6761526514507648, "learning_rate": 9.012551186890037e-07, "loss": 0.1128, "step": 7786 }, { "epoch": 1.7717861205915812, "grad_norm": 2.406746693881849, "learning_rate": 9.011749667418221e-07, "loss": 0.108, "step": 7787 }, { "epoch": 1.772013651877133, "grad_norm": 2.088337573823908, "learning_rate": 9.010948091501298e-07, "loss": 0.104, "step": 7788 }, { "epoch": 1.7722411831626848, "grad_norm": 1.2096744129268449, "learning_rate": 9.010146459155654e-07, "loss": 0.0491, "step": 7789 }, { "epoch": 1.7724687144482365, "grad_norm": 2.1724696253694398, "learning_rate": 9.009344770397671e-07, "loss": 0.0906, "step": 7790 }, { "epoch": 1.7726962457337883, "grad_norm": 2.720624059626655, "learning_rate": 9.008543025243735e-07, "loss": 0.1334, "step": 7791 }, { "epoch": 1.77292377701934, "grad_norm": 1.501000526881006, "learning_rate": 9.007741223710232e-07, "loss": 0.0542, "step": 7792 }, { "epoch": 1.7731513083048918, "grad_norm": 1.8774662298947806, "learning_rate": 9.006939365813549e-07, "loss": 0.1842, "step": 7793 }, { "epoch": 1.7733788395904437, "grad_norm": 2.182592030310091, "learning_rate": 9.006137451570074e-07, "loss": 0.0597, "step": 7794 }, { "epoch": 1.7736063708759955, "grad_norm": 2.253307611268069, "learning_rate": 9.005335480996196e-07, "loss": 0.1503, "step": 7795 }, { "epoch": 1.7738339021615472, "grad_norm": 1.3152085878713347, "learning_rate": 9.004533454108308e-07, "loss": 0.0859, "step": 7796 }, { "epoch": 1.774061433447099, "grad_norm": 2.3123934197708262, "learning_rate": 9.0037313709228e-07, "loss": 0.0783, "step": 7797 }, { "epoch": 1.7742889647326507, "grad_norm": 1.257329892595704, "learning_rate": 9.002929231456067e-07, "loss": 0.1411, "step": 7798 }, { "epoch": 1.7745164960182025, "grad_norm": 1.3867909744443634, "learning_rate": 9.002127035724502e-07, "loss": 0.1619, "step": 7799 }, { "epoch": 1.7747440273037542, "grad_norm": 1.7551261198500565, "learning_rate": 9.001324783744501e-07, "loss": 0.1414, "step": 7800 }, { "epoch": 1.774971558589306, "grad_norm": 2.17055077900065, "learning_rate": 9.000522475532461e-07, "loss": 0.1175, "step": 7801 }, { "epoch": 1.7751990898748577, "grad_norm": 2.261645285178795, "learning_rate": 8.999720111104776e-07, "loss": 0.0675, "step": 7802 }, { "epoch": 1.7754266211604095, "grad_norm": 1.8841771418200437, "learning_rate": 8.99891769047785e-07, "loss": 0.067, "step": 7803 }, { "epoch": 1.7756541524459615, "grad_norm": 2.0197317634528975, "learning_rate": 8.998115213668082e-07, "loss": 0.0977, "step": 7804 }, { "epoch": 1.7758816837315132, "grad_norm": 2.0191667764223644, "learning_rate": 8.997312680691869e-07, "loss": 0.0602, "step": 7805 }, { "epoch": 1.776109215017065, "grad_norm": 1.3290974204701784, "learning_rate": 8.996510091565618e-07, "loss": 0.0557, "step": 7806 }, { "epoch": 1.7763367463026167, "grad_norm": 2.1763864519173968, "learning_rate": 8.99570744630573e-07, "loss": 0.0754, "step": 7807 }, { "epoch": 1.7765642775881685, "grad_norm": 1.4389036394707273, "learning_rate": 8.99490474492861e-07, "loss": 0.0845, "step": 7808 }, { "epoch": 1.7767918088737202, "grad_norm": 1.9050671159870316, "learning_rate": 8.994101987450665e-07, "loss": 0.0838, "step": 7809 }, { "epoch": 1.777019340159272, "grad_norm": 1.0723806211617144, "learning_rate": 8.993299173888302e-07, "loss": 0.06, "step": 7810 }, { "epoch": 1.7772468714448237, "grad_norm": 1.9747451991197136, "learning_rate": 8.992496304257926e-07, "loss": 0.0992, "step": 7811 }, { "epoch": 1.7774744027303755, "grad_norm": 1.7176621767274736, "learning_rate": 8.99169337857595e-07, "loss": 0.0905, "step": 7812 }, { "epoch": 1.7777019340159272, "grad_norm": 1.0753173089734889, "learning_rate": 8.990890396858781e-07, "loss": 0.033, "step": 7813 }, { "epoch": 1.777929465301479, "grad_norm": 1.7152862439179297, "learning_rate": 8.990087359122832e-07, "loss": 0.1307, "step": 7814 }, { "epoch": 1.7781569965870307, "grad_norm": 1.95552270558147, "learning_rate": 8.989284265384515e-07, "loss": 0.1575, "step": 7815 }, { "epoch": 1.7783845278725825, "grad_norm": 1.8848727548966473, "learning_rate": 8.988481115660247e-07, "loss": 0.0718, "step": 7816 }, { "epoch": 1.7786120591581343, "grad_norm": 1.3525083622427532, "learning_rate": 8.987677909966439e-07, "loss": 0.1044, "step": 7817 }, { "epoch": 1.778839590443686, "grad_norm": 2.2268446862759537, "learning_rate": 8.986874648319507e-07, "loss": 0.0853, "step": 7818 }, { "epoch": 1.7790671217292378, "grad_norm": 2.8297802361696567, "learning_rate": 8.986071330735872e-07, "loss": 0.0838, "step": 7819 }, { "epoch": 1.7792946530147895, "grad_norm": 2.612831240136297, "learning_rate": 8.985267957231947e-07, "loss": 0.1672, "step": 7820 }, { "epoch": 1.7795221843003413, "grad_norm": 1.1944218658112027, "learning_rate": 8.984464527824157e-07, "loss": 0.0988, "step": 7821 }, { "epoch": 1.779749715585893, "grad_norm": 1.9194825227954213, "learning_rate": 8.983661042528917e-07, "loss": 0.0917, "step": 7822 }, { "epoch": 1.7799772468714448, "grad_norm": 2.19105148559906, "learning_rate": 8.982857501362655e-07, "loss": 0.0869, "step": 7823 }, { "epoch": 1.7802047781569965, "grad_norm": 1.6695883304194064, "learning_rate": 8.982053904341789e-07, "loss": 0.0941, "step": 7824 }, { "epoch": 1.7804323094425483, "grad_norm": 1.5502404185987204, "learning_rate": 8.981250251482746e-07, "loss": 0.0646, "step": 7825 }, { "epoch": 1.7806598407281, "grad_norm": 2.256945452811573, "learning_rate": 8.980446542801947e-07, "loss": 0.0921, "step": 7826 }, { "epoch": 1.7808873720136518, "grad_norm": 1.4129414140934224, "learning_rate": 8.979642778315824e-07, "loss": 0.094, "step": 7827 }, { "epoch": 1.7811149032992035, "grad_norm": 1.2346827183794675, "learning_rate": 8.9788389580408e-07, "loss": 0.0791, "step": 7828 }, { "epoch": 1.7813424345847553, "grad_norm": 2.0776026778754493, "learning_rate": 8.978035081993307e-07, "loss": 0.1026, "step": 7829 }, { "epoch": 1.781569965870307, "grad_norm": 1.648641965139214, "learning_rate": 8.977231150189772e-07, "loss": 0.0896, "step": 7830 }, { "epoch": 1.7817974971558588, "grad_norm": 1.7606901431693627, "learning_rate": 8.976427162646628e-07, "loss": 0.1071, "step": 7831 }, { "epoch": 1.7820250284414105, "grad_norm": 1.0437463347997784, "learning_rate": 8.975623119380304e-07, "loss": 0.0864, "step": 7832 }, { "epoch": 1.7822525597269625, "grad_norm": 1.4175356073598935, "learning_rate": 8.974819020407237e-07, "loss": 0.0533, "step": 7833 }, { "epoch": 1.7824800910125143, "grad_norm": 1.7466844691413699, "learning_rate": 8.974014865743859e-07, "loss": 0.0661, "step": 7834 }, { "epoch": 1.782707622298066, "grad_norm": 1.7236750667746046, "learning_rate": 8.973210655406605e-07, "loss": 0.1125, "step": 7835 }, { "epoch": 1.7829351535836178, "grad_norm": 1.6443849825820265, "learning_rate": 8.972406389411915e-07, "loss": 0.1002, "step": 7836 }, { "epoch": 1.7831626848691695, "grad_norm": 1.6149835042504506, "learning_rate": 8.971602067776222e-07, "loss": 0.0743, "step": 7837 }, { "epoch": 1.7833902161547213, "grad_norm": 1.2815799918515915, "learning_rate": 8.970797690515967e-07, "loss": 0.0743, "step": 7838 }, { "epoch": 1.783617747440273, "grad_norm": 1.9971918347069777, "learning_rate": 8.969993257647591e-07, "loss": 0.0738, "step": 7839 }, { "epoch": 1.7838452787258248, "grad_norm": 2.003342173446247, "learning_rate": 8.969188769187534e-07, "loss": 0.114, "step": 7840 }, { "epoch": 1.7840728100113765, "grad_norm": 4.793773386642825, "learning_rate": 8.96838422515224e-07, "loss": 0.0991, "step": 7841 }, { "epoch": 1.7843003412969285, "grad_norm": 2.2809709648649803, "learning_rate": 8.967579625558148e-07, "loss": 0.0549, "step": 7842 }, { "epoch": 1.7845278725824802, "grad_norm": 1.7635893669409994, "learning_rate": 8.966774970421708e-07, "loss": 0.1327, "step": 7843 }, { "epoch": 1.784755403868032, "grad_norm": 2.394614637747171, "learning_rate": 8.965970259759363e-07, "loss": 0.1148, "step": 7844 }, { "epoch": 1.7849829351535837, "grad_norm": 1.930436802586443, "learning_rate": 8.965165493587557e-07, "loss": 0.0819, "step": 7845 }, { "epoch": 1.7852104664391355, "grad_norm": 1.4801002678171222, "learning_rate": 8.964360671922743e-07, "loss": 0.1262, "step": 7846 }, { "epoch": 1.7854379977246873, "grad_norm": 1.7059687688784455, "learning_rate": 8.963555794781369e-07, "loss": 0.0745, "step": 7847 }, { "epoch": 1.785665529010239, "grad_norm": 1.5711566617884094, "learning_rate": 8.962750862179883e-07, "loss": 0.1109, "step": 7848 }, { "epoch": 1.7858930602957908, "grad_norm": 1.706377672983305, "learning_rate": 8.961945874134738e-07, "loss": 0.0554, "step": 7849 }, { "epoch": 1.7861205915813425, "grad_norm": 2.039101020748713, "learning_rate": 8.961140830662386e-07, "loss": 0.131, "step": 7850 }, { "epoch": 1.7863481228668943, "grad_norm": 1.59291616283066, "learning_rate": 8.960335731779281e-07, "loss": 0.0962, "step": 7851 }, { "epoch": 1.786575654152446, "grad_norm": 2.174229002035782, "learning_rate": 8.959530577501875e-07, "loss": 0.1294, "step": 7852 }, { "epoch": 1.7868031854379978, "grad_norm": 1.2599519473724012, "learning_rate": 8.958725367846628e-07, "loss": 0.0621, "step": 7853 }, { "epoch": 1.7870307167235495, "grad_norm": 2.5449372656277425, "learning_rate": 8.957920102829997e-07, "loss": 0.0728, "step": 7854 }, { "epoch": 1.7872582480091013, "grad_norm": 2.952238728396471, "learning_rate": 8.957114782468436e-07, "loss": 0.1664, "step": 7855 }, { "epoch": 1.787485779294653, "grad_norm": 1.5952903547857578, "learning_rate": 8.956309406778407e-07, "loss": 0.0595, "step": 7856 }, { "epoch": 1.7877133105802048, "grad_norm": 1.7272271436452793, "learning_rate": 8.955503975776371e-07, "loss": 0.0651, "step": 7857 }, { "epoch": 1.7879408418657565, "grad_norm": 1.8654300815282825, "learning_rate": 8.954698489478788e-07, "loss": 0.0871, "step": 7858 }, { "epoch": 1.7881683731513083, "grad_norm": 1.318743783757732, "learning_rate": 8.953892947902121e-07, "loss": 0.079, "step": 7859 }, { "epoch": 1.78839590443686, "grad_norm": 2.0132943524891567, "learning_rate": 8.953087351062835e-07, "loss": 0.0739, "step": 7860 }, { "epoch": 1.7886234357224118, "grad_norm": 2.289347746053406, "learning_rate": 8.952281698977394e-07, "loss": 0.1019, "step": 7861 }, { "epoch": 1.7888509670079635, "grad_norm": 1.7022126119195158, "learning_rate": 8.951475991662263e-07, "loss": 0.1015, "step": 7862 }, { "epoch": 1.7890784982935153, "grad_norm": 2.650537981470732, "learning_rate": 8.950670229133912e-07, "loss": 0.1347, "step": 7863 }, { "epoch": 1.789306029579067, "grad_norm": 5.940113863866019, "learning_rate": 8.949864411408807e-07, "loss": 0.2453, "step": 7864 }, { "epoch": 1.7895335608646188, "grad_norm": 1.1622668271144168, "learning_rate": 8.949058538503416e-07, "loss": 0.0779, "step": 7865 }, { "epoch": 1.7897610921501705, "grad_norm": 0.9652518854804722, "learning_rate": 8.948252610434213e-07, "loss": 0.0557, "step": 7866 }, { "epoch": 1.7899886234357223, "grad_norm": 2.246060745645692, "learning_rate": 8.947446627217669e-07, "loss": 0.0764, "step": 7867 }, { "epoch": 1.790216154721274, "grad_norm": 1.3878867152062166, "learning_rate": 8.946640588870254e-07, "loss": 0.1098, "step": 7868 }, { "epoch": 1.7904436860068258, "grad_norm": 1.5529127906610505, "learning_rate": 8.945834495408447e-07, "loss": 0.0889, "step": 7869 }, { "epoch": 1.7906712172923775, "grad_norm": 1.2415713797537098, "learning_rate": 8.945028346848718e-07, "loss": 0.1012, "step": 7870 }, { "epoch": 1.7908987485779293, "grad_norm": 1.7786519501363127, "learning_rate": 8.944222143207545e-07, "loss": 0.099, "step": 7871 }, { "epoch": 1.7911262798634813, "grad_norm": 1.4685412189747176, "learning_rate": 8.943415884501407e-07, "loss": 0.0719, "step": 7872 }, { "epoch": 1.791353811149033, "grad_norm": 1.8654576674097252, "learning_rate": 8.942609570746781e-07, "loss": 0.1275, "step": 7873 }, { "epoch": 1.7915813424345848, "grad_norm": 1.591036046248092, "learning_rate": 8.941803201960146e-07, "loss": 0.0888, "step": 7874 }, { "epoch": 1.7918088737201365, "grad_norm": 1.6317920806584023, "learning_rate": 8.940996778157983e-07, "loss": 0.1372, "step": 7875 }, { "epoch": 1.7920364050056883, "grad_norm": 1.6429130913792436, "learning_rate": 8.940190299356774e-07, "loss": 0.128, "step": 7876 }, { "epoch": 1.79226393629124, "grad_norm": 2.1205434237500875, "learning_rate": 8.939383765573004e-07, "loss": 0.1155, "step": 7877 }, { "epoch": 1.7924914675767918, "grad_norm": 1.0015403399612886, "learning_rate": 8.938577176823154e-07, "loss": 0.062, "step": 7878 }, { "epoch": 1.7927189988623435, "grad_norm": 1.5861665019909275, "learning_rate": 8.93777053312371e-07, "loss": 0.0942, "step": 7879 }, { "epoch": 1.7929465301478953, "grad_norm": 2.0196755272152225, "learning_rate": 8.936963834491161e-07, "loss": 0.0852, "step": 7880 }, { "epoch": 1.7931740614334473, "grad_norm": 1.5719758145617422, "learning_rate": 8.93615708094199e-07, "loss": 0.0635, "step": 7881 }, { "epoch": 1.793401592718999, "grad_norm": 2.7461205577430485, "learning_rate": 8.935350272492687e-07, "loss": 0.1522, "step": 7882 }, { "epoch": 1.7936291240045508, "grad_norm": 1.5693797211524632, "learning_rate": 8.934543409159743e-07, "loss": 0.0636, "step": 7883 }, { "epoch": 1.7938566552901025, "grad_norm": 1.2564947826522308, "learning_rate": 8.933736490959649e-07, "loss": 0.0618, "step": 7884 }, { "epoch": 1.7940841865756543, "grad_norm": 1.1241818055947628, "learning_rate": 8.932929517908896e-07, "loss": 0.0482, "step": 7885 }, { "epoch": 1.794311717861206, "grad_norm": 1.4114770467917908, "learning_rate": 8.932122490023977e-07, "loss": 0.096, "step": 7886 }, { "epoch": 1.7945392491467578, "grad_norm": 2.1205223878064157, "learning_rate": 8.931315407321387e-07, "loss": 0.117, "step": 7887 }, { "epoch": 1.7947667804323095, "grad_norm": 1.5491352131872387, "learning_rate": 8.93050826981762e-07, "loss": 0.1023, "step": 7888 }, { "epoch": 1.7949943117178613, "grad_norm": 1.2732550601865582, "learning_rate": 8.929701077529173e-07, "loss": 0.0614, "step": 7889 }, { "epoch": 1.795221843003413, "grad_norm": 2.7047939410685093, "learning_rate": 8.928893830472544e-07, "loss": 0.0976, "step": 7890 }, { "epoch": 1.7954493742889648, "grad_norm": 1.5563267855594476, "learning_rate": 8.92808652866423e-07, "loss": 0.0975, "step": 7891 }, { "epoch": 1.7956769055745165, "grad_norm": 1.3613707002326496, "learning_rate": 8.927279172120734e-07, "loss": 0.0786, "step": 7892 }, { "epoch": 1.7959044368600683, "grad_norm": 2.8711013984874154, "learning_rate": 8.926471760858554e-07, "loss": 0.1526, "step": 7893 }, { "epoch": 1.79613196814562, "grad_norm": 1.5532830349646967, "learning_rate": 8.925664294894193e-07, "loss": 0.1193, "step": 7894 }, { "epoch": 1.7963594994311718, "grad_norm": 1.5363693463316634, "learning_rate": 8.924856774244154e-07, "loss": 0.124, "step": 7895 }, { "epoch": 1.7965870307167235, "grad_norm": 1.4901127201141373, "learning_rate": 8.92404919892494e-07, "loss": 0.0796, "step": 7896 }, { "epoch": 1.7968145620022753, "grad_norm": 1.433497031131219, "learning_rate": 8.923241568953061e-07, "loss": 0.13, "step": 7897 }, { "epoch": 1.797042093287827, "grad_norm": 1.9449335414827038, "learning_rate": 8.922433884345018e-07, "loss": 0.0619, "step": 7898 }, { "epoch": 1.7972696245733788, "grad_norm": 1.275474603473048, "learning_rate": 8.921626145117321e-07, "loss": 0.1032, "step": 7899 }, { "epoch": 1.7974971558589306, "grad_norm": 2.121545544925719, "learning_rate": 8.920818351286479e-07, "loss": 0.1586, "step": 7900 }, { "epoch": 1.7977246871444823, "grad_norm": 2.4854523522884606, "learning_rate": 8.920010502868999e-07, "loss": 0.1011, "step": 7901 }, { "epoch": 1.797952218430034, "grad_norm": 1.8256396602848413, "learning_rate": 8.919202599881395e-07, "loss": 0.0729, "step": 7902 }, { "epoch": 1.7981797497155858, "grad_norm": 2.1225175531463947, "learning_rate": 8.918394642340179e-07, "loss": 0.0888, "step": 7903 }, { "epoch": 1.7984072810011376, "grad_norm": 1.1052579331476957, "learning_rate": 8.917586630261864e-07, "loss": 0.052, "step": 7904 }, { "epoch": 1.7986348122866893, "grad_norm": 2.6200628722142207, "learning_rate": 8.916778563662963e-07, "loss": 0.1277, "step": 7905 }, { "epoch": 1.798862343572241, "grad_norm": 1.9287472476517964, "learning_rate": 8.915970442559993e-07, "loss": 0.0981, "step": 7906 }, { "epoch": 1.7990898748577928, "grad_norm": 1.189291751694118, "learning_rate": 8.915162266969469e-07, "loss": 0.0909, "step": 7907 }, { "epoch": 1.7993174061433446, "grad_norm": 1.618233610564045, "learning_rate": 8.91435403690791e-07, "loss": 0.0687, "step": 7908 }, { "epoch": 1.7995449374288963, "grad_norm": 2.8755569609856084, "learning_rate": 8.913545752391832e-07, "loss": 0.0838, "step": 7909 }, { "epoch": 1.799772468714448, "grad_norm": 3.768457228350488, "learning_rate": 8.912737413437758e-07, "loss": 0.0996, "step": 7910 }, { "epoch": 1.8, "grad_norm": 1.2436444240934987, "learning_rate": 8.91192902006221e-07, "loss": 0.0592, "step": 7911 }, { "epoch": 1.8002275312855518, "grad_norm": 1.3041810375702834, "learning_rate": 8.911120572281705e-07, "loss": 0.0754, "step": 7912 }, { "epoch": 1.8004550625711035, "grad_norm": 1.6467738523949054, "learning_rate": 8.91031207011277e-07, "loss": 0.073, "step": 7913 }, { "epoch": 1.8006825938566553, "grad_norm": 1.8420486910028369, "learning_rate": 8.90950351357193e-07, "loss": 0.054, "step": 7914 }, { "epoch": 1.800910125142207, "grad_norm": 1.916477915303434, "learning_rate": 8.908694902675706e-07, "loss": 0.1207, "step": 7915 }, { "epoch": 1.8011376564277588, "grad_norm": 1.0329327989450314, "learning_rate": 8.907886237440627e-07, "loss": 0.0659, "step": 7916 }, { "epoch": 1.8013651877133106, "grad_norm": 2.55890915319135, "learning_rate": 8.907077517883225e-07, "loss": 0.1255, "step": 7917 }, { "epoch": 1.8015927189988623, "grad_norm": 1.0708527594861654, "learning_rate": 8.906268744020022e-07, "loss": 0.1292, "step": 7918 }, { "epoch": 1.801820250284414, "grad_norm": 1.768678572289282, "learning_rate": 8.905459915867551e-07, "loss": 0.1178, "step": 7919 }, { "epoch": 1.802047781569966, "grad_norm": 1.7701801384501021, "learning_rate": 8.904651033442342e-07, "loss": 0.0865, "step": 7920 }, { "epoch": 1.8022753128555178, "grad_norm": 1.209791177338225, "learning_rate": 8.903842096760929e-07, "loss": 0.13, "step": 7921 }, { "epoch": 1.8025028441410695, "grad_norm": 1.9309745239711726, "learning_rate": 8.903033105839842e-07, "loss": 0.1219, "step": 7922 }, { "epoch": 1.8027303754266213, "grad_norm": 1.791549375354808, "learning_rate": 8.902224060695619e-07, "loss": 0.1401, "step": 7923 }, { "epoch": 1.802957906712173, "grad_norm": 2.2309786352695107, "learning_rate": 8.901414961344792e-07, "loss": 0.1285, "step": 7924 }, { "epoch": 1.8031854379977248, "grad_norm": 1.5413508883349476, "learning_rate": 8.900605807803901e-07, "loss": 0.0976, "step": 7925 }, { "epoch": 1.8034129692832765, "grad_norm": 2.2230731549669596, "learning_rate": 8.89979660008948e-07, "loss": 0.1132, "step": 7926 }, { "epoch": 1.8036405005688283, "grad_norm": 1.6901971188862963, "learning_rate": 8.898987338218069e-07, "loss": 0.161, "step": 7927 }, { "epoch": 1.80386803185438, "grad_norm": 1.562612297701134, "learning_rate": 8.89817802220621e-07, "loss": 0.0824, "step": 7928 }, { "epoch": 1.8040955631399318, "grad_norm": 1.886126267735738, "learning_rate": 8.89736865207044e-07, "loss": 0.1123, "step": 7929 }, { "epoch": 1.8043230944254836, "grad_norm": 1.2601843398994992, "learning_rate": 8.896559227827305e-07, "loss": 0.0966, "step": 7930 }, { "epoch": 1.8045506257110353, "grad_norm": 1.6232214146708215, "learning_rate": 8.895749749493346e-07, "loss": 0.0818, "step": 7931 }, { "epoch": 1.804778156996587, "grad_norm": 1.398048235791363, "learning_rate": 8.894940217085106e-07, "loss": 0.048, "step": 7932 }, { "epoch": 1.8050056882821388, "grad_norm": 1.5243613828322313, "learning_rate": 8.894130630619133e-07, "loss": 0.0731, "step": 7933 }, { "epoch": 1.8052332195676906, "grad_norm": 1.1676191057862337, "learning_rate": 8.893320990111972e-07, "loss": 0.0715, "step": 7934 }, { "epoch": 1.8054607508532423, "grad_norm": 1.5462430254914166, "learning_rate": 8.892511295580172e-07, "loss": 0.1072, "step": 7935 }, { "epoch": 1.805688282138794, "grad_norm": 1.9019514172502328, "learning_rate": 8.891701547040281e-07, "loss": 0.1753, "step": 7936 }, { "epoch": 1.8059158134243458, "grad_norm": 1.9237291621109978, "learning_rate": 8.890891744508847e-07, "loss": 0.0644, "step": 7937 }, { "epoch": 1.8061433447098976, "grad_norm": 2.5070157979610697, "learning_rate": 8.890081888002424e-07, "loss": 0.1119, "step": 7938 }, { "epoch": 1.8063708759954493, "grad_norm": 1.9063518086360378, "learning_rate": 8.88927197753756e-07, "loss": 0.069, "step": 7939 }, { "epoch": 1.806598407281001, "grad_norm": 1.2918093421228658, "learning_rate": 8.888462013130811e-07, "loss": 0.0747, "step": 7940 }, { "epoch": 1.8068259385665528, "grad_norm": 1.445252862411054, "learning_rate": 8.887651994798732e-07, "loss": 0.1199, "step": 7941 }, { "epoch": 1.8070534698521046, "grad_norm": 2.3338068209044995, "learning_rate": 8.886841922557876e-07, "loss": 0.0807, "step": 7942 }, { "epoch": 1.8072810011376563, "grad_norm": 2.4976203732381865, "learning_rate": 8.8860317964248e-07, "loss": 0.0824, "step": 7943 }, { "epoch": 1.807508532423208, "grad_norm": 1.4425565983008646, "learning_rate": 8.885221616416063e-07, "loss": 0.1075, "step": 7944 }, { "epoch": 1.8077360637087598, "grad_norm": 1.3669743032172474, "learning_rate": 8.884411382548221e-07, "loss": 0.0789, "step": 7945 }, { "epoch": 1.8079635949943116, "grad_norm": 2.624662078998394, "learning_rate": 8.883601094837835e-07, "loss": 0.1702, "step": 7946 }, { "epoch": 1.8081911262798633, "grad_norm": 2.4385008822736336, "learning_rate": 8.882790753301465e-07, "loss": 0.1041, "step": 7947 }, { "epoch": 1.808418657565415, "grad_norm": 1.8857003325098818, "learning_rate": 8.881980357955676e-07, "loss": 0.0705, "step": 7948 }, { "epoch": 1.808646188850967, "grad_norm": 2.2324107717841972, "learning_rate": 8.881169908817028e-07, "loss": 0.1169, "step": 7949 }, { "epoch": 1.8088737201365188, "grad_norm": 1.98085870686822, "learning_rate": 8.880359405902085e-07, "loss": 0.0931, "step": 7950 }, { "epoch": 1.8091012514220706, "grad_norm": 1.7089610091975043, "learning_rate": 8.879548849227413e-07, "loss": 0.0488, "step": 7951 }, { "epoch": 1.8093287827076223, "grad_norm": 2.205016197511881, "learning_rate": 8.87873823880958e-07, "loss": 0.1264, "step": 7952 }, { "epoch": 1.809556313993174, "grad_norm": 2.425531264655183, "learning_rate": 8.877927574665149e-07, "loss": 0.0676, "step": 7953 }, { "epoch": 1.8097838452787258, "grad_norm": 1.9803598255840258, "learning_rate": 8.877116856810693e-07, "loss": 0.1153, "step": 7954 }, { "epoch": 1.8100113765642776, "grad_norm": 1.49947563040702, "learning_rate": 8.876306085262781e-07, "loss": 0.0548, "step": 7955 }, { "epoch": 1.8102389078498293, "grad_norm": 2.0261667389538323, "learning_rate": 8.875495260037979e-07, "loss": 0.1146, "step": 7956 }, { "epoch": 1.810466439135381, "grad_norm": 2.1813142108063803, "learning_rate": 8.874684381152865e-07, "loss": 0.0813, "step": 7957 }, { "epoch": 1.8106939704209328, "grad_norm": 1.4894661526116098, "learning_rate": 8.873873448624008e-07, "loss": 0.071, "step": 7958 }, { "epoch": 1.8109215017064848, "grad_norm": 1.9943222444261415, "learning_rate": 8.873062462467983e-07, "loss": 0.0804, "step": 7959 }, { "epoch": 1.8111490329920366, "grad_norm": 1.5808149503128233, "learning_rate": 8.872251422701366e-07, "loss": 0.066, "step": 7960 }, { "epoch": 1.8113765642775883, "grad_norm": 1.6329075256511885, "learning_rate": 8.871440329340733e-07, "loss": 0.0919, "step": 7961 }, { "epoch": 1.81160409556314, "grad_norm": 1.374945952782231, "learning_rate": 8.870629182402659e-07, "loss": 0.1439, "step": 7962 }, { "epoch": 1.8118316268486918, "grad_norm": 1.819629299014224, "learning_rate": 8.869817981903725e-07, "loss": 0.1289, "step": 7963 }, { "epoch": 1.8120591581342436, "grad_norm": 1.9037894605610242, "learning_rate": 8.869006727860508e-07, "loss": 0.1254, "step": 7964 }, { "epoch": 1.8122866894197953, "grad_norm": 2.6305291415119134, "learning_rate": 8.868195420289591e-07, "loss": 0.0888, "step": 7965 }, { "epoch": 1.812514220705347, "grad_norm": 1.2718606094342204, "learning_rate": 8.867384059207554e-07, "loss": 0.0732, "step": 7966 }, { "epoch": 1.8127417519908988, "grad_norm": 1.7677749741199136, "learning_rate": 8.86657264463098e-07, "loss": 0.0579, "step": 7967 }, { "epoch": 1.8129692832764506, "grad_norm": 2.6051676714289633, "learning_rate": 8.865761176576457e-07, "loss": 0.1816, "step": 7968 }, { "epoch": 1.8131968145620023, "grad_norm": 2.5904520494446706, "learning_rate": 8.864949655060562e-07, "loss": 0.068, "step": 7969 }, { "epoch": 1.813424345847554, "grad_norm": 2.402814540167061, "learning_rate": 8.864138080099886e-07, "loss": 0.1039, "step": 7970 }, { "epoch": 1.8136518771331058, "grad_norm": 1.413902540697436, "learning_rate": 8.863326451711015e-07, "loss": 0.0425, "step": 7971 }, { "epoch": 1.8138794084186576, "grad_norm": 1.9231350711585855, "learning_rate": 8.862514769910538e-07, "loss": 0.0771, "step": 7972 }, { "epoch": 1.8141069397042093, "grad_norm": 1.4378931294990538, "learning_rate": 8.861703034715042e-07, "loss": 0.0451, "step": 7973 }, { "epoch": 1.814334470989761, "grad_norm": 1.5674244634740826, "learning_rate": 8.860891246141123e-07, "loss": 0.068, "step": 7974 }, { "epoch": 1.8145620022753128, "grad_norm": 1.8947060156558089, "learning_rate": 8.860079404205367e-07, "loss": 0.06, "step": 7975 }, { "epoch": 1.8147895335608646, "grad_norm": 1.404481266144173, "learning_rate": 8.859267508924366e-07, "loss": 0.0756, "step": 7976 }, { "epoch": 1.8150170648464163, "grad_norm": 2.189011388774002, "learning_rate": 8.858455560314718e-07, "loss": 0.0879, "step": 7977 }, { "epoch": 1.815244596131968, "grad_norm": 1.5985167291388305, "learning_rate": 8.857643558393015e-07, "loss": 0.0925, "step": 7978 }, { "epoch": 1.8154721274175198, "grad_norm": 1.6872714253032772, "learning_rate": 8.856831503175852e-07, "loss": 0.1196, "step": 7979 }, { "epoch": 1.8156996587030716, "grad_norm": 2.412942583341033, "learning_rate": 8.856019394679828e-07, "loss": 0.088, "step": 7980 }, { "epoch": 1.8159271899886233, "grad_norm": 2.059832124378441, "learning_rate": 8.855207232921541e-07, "loss": 0.1585, "step": 7981 }, { "epoch": 1.816154721274175, "grad_norm": 1.8426611964824577, "learning_rate": 8.854395017917588e-07, "loss": 0.1427, "step": 7982 }, { "epoch": 1.8163822525597269, "grad_norm": 1.3887679776448114, "learning_rate": 8.853582749684571e-07, "loss": 0.0922, "step": 7983 }, { "epoch": 1.8166097838452786, "grad_norm": 2.2636010215981512, "learning_rate": 8.852770428239091e-07, "loss": 0.0895, "step": 7984 }, { "epoch": 1.8168373151308304, "grad_norm": 1.921207164884614, "learning_rate": 8.851958053597751e-07, "loss": 0.117, "step": 7985 }, { "epoch": 1.817064846416382, "grad_norm": 1.586916399892704, "learning_rate": 8.851145625777153e-07, "loss": 0.0935, "step": 7986 }, { "epoch": 1.8172923777019339, "grad_norm": 1.35753517250393, "learning_rate": 8.850333144793903e-07, "loss": 0.0597, "step": 7987 }, { "epoch": 1.8175199089874858, "grad_norm": 1.5035937327144726, "learning_rate": 8.849520610664605e-07, "loss": 0.0486, "step": 7988 }, { "epoch": 1.8177474402730376, "grad_norm": 1.9056294709428654, "learning_rate": 8.848708023405866e-07, "loss": 0.1184, "step": 7989 }, { "epoch": 1.8179749715585893, "grad_norm": 2.2830969435991455, "learning_rate": 8.847895383034294e-07, "loss": 0.0697, "step": 7990 }, { "epoch": 1.818202502844141, "grad_norm": 1.758115942769437, "learning_rate": 8.847082689566499e-07, "loss": 0.0525, "step": 7991 }, { "epoch": 1.8184300341296928, "grad_norm": 2.331603106136869, "learning_rate": 8.846269943019091e-07, "loss": 0.0888, "step": 7992 }, { "epoch": 1.8186575654152446, "grad_norm": 1.5640397677378846, "learning_rate": 8.84545714340868e-07, "loss": 0.1167, "step": 7993 }, { "epoch": 1.8188850967007963, "grad_norm": 2.090933926102918, "learning_rate": 8.844644290751877e-07, "loss": 0.1279, "step": 7994 }, { "epoch": 1.819112627986348, "grad_norm": 1.8004666049038967, "learning_rate": 8.843831385065298e-07, "loss": 0.0983, "step": 7995 }, { "epoch": 1.8193401592718998, "grad_norm": 1.6621509552129172, "learning_rate": 8.843018426365555e-07, "loss": 0.1476, "step": 7996 }, { "epoch": 1.8195676905574516, "grad_norm": 1.3210004176459789, "learning_rate": 8.842205414669264e-07, "loss": 0.0324, "step": 7997 }, { "epoch": 1.8197952218430036, "grad_norm": 1.7428824299417647, "learning_rate": 8.841392349993041e-07, "loss": 0.0792, "step": 7998 }, { "epoch": 1.8200227531285553, "grad_norm": 0.9945653597861828, "learning_rate": 8.840579232353506e-07, "loss": 0.0595, "step": 7999 }, { "epoch": 1.820250284414107, "grad_norm": 1.518949966756168, "learning_rate": 8.839766061767277e-07, "loss": 0.1414, "step": 8000 }, { "epoch": 1.8204778156996588, "grad_norm": 1.2545089380132042, "learning_rate": 8.838952838250971e-07, "loss": 0.0652, "step": 8001 }, { "epoch": 1.8207053469852106, "grad_norm": 1.682902077980011, "learning_rate": 8.83813956182121e-07, "loss": 0.116, "step": 8002 }, { "epoch": 1.8209328782707623, "grad_norm": 1.7098678771979696, "learning_rate": 8.837326232494616e-07, "loss": 0.0622, "step": 8003 }, { "epoch": 1.821160409556314, "grad_norm": 2.246667661732211, "learning_rate": 8.836512850287812e-07, "loss": 0.0899, "step": 8004 }, { "epoch": 1.8213879408418658, "grad_norm": 2.090778727283746, "learning_rate": 8.835699415217425e-07, "loss": 0.1351, "step": 8005 }, { "epoch": 1.8216154721274176, "grad_norm": 1.9646684357539137, "learning_rate": 8.834885927300075e-07, "loss": 0.0804, "step": 8006 }, { "epoch": 1.8218430034129693, "grad_norm": 2.5359712036374513, "learning_rate": 8.834072386552392e-07, "loss": 0.1094, "step": 8007 }, { "epoch": 1.822070534698521, "grad_norm": 1.79114517493048, "learning_rate": 8.833258792991001e-07, "loss": 0.1098, "step": 8008 }, { "epoch": 1.8222980659840728, "grad_norm": 2.1882338427516426, "learning_rate": 8.832445146632531e-07, "loss": 0.0914, "step": 8009 }, { "epoch": 1.8225255972696246, "grad_norm": 1.6716849762117383, "learning_rate": 8.831631447493612e-07, "loss": 0.137, "step": 8010 }, { "epoch": 1.8227531285551763, "grad_norm": 1.9983528698245776, "learning_rate": 8.830817695590875e-07, "loss": 0.119, "step": 8011 }, { "epoch": 1.822980659840728, "grad_norm": 1.1983879720021489, "learning_rate": 8.830003890940953e-07, "loss": 0.045, "step": 8012 }, { "epoch": 1.8232081911262799, "grad_norm": 2.504799737236628, "learning_rate": 8.829190033560473e-07, "loss": 0.0951, "step": 8013 }, { "epoch": 1.8234357224118316, "grad_norm": 2.326452595122347, "learning_rate": 8.828376123466072e-07, "loss": 0.0797, "step": 8014 }, { "epoch": 1.8236632536973834, "grad_norm": 1.3881241943485159, "learning_rate": 8.827562160674387e-07, "loss": 0.0842, "step": 8015 }, { "epoch": 1.823890784982935, "grad_norm": 1.8278684346169267, "learning_rate": 8.826748145202052e-07, "loss": 0.1022, "step": 8016 }, { "epoch": 1.8241183162684869, "grad_norm": 1.147248074416315, "learning_rate": 8.8259340770657e-07, "loss": 0.0953, "step": 8017 }, { "epoch": 1.8243458475540386, "grad_norm": 2.0274129863643586, "learning_rate": 8.82511995628198e-07, "loss": 0.1525, "step": 8018 }, { "epoch": 1.8245733788395904, "grad_norm": 1.6139035468352776, "learning_rate": 8.824305782867521e-07, "loss": 0.0783, "step": 8019 }, { "epoch": 1.8248009101251421, "grad_norm": 1.6086703437171743, "learning_rate": 8.823491556838964e-07, "loss": 0.121, "step": 8020 }, { "epoch": 1.8250284414106939, "grad_norm": 2.12903072394807, "learning_rate": 8.822677278212955e-07, "loss": 0.0896, "step": 8021 }, { "epoch": 1.8252559726962456, "grad_norm": 1.955779110294756, "learning_rate": 8.821862947006134e-07, "loss": 0.0823, "step": 8022 }, { "epoch": 1.8254835039817974, "grad_norm": 2.025028737283517, "learning_rate": 8.821048563235143e-07, "loss": 0.1314, "step": 8023 }, { "epoch": 1.8257110352673491, "grad_norm": 1.3606475345993916, "learning_rate": 8.820234126916631e-07, "loss": 0.0718, "step": 8024 }, { "epoch": 1.8259385665529009, "grad_norm": 1.588216088081773, "learning_rate": 8.81941963806724e-07, "loss": 0.125, "step": 8025 }, { "epoch": 1.8261660978384526, "grad_norm": 1.006748908249921, "learning_rate": 8.818605096703614e-07, "loss": 0.0426, "step": 8026 }, { "epoch": 1.8263936291240046, "grad_norm": 2.261848902386564, "learning_rate": 8.817790502842406e-07, "loss": 0.0784, "step": 8027 }, { "epoch": 1.8266211604095564, "grad_norm": 1.3516735433864935, "learning_rate": 8.816975856500264e-07, "loss": 0.1547, "step": 8028 }, { "epoch": 1.826848691695108, "grad_norm": 1.9302802634188536, "learning_rate": 8.816161157693837e-07, "loss": 0.1135, "step": 8029 }, { "epoch": 1.8270762229806599, "grad_norm": 2.551159549795896, "learning_rate": 8.815346406439773e-07, "loss": 0.1004, "step": 8030 }, { "epoch": 1.8273037542662116, "grad_norm": 3.3378461756304882, "learning_rate": 8.814531602754728e-07, "loss": 0.0821, "step": 8031 }, { "epoch": 1.8275312855517634, "grad_norm": 1.7083657595140067, "learning_rate": 8.813716746655354e-07, "loss": 0.0905, "step": 8032 }, { "epoch": 1.8277588168373151, "grad_norm": 1.9588276728965104, "learning_rate": 8.812901838158304e-07, "loss": 0.1355, "step": 8033 }, { "epoch": 1.8279863481228669, "grad_norm": 1.266204130784061, "learning_rate": 8.812086877280234e-07, "loss": 0.0404, "step": 8034 }, { "epoch": 1.8282138794084186, "grad_norm": 2.178652141034485, "learning_rate": 8.811271864037802e-07, "loss": 0.0816, "step": 8035 }, { "epoch": 1.8284414106939704, "grad_norm": 1.7132494402020864, "learning_rate": 8.810456798447664e-07, "loss": 0.0716, "step": 8036 }, { "epoch": 1.8286689419795223, "grad_norm": 1.8796367243061582, "learning_rate": 8.809641680526477e-07, "loss": 0.1086, "step": 8037 }, { "epoch": 1.828896473265074, "grad_norm": 2.8230478404980235, "learning_rate": 8.808826510290904e-07, "loss": 0.0811, "step": 8038 }, { "epoch": 1.8291240045506258, "grad_norm": 1.4886710711072335, "learning_rate": 8.808011287757601e-07, "loss": 0.0494, "step": 8039 }, { "epoch": 1.8293515358361776, "grad_norm": 1.4082184039834797, "learning_rate": 8.807196012943231e-07, "loss": 0.0808, "step": 8040 }, { "epoch": 1.8295790671217294, "grad_norm": 1.8820887098503434, "learning_rate": 8.80638068586446e-07, "loss": 0.0786, "step": 8041 }, { "epoch": 1.829806598407281, "grad_norm": 1.791583784109299, "learning_rate": 8.805565306537949e-07, "loss": 0.1724, "step": 8042 }, { "epoch": 1.8300341296928329, "grad_norm": 2.0567022001992714, "learning_rate": 8.804749874980364e-07, "loss": 0.1747, "step": 8043 }, { "epoch": 1.8302616609783846, "grad_norm": 1.5407941639122054, "learning_rate": 8.80393439120837e-07, "loss": 0.0866, "step": 8044 }, { "epoch": 1.8304891922639364, "grad_norm": 3.2108287944549856, "learning_rate": 8.803118855238635e-07, "loss": 0.139, "step": 8045 }, { "epoch": 1.8307167235494881, "grad_norm": 1.571261699714713, "learning_rate": 8.802303267087825e-07, "loss": 0.1423, "step": 8046 }, { "epoch": 1.8309442548350399, "grad_norm": 2.204022760838511, "learning_rate": 8.801487626772611e-07, "loss": 0.0613, "step": 8047 }, { "epoch": 1.8311717861205916, "grad_norm": 1.4603157962210571, "learning_rate": 8.800671934309663e-07, "loss": 0.1155, "step": 8048 }, { "epoch": 1.8313993174061434, "grad_norm": 2.439806649731172, "learning_rate": 8.799856189715653e-07, "loss": 0.0698, "step": 8049 }, { "epoch": 1.8316268486916951, "grad_norm": 3.8365577402784767, "learning_rate": 8.79904039300725e-07, "loss": 0.095, "step": 8050 }, { "epoch": 1.8318543799772469, "grad_norm": 2.871749940142669, "learning_rate": 8.798224544201132e-07, "loss": 0.0761, "step": 8051 }, { "epoch": 1.8320819112627986, "grad_norm": 3.2286627056288624, "learning_rate": 8.79740864331397e-07, "loss": 0.1399, "step": 8052 }, { "epoch": 1.8323094425483504, "grad_norm": 1.04856166067485, "learning_rate": 8.796592690362439e-07, "loss": 0.0368, "step": 8053 }, { "epoch": 1.8325369738339021, "grad_norm": 1.2119852843619046, "learning_rate": 8.795776685363219e-07, "loss": 0.0695, "step": 8054 }, { "epoch": 1.8327645051194539, "grad_norm": 1.6382039063021019, "learning_rate": 8.794960628332986e-07, "loss": 0.0906, "step": 8055 }, { "epoch": 1.8329920364050056, "grad_norm": 1.2671269042796347, "learning_rate": 8.794144519288419e-07, "loss": 0.0773, "step": 8056 }, { "epoch": 1.8332195676905574, "grad_norm": 1.8428690789141955, "learning_rate": 8.793328358246198e-07, "loss": 0.2364, "step": 8057 }, { "epoch": 1.8334470989761091, "grad_norm": 1.4422639160578108, "learning_rate": 8.792512145223002e-07, "loss": 0.0852, "step": 8058 }, { "epoch": 1.8336746302616609, "grad_norm": 1.0749626632834002, "learning_rate": 8.791695880235515e-07, "loss": 0.0813, "step": 8059 }, { "epoch": 1.8339021615472126, "grad_norm": 1.8125034413887882, "learning_rate": 8.790879563300416e-07, "loss": 0.2156, "step": 8060 }, { "epoch": 1.8341296928327644, "grad_norm": 1.5235577300099732, "learning_rate": 8.790063194434397e-07, "loss": 0.0545, "step": 8061 }, { "epoch": 1.8343572241183161, "grad_norm": 2.187927437051437, "learning_rate": 8.789246773654136e-07, "loss": 0.1461, "step": 8062 }, { "epoch": 1.834584755403868, "grad_norm": 1.2562342000446287, "learning_rate": 8.788430300976321e-07, "loss": 0.0845, "step": 8063 }, { "epoch": 1.8348122866894196, "grad_norm": 2.5351377175024266, "learning_rate": 8.787613776417639e-07, "loss": 0.1025, "step": 8064 }, { "epoch": 1.8350398179749714, "grad_norm": 1.6372928328601253, "learning_rate": 8.78679719999478e-07, "loss": 0.0626, "step": 8065 }, { "epoch": 1.8352673492605234, "grad_norm": 1.9498306238851026, "learning_rate": 8.785980571724433e-07, "loss": 0.0868, "step": 8066 }, { "epoch": 1.8354948805460751, "grad_norm": 1.1706345924977686, "learning_rate": 8.785163891623284e-07, "loss": 0.0475, "step": 8067 }, { "epoch": 1.8357224118316269, "grad_norm": 1.5229949811289203, "learning_rate": 8.784347159708033e-07, "loss": 0.0921, "step": 8068 }, { "epoch": 1.8359499431171786, "grad_norm": 2.0738291750855704, "learning_rate": 8.783530375995364e-07, "loss": 0.0811, "step": 8069 }, { "epoch": 1.8361774744027304, "grad_norm": 2.767989298322564, "learning_rate": 8.782713540501975e-07, "loss": 0.0767, "step": 8070 }, { "epoch": 1.8364050056882821, "grad_norm": 1.8211505086205266, "learning_rate": 8.78189665324456e-07, "loss": 0.1038, "step": 8071 }, { "epoch": 1.8366325369738339, "grad_norm": 1.1072562267315125, "learning_rate": 8.781079714239812e-07, "loss": 0.1055, "step": 8072 }, { "epoch": 1.8368600682593856, "grad_norm": 2.080814912516335, "learning_rate": 8.780262723504434e-07, "loss": 0.1409, "step": 8073 }, { "epoch": 1.8370875995449374, "grad_norm": 2.72947192873807, "learning_rate": 8.779445681055115e-07, "loss": 0.0946, "step": 8074 }, { "epoch": 1.8373151308304891, "grad_norm": 1.5289441377360764, "learning_rate": 8.778628586908563e-07, "loss": 0.1339, "step": 8075 }, { "epoch": 1.8375426621160411, "grad_norm": 1.8774735399981626, "learning_rate": 8.777811441081475e-07, "loss": 0.1135, "step": 8076 }, { "epoch": 1.8377701934015929, "grad_norm": 1.7574623462253978, "learning_rate": 8.776994243590545e-07, "loss": 0.0762, "step": 8077 }, { "epoch": 1.8379977246871446, "grad_norm": 1.6150293370965112, "learning_rate": 8.776176994452485e-07, "loss": 0.0845, "step": 8078 }, { "epoch": 1.8382252559726964, "grad_norm": 2.5156319072151168, "learning_rate": 8.775359693683991e-07, "loss": 0.1245, "step": 8079 }, { "epoch": 1.8384527872582481, "grad_norm": 1.4177266066688612, "learning_rate": 8.77454234130177e-07, "loss": 0.1527, "step": 8080 }, { "epoch": 1.8386803185437999, "grad_norm": 1.528124365633614, "learning_rate": 8.773724937322531e-07, "loss": 0.1792, "step": 8081 }, { "epoch": 1.8389078498293516, "grad_norm": 1.6384604180830433, "learning_rate": 8.772907481762973e-07, "loss": 0.1178, "step": 8082 }, { "epoch": 1.8391353811149034, "grad_norm": 1.4534282416732556, "learning_rate": 8.772089974639806e-07, "loss": 0.1404, "step": 8083 }, { "epoch": 1.8393629124004551, "grad_norm": 2.137834308161238, "learning_rate": 8.77127241596974e-07, "loss": 0.0846, "step": 8084 }, { "epoch": 1.8395904436860069, "grad_norm": 1.3227030583321073, "learning_rate": 8.770454805769482e-07, "loss": 0.0343, "step": 8085 }, { "epoch": 1.8398179749715586, "grad_norm": 2.437576573768376, "learning_rate": 8.769637144055745e-07, "loss": 0.1037, "step": 8086 }, { "epoch": 1.8400455062571104, "grad_norm": 2.089350321464631, "learning_rate": 8.768819430845241e-07, "loss": 0.0672, "step": 8087 }, { "epoch": 1.8402730375426621, "grad_norm": 1.185033433378712, "learning_rate": 8.768001666154678e-07, "loss": 0.1027, "step": 8088 }, { "epoch": 1.840500568828214, "grad_norm": 1.5535944254783345, "learning_rate": 8.767183850000774e-07, "loss": 0.0863, "step": 8089 }, { "epoch": 1.8407281001137656, "grad_norm": 1.9827049694014909, "learning_rate": 8.76636598240024e-07, "loss": 0.057, "step": 8090 }, { "epoch": 1.8409556313993174, "grad_norm": 1.883304759862909, "learning_rate": 8.765548063369795e-07, "loss": 0.1045, "step": 8091 }, { "epoch": 1.8411831626848691, "grad_norm": 2.435086253809783, "learning_rate": 8.764730092926156e-07, "loss": 0.14, "step": 8092 }, { "epoch": 1.841410693970421, "grad_norm": 2.169725599820099, "learning_rate": 8.763912071086039e-07, "loss": 0.0668, "step": 8093 }, { "epoch": 1.8416382252559726, "grad_norm": 3.0719306843376613, "learning_rate": 8.763093997866162e-07, "loss": 0.1174, "step": 8094 }, { "epoch": 1.8418657565415244, "grad_norm": 2.123605048024908, "learning_rate": 8.762275873283248e-07, "loss": 0.0827, "step": 8095 }, { "epoch": 1.8420932878270762, "grad_norm": 1.2168845416302847, "learning_rate": 8.761457697354015e-07, "loss": 0.0356, "step": 8096 }, { "epoch": 1.842320819112628, "grad_norm": 1.9241216472684115, "learning_rate": 8.760639470095185e-07, "loss": 0.0668, "step": 8097 }, { "epoch": 1.8425483503981797, "grad_norm": 1.9828801227804087, "learning_rate": 8.759821191523483e-07, "loss": 0.1372, "step": 8098 }, { "epoch": 1.8427758816837314, "grad_norm": 1.3275853472693522, "learning_rate": 8.759002861655633e-07, "loss": 0.1272, "step": 8099 }, { "epoch": 1.8430034129692832, "grad_norm": 3.054475774822718, "learning_rate": 8.75818448050836e-07, "loss": 0.0964, "step": 8100 }, { "epoch": 1.843230944254835, "grad_norm": 2.6397589735313844, "learning_rate": 8.757366048098389e-07, "loss": 0.1831, "step": 8101 }, { "epoch": 1.8434584755403867, "grad_norm": 2.8524811972808144, "learning_rate": 8.756547564442449e-07, "loss": 0.0668, "step": 8102 }, { "epoch": 1.8436860068259384, "grad_norm": 1.8411565631383815, "learning_rate": 8.755729029557266e-07, "loss": 0.0599, "step": 8103 }, { "epoch": 1.8439135381114902, "grad_norm": 1.7505721085259205, "learning_rate": 8.75491044345957e-07, "loss": 0.1314, "step": 8104 }, { "epoch": 1.8441410693970421, "grad_norm": 1.427770337506339, "learning_rate": 8.754091806166092e-07, "loss": 0.0457, "step": 8105 }, { "epoch": 1.844368600682594, "grad_norm": 1.062378521807983, "learning_rate": 8.753273117693567e-07, "loss": 0.1159, "step": 8106 }, { "epoch": 1.8445961319681456, "grad_norm": 2.1411890853403746, "learning_rate": 8.752454378058721e-07, "loss": 0.1575, "step": 8107 }, { "epoch": 1.8448236632536974, "grad_norm": 1.9368703837820778, "learning_rate": 8.751635587278291e-07, "loss": 0.1211, "step": 8108 }, { "epoch": 1.8450511945392492, "grad_norm": 1.4777317556999319, "learning_rate": 8.750816745369012e-07, "loss": 0.1091, "step": 8109 }, { "epoch": 1.845278725824801, "grad_norm": 2.2453409816350494, "learning_rate": 8.749997852347619e-07, "loss": 0.133, "step": 8110 }, { "epoch": 1.8455062571103527, "grad_norm": 2.5939468446279057, "learning_rate": 8.749178908230845e-07, "loss": 0.0728, "step": 8111 }, { "epoch": 1.8457337883959044, "grad_norm": 2.175685537074212, "learning_rate": 8.748359913035436e-07, "loss": 0.1728, "step": 8112 }, { "epoch": 1.8459613196814562, "grad_norm": 1.0204058719803912, "learning_rate": 8.747540866778124e-07, "loss": 0.0245, "step": 8113 }, { "epoch": 1.846188850967008, "grad_norm": 1.629186033060192, "learning_rate": 8.74672176947565e-07, "loss": 0.0789, "step": 8114 }, { "epoch": 1.8464163822525599, "grad_norm": 1.7530913750858943, "learning_rate": 8.745902621144755e-07, "loss": 0.0708, "step": 8115 }, { "epoch": 1.8466439135381116, "grad_norm": 2.02757935650334, "learning_rate": 8.745083421802183e-07, "loss": 0.0856, "step": 8116 }, { "epoch": 1.8468714448236634, "grad_norm": 1.527710679169608, "learning_rate": 8.744264171464673e-07, "loss": 0.0879, "step": 8117 }, { "epoch": 1.8470989761092151, "grad_norm": 1.3835096798236566, "learning_rate": 8.743444870148974e-07, "loss": 0.0809, "step": 8118 }, { "epoch": 1.847326507394767, "grad_norm": 1.3878336575616186, "learning_rate": 8.742625517871828e-07, "loss": 0.1416, "step": 8119 }, { "epoch": 1.8475540386803186, "grad_norm": 1.5434914988676236, "learning_rate": 8.74180611464998e-07, "loss": 0.0582, "step": 8120 }, { "epoch": 1.8477815699658704, "grad_norm": 1.6401975680980627, "learning_rate": 8.74098666050018e-07, "loss": 0.0453, "step": 8121 }, { "epoch": 1.8480091012514221, "grad_norm": 1.1470988287444377, "learning_rate": 8.740167155439173e-07, "loss": 0.0505, "step": 8122 }, { "epoch": 1.848236632536974, "grad_norm": 1.2615553854964674, "learning_rate": 8.739347599483712e-07, "loss": 0.0671, "step": 8123 }, { "epoch": 1.8484641638225257, "grad_norm": 1.751915429159614, "learning_rate": 8.738527992650542e-07, "loss": 0.1325, "step": 8124 }, { "epoch": 1.8486916951080774, "grad_norm": 1.7203620218025986, "learning_rate": 8.737708334956421e-07, "loss": 0.0897, "step": 8125 }, { "epoch": 1.8489192263936292, "grad_norm": 2.006545073891573, "learning_rate": 8.736888626418097e-07, "loss": 0.0572, "step": 8126 }, { "epoch": 1.849146757679181, "grad_norm": 1.7854996069190847, "learning_rate": 8.736068867052322e-07, "loss": 0.1387, "step": 8127 }, { "epoch": 1.8493742889647327, "grad_norm": 1.5870892229388842, "learning_rate": 8.735249056875852e-07, "loss": 0.1861, "step": 8128 }, { "epoch": 1.8496018202502844, "grad_norm": 2.1956369428640157, "learning_rate": 8.734429195905446e-07, "loss": 0.0882, "step": 8129 }, { "epoch": 1.8498293515358362, "grad_norm": 2.278245974868355, "learning_rate": 8.733609284157855e-07, "loss": 0.1547, "step": 8130 }, { "epoch": 1.850056882821388, "grad_norm": 2.010801737068543, "learning_rate": 8.73278932164984e-07, "loss": 0.066, "step": 8131 }, { "epoch": 1.8502844141069397, "grad_norm": 2.251868297942089, "learning_rate": 8.731969308398158e-07, "loss": 0.0541, "step": 8132 }, { "epoch": 1.8505119453924914, "grad_norm": 0.9874246845573058, "learning_rate": 8.731149244419568e-07, "loss": 0.0544, "step": 8133 }, { "epoch": 1.8507394766780432, "grad_norm": 2.054501652787974, "learning_rate": 8.73032912973083e-07, "loss": 0.1195, "step": 8134 }, { "epoch": 1.850967007963595, "grad_norm": 1.407276108240153, "learning_rate": 8.72950896434871e-07, "loss": 0.0751, "step": 8135 }, { "epoch": 1.8511945392491467, "grad_norm": 1.3362315246063559, "learning_rate": 8.728688748289966e-07, "loss": 0.1127, "step": 8136 }, { "epoch": 1.8514220705346984, "grad_norm": 2.4612186065451125, "learning_rate": 8.727868481571365e-07, "loss": 0.2134, "step": 8137 }, { "epoch": 1.8516496018202502, "grad_norm": 2.1682087913507306, "learning_rate": 8.72704816420967e-07, "loss": 0.1247, "step": 8138 }, { "epoch": 1.851877133105802, "grad_norm": 1.7255568864832886, "learning_rate": 8.726227796221646e-07, "loss": 0.0508, "step": 8139 }, { "epoch": 1.8521046643913537, "grad_norm": 2.407003427099894, "learning_rate": 8.725407377624062e-07, "loss": 0.0689, "step": 8140 }, { "epoch": 1.8523321956769054, "grad_norm": 2.082513378607011, "learning_rate": 8.724586908433682e-07, "loss": 0.0903, "step": 8141 }, { "epoch": 1.8525597269624572, "grad_norm": 1.359523606105818, "learning_rate": 8.72376638866728e-07, "loss": 0.056, "step": 8142 }, { "epoch": 1.852787258248009, "grad_norm": 1.5305548882571467, "learning_rate": 8.722945818341624e-07, "loss": 0.1265, "step": 8143 }, { "epoch": 1.853014789533561, "grad_norm": 1.5834807080288225, "learning_rate": 8.722125197473483e-07, "loss": 0.0803, "step": 8144 }, { "epoch": 1.8532423208191127, "grad_norm": 1.2918790662143784, "learning_rate": 8.721304526079631e-07, "loss": 0.094, "step": 8145 }, { "epoch": 1.8534698521046644, "grad_norm": 1.8366297023243192, "learning_rate": 8.72048380417684e-07, "loss": 0.124, "step": 8146 }, { "epoch": 1.8536973833902162, "grad_norm": 1.2472104565569448, "learning_rate": 8.719663031781884e-07, "loss": 0.0875, "step": 8147 }, { "epoch": 1.853924914675768, "grad_norm": 1.9034596129118144, "learning_rate": 8.71884220891154e-07, "loss": 0.1596, "step": 8148 }, { "epoch": 1.8541524459613197, "grad_norm": 2.470990486257438, "learning_rate": 8.718021335582583e-07, "loss": 0.1083, "step": 8149 }, { "epoch": 1.8543799772468714, "grad_norm": 0.8588122126598416, "learning_rate": 8.71720041181179e-07, "loss": 0.0813, "step": 8150 }, { "epoch": 1.8546075085324232, "grad_norm": 1.3830193645898874, "learning_rate": 8.716379437615937e-07, "loss": 0.068, "step": 8151 }, { "epoch": 1.854835039817975, "grad_norm": 4.054893512600613, "learning_rate": 8.715558413011807e-07, "loss": 0.0993, "step": 8152 }, { "epoch": 1.8550625711035267, "grad_norm": 1.9216151652590312, "learning_rate": 8.714737338016178e-07, "loss": 0.1017, "step": 8153 }, { "epoch": 1.8552901023890787, "grad_norm": 1.797804120202323, "learning_rate": 8.713916212645832e-07, "loss": 0.0759, "step": 8154 }, { "epoch": 1.8555176336746304, "grad_norm": 1.9435258221897045, "learning_rate": 8.713095036917551e-07, "loss": 0.0612, "step": 8155 }, { "epoch": 1.8557451649601822, "grad_norm": 1.961110798616376, "learning_rate": 8.712273810848118e-07, "loss": 0.1092, "step": 8156 }, { "epoch": 1.855972696245734, "grad_norm": 1.7601247902120083, "learning_rate": 8.711452534454318e-07, "loss": 0.1661, "step": 8157 }, { "epoch": 1.8562002275312857, "grad_norm": 2.2464868593263554, "learning_rate": 8.710631207752936e-07, "loss": 0.0679, "step": 8158 }, { "epoch": 1.8564277588168374, "grad_norm": 3.337665078096119, "learning_rate": 8.709809830760759e-07, "loss": 0.1932, "step": 8159 }, { "epoch": 1.8566552901023892, "grad_norm": 1.5630086426802838, "learning_rate": 8.708988403494572e-07, "loss": 0.0462, "step": 8160 }, { "epoch": 1.856882821387941, "grad_norm": 1.7733085293738995, "learning_rate": 8.708166925971168e-07, "loss": 0.0939, "step": 8161 }, { "epoch": 1.8571103526734927, "grad_norm": 1.979119367608335, "learning_rate": 8.707345398207332e-07, "loss": 0.0723, "step": 8162 }, { "epoch": 1.8573378839590444, "grad_norm": 1.5396517304164394, "learning_rate": 8.706523820219858e-07, "loss": 0.1391, "step": 8163 }, { "epoch": 1.8575654152445962, "grad_norm": 2.3463502557091895, "learning_rate": 8.705702192025537e-07, "loss": 0.1303, "step": 8164 }, { "epoch": 1.857792946530148, "grad_norm": 2.063524500898958, "learning_rate": 8.704880513641158e-07, "loss": 0.0778, "step": 8165 }, { "epoch": 1.8580204778156997, "grad_norm": 2.4892268289881105, "learning_rate": 8.704058785083519e-07, "loss": 0.1097, "step": 8166 }, { "epoch": 1.8582480091012514, "grad_norm": 1.6821843849689957, "learning_rate": 8.703237006369411e-07, "loss": 0.0654, "step": 8167 }, { "epoch": 1.8584755403868032, "grad_norm": 1.779146938217964, "learning_rate": 8.702415177515633e-07, "loss": 0.1096, "step": 8168 }, { "epoch": 1.858703071672355, "grad_norm": 1.8893828203527572, "learning_rate": 8.70159329853898e-07, "loss": 0.0759, "step": 8169 }, { "epoch": 1.8589306029579067, "grad_norm": 1.1992146238336574, "learning_rate": 8.700771369456249e-07, "loss": 0.096, "step": 8170 }, { "epoch": 1.8591581342434584, "grad_norm": 2.788348084768204, "learning_rate": 8.69994939028424e-07, "loss": 0.1728, "step": 8171 }, { "epoch": 1.8593856655290102, "grad_norm": 3.6376334733716953, "learning_rate": 8.699127361039753e-07, "loss": 0.1694, "step": 8172 }, { "epoch": 1.859613196814562, "grad_norm": 2.2945377327150513, "learning_rate": 8.698305281739589e-07, "loss": 0.0817, "step": 8173 }, { "epoch": 1.8598407281001137, "grad_norm": 1.486317017024812, "learning_rate": 8.697483152400546e-07, "loss": 0.0676, "step": 8174 }, { "epoch": 1.8600682593856654, "grad_norm": 2.280025037072071, "learning_rate": 8.696660973039432e-07, "loss": 0.1797, "step": 8175 }, { "epoch": 1.8602957906712172, "grad_norm": 1.5402396104562603, "learning_rate": 8.695838743673048e-07, "loss": 0.13, "step": 8176 }, { "epoch": 1.860523321956769, "grad_norm": 1.6290630688565892, "learning_rate": 8.695016464318199e-07, "loss": 0.1265, "step": 8177 }, { "epoch": 1.8607508532423207, "grad_norm": 1.8565183527521674, "learning_rate": 8.69419413499169e-07, "loss": 0.0746, "step": 8178 }, { "epoch": 1.8609783845278725, "grad_norm": 1.3125831060654516, "learning_rate": 8.693371755710332e-07, "loss": 0.035, "step": 8179 }, { "epoch": 1.8612059158134242, "grad_norm": 1.540824128915698, "learning_rate": 8.692549326490929e-07, "loss": 0.0591, "step": 8180 }, { "epoch": 1.861433447098976, "grad_norm": 3.5123038339194967, "learning_rate": 8.69172684735029e-07, "loss": 0.1065, "step": 8181 }, { "epoch": 1.8616609783845277, "grad_norm": 1.3882683308527415, "learning_rate": 8.690904318305228e-07, "loss": 0.0453, "step": 8182 }, { "epoch": 1.8618885096700797, "grad_norm": 1.3620991320658384, "learning_rate": 8.690081739372553e-07, "loss": 0.076, "step": 8183 }, { "epoch": 1.8621160409556314, "grad_norm": 1.5799180062922773, "learning_rate": 8.689259110569072e-07, "loss": 0.0779, "step": 8184 }, { "epoch": 1.8623435722411832, "grad_norm": 1.2709498182853995, "learning_rate": 8.688436431911604e-07, "loss": 0.0463, "step": 8185 }, { "epoch": 1.862571103526735, "grad_norm": 2.3365313574511855, "learning_rate": 8.687613703416962e-07, "loss": 0.1141, "step": 8186 }, { "epoch": 1.8627986348122867, "grad_norm": 1.5747577544042832, "learning_rate": 8.686790925101959e-07, "loss": 0.0706, "step": 8187 }, { "epoch": 1.8630261660978384, "grad_norm": 1.2396408494465165, "learning_rate": 8.685968096983413e-07, "loss": 0.1196, "step": 8188 }, { "epoch": 1.8632536973833902, "grad_norm": 1.8487311158961117, "learning_rate": 8.685145219078141e-07, "loss": 0.1178, "step": 8189 }, { "epoch": 1.863481228668942, "grad_norm": 1.4459826163690286, "learning_rate": 8.684322291402959e-07, "loss": 0.1308, "step": 8190 }, { "epoch": 1.8637087599544937, "grad_norm": 2.1654731303154646, "learning_rate": 8.683499313974687e-07, "loss": 0.1005, "step": 8191 }, { "epoch": 1.8639362912400455, "grad_norm": 1.5116070135857027, "learning_rate": 8.682676286810147e-07, "loss": 0.0352, "step": 8192 }, { "epoch": 1.8641638225255974, "grad_norm": 1.2489503106020365, "learning_rate": 8.68185320992616e-07, "loss": 0.0629, "step": 8193 }, { "epoch": 1.8643913538111492, "grad_norm": 1.2554800050773864, "learning_rate": 8.681030083339545e-07, "loss": 0.0884, "step": 8194 }, { "epoch": 1.864618885096701, "grad_norm": 1.6767342107089667, "learning_rate": 8.680206907067129e-07, "loss": 0.0727, "step": 8195 }, { "epoch": 1.8648464163822527, "grad_norm": 2.3704922722420436, "learning_rate": 8.679383681125735e-07, "loss": 0.0883, "step": 8196 }, { "epoch": 1.8650739476678044, "grad_norm": 1.7957176242096409, "learning_rate": 8.678560405532186e-07, "loss": 0.0678, "step": 8197 }, { "epoch": 1.8653014789533562, "grad_norm": 1.5453285496923461, "learning_rate": 8.67773708030331e-07, "loss": 0.0763, "step": 8198 }, { "epoch": 1.865529010238908, "grad_norm": 1.7530154720544147, "learning_rate": 8.676913705455935e-07, "loss": 0.119, "step": 8199 }, { "epoch": 1.8657565415244597, "grad_norm": 1.7747425306638418, "learning_rate": 8.676090281006889e-07, "loss": 0.0616, "step": 8200 }, { "epoch": 1.8659840728100114, "grad_norm": 1.4450167421454763, "learning_rate": 8.675266806972999e-07, "loss": 0.0934, "step": 8201 }, { "epoch": 1.8662116040955632, "grad_norm": 1.4191089951936058, "learning_rate": 8.674443283371099e-07, "loss": 0.1293, "step": 8202 }, { "epoch": 1.866439135381115, "grad_norm": 2.2180210581811153, "learning_rate": 8.673619710218018e-07, "loss": 0.1191, "step": 8203 }, { "epoch": 1.8666666666666667, "grad_norm": 0.9959512273970105, "learning_rate": 8.672796087530589e-07, "loss": 0.0607, "step": 8204 }, { "epoch": 1.8668941979522184, "grad_norm": 1.4140679911460456, "learning_rate": 8.671972415325644e-07, "loss": 0.0895, "step": 8205 }, { "epoch": 1.8671217292377702, "grad_norm": 2.4634709849571124, "learning_rate": 8.671148693620019e-07, "loss": 0.1481, "step": 8206 }, { "epoch": 1.867349260523322, "grad_norm": 1.3604140438233554, "learning_rate": 8.67032492243055e-07, "loss": 0.0791, "step": 8207 }, { "epoch": 1.8675767918088737, "grad_norm": 1.4725557827579294, "learning_rate": 8.66950110177407e-07, "loss": 0.054, "step": 8208 }, { "epoch": 1.8678043230944255, "grad_norm": 0.9421659920369739, "learning_rate": 8.668677231667422e-07, "loss": 0.0192, "step": 8209 }, { "epoch": 1.8680318543799772, "grad_norm": 2.439861863056658, "learning_rate": 8.667853312127439e-07, "loss": 0.0727, "step": 8210 }, { "epoch": 1.868259385665529, "grad_norm": 1.1163814864932045, "learning_rate": 8.66702934317096e-07, "loss": 0.0754, "step": 8211 }, { "epoch": 1.8684869169510807, "grad_norm": 1.4707594486588582, "learning_rate": 8.66620532481483e-07, "loss": 0.1072, "step": 8212 }, { "epoch": 1.8687144482366325, "grad_norm": 1.195295377767991, "learning_rate": 8.66538125707589e-07, "loss": 0.067, "step": 8213 }, { "epoch": 1.8689419795221842, "grad_norm": 1.0068905553577698, "learning_rate": 8.664557139970978e-07, "loss": 0.0577, "step": 8214 }, { "epoch": 1.869169510807736, "grad_norm": 1.745424099718077, "learning_rate": 8.663732973516942e-07, "loss": 0.1158, "step": 8215 }, { "epoch": 1.8693970420932877, "grad_norm": 1.9544779077531358, "learning_rate": 8.662908757730623e-07, "loss": 0.1273, "step": 8216 }, { "epoch": 1.8696245733788395, "grad_norm": 1.9233550606345866, "learning_rate": 8.66208449262887e-07, "loss": 0.0765, "step": 8217 }, { "epoch": 1.8698521046643912, "grad_norm": 1.7145468545538933, "learning_rate": 8.661260178228524e-07, "loss": 0.0641, "step": 8218 }, { "epoch": 1.870079635949943, "grad_norm": 1.6994600505497042, "learning_rate": 8.660435814546439e-07, "loss": 0.1338, "step": 8219 }, { "epoch": 1.8703071672354947, "grad_norm": 1.7977587549342686, "learning_rate": 8.65961140159946e-07, "loss": 0.0887, "step": 8220 }, { "epoch": 1.8705346985210465, "grad_norm": 1.6649953525826708, "learning_rate": 8.658786939404435e-07, "loss": 0.0825, "step": 8221 }, { "epoch": 1.8707622298065985, "grad_norm": 2.091920476011299, "learning_rate": 8.657962427978219e-07, "loss": 0.0728, "step": 8222 }, { "epoch": 1.8709897610921502, "grad_norm": 2.332037808044974, "learning_rate": 8.657137867337659e-07, "loss": 0.1176, "step": 8223 }, { "epoch": 1.871217292377702, "grad_norm": 1.4201121364278315, "learning_rate": 8.65631325749961e-07, "loss": 0.0411, "step": 8224 }, { "epoch": 1.8714448236632537, "grad_norm": 1.586031908405511, "learning_rate": 8.655488598480925e-07, "loss": 0.1204, "step": 8225 }, { "epoch": 1.8716723549488055, "grad_norm": 2.1514924491921326, "learning_rate": 8.65466389029846e-07, "loss": 0.1011, "step": 8226 }, { "epoch": 1.8718998862343572, "grad_norm": 2.2594262919793526, "learning_rate": 8.65383913296907e-07, "loss": 0.0681, "step": 8227 }, { "epoch": 1.872127417519909, "grad_norm": 1.2654000276886403, "learning_rate": 8.653014326509605e-07, "loss": 0.0517, "step": 8228 }, { "epoch": 1.8723549488054607, "grad_norm": 1.3899124560393632, "learning_rate": 8.652189470936932e-07, "loss": 0.1193, "step": 8229 }, { "epoch": 1.8725824800910125, "grad_norm": 3.2563307631193505, "learning_rate": 8.651364566267906e-07, "loss": 0.1597, "step": 8230 }, { "epoch": 1.8728100113765644, "grad_norm": 2.6756362088820844, "learning_rate": 8.650539612519385e-07, "loss": 0.1332, "step": 8231 }, { "epoch": 1.8730375426621162, "grad_norm": 1.3476498608915706, "learning_rate": 8.64971460970823e-07, "loss": 0.0375, "step": 8232 }, { "epoch": 1.873265073947668, "grad_norm": 1.9234146586811676, "learning_rate": 8.648889557851306e-07, "loss": 0.1022, "step": 8233 }, { "epoch": 1.8734926052332197, "grad_norm": 2.189947855250575, "learning_rate": 8.64806445696547e-07, "loss": 0.0671, "step": 8234 }, { "epoch": 1.8737201365187715, "grad_norm": 1.9894848543680852, "learning_rate": 8.647239307067588e-07, "loss": 0.1531, "step": 8235 }, { "epoch": 1.8739476678043232, "grad_norm": 2.011337725001851, "learning_rate": 8.646414108174527e-07, "loss": 0.0964, "step": 8236 }, { "epoch": 1.874175199089875, "grad_norm": 2.5605691507555073, "learning_rate": 8.64558886030315e-07, "loss": 0.0987, "step": 8237 }, { "epoch": 1.8744027303754267, "grad_norm": 2.7815286281993874, "learning_rate": 8.644763563470324e-07, "loss": 0.1361, "step": 8238 }, { "epoch": 1.8746302616609785, "grad_norm": 3.559819049652831, "learning_rate": 8.643938217692916e-07, "loss": 0.1083, "step": 8239 }, { "epoch": 1.8748577929465302, "grad_norm": 2.0734532160282946, "learning_rate": 8.643112822987795e-07, "loss": 0.1501, "step": 8240 }, { "epoch": 1.875085324232082, "grad_norm": 1.796695049035969, "learning_rate": 8.642287379371831e-07, "loss": 0.0804, "step": 8241 }, { "epoch": 1.8753128555176337, "grad_norm": 2.0981891551441327, "learning_rate": 8.641461886861893e-07, "loss": 0.1276, "step": 8242 }, { "epoch": 1.8755403868031855, "grad_norm": 2.030917202859018, "learning_rate": 8.640636345474857e-07, "loss": 0.1723, "step": 8243 }, { "epoch": 1.8757679180887372, "grad_norm": 2.676745078762152, "learning_rate": 8.639810755227591e-07, "loss": 0.1073, "step": 8244 }, { "epoch": 1.875995449374289, "grad_norm": 3.0043687715907628, "learning_rate": 8.638985116136968e-07, "loss": 0.1064, "step": 8245 }, { "epoch": 1.8762229806598407, "grad_norm": 1.7613446237818156, "learning_rate": 8.638159428219866e-07, "loss": 0.0647, "step": 8246 }, { "epoch": 1.8764505119453925, "grad_norm": 2.2085611676970736, "learning_rate": 8.637333691493159e-07, "loss": 0.102, "step": 8247 }, { "epoch": 1.8766780432309442, "grad_norm": 1.9702990168659118, "learning_rate": 8.636507905973722e-07, "loss": 0.0993, "step": 8248 }, { "epoch": 1.876905574516496, "grad_norm": 1.7498654561923823, "learning_rate": 8.635682071678437e-07, "loss": 0.1733, "step": 8249 }, { "epoch": 1.8771331058020477, "grad_norm": 1.830647843822453, "learning_rate": 8.634856188624177e-07, "loss": 0.1777, "step": 8250 }, { "epoch": 1.8773606370875995, "grad_norm": 1.8382522826065273, "learning_rate": 8.634030256827825e-07, "loss": 0.0899, "step": 8251 }, { "epoch": 1.8775881683731512, "grad_norm": 2.634208451273832, "learning_rate": 8.633204276306261e-07, "loss": 0.0802, "step": 8252 }, { "epoch": 1.877815699658703, "grad_norm": 1.8398339448793228, "learning_rate": 8.632378247076366e-07, "loss": 0.1009, "step": 8253 }, { "epoch": 1.8780432309442547, "grad_norm": 1.6217919155242353, "learning_rate": 8.631552169155023e-07, "loss": 0.0379, "step": 8254 }, { "epoch": 1.8782707622298065, "grad_norm": 1.3190326043007634, "learning_rate": 8.630726042559115e-07, "loss": 0.0573, "step": 8255 }, { "epoch": 1.8784982935153582, "grad_norm": 1.8760347738026566, "learning_rate": 8.629899867305526e-07, "loss": 0.0821, "step": 8256 }, { "epoch": 1.87872582480091, "grad_norm": 1.5065217532584043, "learning_rate": 8.629073643411145e-07, "loss": 0.1825, "step": 8257 }, { "epoch": 1.8789533560864617, "grad_norm": 1.3124918608997533, "learning_rate": 8.628247370892853e-07, "loss": 0.0848, "step": 8258 }, { "epoch": 1.8791808873720135, "grad_norm": 1.8729970019792939, "learning_rate": 8.627421049767541e-07, "loss": 0.1306, "step": 8259 }, { "epoch": 1.8794084186575652, "grad_norm": 1.212882147308011, "learning_rate": 8.626594680052097e-07, "loss": 0.0804, "step": 8260 }, { "epoch": 1.8796359499431172, "grad_norm": 3.189924617267235, "learning_rate": 8.62576826176341e-07, "loss": 0.0968, "step": 8261 }, { "epoch": 1.879863481228669, "grad_norm": 1.395245332753133, "learning_rate": 8.62494179491837e-07, "loss": 0.1108, "step": 8262 }, { "epoch": 1.8800910125142207, "grad_norm": 3.442166694172348, "learning_rate": 8.624115279533872e-07, "loss": 0.1112, "step": 8263 }, { "epoch": 1.8803185437997725, "grad_norm": 1.4344145011506018, "learning_rate": 8.623288715626804e-07, "loss": 0.062, "step": 8264 }, { "epoch": 1.8805460750853242, "grad_norm": 2.1182252172781135, "learning_rate": 8.62246210321406e-07, "loss": 0.1575, "step": 8265 }, { "epoch": 1.880773606370876, "grad_norm": 2.061685485576038, "learning_rate": 8.621635442312537e-07, "loss": 0.0696, "step": 8266 }, { "epoch": 1.8810011376564277, "grad_norm": 2.3448425692841015, "learning_rate": 8.620808732939129e-07, "loss": 0.0739, "step": 8267 }, { "epoch": 1.8812286689419795, "grad_norm": 2.4492682818191076, "learning_rate": 8.619981975110731e-07, "loss": 0.1094, "step": 8268 }, { "epoch": 1.8814562002275312, "grad_norm": 1.4997812182923556, "learning_rate": 8.619155168844243e-07, "loss": 0.125, "step": 8269 }, { "epoch": 1.8816837315130832, "grad_norm": 1.8390918655122415, "learning_rate": 8.618328314156564e-07, "loss": 0.0683, "step": 8270 }, { "epoch": 1.881911262798635, "grad_norm": 2.11266005662525, "learning_rate": 8.617501411064588e-07, "loss": 0.0787, "step": 8271 }, { "epoch": 1.8821387940841867, "grad_norm": 1.5676917774588235, "learning_rate": 8.616674459585221e-07, "loss": 0.1448, "step": 8272 }, { "epoch": 1.8823663253697385, "grad_norm": 1.7133435613588885, "learning_rate": 8.615847459735363e-07, "loss": 0.0943, "step": 8273 }, { "epoch": 1.8825938566552902, "grad_norm": 1.7691964747757152, "learning_rate": 8.615020411531915e-07, "loss": 0.0799, "step": 8274 }, { "epoch": 1.882821387940842, "grad_norm": 1.2155475313958286, "learning_rate": 8.61419331499178e-07, "loss": 0.09, "step": 8275 }, { "epoch": 1.8830489192263937, "grad_norm": 1.9364963071364532, "learning_rate": 8.613366170131867e-07, "loss": 0.1157, "step": 8276 }, { "epoch": 1.8832764505119455, "grad_norm": 1.3310492341567115, "learning_rate": 8.612538976969074e-07, "loss": 0.0838, "step": 8277 }, { "epoch": 1.8835039817974972, "grad_norm": 1.3381461293338204, "learning_rate": 8.611711735520312e-07, "loss": 0.0681, "step": 8278 }, { "epoch": 1.883731513083049, "grad_norm": 1.46469419190077, "learning_rate": 8.610884445802488e-07, "loss": 0.166, "step": 8279 }, { "epoch": 1.8839590443686007, "grad_norm": 1.4746271910348123, "learning_rate": 8.610057107832509e-07, "loss": 0.1038, "step": 8280 }, { "epoch": 1.8841865756541525, "grad_norm": 1.8227506034405914, "learning_rate": 8.609229721627287e-07, "loss": 0.1171, "step": 8281 }, { "epoch": 1.8844141069397042, "grad_norm": 1.2338686150597393, "learning_rate": 8.608402287203728e-07, "loss": 0.0647, "step": 8282 }, { "epoch": 1.884641638225256, "grad_norm": 1.5525017423068463, "learning_rate": 8.607574804578747e-07, "loss": 0.0886, "step": 8283 }, { "epoch": 1.8848691695108077, "grad_norm": 1.619647694383806, "learning_rate": 8.606747273769253e-07, "loss": 0.0578, "step": 8284 }, { "epoch": 1.8850967007963595, "grad_norm": 2.2442457789709587, "learning_rate": 8.605919694792161e-07, "loss": 0.1067, "step": 8285 }, { "epoch": 1.8853242320819112, "grad_norm": 2.0046325858807466, "learning_rate": 8.605092067664386e-07, "loss": 0.1072, "step": 8286 }, { "epoch": 1.885551763367463, "grad_norm": 1.7278131172869977, "learning_rate": 8.604264392402842e-07, "loss": 0.1248, "step": 8287 }, { "epoch": 1.8857792946530147, "grad_norm": 2.0397944094622704, "learning_rate": 8.603436669024446e-07, "loss": 0.1081, "step": 8288 }, { "epoch": 1.8860068259385665, "grad_norm": 1.6612267301198451, "learning_rate": 8.602608897546115e-07, "loss": 0.0954, "step": 8289 }, { "epoch": 1.8862343572241183, "grad_norm": 1.3468640416438875, "learning_rate": 8.601781077984767e-07, "loss": 0.0784, "step": 8290 }, { "epoch": 1.88646188850967, "grad_norm": 1.505824481848141, "learning_rate": 8.600953210357319e-07, "loss": 0.0931, "step": 8291 }, { "epoch": 1.8866894197952218, "grad_norm": 2.2113765085413775, "learning_rate": 8.600125294680692e-07, "loss": 0.1949, "step": 8292 }, { "epoch": 1.8869169510807735, "grad_norm": 1.8238650189070063, "learning_rate": 8.59929733097181e-07, "loss": 0.074, "step": 8293 }, { "epoch": 1.8871444823663253, "grad_norm": 1.9762933225786676, "learning_rate": 8.598469319247593e-07, "loss": 0.1055, "step": 8294 }, { "epoch": 1.887372013651877, "grad_norm": 2.8785714836056293, "learning_rate": 8.597641259524965e-07, "loss": 0.1279, "step": 8295 }, { "epoch": 1.8875995449374288, "grad_norm": 1.5172357183887974, "learning_rate": 8.596813151820849e-07, "loss": 0.0631, "step": 8296 }, { "epoch": 1.8878270762229805, "grad_norm": 2.014306640694779, "learning_rate": 8.595984996152168e-07, "loss": 0.0833, "step": 8297 }, { "epoch": 1.8880546075085323, "grad_norm": 1.1602096020075132, "learning_rate": 8.595156792535852e-07, "loss": 0.1066, "step": 8298 }, { "epoch": 1.8882821387940842, "grad_norm": 3.568651228615464, "learning_rate": 8.594328540988825e-07, "loss": 0.126, "step": 8299 }, { "epoch": 1.888509670079636, "grad_norm": 2.0351080172870013, "learning_rate": 8.593500241528016e-07, "loss": 0.0598, "step": 8300 }, { "epoch": 1.8887372013651877, "grad_norm": 1.5173767046842237, "learning_rate": 8.592671894170356e-07, "loss": 0.1236, "step": 8301 }, { "epoch": 1.8889647326507395, "grad_norm": 2.776342054790528, "learning_rate": 8.59184349893277e-07, "loss": 0.1044, "step": 8302 }, { "epoch": 1.8891922639362912, "grad_norm": 1.6990791967005991, "learning_rate": 8.591015055832195e-07, "loss": 0.0793, "step": 8303 }, { "epoch": 1.889419795221843, "grad_norm": 2.23139948985411, "learning_rate": 8.590186564885557e-07, "loss": 0.1156, "step": 8304 }, { "epoch": 1.8896473265073948, "grad_norm": 1.9638463611645929, "learning_rate": 8.589358026109792e-07, "loss": 0.1422, "step": 8305 }, { "epoch": 1.8898748577929465, "grad_norm": 2.2979610951844545, "learning_rate": 8.588529439521834e-07, "loss": 0.0898, "step": 8306 }, { "epoch": 1.8901023890784983, "grad_norm": 1.839046186571141, "learning_rate": 8.587700805138617e-07, "loss": 0.0831, "step": 8307 }, { "epoch": 1.89032992036405, "grad_norm": 2.7089181775515137, "learning_rate": 8.586872122977076e-07, "loss": 0.2059, "step": 8308 }, { "epoch": 1.890557451649602, "grad_norm": 1.688883265529711, "learning_rate": 8.58604339305415e-07, "loss": 0.1291, "step": 8309 }, { "epoch": 1.8907849829351537, "grad_norm": 1.460898448096465, "learning_rate": 8.585214615386773e-07, "loss": 0.0999, "step": 8310 }, { "epoch": 1.8910125142207055, "grad_norm": 1.2953094489885983, "learning_rate": 8.584385789991887e-07, "loss": 0.0776, "step": 8311 }, { "epoch": 1.8912400455062572, "grad_norm": 2.2075985131237137, "learning_rate": 8.583556916886432e-07, "loss": 0.0963, "step": 8312 }, { "epoch": 1.891467576791809, "grad_norm": 1.7744142385673198, "learning_rate": 8.582727996087345e-07, "loss": 0.0495, "step": 8313 }, { "epoch": 1.8916951080773607, "grad_norm": 1.6762850317162457, "learning_rate": 8.581899027611574e-07, "loss": 0.0882, "step": 8314 }, { "epoch": 1.8919226393629125, "grad_norm": 2.3529920868513408, "learning_rate": 8.581070011476053e-07, "loss": 0.0679, "step": 8315 }, { "epoch": 1.8921501706484642, "grad_norm": 2.100364972939074, "learning_rate": 8.580240947697732e-07, "loss": 0.0748, "step": 8316 }, { "epoch": 1.892377701934016, "grad_norm": 1.9244535455022826, "learning_rate": 8.579411836293555e-07, "loss": 0.081, "step": 8317 }, { "epoch": 1.8926052332195678, "grad_norm": 1.2639776790010304, "learning_rate": 8.578582677280464e-07, "loss": 0.1304, "step": 8318 }, { "epoch": 1.8928327645051195, "grad_norm": 1.710773752904337, "learning_rate": 8.577753470675408e-07, "loss": 0.0803, "step": 8319 }, { "epoch": 1.8930602957906713, "grad_norm": 2.111304301029889, "learning_rate": 8.576924216495336e-07, "loss": 0.1025, "step": 8320 }, { "epoch": 1.893287827076223, "grad_norm": 1.3924061339613205, "learning_rate": 8.576094914757194e-07, "loss": 0.1797, "step": 8321 }, { "epoch": 1.8935153583617748, "grad_norm": 1.9426094569182117, "learning_rate": 8.575265565477931e-07, "loss": 0.0829, "step": 8322 }, { "epoch": 1.8937428896473265, "grad_norm": 2.0189174721635945, "learning_rate": 8.574436168674498e-07, "loss": 0.102, "step": 8323 }, { "epoch": 1.8939704209328783, "grad_norm": 1.5399383265730866, "learning_rate": 8.573606724363848e-07, "loss": 0.1369, "step": 8324 }, { "epoch": 1.89419795221843, "grad_norm": 1.7288081091530902, "learning_rate": 8.572777232562929e-07, "loss": 0.0725, "step": 8325 }, { "epoch": 1.8944254835039818, "grad_norm": 1.730717895188744, "learning_rate": 8.571947693288702e-07, "loss": 0.1152, "step": 8326 }, { "epoch": 1.8946530147895335, "grad_norm": 1.6527431481293537, "learning_rate": 8.571118106558114e-07, "loss": 0.1009, "step": 8327 }, { "epoch": 1.8948805460750853, "grad_norm": 0.9789189145952566, "learning_rate": 8.570288472388122e-07, "loss": 0.0415, "step": 8328 }, { "epoch": 1.895108077360637, "grad_norm": 2.4637644623031347, "learning_rate": 8.569458790795685e-07, "loss": 0.2276, "step": 8329 }, { "epoch": 1.8953356086461888, "grad_norm": 1.626466255646907, "learning_rate": 8.568629061797757e-07, "loss": 0.0409, "step": 8330 }, { "epoch": 1.8955631399317405, "grad_norm": 2.6103787346739282, "learning_rate": 8.567799285411298e-07, "loss": 0.1717, "step": 8331 }, { "epoch": 1.8957906712172923, "grad_norm": 1.7252251553086313, "learning_rate": 8.566969461653266e-07, "loss": 0.1094, "step": 8332 }, { "epoch": 1.896018202502844, "grad_norm": 1.20125019285821, "learning_rate": 8.566139590540622e-07, "loss": 0.0526, "step": 8333 }, { "epoch": 1.8962457337883958, "grad_norm": 1.4917481767592597, "learning_rate": 8.565309672090328e-07, "loss": 0.1327, "step": 8334 }, { "epoch": 1.8964732650739475, "grad_norm": 1.8357499566231195, "learning_rate": 8.564479706319339e-07, "loss": 0.0629, "step": 8335 }, { "epoch": 1.8967007963594993, "grad_norm": 1.1409227083946447, "learning_rate": 8.563649693244629e-07, "loss": 0.0815, "step": 8336 }, { "epoch": 1.896928327645051, "grad_norm": 2.0492970568832356, "learning_rate": 8.562819632883155e-07, "loss": 0.0546, "step": 8337 }, { "epoch": 1.897155858930603, "grad_norm": 1.1938915913631698, "learning_rate": 8.561989525251883e-07, "loss": 0.1295, "step": 8338 }, { "epoch": 1.8973833902161548, "grad_norm": 1.7629521515316806, "learning_rate": 8.56115937036778e-07, "loss": 0.1166, "step": 8339 }, { "epoch": 1.8976109215017065, "grad_norm": 2.7816434755427077, "learning_rate": 8.560329168247812e-07, "loss": 0.1039, "step": 8340 }, { "epoch": 1.8978384527872583, "grad_norm": 1.2398296197062553, "learning_rate": 8.559498918908948e-07, "loss": 0.0941, "step": 8341 }, { "epoch": 1.89806598407281, "grad_norm": 2.056526288023806, "learning_rate": 8.558668622368154e-07, "loss": 0.0783, "step": 8342 }, { "epoch": 1.8982935153583618, "grad_norm": 1.439365910121674, "learning_rate": 8.557838278642401e-07, "loss": 0.1154, "step": 8343 }, { "epoch": 1.8985210466439135, "grad_norm": 1.7162223115697652, "learning_rate": 8.557007887748661e-07, "loss": 0.0905, "step": 8344 }, { "epoch": 1.8987485779294653, "grad_norm": 1.9842439019793214, "learning_rate": 8.556177449703906e-07, "loss": 0.1486, "step": 8345 }, { "epoch": 1.898976109215017, "grad_norm": 1.755920878322768, "learning_rate": 8.555346964525107e-07, "loss": 0.1004, "step": 8346 }, { "epoch": 1.8992036405005688, "grad_norm": 1.7757725128098303, "learning_rate": 8.554516432229238e-07, "loss": 0.0709, "step": 8347 }, { "epoch": 1.8994311717861208, "grad_norm": 1.2197949009977749, "learning_rate": 8.553685852833274e-07, "loss": 0.044, "step": 8348 }, { "epoch": 1.8996587030716725, "grad_norm": 1.6457838920903318, "learning_rate": 8.552855226354187e-07, "loss": 0.1477, "step": 8349 }, { "epoch": 1.8998862343572243, "grad_norm": 1.6996799716523001, "learning_rate": 8.55202455280896e-07, "loss": 0.0671, "step": 8350 }, { "epoch": 1.900113765642776, "grad_norm": 2.286339837192545, "learning_rate": 8.551193832214567e-07, "loss": 0.0783, "step": 8351 }, { "epoch": 1.9003412969283278, "grad_norm": 1.9501381206018091, "learning_rate": 8.550363064587985e-07, "loss": 0.0886, "step": 8352 }, { "epoch": 1.9005688282138795, "grad_norm": 2.3841156996218578, "learning_rate": 8.549532249946197e-07, "loss": 0.1172, "step": 8353 }, { "epoch": 1.9007963594994313, "grad_norm": 1.5973692879748291, "learning_rate": 8.548701388306179e-07, "loss": 0.0549, "step": 8354 }, { "epoch": 1.901023890784983, "grad_norm": 1.727772482080952, "learning_rate": 8.547870479684916e-07, "loss": 0.2124, "step": 8355 }, { "epoch": 1.9012514220705348, "grad_norm": 3.0525620283617956, "learning_rate": 8.547039524099387e-07, "loss": 0.0762, "step": 8356 }, { "epoch": 1.9014789533560865, "grad_norm": 1.7064844536754875, "learning_rate": 8.546208521566578e-07, "loss": 0.0973, "step": 8357 }, { "epoch": 1.9017064846416383, "grad_norm": 1.78821288482924, "learning_rate": 8.545377472103474e-07, "loss": 0.073, "step": 8358 }, { "epoch": 1.90193401592719, "grad_norm": 2.822565788897465, "learning_rate": 8.544546375727055e-07, "loss": 0.1166, "step": 8359 }, { "epoch": 1.9021615472127418, "grad_norm": 2.033612171758109, "learning_rate": 8.543715232454311e-07, "loss": 0.1454, "step": 8360 }, { "epoch": 1.9023890784982935, "grad_norm": 1.6519078958373479, "learning_rate": 8.54288404230223e-07, "loss": 0.1722, "step": 8361 }, { "epoch": 1.9026166097838453, "grad_norm": 1.6349924643849787, "learning_rate": 8.542052805287797e-07, "loss": 0.1117, "step": 8362 }, { "epoch": 1.902844141069397, "grad_norm": 1.7230199638939674, "learning_rate": 8.541221521428003e-07, "loss": 0.0933, "step": 8363 }, { "epoch": 1.9030716723549488, "grad_norm": 2.128008748714492, "learning_rate": 8.540390190739839e-07, "loss": 0.1449, "step": 8364 }, { "epoch": 1.9032992036405005, "grad_norm": 2.520208975230359, "learning_rate": 8.539558813240291e-07, "loss": 0.0579, "step": 8365 }, { "epoch": 1.9035267349260523, "grad_norm": 1.8327988679527187, "learning_rate": 8.538727388946356e-07, "loss": 0.0816, "step": 8366 }, { "epoch": 1.903754266211604, "grad_norm": 1.0131312440967444, "learning_rate": 8.537895917875023e-07, "loss": 0.081, "step": 8367 }, { "epoch": 1.9039817974971558, "grad_norm": 2.525664618164771, "learning_rate": 8.537064400043289e-07, "loss": 0.1376, "step": 8368 }, { "epoch": 1.9042093287827075, "grad_norm": 2.0270036303350527, "learning_rate": 8.536232835468145e-07, "loss": 0.1755, "step": 8369 }, { "epoch": 1.9044368600682593, "grad_norm": 1.5781167079311442, "learning_rate": 8.535401224166593e-07, "loss": 0.0664, "step": 8370 }, { "epoch": 1.904664391353811, "grad_norm": 1.7575018943937446, "learning_rate": 8.534569566155623e-07, "loss": 0.112, "step": 8371 }, { "epoch": 1.9048919226393628, "grad_norm": 1.0596850759981111, "learning_rate": 8.533737861452235e-07, "loss": 0.1052, "step": 8372 }, { "epoch": 1.9051194539249146, "grad_norm": 1.0803329294529274, "learning_rate": 8.532906110073427e-07, "loss": 0.054, "step": 8373 }, { "epoch": 1.9053469852104663, "grad_norm": 1.4269226543498574, "learning_rate": 8.5320743120362e-07, "loss": 0.0906, "step": 8374 }, { "epoch": 1.905574516496018, "grad_norm": 1.8968299723220357, "learning_rate": 8.531242467357555e-07, "loss": 0.1619, "step": 8375 }, { "epoch": 1.9058020477815698, "grad_norm": 1.7596520806000042, "learning_rate": 8.530410576054489e-07, "loss": 0.0861, "step": 8376 }, { "epoch": 1.9060295790671218, "grad_norm": 1.7964392119029702, "learning_rate": 8.52957863814401e-07, "loss": 0.0651, "step": 8377 }, { "epoch": 1.9062571103526735, "grad_norm": 2.288788767440748, "learning_rate": 8.528746653643116e-07, "loss": 0.0977, "step": 8378 }, { "epoch": 1.9064846416382253, "grad_norm": 1.9994178831489846, "learning_rate": 8.527914622568814e-07, "loss": 0.1369, "step": 8379 }, { "epoch": 1.906712172923777, "grad_norm": 1.260689905620287, "learning_rate": 8.527082544938111e-07, "loss": 0.0359, "step": 8380 }, { "epoch": 1.9069397042093288, "grad_norm": 1.7946739368224318, "learning_rate": 8.526250420768009e-07, "loss": 0.0514, "step": 8381 }, { "epoch": 1.9071672354948805, "grad_norm": 1.423877072534363, "learning_rate": 8.525418250075518e-07, "loss": 0.1304, "step": 8382 }, { "epoch": 1.9073947667804323, "grad_norm": 1.4892533610710477, "learning_rate": 8.524586032877645e-07, "loss": 0.1109, "step": 8383 }, { "epoch": 1.907622298065984, "grad_norm": 1.8983697284600995, "learning_rate": 8.523753769191399e-07, "loss": 0.0886, "step": 8384 }, { "epoch": 1.9078498293515358, "grad_norm": 1.7364272953887896, "learning_rate": 8.522921459033791e-07, "loss": 0.0632, "step": 8385 }, { "epoch": 1.9080773606370875, "grad_norm": 1.984748396350035, "learning_rate": 8.522089102421829e-07, "loss": 0.117, "step": 8386 }, { "epoch": 1.9083048919226395, "grad_norm": 1.5206341771574357, "learning_rate": 8.52125669937253e-07, "loss": 0.0739, "step": 8387 }, { "epoch": 1.9085324232081913, "grad_norm": 1.4813839884119313, "learning_rate": 8.5204242499029e-07, "loss": 0.1032, "step": 8388 }, { "epoch": 1.908759954493743, "grad_norm": 2.8830784784360004, "learning_rate": 8.51959175402996e-07, "loss": 0.195, "step": 8389 }, { "epoch": 1.9089874857792948, "grad_norm": 1.3322631129748252, "learning_rate": 8.518759211770719e-07, "loss": 0.1141, "step": 8390 }, { "epoch": 1.9092150170648465, "grad_norm": 2.383725391131221, "learning_rate": 8.517926623142196e-07, "loss": 0.078, "step": 8391 }, { "epoch": 1.9094425483503983, "grad_norm": 3.5156731006712048, "learning_rate": 8.517093988161404e-07, "loss": 0.1994, "step": 8392 }, { "epoch": 1.90967007963595, "grad_norm": 1.9960590154522282, "learning_rate": 8.516261306845365e-07, "loss": 0.0986, "step": 8393 }, { "epoch": 1.9098976109215018, "grad_norm": 1.4147014281353942, "learning_rate": 8.515428579211095e-07, "loss": 0.0571, "step": 8394 }, { "epoch": 1.9101251422070535, "grad_norm": 1.3103127236232746, "learning_rate": 8.514595805275614e-07, "loss": 0.0536, "step": 8395 }, { "epoch": 1.9103526734926053, "grad_norm": 1.1912098943499887, "learning_rate": 8.513762985055942e-07, "loss": 0.1332, "step": 8396 }, { "epoch": 1.910580204778157, "grad_norm": 2.0496364865781658, "learning_rate": 8.5129301185691e-07, "loss": 0.0697, "step": 8397 }, { "epoch": 1.9108077360637088, "grad_norm": 0.9461185817769251, "learning_rate": 8.512097205832111e-07, "loss": 0.0478, "step": 8398 }, { "epoch": 1.9110352673492605, "grad_norm": 1.7546955788450125, "learning_rate": 8.511264246861997e-07, "loss": 0.0874, "step": 8399 }, { "epoch": 1.9112627986348123, "grad_norm": 2.3319144466335544, "learning_rate": 8.510431241675784e-07, "loss": 0.1562, "step": 8400 }, { "epoch": 1.911490329920364, "grad_norm": 2.4271876266800483, "learning_rate": 8.509598190290497e-07, "loss": 0.101, "step": 8401 }, { "epoch": 1.9117178612059158, "grad_norm": 1.8427885222115377, "learning_rate": 8.508765092723159e-07, "loss": 0.0702, "step": 8402 }, { "epoch": 1.9119453924914676, "grad_norm": 1.5707748325621465, "learning_rate": 8.507931948990801e-07, "loss": 0.1112, "step": 8403 }, { "epoch": 1.9121729237770193, "grad_norm": 1.0774601536928494, "learning_rate": 8.507098759110449e-07, "loss": 0.0883, "step": 8404 }, { "epoch": 1.912400455062571, "grad_norm": 2.134705751837509, "learning_rate": 8.506265523099133e-07, "loss": 0.1139, "step": 8405 }, { "epoch": 1.9126279863481228, "grad_norm": 1.9067676708528498, "learning_rate": 8.50543224097388e-07, "loss": 0.0732, "step": 8406 }, { "epoch": 1.9128555176336746, "grad_norm": 0.9727668156299778, "learning_rate": 8.504598912751722e-07, "loss": 0.0331, "step": 8407 }, { "epoch": 1.9130830489192263, "grad_norm": 2.350308805559537, "learning_rate": 8.503765538449695e-07, "loss": 0.1554, "step": 8408 }, { "epoch": 1.913310580204778, "grad_norm": 1.4015398689059053, "learning_rate": 8.502932118084825e-07, "loss": 0.0634, "step": 8409 }, { "epoch": 1.9135381114903298, "grad_norm": 1.8963424951873085, "learning_rate": 8.502098651674148e-07, "loss": 0.1863, "step": 8410 }, { "epoch": 1.9137656427758816, "grad_norm": 1.3398651655669072, "learning_rate": 8.501265139234702e-07, "loss": 0.0709, "step": 8411 }, { "epoch": 1.9139931740614333, "grad_norm": 2.2695420225526757, "learning_rate": 8.500431580783518e-07, "loss": 0.0719, "step": 8412 }, { "epoch": 1.914220705346985, "grad_norm": 0.9721125468019496, "learning_rate": 8.499597976337632e-07, "loss": 0.0514, "step": 8413 }, { "epoch": 1.9144482366325368, "grad_norm": 2.1322236230579223, "learning_rate": 8.498764325914087e-07, "loss": 0.1084, "step": 8414 }, { "epoch": 1.9146757679180886, "grad_norm": 2.35934344607237, "learning_rate": 8.497930629529916e-07, "loss": 0.0763, "step": 8415 }, { "epoch": 1.9149032992036406, "grad_norm": 1.4798203631279319, "learning_rate": 8.497096887202158e-07, "loss": 0.0588, "step": 8416 }, { "epoch": 1.9151308304891923, "grad_norm": 1.6849690708017626, "learning_rate": 8.496263098947857e-07, "loss": 0.1134, "step": 8417 }, { "epoch": 1.915358361774744, "grad_norm": 1.9929702536635638, "learning_rate": 8.495429264784051e-07, "loss": 0.126, "step": 8418 }, { "epoch": 1.9155858930602958, "grad_norm": 2.242324964257082, "learning_rate": 8.494595384727783e-07, "loss": 0.1326, "step": 8419 }, { "epoch": 1.9158134243458476, "grad_norm": 1.199433028857461, "learning_rate": 8.493761458796098e-07, "loss": 0.1425, "step": 8420 }, { "epoch": 1.9160409556313993, "grad_norm": 1.5634859845022955, "learning_rate": 8.492927487006037e-07, "loss": 0.064, "step": 8421 }, { "epoch": 1.916268486916951, "grad_norm": 2.247087348394463, "learning_rate": 8.492093469374646e-07, "loss": 0.0961, "step": 8422 }, { "epoch": 1.9164960182025028, "grad_norm": 1.6277470161077132, "learning_rate": 8.491259405918969e-07, "loss": 0.0823, "step": 8423 }, { "epoch": 1.9167235494880546, "grad_norm": 1.258836642443553, "learning_rate": 8.490425296656057e-07, "loss": 0.073, "step": 8424 }, { "epoch": 1.9169510807736063, "grad_norm": 0.9943141297680885, "learning_rate": 8.489591141602954e-07, "loss": 0.0679, "step": 8425 }, { "epoch": 1.9171786120591583, "grad_norm": 1.5514946605389608, "learning_rate": 8.488756940776709e-07, "loss": 0.0557, "step": 8426 }, { "epoch": 1.91740614334471, "grad_norm": 3.338604708256682, "learning_rate": 8.487922694194374e-07, "loss": 0.2102, "step": 8427 }, { "epoch": 1.9176336746302618, "grad_norm": 1.792222041676389, "learning_rate": 8.487088401872997e-07, "loss": 0.1272, "step": 8428 }, { "epoch": 1.9178612059158135, "grad_norm": 1.8876701986161428, "learning_rate": 8.486254063829628e-07, "loss": 0.0605, "step": 8429 }, { "epoch": 1.9180887372013653, "grad_norm": 2.2659142728739736, "learning_rate": 8.485419680081324e-07, "loss": 0.1049, "step": 8430 }, { "epoch": 1.918316268486917, "grad_norm": 1.9475951010122616, "learning_rate": 8.484585250645134e-07, "loss": 0.1072, "step": 8431 }, { "epoch": 1.9185437997724688, "grad_norm": 1.8729195424533605, "learning_rate": 8.483750775538116e-07, "loss": 0.0868, "step": 8432 }, { "epoch": 1.9187713310580206, "grad_norm": 1.2718749744993825, "learning_rate": 8.482916254777321e-07, "loss": 0.0838, "step": 8433 }, { "epoch": 1.9189988623435723, "grad_norm": 3.179250927334045, "learning_rate": 8.482081688379809e-07, "loss": 0.1408, "step": 8434 }, { "epoch": 1.919226393629124, "grad_norm": 0.9912202600373338, "learning_rate": 8.481247076362633e-07, "loss": 0.1012, "step": 8435 }, { "epoch": 1.9194539249146758, "grad_norm": 2.1647864863915554, "learning_rate": 8.480412418742855e-07, "loss": 0.1023, "step": 8436 }, { "epoch": 1.9196814562002276, "grad_norm": 1.2022709576605757, "learning_rate": 8.479577715537531e-07, "loss": 0.0424, "step": 8437 }, { "epoch": 1.9199089874857793, "grad_norm": 2.668259844445578, "learning_rate": 8.478742966763721e-07, "loss": 0.0619, "step": 8438 }, { "epoch": 1.920136518771331, "grad_norm": 1.3071073149878354, "learning_rate": 8.477908172438488e-07, "loss": 0.1575, "step": 8439 }, { "epoch": 1.9203640500568828, "grad_norm": 1.8530288632841168, "learning_rate": 8.477073332578892e-07, "loss": 0.0684, "step": 8440 }, { "epoch": 1.9205915813424346, "grad_norm": 1.232503648261482, "learning_rate": 8.476238447201995e-07, "loss": 0.0728, "step": 8441 }, { "epoch": 1.9208191126279863, "grad_norm": 2.497572414815773, "learning_rate": 8.475403516324863e-07, "loss": 0.0822, "step": 8442 }, { "epoch": 1.921046643913538, "grad_norm": 1.9091586596017254, "learning_rate": 8.474568539964556e-07, "loss": 0.1432, "step": 8443 }, { "epoch": 1.9212741751990898, "grad_norm": 1.7922258981875185, "learning_rate": 8.473733518138145e-07, "loss": 0.0858, "step": 8444 }, { "epoch": 1.9215017064846416, "grad_norm": 1.6463268320552324, "learning_rate": 8.472898450862691e-07, "loss": 0.0483, "step": 8445 }, { "epoch": 1.9217292377701933, "grad_norm": 2.5348026255674987, "learning_rate": 8.472063338155265e-07, "loss": 0.1395, "step": 8446 }, { "epoch": 1.921956769055745, "grad_norm": 1.736325113713475, "learning_rate": 8.471228180032934e-07, "loss": 0.0768, "step": 8447 }, { "epoch": 1.9221843003412968, "grad_norm": 2.1050598157100544, "learning_rate": 8.470392976512768e-07, "loss": 0.1608, "step": 8448 }, { "epoch": 1.9224118316268486, "grad_norm": 1.6835896578771998, "learning_rate": 8.469557727611833e-07, "loss": 0.1586, "step": 8449 }, { "epoch": 1.9226393629124003, "grad_norm": 1.9175114617751532, "learning_rate": 8.468722433347204e-07, "loss": 0.1146, "step": 8450 }, { "epoch": 1.922866894197952, "grad_norm": 3.4114979818235165, "learning_rate": 8.467887093735953e-07, "loss": 0.0854, "step": 8451 }, { "epoch": 1.9230944254835038, "grad_norm": 1.9034177264033771, "learning_rate": 8.467051708795152e-07, "loss": 0.0824, "step": 8452 }, { "epoch": 1.9233219567690556, "grad_norm": 1.8754113697450077, "learning_rate": 8.466216278541874e-07, "loss": 0.0891, "step": 8453 }, { "epoch": 1.9235494880546073, "grad_norm": 2.15705770201668, "learning_rate": 8.465380802993193e-07, "loss": 0.0708, "step": 8454 }, { "epoch": 1.9237770193401593, "grad_norm": 1.809327681436963, "learning_rate": 8.464545282166187e-07, "loss": 0.0702, "step": 8455 }, { "epoch": 1.924004550625711, "grad_norm": 1.0962609004853898, "learning_rate": 8.463709716077929e-07, "loss": 0.1039, "step": 8456 }, { "epoch": 1.9242320819112628, "grad_norm": 1.5206808018082445, "learning_rate": 8.4628741047455e-07, "loss": 0.0496, "step": 8457 }, { "epoch": 1.9244596131968146, "grad_norm": 3.0651136015171705, "learning_rate": 8.462038448185977e-07, "loss": 0.1708, "step": 8458 }, { "epoch": 1.9246871444823663, "grad_norm": 1.0623543651630516, "learning_rate": 8.461202746416442e-07, "loss": 0.0663, "step": 8459 }, { "epoch": 1.924914675767918, "grad_norm": 1.665256204666068, "learning_rate": 8.460366999453968e-07, "loss": 0.0957, "step": 8460 }, { "epoch": 1.9251422070534698, "grad_norm": 1.7811657834442234, "learning_rate": 8.459531207315644e-07, "loss": 0.0912, "step": 8461 }, { "epoch": 1.9253697383390216, "grad_norm": 2.319950791748146, "learning_rate": 8.458695370018546e-07, "loss": 0.0839, "step": 8462 }, { "epoch": 1.9255972696245733, "grad_norm": 2.2330640736837437, "learning_rate": 8.457859487579762e-07, "loss": 0.1652, "step": 8463 }, { "epoch": 1.925824800910125, "grad_norm": 2.522709981070815, "learning_rate": 8.457023560016371e-07, "loss": 0.1151, "step": 8464 }, { "epoch": 1.926052332195677, "grad_norm": 1.8640678015129717, "learning_rate": 8.456187587345463e-07, "loss": 0.0459, "step": 8465 }, { "epoch": 1.9262798634812288, "grad_norm": 2.50091870403617, "learning_rate": 8.455351569584119e-07, "loss": 0.0564, "step": 8466 }, { "epoch": 1.9265073947667806, "grad_norm": 2.5819766855842956, "learning_rate": 8.454515506749431e-07, "loss": 0.083, "step": 8467 }, { "epoch": 1.9267349260523323, "grad_norm": 1.6796689480749127, "learning_rate": 8.453679398858481e-07, "loss": 0.0531, "step": 8468 }, { "epoch": 1.926962457337884, "grad_norm": 1.677839119494002, "learning_rate": 8.452843245928359e-07, "loss": 0.0613, "step": 8469 }, { "epoch": 1.9271899886234358, "grad_norm": 1.8973529305250347, "learning_rate": 8.452007047976155e-07, "loss": 0.078, "step": 8470 }, { "epoch": 1.9274175199089876, "grad_norm": 2.3986730572979265, "learning_rate": 8.451170805018964e-07, "loss": 0.0893, "step": 8471 }, { "epoch": 1.9276450511945393, "grad_norm": 1.3191923575313789, "learning_rate": 8.45033451707387e-07, "loss": 0.0711, "step": 8472 }, { "epoch": 1.927872582480091, "grad_norm": 2.919992540617028, "learning_rate": 8.449498184157968e-07, "loss": 0.087, "step": 8473 }, { "epoch": 1.9281001137656428, "grad_norm": 2.1736715695739366, "learning_rate": 8.448661806288352e-07, "loss": 0.1122, "step": 8474 }, { "epoch": 1.9283276450511946, "grad_norm": 1.7246493796250064, "learning_rate": 8.447825383482116e-07, "loss": 0.1058, "step": 8475 }, { "epoch": 1.9285551763367463, "grad_norm": 1.9251531278033478, "learning_rate": 8.446988915756353e-07, "loss": 0.0836, "step": 8476 }, { "epoch": 1.928782707622298, "grad_norm": 1.4968384792196872, "learning_rate": 8.446152403128161e-07, "loss": 0.0998, "step": 8477 }, { "epoch": 1.9290102389078498, "grad_norm": 1.3499858437839942, "learning_rate": 8.445315845614636e-07, "loss": 0.1275, "step": 8478 }, { "epoch": 1.9292377701934016, "grad_norm": 1.971157495492609, "learning_rate": 8.444479243232875e-07, "loss": 0.0787, "step": 8479 }, { "epoch": 1.9294653014789533, "grad_norm": 1.2783668702913251, "learning_rate": 8.443642595999977e-07, "loss": 0.0503, "step": 8480 }, { "epoch": 1.929692832764505, "grad_norm": 2.068704739883542, "learning_rate": 8.442805903933041e-07, "loss": 0.0918, "step": 8481 }, { "epoch": 1.9299203640500568, "grad_norm": 1.4539311831790112, "learning_rate": 8.441969167049171e-07, "loss": 0.0937, "step": 8482 }, { "epoch": 1.9301478953356086, "grad_norm": 2.423333261668445, "learning_rate": 8.441132385365462e-07, "loss": 0.0632, "step": 8483 }, { "epoch": 1.9303754266211604, "grad_norm": 1.816600565728022, "learning_rate": 8.440295558899024e-07, "loss": 0.0642, "step": 8484 }, { "epoch": 1.930602957906712, "grad_norm": 1.9843915366698888, "learning_rate": 8.439458687666954e-07, "loss": 0.0949, "step": 8485 }, { "epoch": 1.9308304891922639, "grad_norm": 1.8704383168228935, "learning_rate": 8.438621771686358e-07, "loss": 0.1104, "step": 8486 }, { "epoch": 1.9310580204778156, "grad_norm": 2.089140082871353, "learning_rate": 8.437784810974343e-07, "loss": 0.1113, "step": 8487 }, { "epoch": 1.9312855517633674, "grad_norm": 1.5628222585539076, "learning_rate": 8.43694780554801e-07, "loss": 0.0848, "step": 8488 }, { "epoch": 1.931513083048919, "grad_norm": 1.731060896808239, "learning_rate": 8.436110755424472e-07, "loss": 0.0731, "step": 8489 }, { "epoch": 1.9317406143344709, "grad_norm": 1.846587230420918, "learning_rate": 8.435273660620833e-07, "loss": 0.059, "step": 8490 }, { "epoch": 1.9319681456200226, "grad_norm": 1.896878928250611, "learning_rate": 8.434436521154202e-07, "loss": 0.0574, "step": 8491 }, { "epoch": 1.9321956769055744, "grad_norm": 1.3544988049376627, "learning_rate": 8.43359933704169e-07, "loss": 0.0535, "step": 8492 }, { "epoch": 1.9324232081911261, "grad_norm": 1.740141894716064, "learning_rate": 8.432762108300404e-07, "loss": 0.0891, "step": 8493 }, { "epoch": 1.932650739476678, "grad_norm": 0.9830501187953077, "learning_rate": 8.431924834947461e-07, "loss": 0.1051, "step": 8494 }, { "epoch": 1.9328782707622298, "grad_norm": 2.4380986003963083, "learning_rate": 8.431087516999969e-07, "loss": 0.1076, "step": 8495 }, { "epoch": 1.9331058020477816, "grad_norm": 1.4384427780874542, "learning_rate": 8.430250154475044e-07, "loss": 0.1277, "step": 8496 }, { "epoch": 1.9333333333333333, "grad_norm": 2.3763608180998066, "learning_rate": 8.429412747389798e-07, "loss": 0.1468, "step": 8497 }, { "epoch": 1.933560864618885, "grad_norm": 2.7126993024948796, "learning_rate": 8.428575295761346e-07, "loss": 0.0923, "step": 8498 }, { "epoch": 1.9337883959044369, "grad_norm": 2.1813475835249982, "learning_rate": 8.427737799606806e-07, "loss": 0.0782, "step": 8499 }, { "epoch": 1.9340159271899886, "grad_norm": 2.6373903019450813, "learning_rate": 8.426900258943292e-07, "loss": 0.0889, "step": 8500 }, { "epoch": 1.9342434584755404, "grad_norm": 1.4708943806049117, "learning_rate": 8.426062673787926e-07, "loss": 0.0483, "step": 8501 }, { "epoch": 1.934470989761092, "grad_norm": 1.1381297165096618, "learning_rate": 8.425225044157823e-07, "loss": 0.0428, "step": 8502 }, { "epoch": 1.9346985210466439, "grad_norm": 1.7545261529169938, "learning_rate": 8.424387370070102e-07, "loss": 0.1039, "step": 8503 }, { "epoch": 1.9349260523321958, "grad_norm": 1.4167186825796818, "learning_rate": 8.423549651541889e-07, "loss": 0.1195, "step": 8504 }, { "epoch": 1.9351535836177476, "grad_norm": 1.5331925096031622, "learning_rate": 8.4227118885903e-07, "loss": 0.0939, "step": 8505 }, { "epoch": 1.9353811149032993, "grad_norm": 1.3675225762983716, "learning_rate": 8.421874081232459e-07, "loss": 0.1108, "step": 8506 }, { "epoch": 1.935608646188851, "grad_norm": 2.024739386960217, "learning_rate": 8.421036229485489e-07, "loss": 0.0882, "step": 8507 }, { "epoch": 1.9358361774744028, "grad_norm": 1.9821151920385132, "learning_rate": 8.420198333366513e-07, "loss": 0.0854, "step": 8508 }, { "epoch": 1.9360637087599546, "grad_norm": 1.0951514220743803, "learning_rate": 8.419360392892663e-07, "loss": 0.0594, "step": 8509 }, { "epoch": 1.9362912400455063, "grad_norm": 2.2169726519990904, "learning_rate": 8.418522408081054e-07, "loss": 0.1, "step": 8510 }, { "epoch": 1.936518771331058, "grad_norm": 2.1220228348519505, "learning_rate": 8.417684378948822e-07, "loss": 0.0438, "step": 8511 }, { "epoch": 1.9367463026166098, "grad_norm": 0.8841115425271152, "learning_rate": 8.416846305513089e-07, "loss": 0.0205, "step": 8512 }, { "epoch": 1.9369738339021616, "grad_norm": 1.0874199804171312, "learning_rate": 8.416008187790986e-07, "loss": 0.0487, "step": 8513 }, { "epoch": 1.9372013651877134, "grad_norm": 1.6181231078970064, "learning_rate": 8.415170025799644e-07, "loss": 0.1301, "step": 8514 }, { "epoch": 1.937428896473265, "grad_norm": 1.1611858386666436, "learning_rate": 8.414331819556193e-07, "loss": 0.0285, "step": 8515 }, { "epoch": 1.9376564277588169, "grad_norm": 1.2191047345391055, "learning_rate": 8.41349356907776e-07, "loss": 0.0758, "step": 8516 }, { "epoch": 1.9378839590443686, "grad_norm": 1.8008450035039854, "learning_rate": 8.412655274381481e-07, "loss": 0.0816, "step": 8517 }, { "epoch": 1.9381114903299204, "grad_norm": 2.549335874160235, "learning_rate": 8.411816935484491e-07, "loss": 0.1047, "step": 8518 }, { "epoch": 1.9383390216154721, "grad_norm": 1.863164888167891, "learning_rate": 8.410978552403923e-07, "loss": 0.1242, "step": 8519 }, { "epoch": 1.9385665529010239, "grad_norm": 1.9511545134715371, "learning_rate": 8.410140125156907e-07, "loss": 0.0776, "step": 8520 }, { "epoch": 1.9387940841865756, "grad_norm": 1.4616590047656322, "learning_rate": 8.409301653760587e-07, "loss": 0.0534, "step": 8521 }, { "epoch": 1.9390216154721274, "grad_norm": 1.5355929638697312, "learning_rate": 8.408463138232094e-07, "loss": 0.0876, "step": 8522 }, { "epoch": 1.9392491467576791, "grad_norm": 1.4389045256078594, "learning_rate": 8.407624578588566e-07, "loss": 0.0846, "step": 8523 }, { "epoch": 1.9394766780432309, "grad_norm": 2.931059465647058, "learning_rate": 8.406785974847145e-07, "loss": 0.1378, "step": 8524 }, { "epoch": 1.9397042093287826, "grad_norm": 1.4601601928244312, "learning_rate": 8.405947327024968e-07, "loss": 0.0566, "step": 8525 }, { "epoch": 1.9399317406143344, "grad_norm": 2.200201035887927, "learning_rate": 8.405108635139178e-07, "loss": 0.0842, "step": 8526 }, { "epoch": 1.9401592718998861, "grad_norm": 2.0423363286701677, "learning_rate": 8.404269899206911e-07, "loss": 0.0823, "step": 8527 }, { "epoch": 1.9403868031854379, "grad_norm": 1.8410916810358255, "learning_rate": 8.403431119245316e-07, "loss": 0.1669, "step": 8528 }, { "epoch": 1.9406143344709896, "grad_norm": 1.8658327967514299, "learning_rate": 8.40259229527153e-07, "loss": 0.112, "step": 8529 }, { "epoch": 1.9408418657565414, "grad_norm": 1.646429046748771, "learning_rate": 8.4017534273027e-07, "loss": 0.0939, "step": 8530 }, { "epoch": 1.9410693970420931, "grad_norm": 1.9699312522483354, "learning_rate": 8.400914515355972e-07, "loss": 0.0713, "step": 8531 }, { "epoch": 1.9412969283276449, "grad_norm": 3.035616092105102, "learning_rate": 8.40007555944849e-07, "loss": 0.1617, "step": 8532 }, { "epoch": 1.9415244596131969, "grad_norm": 1.1459056528449258, "learning_rate": 8.399236559597403e-07, "loss": 0.0407, "step": 8533 }, { "epoch": 1.9417519908987486, "grad_norm": 1.6129092079250968, "learning_rate": 8.398397515819855e-07, "loss": 0.0355, "step": 8534 }, { "epoch": 1.9419795221843004, "grad_norm": 1.6025860195926533, "learning_rate": 8.397558428132995e-07, "loss": 0.0769, "step": 8535 }, { "epoch": 1.9422070534698521, "grad_norm": 1.7837472740584903, "learning_rate": 8.396719296553976e-07, "loss": 0.0558, "step": 8536 }, { "epoch": 1.9424345847554039, "grad_norm": 2.8544033907639945, "learning_rate": 8.395880121099944e-07, "loss": 0.2276, "step": 8537 }, { "epoch": 1.9426621160409556, "grad_norm": 2.226030766319897, "learning_rate": 8.395040901788056e-07, "loss": 0.163, "step": 8538 }, { "epoch": 1.9428896473265074, "grad_norm": 2.4601269585424066, "learning_rate": 8.394201638635458e-07, "loss": 0.1241, "step": 8539 }, { "epoch": 1.9431171786120591, "grad_norm": 1.6894697055252035, "learning_rate": 8.393362331659305e-07, "loss": 0.1729, "step": 8540 }, { "epoch": 1.9433447098976109, "grad_norm": 1.089462098183616, "learning_rate": 8.392522980876752e-07, "loss": 0.0458, "step": 8541 }, { "epoch": 1.9435722411831626, "grad_norm": 1.4212217862469188, "learning_rate": 8.391683586304954e-07, "loss": 0.0483, "step": 8542 }, { "epoch": 1.9437997724687146, "grad_norm": 2.0508094063197895, "learning_rate": 8.390844147961064e-07, "loss": 0.0837, "step": 8543 }, { "epoch": 1.9440273037542664, "grad_norm": 2.6999732469953663, "learning_rate": 8.39000466586224e-07, "loss": 0.0963, "step": 8544 }, { "epoch": 1.944254835039818, "grad_norm": 1.4407700122396472, "learning_rate": 8.389165140025642e-07, "loss": 0.167, "step": 8545 }, { "epoch": 1.9444823663253699, "grad_norm": 2.4557818509444247, "learning_rate": 8.388325570468425e-07, "loss": 0.0731, "step": 8546 }, { "epoch": 1.9447098976109216, "grad_norm": 1.354782697339982, "learning_rate": 8.38748595720775e-07, "loss": 0.1182, "step": 8547 }, { "epoch": 1.9449374288964734, "grad_norm": 0.7537613438324625, "learning_rate": 8.386646300260777e-07, "loss": 0.0589, "step": 8548 }, { "epoch": 1.9451649601820251, "grad_norm": 1.4623107199177439, "learning_rate": 8.385806599644666e-07, "loss": 0.0587, "step": 8549 }, { "epoch": 1.9453924914675769, "grad_norm": 1.892414732824123, "learning_rate": 8.384966855376579e-07, "loss": 0.0533, "step": 8550 }, { "epoch": 1.9456200227531286, "grad_norm": 2.19221426746065, "learning_rate": 8.384127067473681e-07, "loss": 0.0642, "step": 8551 }, { "epoch": 1.9458475540386804, "grad_norm": 1.5719041762146422, "learning_rate": 8.383287235953133e-07, "loss": 0.0261, "step": 8552 }, { "epoch": 1.9460750853242321, "grad_norm": 2.376314018674937, "learning_rate": 8.382447360832102e-07, "loss": 0.2128, "step": 8553 }, { "epoch": 1.9463026166097839, "grad_norm": 2.631107798034157, "learning_rate": 8.381607442127753e-07, "loss": 0.0754, "step": 8554 }, { "epoch": 1.9465301478953356, "grad_norm": 1.8455788499407606, "learning_rate": 8.38076747985725e-07, "loss": 0.1417, "step": 8555 }, { "epoch": 1.9467576791808874, "grad_norm": 0.9453191903679632, "learning_rate": 8.379927474037762e-07, "loss": 0.1107, "step": 8556 }, { "epoch": 1.9469852104664391, "grad_norm": 2.383443887327382, "learning_rate": 8.379087424686458e-07, "loss": 0.1473, "step": 8557 }, { "epoch": 1.9472127417519909, "grad_norm": 2.248092514465891, "learning_rate": 8.378247331820504e-07, "loss": 0.114, "step": 8558 }, { "epoch": 1.9474402730375426, "grad_norm": 1.7609556991713609, "learning_rate": 8.377407195457077e-07, "loss": 0.1203, "step": 8559 }, { "epoch": 1.9476678043230944, "grad_norm": 2.468550221729246, "learning_rate": 8.376567015613339e-07, "loss": 0.0881, "step": 8560 }, { "epoch": 1.9478953356086461, "grad_norm": 1.0006009709287198, "learning_rate": 8.375726792306467e-07, "loss": 0.0568, "step": 8561 }, { "epoch": 1.948122866894198, "grad_norm": 1.5722493388504248, "learning_rate": 8.374886525553632e-07, "loss": 0.0684, "step": 8562 }, { "epoch": 1.9483503981797496, "grad_norm": 1.7833042384805906, "learning_rate": 8.374046215372011e-07, "loss": 0.0784, "step": 8563 }, { "epoch": 1.9485779294653014, "grad_norm": 2.2718997282755162, "learning_rate": 8.37320586177877e-07, "loss": 0.1032, "step": 8564 }, { "epoch": 1.9488054607508531, "grad_norm": 2.021110083073223, "learning_rate": 8.372365464791094e-07, "loss": 0.1369, "step": 8565 }, { "epoch": 1.949032992036405, "grad_norm": 1.893991429842655, "learning_rate": 8.371525024426153e-07, "loss": 0.1258, "step": 8566 }, { "epoch": 1.9492605233219567, "grad_norm": 1.8229655764102353, "learning_rate": 8.370684540701126e-07, "loss": 0.1047, "step": 8567 }, { "epoch": 1.9494880546075084, "grad_norm": 1.9936427878273355, "learning_rate": 8.369844013633191e-07, "loss": 0.1409, "step": 8568 }, { "epoch": 1.9497155858930602, "grad_norm": 1.2484127425000624, "learning_rate": 8.369003443239528e-07, "loss": 0.0634, "step": 8569 }, { "epoch": 1.949943117178612, "grad_norm": 2.4458940117366152, "learning_rate": 8.368162829537313e-07, "loss": 0.0893, "step": 8570 }, { "epoch": 1.9501706484641637, "grad_norm": 1.3735546349913483, "learning_rate": 8.367322172543729e-07, "loss": 0.0971, "step": 8571 }, { "epoch": 1.9503981797497156, "grad_norm": 1.0857680420382734, "learning_rate": 8.366481472275961e-07, "loss": 0.0363, "step": 8572 }, { "epoch": 1.9506257110352674, "grad_norm": 2.9551954213167075, "learning_rate": 8.365640728751184e-07, "loss": 0.1919, "step": 8573 }, { "epoch": 1.9508532423208191, "grad_norm": 1.8609193821716674, "learning_rate": 8.364799941986587e-07, "loss": 0.0689, "step": 8574 }, { "epoch": 1.9510807736063709, "grad_norm": 2.45267786058061, "learning_rate": 8.363959111999352e-07, "loss": 0.1152, "step": 8575 }, { "epoch": 1.9513083048919226, "grad_norm": 1.9628407395323646, "learning_rate": 8.363118238806663e-07, "loss": 0.0626, "step": 8576 }, { "epoch": 1.9515358361774744, "grad_norm": 1.8859253724260385, "learning_rate": 8.362277322425709e-07, "loss": 0.0701, "step": 8577 }, { "epoch": 1.9517633674630261, "grad_norm": 1.8757889787827413, "learning_rate": 8.361436362873676e-07, "loss": 0.1373, "step": 8578 }, { "epoch": 1.951990898748578, "grad_norm": 1.6478100869469023, "learning_rate": 8.360595360167748e-07, "loss": 0.1116, "step": 8579 }, { "epoch": 1.9522184300341296, "grad_norm": 2.7164270881373214, "learning_rate": 8.359754314325117e-07, "loss": 0.2087, "step": 8580 }, { "epoch": 1.9524459613196816, "grad_norm": 1.6867400681793747, "learning_rate": 8.358913225362972e-07, "loss": 0.0968, "step": 8581 }, { "epoch": 1.9526734926052334, "grad_norm": 1.6519218488263627, "learning_rate": 8.358072093298503e-07, "loss": 0.0727, "step": 8582 }, { "epoch": 1.9529010238907851, "grad_norm": 1.833600292382146, "learning_rate": 8.357230918148901e-07, "loss": 0.1168, "step": 8583 }, { "epoch": 1.9531285551763369, "grad_norm": 1.3802649701297778, "learning_rate": 8.356389699931359e-07, "loss": 0.1326, "step": 8584 }, { "epoch": 1.9533560864618886, "grad_norm": 2.21485988798864, "learning_rate": 8.35554843866307e-07, "loss": 0.0858, "step": 8585 }, { "epoch": 1.9535836177474404, "grad_norm": 2.0919450611286816, "learning_rate": 8.354707134361224e-07, "loss": 0.1045, "step": 8586 }, { "epoch": 1.9538111490329921, "grad_norm": 2.8589127146761113, "learning_rate": 8.35386578704302e-07, "loss": 0.0908, "step": 8587 }, { "epoch": 1.9540386803185439, "grad_norm": 1.4441818031879883, "learning_rate": 8.353024396725653e-07, "loss": 0.0924, "step": 8588 }, { "epoch": 1.9542662116040956, "grad_norm": 2.2692155542041657, "learning_rate": 8.352182963426317e-07, "loss": 0.1268, "step": 8589 }, { "epoch": 1.9544937428896474, "grad_norm": 1.7174063991169783, "learning_rate": 8.351341487162214e-07, "loss": 0.1364, "step": 8590 }, { "epoch": 1.9547212741751991, "grad_norm": 2.47929661092712, "learning_rate": 8.350499967950538e-07, "loss": 0.183, "step": 8591 }, { "epoch": 1.954948805460751, "grad_norm": 0.9115396121552741, "learning_rate": 8.349658405808489e-07, "loss": 0.047, "step": 8592 }, { "epoch": 1.9551763367463026, "grad_norm": 1.6855778243414008, "learning_rate": 8.348816800753269e-07, "loss": 0.0586, "step": 8593 }, { "epoch": 1.9554038680318544, "grad_norm": 1.410626305008107, "learning_rate": 8.347975152802075e-07, "loss": 0.0732, "step": 8594 }, { "epoch": 1.9556313993174061, "grad_norm": 2.3121651096996807, "learning_rate": 8.347133461972112e-07, "loss": 0.1034, "step": 8595 }, { "epoch": 1.955858930602958, "grad_norm": 2.1842489314084768, "learning_rate": 8.346291728280584e-07, "loss": 0.1254, "step": 8596 }, { "epoch": 1.9560864618885097, "grad_norm": 1.2125108398522104, "learning_rate": 8.345449951744692e-07, "loss": 0.0821, "step": 8597 }, { "epoch": 1.9563139931740614, "grad_norm": 2.32704489508761, "learning_rate": 8.344608132381637e-07, "loss": 0.0857, "step": 8598 }, { "epoch": 1.9565415244596132, "grad_norm": 1.476103066454572, "learning_rate": 8.343766270208631e-07, "loss": 0.0973, "step": 8599 }, { "epoch": 1.956769055745165, "grad_norm": 2.1631455138407243, "learning_rate": 8.342924365242878e-07, "loss": 0.0974, "step": 8600 }, { "epoch": 1.9569965870307167, "grad_norm": 1.4889235511311651, "learning_rate": 8.342082417501579e-07, "loss": 0.0426, "step": 8601 }, { "epoch": 1.9572241183162684, "grad_norm": 1.3914138563247866, "learning_rate": 8.341240427001951e-07, "loss": 0.0676, "step": 8602 }, { "epoch": 1.9574516496018202, "grad_norm": 2.746354690345646, "learning_rate": 8.340398393761199e-07, "loss": 0.096, "step": 8603 }, { "epoch": 1.957679180887372, "grad_norm": 1.7950527438701973, "learning_rate": 8.339556317796529e-07, "loss": 0.0526, "step": 8604 }, { "epoch": 1.9579067121729237, "grad_norm": 1.4717345104063482, "learning_rate": 8.338714199125157e-07, "loss": 0.0565, "step": 8605 }, { "epoch": 1.9581342434584754, "grad_norm": 1.9075516581715017, "learning_rate": 8.337872037764292e-07, "loss": 0.0599, "step": 8606 }, { "epoch": 1.9583617747440272, "grad_norm": 2.264996914994366, "learning_rate": 8.337029833731145e-07, "loss": 0.0812, "step": 8607 }, { "epoch": 1.958589306029579, "grad_norm": 1.3912222017896145, "learning_rate": 8.336187587042932e-07, "loss": 0.042, "step": 8608 }, { "epoch": 1.9588168373151307, "grad_norm": 1.2403469619206942, "learning_rate": 8.335345297716863e-07, "loss": 0.0808, "step": 8609 }, { "epoch": 1.9590443686006824, "grad_norm": 1.7039712007849193, "learning_rate": 8.334502965770158e-07, "loss": 0.102, "step": 8610 }, { "epoch": 1.9592718998862344, "grad_norm": 2.188588003821983, "learning_rate": 8.33366059122003e-07, "loss": 0.0744, "step": 8611 }, { "epoch": 1.9594994311717862, "grad_norm": 1.5541202044831102, "learning_rate": 8.332818174083694e-07, "loss": 0.0423, "step": 8612 }, { "epoch": 1.959726962457338, "grad_norm": 2.35657294378755, "learning_rate": 8.331975714378369e-07, "loss": 0.0667, "step": 8613 }, { "epoch": 1.9599544937428897, "grad_norm": 2.9190505646170095, "learning_rate": 8.331133212121273e-07, "loss": 0.1226, "step": 8614 }, { "epoch": 1.9601820250284414, "grad_norm": 1.074796870429527, "learning_rate": 8.330290667329627e-07, "loss": 0.0824, "step": 8615 }, { "epoch": 1.9604095563139932, "grad_norm": 1.6689032764316707, "learning_rate": 8.329448080020651e-07, "loss": 0.15, "step": 8616 }, { "epoch": 1.960637087599545, "grad_norm": 1.3409500924708422, "learning_rate": 8.32860545021156e-07, "loss": 0.0867, "step": 8617 }, { "epoch": 1.9608646188850967, "grad_norm": 3.7050291604272387, "learning_rate": 8.327762777919585e-07, "loss": 0.0839, "step": 8618 }, { "epoch": 1.9610921501706484, "grad_norm": 0.8218866668948319, "learning_rate": 8.326920063161942e-07, "loss": 0.0467, "step": 8619 }, { "epoch": 1.9613196814562004, "grad_norm": 1.0653092132142425, "learning_rate": 8.326077305955857e-07, "loss": 0.0754, "step": 8620 }, { "epoch": 1.9615472127417521, "grad_norm": 1.8128981252159282, "learning_rate": 8.325234506318553e-07, "loss": 0.0821, "step": 8621 }, { "epoch": 1.961774744027304, "grad_norm": 1.6899341568403192, "learning_rate": 8.324391664267257e-07, "loss": 0.0888, "step": 8622 }, { "epoch": 1.9620022753128556, "grad_norm": 2.3338714314112, "learning_rate": 8.323548779819194e-07, "loss": 0.0576, "step": 8623 }, { "epoch": 1.9622298065984074, "grad_norm": 1.4170197276530796, "learning_rate": 8.322705852991592e-07, "loss": 0.1294, "step": 8624 }, { "epoch": 1.9624573378839592, "grad_norm": 1.6082562993048413, "learning_rate": 8.321862883801677e-07, "loss": 0.0561, "step": 8625 }, { "epoch": 1.962684869169511, "grad_norm": 2.067958132163914, "learning_rate": 8.321019872266682e-07, "loss": 0.1137, "step": 8626 }, { "epoch": 1.9629124004550627, "grad_norm": 1.5830819168127874, "learning_rate": 8.320176818403831e-07, "loss": 0.0839, "step": 8627 }, { "epoch": 1.9631399317406144, "grad_norm": 1.9808658987468761, "learning_rate": 8.319333722230359e-07, "loss": 0.1352, "step": 8628 }, { "epoch": 1.9633674630261662, "grad_norm": 1.2815991943265168, "learning_rate": 8.318490583763494e-07, "loss": 0.0971, "step": 8629 }, { "epoch": 1.963594994311718, "grad_norm": 1.9747071861845462, "learning_rate": 8.317647403020472e-07, "loss": 0.229, "step": 8630 }, { "epoch": 1.9638225255972697, "grad_norm": 1.3506976246768094, "learning_rate": 8.316804180018522e-07, "loss": 0.0412, "step": 8631 }, { "epoch": 1.9640500568828214, "grad_norm": 1.4891462190541658, "learning_rate": 8.315960914774878e-07, "loss": 0.0689, "step": 8632 }, { "epoch": 1.9642775881683732, "grad_norm": 1.6604876475552375, "learning_rate": 8.31511760730678e-07, "loss": 0.1113, "step": 8633 }, { "epoch": 1.964505119453925, "grad_norm": 1.7505201034146314, "learning_rate": 8.314274257631458e-07, "loss": 0.0753, "step": 8634 }, { "epoch": 1.9647326507394767, "grad_norm": 2.223169669274098, "learning_rate": 8.313430865766154e-07, "loss": 0.0839, "step": 8635 }, { "epoch": 1.9649601820250284, "grad_norm": 2.581921503540097, "learning_rate": 8.312587431728098e-07, "loss": 0.071, "step": 8636 }, { "epoch": 1.9651877133105802, "grad_norm": 1.866436868374168, "learning_rate": 8.311743955534533e-07, "loss": 0.1104, "step": 8637 }, { "epoch": 1.965415244596132, "grad_norm": 1.222648355666239, "learning_rate": 8.310900437202699e-07, "loss": 0.0565, "step": 8638 }, { "epoch": 1.9656427758816837, "grad_norm": 1.990385424763967, "learning_rate": 8.310056876749833e-07, "loss": 0.1203, "step": 8639 }, { "epoch": 1.9658703071672354, "grad_norm": 1.5209309282517698, "learning_rate": 8.309213274193179e-07, "loss": 0.138, "step": 8640 }, { "epoch": 1.9660978384527872, "grad_norm": 2.370651074382666, "learning_rate": 8.308369629549976e-07, "loss": 0.0579, "step": 8641 }, { "epoch": 1.966325369738339, "grad_norm": 1.6617922953603113, "learning_rate": 8.307525942837468e-07, "loss": 0.1192, "step": 8642 }, { "epoch": 1.9665529010238907, "grad_norm": 2.0286903030901375, "learning_rate": 8.306682214072897e-07, "loss": 0.2664, "step": 8643 }, { "epoch": 1.9667804323094424, "grad_norm": 1.3145498331251388, "learning_rate": 8.305838443273508e-07, "loss": 0.1039, "step": 8644 }, { "epoch": 1.9670079635949942, "grad_norm": 2.292862361880169, "learning_rate": 8.304994630456546e-07, "loss": 0.1654, "step": 8645 }, { "epoch": 1.967235494880546, "grad_norm": 2.1230943172193157, "learning_rate": 8.304150775639258e-07, "loss": 0.0782, "step": 8646 }, { "epoch": 1.9674630261660977, "grad_norm": 2.0206529007810183, "learning_rate": 8.303306878838892e-07, "loss": 0.1018, "step": 8647 }, { "epoch": 1.9676905574516494, "grad_norm": 1.0586918202520623, "learning_rate": 8.302462940072691e-07, "loss": 0.0425, "step": 8648 }, { "epoch": 1.9679180887372012, "grad_norm": 1.7279422628295729, "learning_rate": 8.301618959357908e-07, "loss": 0.1064, "step": 8649 }, { "epoch": 1.9681456200227532, "grad_norm": 1.864109156077631, "learning_rate": 8.300774936711792e-07, "loss": 0.1265, "step": 8650 }, { "epoch": 1.968373151308305, "grad_norm": 11.346775621475524, "learning_rate": 8.299930872151589e-07, "loss": 0.1063, "step": 8651 }, { "epoch": 1.9686006825938567, "grad_norm": 4.708817423035009, "learning_rate": 8.299086765694554e-07, "loss": 0.1209, "step": 8652 }, { "epoch": 1.9688282138794084, "grad_norm": 1.6750224470810209, "learning_rate": 8.298242617357939e-07, "loss": 0.0439, "step": 8653 }, { "epoch": 1.9690557451649602, "grad_norm": 1.7314343639955407, "learning_rate": 8.297398427158996e-07, "loss": 0.1048, "step": 8654 }, { "epoch": 1.969283276450512, "grad_norm": 2.9630009476977377, "learning_rate": 8.296554195114979e-07, "loss": 0.1042, "step": 8655 }, { "epoch": 1.9695108077360637, "grad_norm": 1.8404252877953569, "learning_rate": 8.295709921243143e-07, "loss": 0.1182, "step": 8656 }, { "epoch": 1.9697383390216154, "grad_norm": 1.2324941962130138, "learning_rate": 8.294865605560743e-07, "loss": 0.0501, "step": 8657 }, { "epoch": 1.9699658703071672, "grad_norm": 1.4366065854180936, "learning_rate": 8.294021248085032e-07, "loss": 0.082, "step": 8658 }, { "epoch": 1.9701934015927192, "grad_norm": 1.478003504693769, "learning_rate": 8.293176848833273e-07, "loss": 0.1013, "step": 8659 }, { "epoch": 1.970420932878271, "grad_norm": 1.5789170948783189, "learning_rate": 8.292332407822721e-07, "loss": 0.0883, "step": 8660 }, { "epoch": 1.9706484641638227, "grad_norm": 1.7038465627576111, "learning_rate": 8.291487925070634e-07, "loss": 0.0388, "step": 8661 }, { "epoch": 1.9708759954493744, "grad_norm": 1.6944943552866523, "learning_rate": 8.290643400594273e-07, "loss": 0.0665, "step": 8662 }, { "epoch": 1.9711035267349262, "grad_norm": 1.631425973583011, "learning_rate": 8.289798834410898e-07, "loss": 0.0694, "step": 8663 }, { "epoch": 1.971331058020478, "grad_norm": 0.9533971656815422, "learning_rate": 8.288954226537768e-07, "loss": 0.0392, "step": 8664 }, { "epoch": 1.9715585893060297, "grad_norm": 1.5917535714667914, "learning_rate": 8.288109576992151e-07, "loss": 0.1289, "step": 8665 }, { "epoch": 1.9717861205915814, "grad_norm": 1.809822780479361, "learning_rate": 8.287264885791308e-07, "loss": 0.0719, "step": 8666 }, { "epoch": 1.9720136518771332, "grad_norm": 1.578200781080261, "learning_rate": 8.286420152952499e-07, "loss": 0.0866, "step": 8667 }, { "epoch": 1.972241183162685, "grad_norm": 2.8878884989148244, "learning_rate": 8.285575378492992e-07, "loss": 0.1145, "step": 8668 }, { "epoch": 1.9724687144482367, "grad_norm": 1.5615581436014474, "learning_rate": 8.284730562430053e-07, "loss": 0.1209, "step": 8669 }, { "epoch": 1.9726962457337884, "grad_norm": 1.3844281684842732, "learning_rate": 8.283885704780947e-07, "loss": 0.0842, "step": 8670 }, { "epoch": 1.9729237770193402, "grad_norm": 2.387948296712115, "learning_rate": 8.283040805562942e-07, "loss": 0.0832, "step": 8671 }, { "epoch": 1.973151308304892, "grad_norm": 1.7102712047888355, "learning_rate": 8.282195864793307e-07, "loss": 0.1447, "step": 8672 }, { "epoch": 1.9733788395904437, "grad_norm": 1.4547768267132164, "learning_rate": 8.281350882489311e-07, "loss": 0.0597, "step": 8673 }, { "epoch": 1.9736063708759954, "grad_norm": 1.8652494247457443, "learning_rate": 8.280505858668222e-07, "loss": 0.1626, "step": 8674 }, { "epoch": 1.9738339021615472, "grad_norm": 2.1832358441304534, "learning_rate": 8.279660793347314e-07, "loss": 0.1992, "step": 8675 }, { "epoch": 1.974061433447099, "grad_norm": 1.4624364468009274, "learning_rate": 8.278815686543854e-07, "loss": 0.1492, "step": 8676 }, { "epoch": 1.9742889647326507, "grad_norm": 2.2089614656107153, "learning_rate": 8.277970538275118e-07, "loss": 0.067, "step": 8677 }, { "epoch": 1.9745164960182024, "grad_norm": 1.3650501445700982, "learning_rate": 8.277125348558376e-07, "loss": 0.0592, "step": 8678 }, { "epoch": 1.9747440273037542, "grad_norm": 3.0771392361969356, "learning_rate": 8.276280117410909e-07, "loss": 0.0427, "step": 8679 }, { "epoch": 1.974971558589306, "grad_norm": 1.2119852743527402, "learning_rate": 8.275434844849985e-07, "loss": 0.0656, "step": 8680 }, { "epoch": 1.9751990898748577, "grad_norm": 2.2806841761919965, "learning_rate": 8.274589530892881e-07, "loss": 0.0588, "step": 8681 }, { "epoch": 1.9754266211604095, "grad_norm": 2.55583141126003, "learning_rate": 8.273744175556877e-07, "loss": 0.1209, "step": 8682 }, { "epoch": 1.9756541524459612, "grad_norm": 1.4254587602708517, "learning_rate": 8.272898778859247e-07, "loss": 0.0723, "step": 8683 }, { "epoch": 1.975881683731513, "grad_norm": 2.673288718810534, "learning_rate": 8.272053340817271e-07, "loss": 0.0835, "step": 8684 }, { "epoch": 1.9761092150170647, "grad_norm": 0.9119109227202765, "learning_rate": 8.271207861448227e-07, "loss": 0.0867, "step": 8685 }, { "epoch": 1.9763367463026165, "grad_norm": 1.5946016036601556, "learning_rate": 8.270362340769397e-07, "loss": 0.1654, "step": 8686 }, { "epoch": 1.9765642775881682, "grad_norm": 1.4365129045779053, "learning_rate": 8.269516778798062e-07, "loss": 0.1164, "step": 8687 }, { "epoch": 1.9767918088737202, "grad_norm": 1.4889068357273756, "learning_rate": 8.268671175551499e-07, "loss": 0.0874, "step": 8688 }, { "epoch": 1.977019340159272, "grad_norm": 1.886486084945335, "learning_rate": 8.267825531046997e-07, "loss": 0.0753, "step": 8689 }, { "epoch": 1.9772468714448237, "grad_norm": 2.4827800257148493, "learning_rate": 8.266979845301837e-07, "loss": 0.1679, "step": 8690 }, { "epoch": 1.9774744027303754, "grad_norm": 1.3579765440263711, "learning_rate": 8.266134118333302e-07, "loss": 0.0974, "step": 8691 }, { "epoch": 1.9777019340159272, "grad_norm": 2.7996405259060584, "learning_rate": 8.265288350158677e-07, "loss": 0.1313, "step": 8692 }, { "epoch": 1.977929465301479, "grad_norm": 1.7784935591636652, "learning_rate": 8.26444254079525e-07, "loss": 0.0611, "step": 8693 }, { "epoch": 1.9781569965870307, "grad_norm": 2.153563806186732, "learning_rate": 8.263596690260306e-07, "loss": 0.1406, "step": 8694 }, { "epoch": 1.9783845278725825, "grad_norm": 1.4147170215746951, "learning_rate": 8.262750798571134e-07, "loss": 0.0482, "step": 8695 }, { "epoch": 1.9786120591581342, "grad_norm": 1.7619991838965041, "learning_rate": 8.261904865745022e-07, "loss": 0.1815, "step": 8696 }, { "epoch": 1.978839590443686, "grad_norm": 1.1331623903514012, "learning_rate": 8.26105889179926e-07, "loss": 0.0692, "step": 8697 }, { "epoch": 1.979067121729238, "grad_norm": 1.253627258938659, "learning_rate": 8.260212876751137e-07, "loss": 0.1328, "step": 8698 }, { "epoch": 1.9792946530147897, "grad_norm": 2.0095171447623783, "learning_rate": 8.259366820617943e-07, "loss": 0.0824, "step": 8699 }, { "epoch": 1.9795221843003414, "grad_norm": 1.7847243273657507, "learning_rate": 8.258520723416972e-07, "loss": 0.0839, "step": 8700 }, { "epoch": 1.9797497155858932, "grad_norm": 1.1223884821522332, "learning_rate": 8.257674585165515e-07, "loss": 0.0501, "step": 8701 }, { "epoch": 1.979977246871445, "grad_norm": 0.7103291026724687, "learning_rate": 8.256828405880868e-07, "loss": 0.0882, "step": 8702 }, { "epoch": 1.9802047781569967, "grad_norm": 1.529906844117204, "learning_rate": 8.255982185580323e-07, "loss": 0.053, "step": 8703 }, { "epoch": 1.9804323094425484, "grad_norm": 1.51643427579225, "learning_rate": 8.255135924281175e-07, "loss": 0.0542, "step": 8704 }, { "epoch": 1.9806598407281002, "grad_norm": 1.93250010433533, "learning_rate": 8.254289622000724e-07, "loss": 0.0969, "step": 8705 }, { "epoch": 1.980887372013652, "grad_norm": 2.222065038425281, "learning_rate": 8.253443278756261e-07, "loss": 0.0797, "step": 8706 }, { "epoch": 1.9811149032992037, "grad_norm": 1.6875050782240095, "learning_rate": 8.252596894565088e-07, "loss": 0.1117, "step": 8707 }, { "epoch": 1.9813424345847555, "grad_norm": 1.6021453556306384, "learning_rate": 8.251750469444498e-07, "loss": 0.062, "step": 8708 }, { "epoch": 1.9815699658703072, "grad_norm": 1.3794430805592486, "learning_rate": 8.250904003411798e-07, "loss": 0.1158, "step": 8709 }, { "epoch": 1.981797497155859, "grad_norm": 2.223816644362845, "learning_rate": 8.250057496484285e-07, "loss": 0.1028, "step": 8710 }, { "epoch": 1.9820250284414107, "grad_norm": 1.902049328564107, "learning_rate": 8.24921094867926e-07, "loss": 0.0457, "step": 8711 }, { "epoch": 1.9822525597269625, "grad_norm": 2.159891289436667, "learning_rate": 8.248364360014023e-07, "loss": 0.0958, "step": 8712 }, { "epoch": 1.9824800910125142, "grad_norm": 2.5114187966886097, "learning_rate": 8.247517730505879e-07, "loss": 0.1056, "step": 8713 }, { "epoch": 1.982707622298066, "grad_norm": 2.2504820631273086, "learning_rate": 8.24667106017213e-07, "loss": 0.0671, "step": 8714 }, { "epoch": 1.9829351535836177, "grad_norm": 1.2019567535363975, "learning_rate": 8.245824349030082e-07, "loss": 0.0722, "step": 8715 }, { "epoch": 1.9831626848691695, "grad_norm": 1.6629097763213867, "learning_rate": 8.244977597097039e-07, "loss": 0.1164, "step": 8716 }, { "epoch": 1.9833902161547212, "grad_norm": 1.2374423438478135, "learning_rate": 8.244130804390311e-07, "loss": 0.051, "step": 8717 }, { "epoch": 1.983617747440273, "grad_norm": 1.5062111724253495, "learning_rate": 8.243283970927196e-07, "loss": 0.1171, "step": 8718 }, { "epoch": 1.9838452787258247, "grad_norm": 2.3205296578644656, "learning_rate": 8.242437096725009e-07, "loss": 0.0971, "step": 8719 }, { "epoch": 1.9840728100113765, "grad_norm": 1.1268486989632436, "learning_rate": 8.241590181801059e-07, "loss": 0.0205, "step": 8720 }, { "epoch": 1.9843003412969282, "grad_norm": 2.539102510420942, "learning_rate": 8.240743226172651e-07, "loss": 0.1488, "step": 8721 }, { "epoch": 1.98452787258248, "grad_norm": 1.3846449421864462, "learning_rate": 8.239896229857096e-07, "loss": 0.1043, "step": 8722 }, { "epoch": 1.9847554038680317, "grad_norm": 1.9851920729714756, "learning_rate": 8.23904919287171e-07, "loss": 0.2263, "step": 8723 }, { "epoch": 1.9849829351535835, "grad_norm": 2.221709013257648, "learning_rate": 8.2382021152338e-07, "loss": 0.0977, "step": 8724 }, { "epoch": 1.9852104664391352, "grad_norm": 1.6218224330242628, "learning_rate": 8.237354996960678e-07, "loss": 0.0854, "step": 8725 }, { "epoch": 1.985437997724687, "grad_norm": 1.5029991830245644, "learning_rate": 8.23650783806966e-07, "loss": 0.1117, "step": 8726 }, { "epoch": 1.985665529010239, "grad_norm": 2.9301067848601767, "learning_rate": 8.235660638578061e-07, "loss": 0.1432, "step": 8727 }, { "epoch": 1.9858930602957907, "grad_norm": 3.2845770288624614, "learning_rate": 8.234813398503194e-07, "loss": 0.1129, "step": 8728 }, { "epoch": 1.9861205915813425, "grad_norm": 0.8591511147372719, "learning_rate": 8.233966117862378e-07, "loss": 0.0442, "step": 8729 }, { "epoch": 1.9863481228668942, "grad_norm": 1.2599805504047201, "learning_rate": 8.233118796672929e-07, "loss": 0.0706, "step": 8730 }, { "epoch": 1.986575654152446, "grad_norm": 1.832085019662094, "learning_rate": 8.23227143495216e-07, "loss": 0.0444, "step": 8731 }, { "epoch": 1.9868031854379977, "grad_norm": 2.725371399737415, "learning_rate": 8.231424032717395e-07, "loss": 0.1039, "step": 8732 }, { "epoch": 1.9870307167235495, "grad_norm": 1.0608242847643243, "learning_rate": 8.230576589985951e-07, "loss": 0.0605, "step": 8733 }, { "epoch": 1.9872582480091012, "grad_norm": 2.265480959394933, "learning_rate": 8.22972910677515e-07, "loss": 0.0924, "step": 8734 }, { "epoch": 1.987485779294653, "grad_norm": 1.5908850088022235, "learning_rate": 8.22888158310231e-07, "loss": 0.084, "step": 8735 }, { "epoch": 1.9877133105802047, "grad_norm": 1.5149168873070165, "learning_rate": 8.228034018984757e-07, "loss": 0.1045, "step": 8736 }, { "epoch": 1.9879408418657567, "grad_norm": 1.601832771127956, "learning_rate": 8.227186414439812e-07, "loss": 0.1341, "step": 8737 }, { "epoch": 1.9881683731513085, "grad_norm": 1.8254229787531513, "learning_rate": 8.226338769484793e-07, "loss": 0.07, "step": 8738 }, { "epoch": 1.9883959044368602, "grad_norm": 1.364448391816041, "learning_rate": 8.225491084137032e-07, "loss": 0.0783, "step": 8739 }, { "epoch": 1.988623435722412, "grad_norm": 2.3290448585354575, "learning_rate": 8.22464335841385e-07, "loss": 0.0726, "step": 8740 }, { "epoch": 1.9888509670079637, "grad_norm": 1.5552876815848529, "learning_rate": 8.223795592332575e-07, "loss": 0.0713, "step": 8741 }, { "epoch": 1.9890784982935155, "grad_norm": 1.5238339414276638, "learning_rate": 8.222947785910534e-07, "loss": 0.0605, "step": 8742 }, { "epoch": 1.9893060295790672, "grad_norm": 1.8533393839233188, "learning_rate": 8.222099939165053e-07, "loss": 0.0658, "step": 8743 }, { "epoch": 1.989533560864619, "grad_norm": 1.636769542071038, "learning_rate": 8.221252052113461e-07, "loss": 0.0686, "step": 8744 }, { "epoch": 1.9897610921501707, "grad_norm": 1.1784979000680322, "learning_rate": 8.220404124773084e-07, "loss": 0.0454, "step": 8745 }, { "epoch": 1.9899886234357225, "grad_norm": 1.207734253241908, "learning_rate": 8.21955615716126e-07, "loss": 0.0862, "step": 8746 }, { "epoch": 1.9902161547212742, "grad_norm": 1.4545277819121272, "learning_rate": 8.218708149295312e-07, "loss": 0.0549, "step": 8747 }, { "epoch": 1.990443686006826, "grad_norm": 2.7172887991799524, "learning_rate": 8.217860101192578e-07, "loss": 0.1243, "step": 8748 }, { "epoch": 1.9906712172923777, "grad_norm": 1.702889527991951, "learning_rate": 8.217012012870384e-07, "loss": 0.1054, "step": 8749 }, { "epoch": 1.9908987485779295, "grad_norm": 1.271613927088692, "learning_rate": 8.21616388434607e-07, "loss": 0.1486, "step": 8750 }, { "epoch": 1.9911262798634812, "grad_norm": 1.8988243187589482, "learning_rate": 8.215315715636965e-07, "loss": 0.1423, "step": 8751 }, { "epoch": 1.991353811149033, "grad_norm": 2.055876805575002, "learning_rate": 8.214467506760407e-07, "loss": 0.115, "step": 8752 }, { "epoch": 1.9915813424345847, "grad_norm": 2.638508117319233, "learning_rate": 8.213619257733729e-07, "loss": 0.0905, "step": 8753 }, { "epoch": 1.9918088737201365, "grad_norm": 2.567328517530897, "learning_rate": 8.212770968574274e-07, "loss": 0.1084, "step": 8754 }, { "epoch": 1.9920364050056882, "grad_norm": 1.7075683935276236, "learning_rate": 8.211922639299372e-07, "loss": 0.1013, "step": 8755 }, { "epoch": 1.99226393629124, "grad_norm": 1.648301967423068, "learning_rate": 8.211074269926364e-07, "loss": 0.0839, "step": 8756 }, { "epoch": 1.9924914675767917, "grad_norm": 1.434701677591572, "learning_rate": 8.21022586047259e-07, "loss": 0.1214, "step": 8757 }, { "epoch": 1.9927189988623435, "grad_norm": 1.8420000043262064, "learning_rate": 8.209377410955388e-07, "loss": 0.1467, "step": 8758 }, { "epoch": 1.9929465301478952, "grad_norm": 2.52652405311473, "learning_rate": 8.208528921392101e-07, "loss": 0.1096, "step": 8759 }, { "epoch": 1.993174061433447, "grad_norm": 1.1848532764347495, "learning_rate": 8.207680391800071e-07, "loss": 0.0315, "step": 8760 }, { "epoch": 1.9934015927189987, "grad_norm": 1.6017535978855324, "learning_rate": 8.206831822196639e-07, "loss": 0.1236, "step": 8761 }, { "epoch": 1.9936291240045505, "grad_norm": 1.649573992216395, "learning_rate": 8.205983212599147e-07, "loss": 0.0656, "step": 8762 }, { "epoch": 1.9938566552901023, "grad_norm": 2.890250389013289, "learning_rate": 8.205134563024942e-07, "loss": 0.1369, "step": 8763 }, { "epoch": 1.994084186575654, "grad_norm": 2.140472700627925, "learning_rate": 8.204285873491366e-07, "loss": 0.1049, "step": 8764 }, { "epoch": 1.9943117178612058, "grad_norm": 2.7508207881108455, "learning_rate": 8.203437144015766e-07, "loss": 0.15, "step": 8765 }, { "epoch": 1.9945392491467577, "grad_norm": 1.5774029621418546, "learning_rate": 8.202588374615489e-07, "loss": 0.0621, "step": 8766 }, { "epoch": 1.9947667804323095, "grad_norm": 4.257497987165835, "learning_rate": 8.201739565307881e-07, "loss": 0.1181, "step": 8767 }, { "epoch": 1.9949943117178612, "grad_norm": 1.54072123867852, "learning_rate": 8.200890716110291e-07, "loss": 0.0576, "step": 8768 }, { "epoch": 1.995221843003413, "grad_norm": 1.3992445871747745, "learning_rate": 8.200041827040067e-07, "loss": 0.1309, "step": 8769 }, { "epoch": 1.9954493742889647, "grad_norm": 2.107248853389515, "learning_rate": 8.19919289811456e-07, "loss": 0.074, "step": 8770 }, { "epoch": 1.9956769055745165, "grad_norm": 1.7026980270034522, "learning_rate": 8.19834392935112e-07, "loss": 0.1173, "step": 8771 }, { "epoch": 1.9959044368600682, "grad_norm": 1.8419011010512982, "learning_rate": 8.197494920767098e-07, "loss": 0.1412, "step": 8772 }, { "epoch": 1.99613196814562, "grad_norm": 1.3318756882986866, "learning_rate": 8.196645872379847e-07, "loss": 0.0862, "step": 8773 }, { "epoch": 1.9963594994311717, "grad_norm": 1.5624969864715754, "learning_rate": 8.19579678420672e-07, "loss": 0.0824, "step": 8774 }, { "epoch": 1.9965870307167235, "grad_norm": 2.530990456405567, "learning_rate": 8.194947656265068e-07, "loss": 0.1119, "step": 8775 }, { "epoch": 1.9968145620022755, "grad_norm": 1.0514160033556272, "learning_rate": 8.19409848857225e-07, "loss": 0.0535, "step": 8776 }, { "epoch": 1.9970420932878272, "grad_norm": 1.4347125548012525, "learning_rate": 8.193249281145618e-07, "loss": 0.0278, "step": 8777 }, { "epoch": 1.997269624573379, "grad_norm": 1.3797341750105707, "learning_rate": 8.19240003400253e-07, "loss": 0.0479, "step": 8778 }, { "epoch": 1.9974971558589307, "grad_norm": 2.0241199844319366, "learning_rate": 8.191550747160343e-07, "loss": 0.1545, "step": 8779 }, { "epoch": 1.9977246871444825, "grad_norm": 1.5886296816722167, "learning_rate": 8.190701420636415e-07, "loss": 0.1636, "step": 8780 }, { "epoch": 1.9979522184300342, "grad_norm": 2.8866673579296904, "learning_rate": 8.189852054448104e-07, "loss": 0.0926, "step": 8781 }, { "epoch": 1.998179749715586, "grad_norm": 1.8291149977747132, "learning_rate": 8.189002648612768e-07, "loss": 0.0636, "step": 8782 }, { "epoch": 1.9984072810011377, "grad_norm": 1.891164270992027, "learning_rate": 8.188153203147769e-07, "loss": 0.1051, "step": 8783 }, { "epoch": 1.9986348122866895, "grad_norm": 1.8788867320034368, "learning_rate": 8.18730371807047e-07, "loss": 0.0737, "step": 8784 }, { "epoch": 1.9988623435722412, "grad_norm": 1.7781478435152995, "learning_rate": 8.18645419339823e-07, "loss": 0.0693, "step": 8785 }, { "epoch": 1.999089874857793, "grad_norm": 2.3742153897723957, "learning_rate": 8.185604629148413e-07, "loss": 0.1433, "step": 8786 }, { "epoch": 1.9993174061433447, "grad_norm": 1.5317856218091108, "learning_rate": 8.184755025338384e-07, "loss": 0.1055, "step": 8787 }, { "epoch": 1.9995449374288965, "grad_norm": 1.9943110303869713, "learning_rate": 8.183905381985503e-07, "loss": 0.1895, "step": 8788 }, { "epoch": 1.9997724687144482, "grad_norm": 1.6863939890780129, "learning_rate": 8.183055699107139e-07, "loss": 0.0721, "step": 8789 }, { "epoch": 2.0, "grad_norm": 1.1227135701228614, "learning_rate": 8.182205976720656e-07, "loss": 0.0323, "step": 8790 }, { "epoch": 2.0002275312855518, "grad_norm": 1.639248317330326, "learning_rate": 8.181356214843422e-07, "loss": 0.0745, "step": 8791 }, { "epoch": 2.0004550625711035, "grad_norm": 1.1149408311128595, "learning_rate": 8.180506413492804e-07, "loss": 0.0451, "step": 8792 }, { "epoch": 2.0006825938566553, "grad_norm": 1.6891048132315853, "learning_rate": 8.17965657268617e-07, "loss": 0.0988, "step": 8793 }, { "epoch": 2.000910125142207, "grad_norm": 1.27443946664208, "learning_rate": 8.178806692440891e-07, "loss": 0.0492, "step": 8794 }, { "epoch": 2.0011376564277588, "grad_norm": 1.2380161041122009, "learning_rate": 8.177956772774334e-07, "loss": 0.0448, "step": 8795 }, { "epoch": 2.0013651877133105, "grad_norm": 1.7709227835300692, "learning_rate": 8.177106813703872e-07, "loss": 0.0489, "step": 8796 }, { "epoch": 2.0015927189988623, "grad_norm": 1.524886744541349, "learning_rate": 8.176256815246878e-07, "loss": 0.058, "step": 8797 }, { "epoch": 2.001820250284414, "grad_norm": 1.5196347556733598, "learning_rate": 8.175406777420721e-07, "loss": 0.0764, "step": 8798 }, { "epoch": 2.0020477815699658, "grad_norm": 0.8129924881751084, "learning_rate": 8.174556700242775e-07, "loss": 0.0473, "step": 8799 }, { "epoch": 2.0022753128555175, "grad_norm": 0.9728735219017393, "learning_rate": 8.173706583730414e-07, "loss": 0.0632, "step": 8800 }, { "epoch": 2.0025028441410693, "grad_norm": 0.825161854835932, "learning_rate": 8.172856427901015e-07, "loss": 0.0627, "step": 8801 }, { "epoch": 2.002730375426621, "grad_norm": 1.1071985751788505, "learning_rate": 8.17200623277195e-07, "loss": 0.0827, "step": 8802 }, { "epoch": 2.0029579067121728, "grad_norm": 0.9408215131976463, "learning_rate": 8.171155998360601e-07, "loss": 0.0691, "step": 8803 }, { "epoch": 2.0031854379977245, "grad_norm": 1.2887530621852699, "learning_rate": 8.170305724684341e-07, "loss": 0.0919, "step": 8804 }, { "epoch": 2.0034129692832763, "grad_norm": 1.6152645826005223, "learning_rate": 8.169455411760547e-07, "loss": 0.0437, "step": 8805 }, { "epoch": 2.003640500568828, "grad_norm": 1.8198581728956609, "learning_rate": 8.168605059606601e-07, "loss": 0.1383, "step": 8806 }, { "epoch": 2.00386803185438, "grad_norm": 1.4179904940921069, "learning_rate": 8.167754668239883e-07, "loss": 0.0828, "step": 8807 }, { "epoch": 2.0040955631399315, "grad_norm": 1.5926951533212381, "learning_rate": 8.16690423767777e-07, "loss": 0.1072, "step": 8808 }, { "epoch": 2.0043230944254833, "grad_norm": 1.2223116840843713, "learning_rate": 8.166053767937643e-07, "loss": 0.0738, "step": 8809 }, { "epoch": 2.0045506257110355, "grad_norm": 1.445344991442188, "learning_rate": 8.165203259036888e-07, "loss": 0.0428, "step": 8810 }, { "epoch": 2.0047781569965872, "grad_norm": 0.9090576750966644, "learning_rate": 8.164352710992887e-07, "loss": 0.0741, "step": 8811 }, { "epoch": 2.005005688282139, "grad_norm": 1.7276762171111015, "learning_rate": 8.163502123823021e-07, "loss": 0.0435, "step": 8812 }, { "epoch": 2.0052332195676907, "grad_norm": 1.492141595193191, "learning_rate": 8.162651497544677e-07, "loss": 0.0351, "step": 8813 }, { "epoch": 2.0054607508532425, "grad_norm": 1.3715675122970237, "learning_rate": 8.161800832175239e-07, "loss": 0.0424, "step": 8814 }, { "epoch": 2.0056882821387942, "grad_norm": 1.7638913521749375, "learning_rate": 8.160950127732093e-07, "loss": 0.0776, "step": 8815 }, { "epoch": 2.005915813424346, "grad_norm": 0.735937669675378, "learning_rate": 8.160099384232625e-07, "loss": 0.0524, "step": 8816 }, { "epoch": 2.0061433447098977, "grad_norm": 1.4131055981317602, "learning_rate": 8.159248601694226e-07, "loss": 0.0509, "step": 8817 }, { "epoch": 2.0063708759954495, "grad_norm": 1.4558188081430434, "learning_rate": 8.158397780134281e-07, "loss": 0.0303, "step": 8818 }, { "epoch": 2.0065984072810013, "grad_norm": 2.320515554826066, "learning_rate": 8.157546919570181e-07, "loss": 0.0373, "step": 8819 }, { "epoch": 2.006825938566553, "grad_norm": 2.027702445818353, "learning_rate": 8.156696020019314e-07, "loss": 0.0575, "step": 8820 }, { "epoch": 2.0070534698521048, "grad_norm": 2.6298986980382972, "learning_rate": 8.155845081499074e-07, "loss": 0.1308, "step": 8821 }, { "epoch": 2.0072810011376565, "grad_norm": 1.694566079523619, "learning_rate": 8.154994104026849e-07, "loss": 0.0606, "step": 8822 }, { "epoch": 2.0075085324232083, "grad_norm": 1.4030073007426522, "learning_rate": 8.154143087620035e-07, "loss": 0.0316, "step": 8823 }, { "epoch": 2.00773606370876, "grad_norm": 1.6338153612852564, "learning_rate": 8.153292032296025e-07, "loss": 0.0435, "step": 8824 }, { "epoch": 2.0079635949943118, "grad_norm": 1.4876129806903389, "learning_rate": 8.152440938072208e-07, "loss": 0.1292, "step": 8825 }, { "epoch": 2.0081911262798635, "grad_norm": 1.9032960662440848, "learning_rate": 8.151589804965984e-07, "loss": 0.0339, "step": 8826 }, { "epoch": 2.0084186575654153, "grad_norm": 1.4548831058798386, "learning_rate": 8.150738632994748e-07, "loss": 0.1377, "step": 8827 }, { "epoch": 2.008646188850967, "grad_norm": 2.4131691841176623, "learning_rate": 8.149887422175895e-07, "loss": 0.0382, "step": 8828 }, { "epoch": 2.0088737201365188, "grad_norm": 1.3122056977883902, "learning_rate": 8.149036172526821e-07, "loss": 0.0725, "step": 8829 }, { "epoch": 2.0091012514220705, "grad_norm": 1.0099883818234812, "learning_rate": 8.148184884064928e-07, "loss": 0.0446, "step": 8830 }, { "epoch": 2.0093287827076223, "grad_norm": 1.8582106873286008, "learning_rate": 8.14733355680761e-07, "loss": 0.0424, "step": 8831 }, { "epoch": 2.009556313993174, "grad_norm": 1.6924905733400868, "learning_rate": 8.146482190772271e-07, "loss": 0.1153, "step": 8832 }, { "epoch": 2.0097838452787258, "grad_norm": 1.8432234472230091, "learning_rate": 8.145630785976307e-07, "loss": 0.0517, "step": 8833 }, { "epoch": 2.0100113765642775, "grad_norm": 1.7375043175991889, "learning_rate": 8.144779342437123e-07, "loss": 0.0457, "step": 8834 }, { "epoch": 2.0102389078498293, "grad_norm": 1.342260023405727, "learning_rate": 8.143927860172118e-07, "loss": 0.0304, "step": 8835 }, { "epoch": 2.010466439135381, "grad_norm": 0.911422332247528, "learning_rate": 8.143076339198698e-07, "loss": 0.0395, "step": 8836 }, { "epoch": 2.010693970420933, "grad_norm": 3.5170367489675023, "learning_rate": 8.142224779534263e-07, "loss": 0.0322, "step": 8837 }, { "epoch": 2.0109215017064845, "grad_norm": 2.261756172333368, "learning_rate": 8.14137318119622e-07, "loss": 0.0608, "step": 8838 }, { "epoch": 2.0111490329920363, "grad_norm": 1.2494008164346324, "learning_rate": 8.140521544201972e-07, "loss": 0.0542, "step": 8839 }, { "epoch": 2.011376564277588, "grad_norm": 0.9523857994791479, "learning_rate": 8.139669868568927e-07, "loss": 0.0295, "step": 8840 }, { "epoch": 2.01160409556314, "grad_norm": 1.838077742603398, "learning_rate": 8.13881815431449e-07, "loss": 0.16, "step": 8841 }, { "epoch": 2.0118316268486915, "grad_norm": 0.8507938682088919, "learning_rate": 8.13796640145607e-07, "loss": 0.0488, "step": 8842 }, { "epoch": 2.0120591581342433, "grad_norm": 1.9073506063694765, "learning_rate": 8.137114610011074e-07, "loss": 0.1008, "step": 8843 }, { "epoch": 2.012286689419795, "grad_norm": 2.059551748427677, "learning_rate": 8.136262779996912e-07, "loss": 0.0701, "step": 8844 }, { "epoch": 2.012514220705347, "grad_norm": 1.713946499618614, "learning_rate": 8.135410911430992e-07, "loss": 0.0454, "step": 8845 }, { "epoch": 2.0127417519908986, "grad_norm": 1.8394902270979199, "learning_rate": 8.134559004330725e-07, "loss": 0.0409, "step": 8846 }, { "epoch": 2.0129692832764503, "grad_norm": 1.276868434763315, "learning_rate": 8.133707058713525e-07, "loss": 0.0334, "step": 8847 }, { "epoch": 2.013196814562002, "grad_norm": 1.2184236082444977, "learning_rate": 8.132855074596803e-07, "loss": 0.0253, "step": 8848 }, { "epoch": 2.0134243458475543, "grad_norm": 1.5754488122016854, "learning_rate": 8.132003051997972e-07, "loss": 0.0844, "step": 8849 }, { "epoch": 2.013651877133106, "grad_norm": 1.491733201506274, "learning_rate": 8.131150990934445e-07, "loss": 0.0533, "step": 8850 }, { "epoch": 2.0138794084186578, "grad_norm": 1.3402104757934845, "learning_rate": 8.130298891423636e-07, "loss": 0.0915, "step": 8851 }, { "epoch": 2.0141069397042095, "grad_norm": 1.1312502357304939, "learning_rate": 8.12944675348296e-07, "loss": 0.0338, "step": 8852 }, { "epoch": 2.0143344709897613, "grad_norm": 1.67824625782941, "learning_rate": 8.128594577129836e-07, "loss": 0.0259, "step": 8853 }, { "epoch": 2.014562002275313, "grad_norm": 1.785006820022791, "learning_rate": 8.12774236238168e-07, "loss": 0.0629, "step": 8854 }, { "epoch": 2.0147895335608648, "grad_norm": 1.5476841430485058, "learning_rate": 8.126890109255908e-07, "loss": 0.0813, "step": 8855 }, { "epoch": 2.0150170648464165, "grad_norm": 1.3137594059313582, "learning_rate": 8.126037817769939e-07, "loss": 0.0803, "step": 8856 }, { "epoch": 2.0152445961319683, "grad_norm": 1.0884237412764168, "learning_rate": 8.125185487941195e-07, "loss": 0.0542, "step": 8857 }, { "epoch": 2.01547212741752, "grad_norm": 3.13843937228986, "learning_rate": 8.124333119787093e-07, "loss": 0.1141, "step": 8858 }, { "epoch": 2.0156996587030718, "grad_norm": 1.3093146134482476, "learning_rate": 8.123480713325053e-07, "loss": 0.1013, "step": 8859 }, { "epoch": 2.0159271899886235, "grad_norm": 0.9028993616744687, "learning_rate": 8.122628268572499e-07, "loss": 0.046, "step": 8860 }, { "epoch": 2.0161547212741753, "grad_norm": 1.532618420082054, "learning_rate": 8.121775785546855e-07, "loss": 0.0421, "step": 8861 }, { "epoch": 2.016382252559727, "grad_norm": 1.2710114861205974, "learning_rate": 8.120923264265539e-07, "loss": 0.0201, "step": 8862 }, { "epoch": 2.016609783845279, "grad_norm": 1.1152451222395094, "learning_rate": 8.120070704745979e-07, "loss": 0.0302, "step": 8863 }, { "epoch": 2.0168373151308305, "grad_norm": 1.5461921042299296, "learning_rate": 8.1192181070056e-07, "loss": 0.1264, "step": 8864 }, { "epoch": 2.0170648464163823, "grad_norm": 1.7138961394140304, "learning_rate": 8.118365471061825e-07, "loss": 0.0841, "step": 8865 }, { "epoch": 2.017292377701934, "grad_norm": 3.0348422801696717, "learning_rate": 8.117512796932079e-07, "loss": 0.0509, "step": 8866 }, { "epoch": 2.017519908987486, "grad_norm": 1.4907253023289126, "learning_rate": 8.116660084633796e-07, "loss": 0.0468, "step": 8867 }, { "epoch": 2.0177474402730375, "grad_norm": 1.3836711005142381, "learning_rate": 8.115807334184398e-07, "loss": 0.0216, "step": 8868 }, { "epoch": 2.0179749715585893, "grad_norm": 1.1496152961442507, "learning_rate": 8.114954545601314e-07, "loss": 0.0308, "step": 8869 }, { "epoch": 2.018202502844141, "grad_norm": 1.3218587437148022, "learning_rate": 8.114101718901976e-07, "loss": 0.04, "step": 8870 }, { "epoch": 2.018430034129693, "grad_norm": 1.7977004770876162, "learning_rate": 8.113248854103811e-07, "loss": 0.1233, "step": 8871 }, { "epoch": 2.0186575654152445, "grad_norm": 1.0030429934118974, "learning_rate": 8.112395951224254e-07, "loss": 0.0318, "step": 8872 }, { "epoch": 2.0188850967007963, "grad_norm": 1.3374556340738952, "learning_rate": 8.111543010280733e-07, "loss": 0.1082, "step": 8873 }, { "epoch": 2.019112627986348, "grad_norm": 1.2754626107915659, "learning_rate": 8.110690031290683e-07, "loss": 0.0238, "step": 8874 }, { "epoch": 2.0193401592719, "grad_norm": 1.1302821237073781, "learning_rate": 8.109837014271536e-07, "loss": 0.0518, "step": 8875 }, { "epoch": 2.0195676905574516, "grad_norm": 2.3241974039177973, "learning_rate": 8.108983959240726e-07, "loss": 0.0477, "step": 8876 }, { "epoch": 2.0197952218430033, "grad_norm": 2.012356940370276, "learning_rate": 8.108130866215689e-07, "loss": 0.0759, "step": 8877 }, { "epoch": 2.020022753128555, "grad_norm": 1.2928306795197322, "learning_rate": 8.107277735213861e-07, "loss": 0.0985, "step": 8878 }, { "epoch": 2.020250284414107, "grad_norm": 1.616751693476569, "learning_rate": 8.106424566252675e-07, "loss": 0.1537, "step": 8879 }, { "epoch": 2.0204778156996586, "grad_norm": 1.1982048192000676, "learning_rate": 8.105571359349575e-07, "loss": 0.0761, "step": 8880 }, { "epoch": 2.0207053469852103, "grad_norm": 1.5225924618859668, "learning_rate": 8.104718114521993e-07, "loss": 0.097, "step": 8881 }, { "epoch": 2.020932878270762, "grad_norm": 1.5669304640435022, "learning_rate": 8.103864831787367e-07, "loss": 0.0511, "step": 8882 }, { "epoch": 2.021160409556314, "grad_norm": 1.810778789132058, "learning_rate": 8.103011511163141e-07, "loss": 0.0375, "step": 8883 }, { "epoch": 2.0213879408418656, "grad_norm": 1.557172516994359, "learning_rate": 8.102158152666753e-07, "loss": 0.0917, "step": 8884 }, { "epoch": 2.0216154721274173, "grad_norm": 1.3301375818507017, "learning_rate": 8.101304756315645e-07, "loss": 0.0776, "step": 8885 }, { "epoch": 2.021843003412969, "grad_norm": 1.760767637387474, "learning_rate": 8.100451322127257e-07, "loss": 0.0576, "step": 8886 }, { "epoch": 2.022070534698521, "grad_norm": 1.265408011042625, "learning_rate": 8.099597850119035e-07, "loss": 0.0131, "step": 8887 }, { "epoch": 2.022298065984073, "grad_norm": 1.2303519344090885, "learning_rate": 8.098744340308419e-07, "loss": 0.0284, "step": 8888 }, { "epoch": 2.0225255972696248, "grad_norm": 0.7748711148160067, "learning_rate": 8.097890792712853e-07, "loss": 0.0354, "step": 8889 }, { "epoch": 2.0227531285551765, "grad_norm": 1.5478258333880945, "learning_rate": 8.097037207349785e-07, "loss": 0.1163, "step": 8890 }, { "epoch": 2.0229806598407283, "grad_norm": 1.0405193819995862, "learning_rate": 8.096183584236659e-07, "loss": 0.1061, "step": 8891 }, { "epoch": 2.02320819112628, "grad_norm": 2.1481594666987185, "learning_rate": 8.095329923390924e-07, "loss": 0.0545, "step": 8892 }, { "epoch": 2.023435722411832, "grad_norm": 1.1859826176533887, "learning_rate": 8.094476224830022e-07, "loss": 0.0936, "step": 8893 }, { "epoch": 2.0236632536973835, "grad_norm": 3.5992574346212485, "learning_rate": 8.093622488571405e-07, "loss": 0.1488, "step": 8894 }, { "epoch": 2.0238907849829353, "grad_norm": 1.8619071779147156, "learning_rate": 8.09276871463252e-07, "loss": 0.1311, "step": 8895 }, { "epoch": 2.024118316268487, "grad_norm": 1.9699566241656805, "learning_rate": 8.091914903030818e-07, "loss": 0.0539, "step": 8896 }, { "epoch": 2.024345847554039, "grad_norm": 1.4290101274644949, "learning_rate": 8.091061053783748e-07, "loss": 0.1257, "step": 8897 }, { "epoch": 2.0245733788395905, "grad_norm": 1.63019832829888, "learning_rate": 8.090207166908763e-07, "loss": 0.0497, "step": 8898 }, { "epoch": 2.0248009101251423, "grad_norm": 1.3568352935794203, "learning_rate": 8.089353242423313e-07, "loss": 0.0555, "step": 8899 }, { "epoch": 2.025028441410694, "grad_norm": 1.8337008340406296, "learning_rate": 8.088499280344851e-07, "loss": 0.0439, "step": 8900 }, { "epoch": 2.025255972696246, "grad_norm": 0.8472940461819305, "learning_rate": 8.087645280690831e-07, "loss": 0.0156, "step": 8901 }, { "epoch": 2.0254835039817976, "grad_norm": 1.5355922509172337, "learning_rate": 8.086791243478709e-07, "loss": 0.0257, "step": 8902 }, { "epoch": 2.0257110352673493, "grad_norm": 1.8813496487292052, "learning_rate": 8.085937168725934e-07, "loss": 0.0322, "step": 8903 }, { "epoch": 2.025938566552901, "grad_norm": 1.324514290999542, "learning_rate": 8.085083056449968e-07, "loss": 0.0517, "step": 8904 }, { "epoch": 2.026166097838453, "grad_norm": 1.123634941418051, "learning_rate": 8.084228906668267e-07, "loss": 0.0402, "step": 8905 }, { "epoch": 2.0263936291240046, "grad_norm": 1.984050494640443, "learning_rate": 8.083374719398282e-07, "loss": 0.0454, "step": 8906 }, { "epoch": 2.0266211604095563, "grad_norm": 1.0585652976995195, "learning_rate": 8.082520494657478e-07, "loss": 0.1032, "step": 8907 }, { "epoch": 2.026848691695108, "grad_norm": 2.0027178523200737, "learning_rate": 8.08166623246331e-07, "loss": 0.1118, "step": 8908 }, { "epoch": 2.02707622298066, "grad_norm": 1.0927974697343035, "learning_rate": 8.08081193283324e-07, "loss": 0.0853, "step": 8909 }, { "epoch": 2.0273037542662116, "grad_norm": 1.9775497901190555, "learning_rate": 8.079957595784727e-07, "loss": 0.0341, "step": 8910 }, { "epoch": 2.0275312855517633, "grad_norm": 0.7191262298610074, "learning_rate": 8.07910322133523e-07, "loss": 0.0447, "step": 8911 }, { "epoch": 2.027758816837315, "grad_norm": 1.1461280136279222, "learning_rate": 8.078248809502215e-07, "loss": 0.0261, "step": 8912 }, { "epoch": 2.027986348122867, "grad_norm": 1.4199751658771598, "learning_rate": 8.077394360303143e-07, "loss": 0.0391, "step": 8913 }, { "epoch": 2.0282138794084186, "grad_norm": 2.4467428915964144, "learning_rate": 8.076539873755476e-07, "loss": 0.0847, "step": 8914 }, { "epoch": 2.0284414106939703, "grad_norm": 1.2357965996381937, "learning_rate": 8.07568534987668e-07, "loss": 0.0474, "step": 8915 }, { "epoch": 2.028668941979522, "grad_norm": 1.2427640347071134, "learning_rate": 8.074830788684218e-07, "loss": 0.0401, "step": 8916 }, { "epoch": 2.028896473265074, "grad_norm": 2.4817322255843157, "learning_rate": 8.073976190195557e-07, "loss": 0.069, "step": 8917 }, { "epoch": 2.0291240045506256, "grad_norm": 1.9727789886388352, "learning_rate": 8.073121554428165e-07, "loss": 0.0611, "step": 8918 }, { "epoch": 2.0293515358361773, "grad_norm": 1.3750702848213363, "learning_rate": 8.072266881399504e-07, "loss": 0.0209, "step": 8919 }, { "epoch": 2.029579067121729, "grad_norm": 2.008802936258141, "learning_rate": 8.071412171127047e-07, "loss": 0.0649, "step": 8920 }, { "epoch": 2.029806598407281, "grad_norm": 2.1349094497728998, "learning_rate": 8.070557423628262e-07, "loss": 0.083, "step": 8921 }, { "epoch": 2.0300341296928326, "grad_norm": 0.9866346052842123, "learning_rate": 8.069702638920615e-07, "loss": 0.018, "step": 8922 }, { "epoch": 2.0302616609783843, "grad_norm": 1.324608361219327, "learning_rate": 8.06884781702158e-07, "loss": 0.0222, "step": 8923 }, { "epoch": 2.030489192263936, "grad_norm": 0.7576778134924638, "learning_rate": 8.067992957948628e-07, "loss": 0.0521, "step": 8924 }, { "epoch": 2.030716723549488, "grad_norm": 0.8122934536776792, "learning_rate": 8.067138061719227e-07, "loss": 0.019, "step": 8925 }, { "epoch": 2.03094425483504, "grad_norm": 1.3291243418602547, "learning_rate": 8.066283128350854e-07, "loss": 0.0298, "step": 8926 }, { "epoch": 2.031171786120592, "grad_norm": 2.042240688145885, "learning_rate": 8.065428157860978e-07, "loss": 0.0386, "step": 8927 }, { "epoch": 2.0313993174061435, "grad_norm": 1.640167432849453, "learning_rate": 8.064573150267077e-07, "loss": 0.0307, "step": 8928 }, { "epoch": 2.0316268486916953, "grad_norm": 1.6054240756394935, "learning_rate": 8.063718105586624e-07, "loss": 0.0256, "step": 8929 }, { "epoch": 2.031854379977247, "grad_norm": 4.192156659893677, "learning_rate": 8.062863023837093e-07, "loss": 0.0681, "step": 8930 }, { "epoch": 2.032081911262799, "grad_norm": 1.572181011752906, "learning_rate": 8.062007905035965e-07, "loss": 0.0228, "step": 8931 }, { "epoch": 2.0323094425483506, "grad_norm": 1.943062464709226, "learning_rate": 8.061152749200713e-07, "loss": 0.0923, "step": 8932 }, { "epoch": 2.0325369738339023, "grad_norm": 1.5336636120970002, "learning_rate": 8.060297556348812e-07, "loss": 0.0834, "step": 8933 }, { "epoch": 2.032764505119454, "grad_norm": 2.282119148292395, "learning_rate": 8.059442326497748e-07, "loss": 0.0549, "step": 8934 }, { "epoch": 2.032992036405006, "grad_norm": 2.624530880383298, "learning_rate": 8.058587059664996e-07, "loss": 0.0505, "step": 8935 }, { "epoch": 2.0332195676905576, "grad_norm": 1.1785663028124431, "learning_rate": 8.057731755868036e-07, "loss": 0.095, "step": 8936 }, { "epoch": 2.0334470989761093, "grad_norm": 1.4395348877574188, "learning_rate": 8.056876415124352e-07, "loss": 0.0292, "step": 8937 }, { "epoch": 2.033674630261661, "grad_norm": 1.2287277310831204, "learning_rate": 8.056021037451422e-07, "loss": 0.0334, "step": 8938 }, { "epoch": 2.033902161547213, "grad_norm": 1.8255268955888289, "learning_rate": 8.055165622866726e-07, "loss": 0.0331, "step": 8939 }, { "epoch": 2.0341296928327646, "grad_norm": 1.5879690663346466, "learning_rate": 8.054310171387756e-07, "loss": 0.0242, "step": 8940 }, { "epoch": 2.0343572241183163, "grad_norm": 2.0235019064610724, "learning_rate": 8.053454683031987e-07, "loss": 0.1372, "step": 8941 }, { "epoch": 2.034584755403868, "grad_norm": 1.181125885769763, "learning_rate": 8.052599157816908e-07, "loss": 0.0227, "step": 8942 }, { "epoch": 2.03481228668942, "grad_norm": 3.9382854212462934, "learning_rate": 8.051743595760005e-07, "loss": 0.0995, "step": 8943 }, { "epoch": 2.0350398179749716, "grad_norm": 2.1906054815912035, "learning_rate": 8.050887996878761e-07, "loss": 0.0406, "step": 8944 }, { "epoch": 2.0352673492605233, "grad_norm": 1.8522349512738712, "learning_rate": 8.050032361190666e-07, "loss": 0.0627, "step": 8945 }, { "epoch": 2.035494880546075, "grad_norm": 1.6617262239448711, "learning_rate": 8.049176688713203e-07, "loss": 0.0573, "step": 8946 }, { "epoch": 2.035722411831627, "grad_norm": 2.872919050675009, "learning_rate": 8.048320979463867e-07, "loss": 0.1393, "step": 8947 }, { "epoch": 2.0359499431171786, "grad_norm": 1.6577751858636431, "learning_rate": 8.047465233460141e-07, "loss": 0.1273, "step": 8948 }, { "epoch": 2.0361774744027303, "grad_norm": 0.9839107736775068, "learning_rate": 8.04660945071952e-07, "loss": 0.0172, "step": 8949 }, { "epoch": 2.036405005688282, "grad_norm": 1.6974546856016852, "learning_rate": 8.045753631259491e-07, "loss": 0.0249, "step": 8950 }, { "epoch": 2.036632536973834, "grad_norm": 1.2769205319660486, "learning_rate": 8.044897775097548e-07, "loss": 0.0307, "step": 8951 }, { "epoch": 2.0368600682593856, "grad_norm": 1.2296353160559101, "learning_rate": 8.04404188225118e-07, "loss": 0.0148, "step": 8952 }, { "epoch": 2.0370875995449373, "grad_norm": 1.4095932818883932, "learning_rate": 8.043185952737881e-07, "loss": 0.1281, "step": 8953 }, { "epoch": 2.037315130830489, "grad_norm": 1.6892658920866102, "learning_rate": 8.042329986575145e-07, "loss": 0.0239, "step": 8954 }, { "epoch": 2.037542662116041, "grad_norm": 1.7860108163689632, "learning_rate": 8.041473983780467e-07, "loss": 0.0833, "step": 8955 }, { "epoch": 2.0377701934015926, "grad_norm": 1.3974190234214017, "learning_rate": 8.040617944371343e-07, "loss": 0.0225, "step": 8956 }, { "epoch": 2.0379977246871444, "grad_norm": 1.7684063697572843, "learning_rate": 8.039761868365267e-07, "loss": 0.0471, "step": 8957 }, { "epoch": 2.038225255972696, "grad_norm": 2.016680107128214, "learning_rate": 8.038905755779737e-07, "loss": 0.0653, "step": 8958 }, { "epoch": 2.038452787258248, "grad_norm": 1.1038807275490525, "learning_rate": 8.038049606632248e-07, "loss": 0.0609, "step": 8959 }, { "epoch": 2.0386803185437996, "grad_norm": 1.2448753957934051, "learning_rate": 8.0371934209403e-07, "loss": 0.0176, "step": 8960 }, { "epoch": 2.0389078498293514, "grad_norm": 1.741936926501403, "learning_rate": 8.036337198721392e-07, "loss": 0.0457, "step": 8961 }, { "epoch": 2.039135381114903, "grad_norm": 1.3952274993028444, "learning_rate": 8.035480939993025e-07, "loss": 0.0676, "step": 8962 }, { "epoch": 2.039362912400455, "grad_norm": 1.0455820280644919, "learning_rate": 8.034624644772694e-07, "loss": 0.0438, "step": 8963 }, { "epoch": 2.0395904436860066, "grad_norm": 1.139554219107509, "learning_rate": 8.033768313077905e-07, "loss": 0.0235, "step": 8964 }, { "epoch": 2.039817974971559, "grad_norm": 1.9335144343525827, "learning_rate": 8.03291194492616e-07, "loss": 0.0556, "step": 8965 }, { "epoch": 2.0400455062571106, "grad_norm": 1.1042452402293887, "learning_rate": 8.032055540334958e-07, "loss": 0.0378, "step": 8966 }, { "epoch": 2.0402730375426623, "grad_norm": 2.009877223396931, "learning_rate": 8.031199099321804e-07, "loss": 0.0416, "step": 8967 }, { "epoch": 2.040500568828214, "grad_norm": 1.848101717007317, "learning_rate": 8.030342621904205e-07, "loss": 0.125, "step": 8968 }, { "epoch": 2.040728100113766, "grad_norm": 1.5200120739584286, "learning_rate": 8.029486108099663e-07, "loss": 0.0732, "step": 8969 }, { "epoch": 2.0409556313993176, "grad_norm": 0.9588206533321003, "learning_rate": 8.028629557925683e-07, "loss": 0.0376, "step": 8970 }, { "epoch": 2.0411831626848693, "grad_norm": 1.8713116880826133, "learning_rate": 8.027772971399773e-07, "loss": 0.0575, "step": 8971 }, { "epoch": 2.041410693970421, "grad_norm": 1.4634893162573441, "learning_rate": 8.026916348539438e-07, "loss": 0.0571, "step": 8972 }, { "epoch": 2.041638225255973, "grad_norm": 2.215319961433251, "learning_rate": 8.026059689362186e-07, "loss": 0.1205, "step": 8973 }, { "epoch": 2.0418657565415246, "grad_norm": 0.8852613105021321, "learning_rate": 8.025202993885528e-07, "loss": 0.0347, "step": 8974 }, { "epoch": 2.0420932878270763, "grad_norm": 2.0572452023597747, "learning_rate": 8.024346262126976e-07, "loss": 0.0445, "step": 8975 }, { "epoch": 2.042320819112628, "grad_norm": 0.8397897258684369, "learning_rate": 8.02348949410403e-07, "loss": 0.0304, "step": 8976 }, { "epoch": 2.04254835039818, "grad_norm": 1.60407005281846, "learning_rate": 8.022632689834209e-07, "loss": 0.0969, "step": 8977 }, { "epoch": 2.0427758816837316, "grad_norm": 0.9333464874995563, "learning_rate": 8.021775849335023e-07, "loss": 0.0318, "step": 8978 }, { "epoch": 2.0430034129692833, "grad_norm": 2.263807067343164, "learning_rate": 8.020918972623983e-07, "loss": 0.0944, "step": 8979 }, { "epoch": 2.043230944254835, "grad_norm": 1.9712926683450096, "learning_rate": 8.0200620597186e-07, "loss": 0.0317, "step": 8980 }, { "epoch": 2.043458475540387, "grad_norm": 1.6767964098327561, "learning_rate": 8.019205110636394e-07, "loss": 0.0765, "step": 8981 }, { "epoch": 2.0436860068259386, "grad_norm": 1.3875052253841036, "learning_rate": 8.018348125394874e-07, "loss": 0.1099, "step": 8982 }, { "epoch": 2.0439135381114903, "grad_norm": 2.3749906044977758, "learning_rate": 8.017491104011557e-07, "loss": 0.044, "step": 8983 }, { "epoch": 2.044141069397042, "grad_norm": 1.7636354504511285, "learning_rate": 8.016634046503958e-07, "loss": 0.0402, "step": 8984 }, { "epoch": 2.044368600682594, "grad_norm": 1.4913896041513928, "learning_rate": 8.015776952889596e-07, "loss": 0.0517, "step": 8985 }, { "epoch": 2.0445961319681456, "grad_norm": 2.511436444195971, "learning_rate": 8.014919823185987e-07, "loss": 0.101, "step": 8986 }, { "epoch": 2.0448236632536974, "grad_norm": 0.9461858874889127, "learning_rate": 8.014062657410648e-07, "loss": 0.0146, "step": 8987 }, { "epoch": 2.045051194539249, "grad_norm": 1.6882002541921113, "learning_rate": 8.0132054555811e-07, "loss": 0.0279, "step": 8988 }, { "epoch": 2.045278725824801, "grad_norm": 2.164694201901671, "learning_rate": 8.012348217714861e-07, "loss": 0.0687, "step": 8989 }, { "epoch": 2.0455062571103526, "grad_norm": 0.8913950871887149, "learning_rate": 8.011490943829451e-07, "loss": 0.0545, "step": 8990 }, { "epoch": 2.0457337883959044, "grad_norm": 1.7822662572491148, "learning_rate": 8.010633633942394e-07, "loss": 0.082, "step": 8991 }, { "epoch": 2.045961319681456, "grad_norm": 1.6849998182193533, "learning_rate": 8.00977628807121e-07, "loss": 0.1276, "step": 8992 }, { "epoch": 2.046188850967008, "grad_norm": 0.9499780954546638, "learning_rate": 8.008918906233421e-07, "loss": 0.0252, "step": 8993 }, { "epoch": 2.0464163822525596, "grad_norm": 1.1148052194865308, "learning_rate": 8.00806148844655e-07, "loss": 0.0184, "step": 8994 }, { "epoch": 2.0466439135381114, "grad_norm": 1.4157448520587377, "learning_rate": 8.007204034728123e-07, "loss": 0.0119, "step": 8995 }, { "epoch": 2.046871444823663, "grad_norm": 1.8161237313706846, "learning_rate": 8.006346545095664e-07, "loss": 0.0361, "step": 8996 }, { "epoch": 2.047098976109215, "grad_norm": 1.6145593767095079, "learning_rate": 8.005489019566697e-07, "loss": 0.0853, "step": 8997 }, { "epoch": 2.0473265073947666, "grad_norm": 1.5070509103543634, "learning_rate": 8.004631458158749e-07, "loss": 0.0219, "step": 8998 }, { "epoch": 2.0475540386803184, "grad_norm": 1.5212533805532744, "learning_rate": 8.00377386088935e-07, "loss": 0.0308, "step": 8999 }, { "epoch": 2.04778156996587, "grad_norm": 1.7683804948094972, "learning_rate": 8.002916227776023e-07, "loss": 0.061, "step": 9000 }, { "epoch": 2.048009101251422, "grad_norm": 1.1676965297854693, "learning_rate": 8.002058558836298e-07, "loss": 0.0179, "step": 9001 }, { "epoch": 2.0482366325369736, "grad_norm": 1.1725218533720503, "learning_rate": 8.001200854087707e-07, "loss": 0.0619, "step": 9002 }, { "epoch": 2.0484641638225254, "grad_norm": 1.5277769596056463, "learning_rate": 8.000343113547777e-07, "loss": 0.1207, "step": 9003 }, { "epoch": 2.0486916951080776, "grad_norm": 1.2876329386633896, "learning_rate": 7.999485337234038e-07, "loss": 0.129, "step": 9004 }, { "epoch": 2.0489192263936293, "grad_norm": 1.4316387359992495, "learning_rate": 7.998627525164024e-07, "loss": 0.0308, "step": 9005 }, { "epoch": 2.049146757679181, "grad_norm": 2.0391400130019366, "learning_rate": 7.997769677355266e-07, "loss": 0.146, "step": 9006 }, { "epoch": 2.049374288964733, "grad_norm": 2.5254020354583044, "learning_rate": 7.996911793825296e-07, "loss": 0.0958, "step": 9007 }, { "epoch": 2.0496018202502846, "grad_norm": 1.859705882278126, "learning_rate": 7.996053874591649e-07, "loss": 0.034, "step": 9008 }, { "epoch": 2.0498293515358363, "grad_norm": 1.4013398825771965, "learning_rate": 7.995195919671858e-07, "loss": 0.0355, "step": 9009 }, { "epoch": 2.050056882821388, "grad_norm": 1.8066008322109752, "learning_rate": 7.994337929083458e-07, "loss": 0.0322, "step": 9010 }, { "epoch": 2.05028441410694, "grad_norm": 1.3387518936152152, "learning_rate": 7.993479902843987e-07, "loss": 0.0192, "step": 9011 }, { "epoch": 2.0505119453924916, "grad_norm": 1.3836163125157097, "learning_rate": 7.99262184097098e-07, "loss": 0.0723, "step": 9012 }, { "epoch": 2.0507394766780433, "grad_norm": 2.182460664922858, "learning_rate": 7.991763743481971e-07, "loss": 0.0427, "step": 9013 }, { "epoch": 2.050967007963595, "grad_norm": 3.715086981999057, "learning_rate": 7.990905610394503e-07, "loss": 0.0436, "step": 9014 }, { "epoch": 2.051194539249147, "grad_norm": 1.3497563155973114, "learning_rate": 7.990047441726114e-07, "loss": 0.1029, "step": 9015 }, { "epoch": 2.0514220705346986, "grad_norm": 1.8241562674210596, "learning_rate": 7.989189237494339e-07, "loss": 0.1718, "step": 9016 }, { "epoch": 2.0516496018202504, "grad_norm": 2.1318260927651576, "learning_rate": 7.988330997716723e-07, "loss": 0.0298, "step": 9017 }, { "epoch": 2.051877133105802, "grad_norm": 1.625880671121143, "learning_rate": 7.987472722410805e-07, "loss": 0.1317, "step": 9018 }, { "epoch": 2.052104664391354, "grad_norm": 3.0419852460456105, "learning_rate": 7.986614411594126e-07, "loss": 0.0902, "step": 9019 }, { "epoch": 2.0523321956769056, "grad_norm": 1.1566723855079115, "learning_rate": 7.98575606528423e-07, "loss": 0.0287, "step": 9020 }, { "epoch": 2.0525597269624574, "grad_norm": 2.173986589113051, "learning_rate": 7.984897683498658e-07, "loss": 0.1862, "step": 9021 }, { "epoch": 2.052787258248009, "grad_norm": 2.657098105490752, "learning_rate": 7.984039266254955e-07, "loss": 0.0605, "step": 9022 }, { "epoch": 2.053014789533561, "grad_norm": 1.3439299079894502, "learning_rate": 7.983180813570665e-07, "loss": 0.0776, "step": 9023 }, { "epoch": 2.0532423208191126, "grad_norm": 1.8443990231321319, "learning_rate": 7.982322325463331e-07, "loss": 0.0335, "step": 9024 }, { "epoch": 2.0534698521046644, "grad_norm": 1.6303694233983472, "learning_rate": 7.981463801950507e-07, "loss": 0.0379, "step": 9025 }, { "epoch": 2.053697383390216, "grad_norm": 1.7700467233785628, "learning_rate": 7.98060524304973e-07, "loss": 0.0474, "step": 9026 }, { "epoch": 2.053924914675768, "grad_norm": 1.7842786850819012, "learning_rate": 7.97974664877855e-07, "loss": 0.0705, "step": 9027 }, { "epoch": 2.0541524459613196, "grad_norm": 1.608702386328513, "learning_rate": 7.978888019154518e-07, "loss": 0.0466, "step": 9028 }, { "epoch": 2.0543799772468714, "grad_norm": 1.2148934424447984, "learning_rate": 7.978029354195182e-07, "loss": 0.0286, "step": 9029 }, { "epoch": 2.054607508532423, "grad_norm": 1.9444035598114653, "learning_rate": 7.977170653918088e-07, "loss": 0.1644, "step": 9030 }, { "epoch": 2.054835039817975, "grad_norm": 1.1480733728097936, "learning_rate": 7.976311918340792e-07, "loss": 0.0166, "step": 9031 }, { "epoch": 2.0550625711035266, "grad_norm": 1.6450968119134726, "learning_rate": 7.975453147480843e-07, "loss": 0.0259, "step": 9032 }, { "epoch": 2.0552901023890784, "grad_norm": 1.8067992560101058, "learning_rate": 7.974594341355787e-07, "loss": 0.0429, "step": 9033 }, { "epoch": 2.05551763367463, "grad_norm": 1.7079755352057893, "learning_rate": 7.973735499983185e-07, "loss": 0.0405, "step": 9034 }, { "epoch": 2.055745164960182, "grad_norm": 1.494102609790233, "learning_rate": 7.972876623380585e-07, "loss": 0.067, "step": 9035 }, { "epoch": 2.0559726962457336, "grad_norm": 1.0921037707533194, "learning_rate": 7.972017711565543e-07, "loss": 0.082, "step": 9036 }, { "epoch": 2.0562002275312854, "grad_norm": 2.564234211322546, "learning_rate": 7.971158764555611e-07, "loss": 0.0424, "step": 9037 }, { "epoch": 2.056427758816837, "grad_norm": 1.3145382795708485, "learning_rate": 7.970299782368347e-07, "loss": 0.0835, "step": 9038 }, { "epoch": 2.056655290102389, "grad_norm": 2.060414155748251, "learning_rate": 7.969440765021306e-07, "loss": 0.0792, "step": 9039 }, { "epoch": 2.0568828213879407, "grad_norm": 0.9272262973137503, "learning_rate": 7.968581712532044e-07, "loss": 0.0751, "step": 9040 }, { "epoch": 2.0571103526734924, "grad_norm": 0.9430514246449326, "learning_rate": 7.96772262491812e-07, "loss": 0.0422, "step": 9041 }, { "epoch": 2.057337883959044, "grad_norm": 1.817014778048195, "learning_rate": 7.966863502197092e-07, "loss": 0.0684, "step": 9042 }, { "epoch": 2.0575654152445964, "grad_norm": 1.372358221318876, "learning_rate": 7.966004344386517e-07, "loss": 0.0597, "step": 9043 }, { "epoch": 2.057792946530148, "grad_norm": 2.5726795542399232, "learning_rate": 7.965145151503957e-07, "loss": 0.1362, "step": 9044 }, { "epoch": 2.0580204778157, "grad_norm": 2.0362358762589654, "learning_rate": 7.964285923566971e-07, "loss": 0.0824, "step": 9045 }, { "epoch": 2.0582480091012516, "grad_norm": 1.3864717525542436, "learning_rate": 7.963426660593121e-07, "loss": 0.0541, "step": 9046 }, { "epoch": 2.0584755403868034, "grad_norm": 1.2558444766460526, "learning_rate": 7.962567362599965e-07, "loss": 0.0847, "step": 9047 }, { "epoch": 2.058703071672355, "grad_norm": 1.9079873952492432, "learning_rate": 7.961708029605072e-07, "loss": 0.0994, "step": 9048 }, { "epoch": 2.058930602957907, "grad_norm": 2.486622958190756, "learning_rate": 7.960848661626e-07, "loss": 0.0611, "step": 9049 }, { "epoch": 2.0591581342434586, "grad_norm": 1.786473375243381, "learning_rate": 7.959989258680314e-07, "loss": 0.1021, "step": 9050 }, { "epoch": 2.0593856655290104, "grad_norm": 1.2647734446477317, "learning_rate": 7.959129820785581e-07, "loss": 0.0246, "step": 9051 }, { "epoch": 2.059613196814562, "grad_norm": 2.055755652392172, "learning_rate": 7.958270347959365e-07, "loss": 0.0318, "step": 9052 }, { "epoch": 2.059840728100114, "grad_norm": 1.7331547478611729, "learning_rate": 7.95741084021923e-07, "loss": 0.0504, "step": 9053 }, { "epoch": 2.0600682593856656, "grad_norm": 2.0810800670078553, "learning_rate": 7.956551297582744e-07, "loss": 0.1526, "step": 9054 }, { "epoch": 2.0602957906712174, "grad_norm": 1.2473878307051924, "learning_rate": 7.955691720067476e-07, "loss": 0.068, "step": 9055 }, { "epoch": 2.060523321956769, "grad_norm": 1.1892730990813287, "learning_rate": 7.954832107690994e-07, "loss": 0.0193, "step": 9056 }, { "epoch": 2.060750853242321, "grad_norm": 2.625497516100211, "learning_rate": 7.953972460470865e-07, "loss": 0.081, "step": 9057 }, { "epoch": 2.0609783845278726, "grad_norm": 1.0465781093855349, "learning_rate": 7.953112778424658e-07, "loss": 0.0323, "step": 9058 }, { "epoch": 2.0612059158134244, "grad_norm": 1.9502856330253195, "learning_rate": 7.952253061569946e-07, "loss": 0.0778, "step": 9059 }, { "epoch": 2.061433447098976, "grad_norm": 1.2985376747533437, "learning_rate": 7.951393309924299e-07, "loss": 0.0355, "step": 9060 }, { "epoch": 2.061660978384528, "grad_norm": 2.0180691734194545, "learning_rate": 7.950533523505288e-07, "loss": 0.0283, "step": 9061 }, { "epoch": 2.0618885096700796, "grad_norm": 1.3043138217729549, "learning_rate": 7.949673702330487e-07, "loss": 0.0212, "step": 9062 }, { "epoch": 2.0621160409556314, "grad_norm": 2.2842920182686943, "learning_rate": 7.948813846417469e-07, "loss": 0.0585, "step": 9063 }, { "epoch": 2.062343572241183, "grad_norm": 1.0500746839458064, "learning_rate": 7.947953955783808e-07, "loss": 0.0598, "step": 9064 }, { "epoch": 2.062571103526735, "grad_norm": 1.3277227542346568, "learning_rate": 7.947094030447077e-07, "loss": 0.0845, "step": 9065 }, { "epoch": 2.0627986348122866, "grad_norm": 1.253186260522492, "learning_rate": 7.946234070424852e-07, "loss": 0.0546, "step": 9066 }, { "epoch": 2.0630261660978384, "grad_norm": 3.1941381013094934, "learning_rate": 7.945374075734706e-07, "loss": 0.0531, "step": 9067 }, { "epoch": 2.06325369738339, "grad_norm": 2.170365099074321, "learning_rate": 7.944514046394222e-07, "loss": 0.0558, "step": 9068 }, { "epoch": 2.063481228668942, "grad_norm": 1.267710151283297, "learning_rate": 7.943653982420976e-07, "loss": 0.1233, "step": 9069 }, { "epoch": 2.0637087599544937, "grad_norm": 2.934322988147777, "learning_rate": 7.942793883832541e-07, "loss": 0.0118, "step": 9070 }, { "epoch": 2.0639362912400454, "grad_norm": 1.7674927965665588, "learning_rate": 7.9419337506465e-07, "loss": 0.037, "step": 9071 }, { "epoch": 2.064163822525597, "grad_norm": 1.5262962717343906, "learning_rate": 7.941073582880431e-07, "loss": 0.0296, "step": 9072 }, { "epoch": 2.064391353811149, "grad_norm": 2.4846929499037236, "learning_rate": 7.940213380551918e-07, "loss": 0.0798, "step": 9073 }, { "epoch": 2.0646188850967007, "grad_norm": 1.7093651494630786, "learning_rate": 7.939353143678535e-07, "loss": 0.068, "step": 9074 }, { "epoch": 2.0648464163822524, "grad_norm": 0.947770878057256, "learning_rate": 7.938492872277872e-07, "loss": 0.0332, "step": 9075 }, { "epoch": 2.065073947667804, "grad_norm": 3.037640012456654, "learning_rate": 7.937632566367505e-07, "loss": 0.1178, "step": 9076 }, { "epoch": 2.065301478953356, "grad_norm": 1.4987170595019004, "learning_rate": 7.936772225965018e-07, "loss": 0.0143, "step": 9077 }, { "epoch": 2.0655290102389077, "grad_norm": 1.2376842513542412, "learning_rate": 7.935911851087996e-07, "loss": 0.0199, "step": 9078 }, { "epoch": 2.0657565415244594, "grad_norm": 1.6224691256067159, "learning_rate": 7.935051441754024e-07, "loss": 0.0436, "step": 9079 }, { "epoch": 2.065984072810011, "grad_norm": 2.088654020758353, "learning_rate": 7.934190997980687e-07, "loss": 0.1661, "step": 9080 }, { "epoch": 2.066211604095563, "grad_norm": 1.42065406988388, "learning_rate": 7.933330519785569e-07, "loss": 0.0226, "step": 9081 }, { "epoch": 2.066439135381115, "grad_norm": 1.3806344487018214, "learning_rate": 7.93247000718626e-07, "loss": 0.0488, "step": 9082 }, { "epoch": 2.066666666666667, "grad_norm": 1.063319184761991, "learning_rate": 7.931609460200345e-07, "loss": 0.0771, "step": 9083 }, { "epoch": 2.0668941979522186, "grad_norm": 1.0127117965184074, "learning_rate": 7.930748878845411e-07, "loss": 0.0173, "step": 9084 }, { "epoch": 2.0671217292377704, "grad_norm": 1.4019625398505184, "learning_rate": 7.929888263139049e-07, "loss": 0.0336, "step": 9085 }, { "epoch": 2.067349260523322, "grad_norm": 1.1131020167883046, "learning_rate": 7.929027613098848e-07, "loss": 0.0456, "step": 9086 }, { "epoch": 2.067576791808874, "grad_norm": 1.733949004627717, "learning_rate": 7.928166928742398e-07, "loss": 0.1259, "step": 9087 }, { "epoch": 2.0678043230944256, "grad_norm": 1.043896409482535, "learning_rate": 7.927306210087287e-07, "loss": 0.0583, "step": 9088 }, { "epoch": 2.0680318543799774, "grad_norm": 1.1197448277741158, "learning_rate": 7.926445457151111e-07, "loss": 0.0828, "step": 9089 }, { "epoch": 2.068259385665529, "grad_norm": 1.9678162359488363, "learning_rate": 7.925584669951459e-07, "loss": 0.0623, "step": 9090 }, { "epoch": 2.068486916951081, "grad_norm": 1.5132352396941313, "learning_rate": 7.924723848505925e-07, "loss": 0.0237, "step": 9091 }, { "epoch": 2.0687144482366326, "grad_norm": 1.646682794518182, "learning_rate": 7.923862992832103e-07, "loss": 0.1909, "step": 9092 }, { "epoch": 2.0689419795221844, "grad_norm": 2.3077472062054585, "learning_rate": 7.923002102947587e-07, "loss": 0.0745, "step": 9093 }, { "epoch": 2.069169510807736, "grad_norm": 0.9780808903063914, "learning_rate": 7.922141178869973e-07, "loss": 0.0152, "step": 9094 }, { "epoch": 2.069397042093288, "grad_norm": 1.4315578858964255, "learning_rate": 7.921280220616855e-07, "loss": 0.1481, "step": 9095 }, { "epoch": 2.0696245733788396, "grad_norm": 1.2423627879635764, "learning_rate": 7.920419228205829e-07, "loss": 0.1512, "step": 9096 }, { "epoch": 2.0698521046643914, "grad_norm": 2.285717832479959, "learning_rate": 7.919558201654493e-07, "loss": 0.0467, "step": 9097 }, { "epoch": 2.070079635949943, "grad_norm": 1.7444028206997109, "learning_rate": 7.918697140980447e-07, "loss": 0.0286, "step": 9098 }, { "epoch": 2.070307167235495, "grad_norm": 1.9074989101870283, "learning_rate": 7.917836046201285e-07, "loss": 0.0639, "step": 9099 }, { "epoch": 2.0705346985210467, "grad_norm": 1.3169246364354064, "learning_rate": 7.916974917334612e-07, "loss": 0.0769, "step": 9100 }, { "epoch": 2.0707622298065984, "grad_norm": 1.8006086624089455, "learning_rate": 7.916113754398022e-07, "loss": 0.1236, "step": 9101 }, { "epoch": 2.07098976109215, "grad_norm": 1.1154065737416012, "learning_rate": 7.91525255740912e-07, "loss": 0.0396, "step": 9102 }, { "epoch": 2.071217292377702, "grad_norm": 1.6098922197697982, "learning_rate": 7.914391326385504e-07, "loss": 0.1823, "step": 9103 }, { "epoch": 2.0714448236632537, "grad_norm": 1.7142868450675366, "learning_rate": 7.913530061344778e-07, "loss": 0.043, "step": 9104 }, { "epoch": 2.0716723549488054, "grad_norm": 1.5874061597081095, "learning_rate": 7.912668762304544e-07, "loss": 0.0792, "step": 9105 }, { "epoch": 2.071899886234357, "grad_norm": 1.509108721934602, "learning_rate": 7.911807429282406e-07, "loss": 0.0848, "step": 9106 }, { "epoch": 2.072127417519909, "grad_norm": 2.749770216447884, "learning_rate": 7.910946062295967e-07, "loss": 0.0584, "step": 9107 }, { "epoch": 2.0723549488054607, "grad_norm": 0.7878630269766755, "learning_rate": 7.910084661362832e-07, "loss": 0.0688, "step": 9108 }, { "epoch": 2.0725824800910124, "grad_norm": 1.2346259917745928, "learning_rate": 7.909223226500609e-07, "loss": 0.0327, "step": 9109 }, { "epoch": 2.072810011376564, "grad_norm": 1.3686367464991218, "learning_rate": 7.908361757726901e-07, "loss": 0.0668, "step": 9110 }, { "epoch": 2.073037542662116, "grad_norm": 1.8352613867965457, "learning_rate": 7.907500255059314e-07, "loss": 0.0672, "step": 9111 }, { "epoch": 2.0732650739476677, "grad_norm": 2.697406308758313, "learning_rate": 7.906638718515458e-07, "loss": 0.0489, "step": 9112 }, { "epoch": 2.0734926052332194, "grad_norm": 1.098914714863621, "learning_rate": 7.905777148112943e-07, "loss": 0.031, "step": 9113 }, { "epoch": 2.073720136518771, "grad_norm": 3.2621670369013933, "learning_rate": 7.904915543869372e-07, "loss": 0.0521, "step": 9114 }, { "epoch": 2.073947667804323, "grad_norm": 2.479039575608758, "learning_rate": 7.904053905802362e-07, "loss": 0.0878, "step": 9115 }, { "epoch": 2.0741751990898747, "grad_norm": 1.418821664800718, "learning_rate": 7.903192233929515e-07, "loss": 0.0567, "step": 9116 }, { "epoch": 2.0744027303754264, "grad_norm": 1.9336354771265998, "learning_rate": 7.90233052826845e-07, "loss": 0.0442, "step": 9117 }, { "epoch": 2.074630261660978, "grad_norm": 1.637993635401336, "learning_rate": 7.901468788836771e-07, "loss": 0.1124, "step": 9118 }, { "epoch": 2.07485779294653, "grad_norm": 1.1804507773489084, "learning_rate": 7.900607015652101e-07, "loss": 0.0512, "step": 9119 }, { "epoch": 2.0750853242320817, "grad_norm": 1.5299470508494952, "learning_rate": 7.899745208732043e-07, "loss": 0.0354, "step": 9120 }, { "epoch": 2.075312855517634, "grad_norm": 2.053834900698496, "learning_rate": 7.898883368094213e-07, "loss": 0.0438, "step": 9121 }, { "epoch": 2.0755403868031856, "grad_norm": 1.6512024163216108, "learning_rate": 7.898021493756228e-07, "loss": 0.1283, "step": 9122 }, { "epoch": 2.0757679180887374, "grad_norm": 1.2485011334703044, "learning_rate": 7.897159585735702e-07, "loss": 0.0559, "step": 9123 }, { "epoch": 2.075995449374289, "grad_norm": 1.718240711541217, "learning_rate": 7.896297644050249e-07, "loss": 0.0463, "step": 9124 }, { "epoch": 2.076222980659841, "grad_norm": 1.9656083707282161, "learning_rate": 7.895435668717488e-07, "loss": 0.0604, "step": 9125 }, { "epoch": 2.0764505119453927, "grad_norm": 2.0888372710238703, "learning_rate": 7.894573659755038e-07, "loss": 0.0895, "step": 9126 }, { "epoch": 2.0766780432309444, "grad_norm": 1.8353650592615016, "learning_rate": 7.89371161718051e-07, "loss": 0.0522, "step": 9127 }, { "epoch": 2.076905574516496, "grad_norm": 2.5236097562873403, "learning_rate": 7.892849541011531e-07, "loss": 0.0367, "step": 9128 }, { "epoch": 2.077133105802048, "grad_norm": 1.1616657344858807, "learning_rate": 7.891987431265714e-07, "loss": 0.0174, "step": 9129 }, { "epoch": 2.0773606370875997, "grad_norm": 1.578144325050575, "learning_rate": 7.891125287960682e-07, "loss": 0.0812, "step": 9130 }, { "epoch": 2.0775881683731514, "grad_norm": 1.1517242687449327, "learning_rate": 7.890263111114052e-07, "loss": 0.0452, "step": 9131 }, { "epoch": 2.077815699658703, "grad_norm": 1.8810114460736644, "learning_rate": 7.889400900743452e-07, "loss": 0.0528, "step": 9132 }, { "epoch": 2.078043230944255, "grad_norm": 2.0473626545484116, "learning_rate": 7.888538656866498e-07, "loss": 0.0368, "step": 9133 }, { "epoch": 2.0782707622298067, "grad_norm": 1.8958294920231127, "learning_rate": 7.887676379500814e-07, "loss": 0.1566, "step": 9134 }, { "epoch": 2.0784982935153584, "grad_norm": 2.387957751750927, "learning_rate": 7.886814068664025e-07, "loss": 0.1222, "step": 9135 }, { "epoch": 2.07872582480091, "grad_norm": 1.7538970479706861, "learning_rate": 7.885951724373754e-07, "loss": 0.1517, "step": 9136 }, { "epoch": 2.078953356086462, "grad_norm": 2.3686037145553342, "learning_rate": 7.885089346647625e-07, "loss": 0.153, "step": 9137 }, { "epoch": 2.0791808873720137, "grad_norm": 2.0979094686177095, "learning_rate": 7.884226935503267e-07, "loss": 0.0604, "step": 9138 }, { "epoch": 2.0794084186575654, "grad_norm": 1.3953638862022424, "learning_rate": 7.8833644909583e-07, "loss": 0.0337, "step": 9139 }, { "epoch": 2.079635949943117, "grad_norm": 1.179189164442287, "learning_rate": 7.882502013030358e-07, "loss": 0.0312, "step": 9140 }, { "epoch": 2.079863481228669, "grad_norm": 1.182582252605836, "learning_rate": 7.881639501737059e-07, "loss": 0.0649, "step": 9141 }, { "epoch": 2.0800910125142207, "grad_norm": 0.6775958909095499, "learning_rate": 7.880776957096041e-07, "loss": 0.0095, "step": 9142 }, { "epoch": 2.0803185437997724, "grad_norm": 1.2891614755595928, "learning_rate": 7.879914379124928e-07, "loss": 0.0367, "step": 9143 }, { "epoch": 2.080546075085324, "grad_norm": 1.4798323666152837, "learning_rate": 7.879051767841351e-07, "loss": 0.0229, "step": 9144 }, { "epoch": 2.080773606370876, "grad_norm": 1.575524970401019, "learning_rate": 7.878189123262937e-07, "loss": 0.059, "step": 9145 }, { "epoch": 2.0810011376564277, "grad_norm": 1.0135897054386669, "learning_rate": 7.877326445407321e-07, "loss": 0.0563, "step": 9146 }, { "epoch": 2.0812286689419794, "grad_norm": 2.891970697742299, "learning_rate": 7.876463734292133e-07, "loss": 0.0909, "step": 9147 }, { "epoch": 2.081456200227531, "grad_norm": 1.3653078247569157, "learning_rate": 7.875600989935004e-07, "loss": 0.0474, "step": 9148 }, { "epoch": 2.081683731513083, "grad_norm": 2.5159777114813062, "learning_rate": 7.874738212353567e-07, "loss": 0.047, "step": 9149 }, { "epoch": 2.0819112627986347, "grad_norm": 1.4073584073202479, "learning_rate": 7.873875401565458e-07, "loss": 0.0439, "step": 9150 }, { "epoch": 2.0821387940841865, "grad_norm": 1.1811930490134919, "learning_rate": 7.873012557588309e-07, "loss": 0.036, "step": 9151 }, { "epoch": 2.082366325369738, "grad_norm": 1.4703966938214825, "learning_rate": 7.872149680439756e-07, "loss": 0.1374, "step": 9152 }, { "epoch": 2.08259385665529, "grad_norm": 1.5401455211601165, "learning_rate": 7.871286770137434e-07, "loss": 0.0349, "step": 9153 }, { "epoch": 2.0828213879408417, "grad_norm": 2.0287849754461793, "learning_rate": 7.870423826698981e-07, "loss": 0.0689, "step": 9154 }, { "epoch": 2.0830489192263935, "grad_norm": 1.2804858837836415, "learning_rate": 7.869560850142031e-07, "loss": 0.0316, "step": 9155 }, { "epoch": 2.083276450511945, "grad_norm": 2.730136139700032, "learning_rate": 7.868697840484225e-07, "loss": 0.0411, "step": 9156 }, { "epoch": 2.083503981797497, "grad_norm": 1.4714407773423903, "learning_rate": 7.867834797743199e-07, "loss": 0.0537, "step": 9157 }, { "epoch": 2.0837315130830487, "grad_norm": 2.2903325365713587, "learning_rate": 7.866971721936592e-07, "loss": 0.0266, "step": 9158 }, { "epoch": 2.0839590443686005, "grad_norm": 2.387960948734622, "learning_rate": 7.866108613082045e-07, "loss": 0.0897, "step": 9159 }, { "epoch": 2.0841865756541527, "grad_norm": 2.0120140017901584, "learning_rate": 7.865245471197198e-07, "loss": 0.0986, "step": 9160 }, { "epoch": 2.0844141069397044, "grad_norm": 1.1629231276556602, "learning_rate": 7.864382296299689e-07, "loss": 0.1165, "step": 9161 }, { "epoch": 2.084641638225256, "grad_norm": 1.2142308313846102, "learning_rate": 7.863519088407164e-07, "loss": 0.0208, "step": 9162 }, { "epoch": 2.084869169510808, "grad_norm": 2.4135273275384908, "learning_rate": 7.862655847537265e-07, "loss": 0.0456, "step": 9163 }, { "epoch": 2.0850967007963597, "grad_norm": 1.793821084419417, "learning_rate": 7.861792573707634e-07, "loss": 0.0241, "step": 9164 }, { "epoch": 2.0853242320819114, "grad_norm": 1.4462244269890792, "learning_rate": 7.860929266935914e-07, "loss": 0.0216, "step": 9165 }, { "epoch": 2.085551763367463, "grad_norm": 1.8524138928143818, "learning_rate": 7.860065927239752e-07, "loss": 0.0659, "step": 9166 }, { "epoch": 2.085779294653015, "grad_norm": 1.5037523706054334, "learning_rate": 7.859202554636788e-07, "loss": 0.0533, "step": 9167 }, { "epoch": 2.0860068259385667, "grad_norm": 1.269697638544373, "learning_rate": 7.858339149144671e-07, "loss": 0.0749, "step": 9168 }, { "epoch": 2.0862343572241184, "grad_norm": 1.8488925262658285, "learning_rate": 7.857475710781048e-07, "loss": 0.0428, "step": 9169 }, { "epoch": 2.08646188850967, "grad_norm": 1.1660865685004795, "learning_rate": 7.856612239563568e-07, "loss": 0.0405, "step": 9170 }, { "epoch": 2.086689419795222, "grad_norm": 1.1594702561901014, "learning_rate": 7.855748735509873e-07, "loss": 0.0301, "step": 9171 }, { "epoch": 2.0869169510807737, "grad_norm": 1.7929484121993156, "learning_rate": 7.854885198637615e-07, "loss": 0.031, "step": 9172 }, { "epoch": 2.0871444823663254, "grad_norm": 1.9650678687188834, "learning_rate": 7.854021628964445e-07, "loss": 0.0825, "step": 9173 }, { "epoch": 2.087372013651877, "grad_norm": 1.5320972901943375, "learning_rate": 7.853158026508009e-07, "loss": 0.042, "step": 9174 }, { "epoch": 2.087599544937429, "grad_norm": 1.125421549574689, "learning_rate": 7.852294391285959e-07, "loss": 0.0277, "step": 9175 }, { "epoch": 2.0878270762229807, "grad_norm": 1.8538364240405865, "learning_rate": 7.851430723315946e-07, "loss": 0.0852, "step": 9176 }, { "epoch": 2.0880546075085324, "grad_norm": 0.9958089698704002, "learning_rate": 7.850567022615623e-07, "loss": 0.0159, "step": 9177 }, { "epoch": 2.088282138794084, "grad_norm": 2.4409117643496354, "learning_rate": 7.84970328920264e-07, "loss": 0.1737, "step": 9178 }, { "epoch": 2.088509670079636, "grad_norm": 1.6812234925733471, "learning_rate": 7.848839523094651e-07, "loss": 0.0558, "step": 9179 }, { "epoch": 2.0887372013651877, "grad_norm": 1.9036372254475549, "learning_rate": 7.847975724309312e-07, "loss": 0.0563, "step": 9180 }, { "epoch": 2.0889647326507395, "grad_norm": 2.337524718407092, "learning_rate": 7.847111892864275e-07, "loss": 0.1269, "step": 9181 }, { "epoch": 2.089192263936291, "grad_norm": 1.1593951789956227, "learning_rate": 7.846248028777195e-07, "loss": 0.0389, "step": 9182 }, { "epoch": 2.089419795221843, "grad_norm": 2.7307935985707568, "learning_rate": 7.845384132065731e-07, "loss": 0.127, "step": 9183 }, { "epoch": 2.0896473265073947, "grad_norm": 1.9933713250868286, "learning_rate": 7.844520202747534e-07, "loss": 0.0581, "step": 9184 }, { "epoch": 2.0898748577929465, "grad_norm": 0.960440095213362, "learning_rate": 7.843656240840265e-07, "loss": 0.0621, "step": 9185 }, { "epoch": 2.090102389078498, "grad_norm": 1.3040238998096036, "learning_rate": 7.842792246361583e-07, "loss": 0.0401, "step": 9186 }, { "epoch": 2.09032992036405, "grad_norm": 2.9926707670526156, "learning_rate": 7.841928219329144e-07, "loss": 0.143, "step": 9187 }, { "epoch": 2.0905574516496017, "grad_norm": 1.8199233717870194, "learning_rate": 7.841064159760605e-07, "loss": 0.065, "step": 9188 }, { "epoch": 2.0907849829351535, "grad_norm": 1.310924972378196, "learning_rate": 7.840200067673632e-07, "loss": 0.0489, "step": 9189 }, { "epoch": 2.091012514220705, "grad_norm": 1.3121302997582052, "learning_rate": 7.839335943085882e-07, "loss": 0.019, "step": 9190 }, { "epoch": 2.091240045506257, "grad_norm": 1.0244602061787904, "learning_rate": 7.838471786015013e-07, "loss": 0.0361, "step": 9191 }, { "epoch": 2.0914675767918087, "grad_norm": 2.161668507744929, "learning_rate": 7.837607596478691e-07, "loss": 0.0548, "step": 9192 }, { "epoch": 2.0916951080773605, "grad_norm": 1.287337927794009, "learning_rate": 7.836743374494579e-07, "loss": 0.072, "step": 9193 }, { "epoch": 2.0919226393629122, "grad_norm": 1.3083130207692248, "learning_rate": 7.835879120080337e-07, "loss": 0.0175, "step": 9194 }, { "epoch": 2.092150170648464, "grad_norm": 1.4580985938386846, "learning_rate": 7.83501483325363e-07, "loss": 0.0385, "step": 9195 }, { "epoch": 2.0923777019340157, "grad_norm": 1.7573973967743006, "learning_rate": 7.834150514032124e-07, "loss": 0.0702, "step": 9196 }, { "epoch": 2.0926052332195675, "grad_norm": 0.9961034152160794, "learning_rate": 7.833286162433481e-07, "loss": 0.0192, "step": 9197 }, { "epoch": 2.0928327645051192, "grad_norm": 1.8957486861488144, "learning_rate": 7.832421778475369e-07, "loss": 0.056, "step": 9198 }, { "epoch": 2.0930602957906714, "grad_norm": 1.352577067070104, "learning_rate": 7.831557362175454e-07, "loss": 0.1217, "step": 9199 }, { "epoch": 2.093287827076223, "grad_norm": 1.5536317526274144, "learning_rate": 7.830692913551403e-07, "loss": 0.1074, "step": 9200 }, { "epoch": 2.093515358361775, "grad_norm": 1.8333157486267224, "learning_rate": 7.829828432620884e-07, "loss": 0.0442, "step": 9201 }, { "epoch": 2.0937428896473267, "grad_norm": 1.3639882761041908, "learning_rate": 7.828963919401567e-07, "loss": 0.0254, "step": 9202 }, { "epoch": 2.0939704209328784, "grad_norm": 1.171910362710783, "learning_rate": 7.828099373911116e-07, "loss": 0.063, "step": 9203 }, { "epoch": 2.09419795221843, "grad_norm": 1.1519817333073965, "learning_rate": 7.827234796167206e-07, "loss": 0.0642, "step": 9204 }, { "epoch": 2.094425483503982, "grad_norm": 1.486828533039071, "learning_rate": 7.826370186187503e-07, "loss": 0.0555, "step": 9205 }, { "epoch": 2.0946530147895337, "grad_norm": 1.4307695048245372, "learning_rate": 7.825505543989682e-07, "loss": 0.0521, "step": 9206 }, { "epoch": 2.0948805460750854, "grad_norm": 2.2027508824528765, "learning_rate": 7.824640869591412e-07, "loss": 0.1103, "step": 9207 }, { "epoch": 2.095108077360637, "grad_norm": 2.1915433152702546, "learning_rate": 7.823776163010369e-07, "loss": 0.0485, "step": 9208 }, { "epoch": 2.095335608646189, "grad_norm": 1.1490720786451287, "learning_rate": 7.822911424264222e-07, "loss": 0.0243, "step": 9209 }, { "epoch": 2.0955631399317407, "grad_norm": 1.3690667315155065, "learning_rate": 7.822046653370647e-07, "loss": 0.0666, "step": 9210 }, { "epoch": 2.0957906712172925, "grad_norm": 1.9260718490822129, "learning_rate": 7.821181850347316e-07, "loss": 0.0282, "step": 9211 }, { "epoch": 2.096018202502844, "grad_norm": 2.13769511073873, "learning_rate": 7.820317015211905e-07, "loss": 0.1475, "step": 9212 }, { "epoch": 2.096245733788396, "grad_norm": 1.6462915109688485, "learning_rate": 7.819452147982091e-07, "loss": 0.0544, "step": 9213 }, { "epoch": 2.0964732650739477, "grad_norm": 1.4228717351251425, "learning_rate": 7.81858724867555e-07, "loss": 0.0302, "step": 9214 }, { "epoch": 2.0967007963594995, "grad_norm": 2.0362478269213815, "learning_rate": 7.817722317309958e-07, "loss": 0.0908, "step": 9215 }, { "epoch": 2.096928327645051, "grad_norm": 3.2784385787483243, "learning_rate": 7.816857353902993e-07, "loss": 0.0597, "step": 9216 }, { "epoch": 2.097155858930603, "grad_norm": 1.5586908834050757, "learning_rate": 7.815992358472332e-07, "loss": 0.0292, "step": 9217 }, { "epoch": 2.0973833902161547, "grad_norm": 1.547293545985965, "learning_rate": 7.815127331035656e-07, "loss": 0.0405, "step": 9218 }, { "epoch": 2.0976109215017065, "grad_norm": 1.532042401197724, "learning_rate": 7.814262271610645e-07, "loss": 0.0261, "step": 9219 }, { "epoch": 2.0978384527872582, "grad_norm": 2.2259657887934465, "learning_rate": 7.813397180214978e-07, "loss": 0.1, "step": 9220 }, { "epoch": 2.09806598407281, "grad_norm": 1.656783599799285, "learning_rate": 7.812532056866334e-07, "loss": 0.1656, "step": 9221 }, { "epoch": 2.0982935153583617, "grad_norm": 1.9188569759547522, "learning_rate": 7.811666901582397e-07, "loss": 0.1067, "step": 9222 }, { "epoch": 2.0985210466439135, "grad_norm": 1.6568588645485047, "learning_rate": 7.81080171438085e-07, "loss": 0.0211, "step": 9223 }, { "epoch": 2.0987485779294652, "grad_norm": 1.6822198634989907, "learning_rate": 7.809936495279373e-07, "loss": 0.035, "step": 9224 }, { "epoch": 2.098976109215017, "grad_norm": 1.7191687731002951, "learning_rate": 7.80907124429565e-07, "loss": 0.042, "step": 9225 }, { "epoch": 2.0992036405005687, "grad_norm": 1.8710917610254834, "learning_rate": 7.808205961447368e-07, "loss": 0.0381, "step": 9226 }, { "epoch": 2.0994311717861205, "grad_norm": 2.3408149009690513, "learning_rate": 7.80734064675221e-07, "loss": 0.1432, "step": 9227 }, { "epoch": 2.0996587030716722, "grad_norm": 1.9889802010588338, "learning_rate": 7.806475300227859e-07, "loss": 0.056, "step": 9228 }, { "epoch": 2.099886234357224, "grad_norm": 2.674385690194074, "learning_rate": 7.805609921892006e-07, "loss": 0.0483, "step": 9229 }, { "epoch": 2.1001137656427757, "grad_norm": 1.02491445767166, "learning_rate": 7.804744511762334e-07, "loss": 0.0736, "step": 9230 }, { "epoch": 2.1003412969283275, "grad_norm": 1.117188844229928, "learning_rate": 7.803879069856532e-07, "loss": 0.0221, "step": 9231 }, { "epoch": 2.1005688282138792, "grad_norm": 1.3889245732873237, "learning_rate": 7.803013596192285e-07, "loss": 0.1145, "step": 9232 }, { "epoch": 2.100796359499431, "grad_norm": 2.7664038135593243, "learning_rate": 7.802148090787289e-07, "loss": 0.0757, "step": 9233 }, { "epoch": 2.1010238907849828, "grad_norm": 1.717634586587934, "learning_rate": 7.801282553659225e-07, "loss": 0.0867, "step": 9234 }, { "epoch": 2.1012514220705345, "grad_norm": 1.604396344801769, "learning_rate": 7.800416984825787e-07, "loss": 0.0786, "step": 9235 }, { "epoch": 2.1014789533560863, "grad_norm": 1.3900155303859807, "learning_rate": 7.799551384304666e-07, "loss": 0.0755, "step": 9236 }, { "epoch": 2.101706484641638, "grad_norm": 1.6256090464414028, "learning_rate": 7.798685752113553e-07, "loss": 0.0839, "step": 9237 }, { "epoch": 2.10193401592719, "grad_norm": 1.504365372068615, "learning_rate": 7.797820088270141e-07, "loss": 0.0649, "step": 9238 }, { "epoch": 2.102161547212742, "grad_norm": 1.9296763686094016, "learning_rate": 7.796954392792119e-07, "loss": 0.1081, "step": 9239 }, { "epoch": 2.1023890784982937, "grad_norm": 1.52904792022275, "learning_rate": 7.796088665697183e-07, "loss": 0.1283, "step": 9240 }, { "epoch": 2.1026166097838455, "grad_norm": 1.089545196735548, "learning_rate": 7.795222907003025e-07, "loss": 0.0625, "step": 9241 }, { "epoch": 2.102844141069397, "grad_norm": 1.2066058203186265, "learning_rate": 7.794357116727342e-07, "loss": 0.092, "step": 9242 }, { "epoch": 2.103071672354949, "grad_norm": 1.3844205195318093, "learning_rate": 7.793491294887827e-07, "loss": 0.0141, "step": 9243 }, { "epoch": 2.1032992036405007, "grad_norm": 1.3781651085528592, "learning_rate": 7.792625441502178e-07, "loss": 0.1123, "step": 9244 }, { "epoch": 2.1035267349260525, "grad_norm": 1.5402665201462948, "learning_rate": 7.791759556588088e-07, "loss": 0.1873, "step": 9245 }, { "epoch": 2.103754266211604, "grad_norm": 2.3134223307321857, "learning_rate": 7.790893640163258e-07, "loss": 0.1023, "step": 9246 }, { "epoch": 2.103981797497156, "grad_norm": 1.9875973334417596, "learning_rate": 7.790027692245383e-07, "loss": 0.0567, "step": 9247 }, { "epoch": 2.1042093287827077, "grad_norm": 2.7137902344214337, "learning_rate": 7.789161712852163e-07, "loss": 0.068, "step": 9248 }, { "epoch": 2.1044368600682595, "grad_norm": 1.054420198034236, "learning_rate": 7.788295702001296e-07, "loss": 0.0787, "step": 9249 }, { "epoch": 2.1046643913538112, "grad_norm": 1.5842620339906497, "learning_rate": 7.787429659710483e-07, "loss": 0.0612, "step": 9250 }, { "epoch": 2.104891922639363, "grad_norm": 1.1935488217069934, "learning_rate": 7.786563585997422e-07, "loss": 0.028, "step": 9251 }, { "epoch": 2.1051194539249147, "grad_norm": 1.6264059074191337, "learning_rate": 7.785697480879817e-07, "loss": 0.0296, "step": 9252 }, { "epoch": 2.1053469852104665, "grad_norm": 1.945992851236711, "learning_rate": 7.784831344375368e-07, "loss": 0.0292, "step": 9253 }, { "epoch": 2.1055745164960182, "grad_norm": 1.64066328910728, "learning_rate": 7.783965176501776e-07, "loss": 0.0496, "step": 9254 }, { "epoch": 2.10580204778157, "grad_norm": 1.7098989672937894, "learning_rate": 7.783098977276746e-07, "loss": 0.1689, "step": 9255 }, { "epoch": 2.1060295790671217, "grad_norm": 2.1049711830611346, "learning_rate": 7.78223274671798e-07, "loss": 0.0274, "step": 9256 }, { "epoch": 2.1062571103526735, "grad_norm": 1.1594944674223808, "learning_rate": 7.781366484843184e-07, "loss": 0.0183, "step": 9257 }, { "epoch": 2.1064846416382252, "grad_norm": 1.8663119260676362, "learning_rate": 7.78050019167006e-07, "loss": 0.0367, "step": 9258 }, { "epoch": 2.106712172923777, "grad_norm": 2.013908637721363, "learning_rate": 7.779633867216316e-07, "loss": 0.0723, "step": 9259 }, { "epoch": 2.1069397042093287, "grad_norm": 1.8907967348439711, "learning_rate": 7.778767511499657e-07, "loss": 0.1024, "step": 9260 }, { "epoch": 2.1071672354948805, "grad_norm": 2.7819586912495864, "learning_rate": 7.77790112453779e-07, "loss": 0.0349, "step": 9261 }, { "epoch": 2.1073947667804322, "grad_norm": 1.8492682898613977, "learning_rate": 7.77703470634842e-07, "loss": 0.0492, "step": 9262 }, { "epoch": 2.107622298065984, "grad_norm": 1.6053819092913735, "learning_rate": 7.77616825694926e-07, "loss": 0.089, "step": 9263 }, { "epoch": 2.1078498293515358, "grad_norm": 1.3193114208099739, "learning_rate": 7.775301776358017e-07, "loss": 0.1046, "step": 9264 }, { "epoch": 2.1080773606370875, "grad_norm": 2.1875301567197623, "learning_rate": 7.774435264592396e-07, "loss": 0.121, "step": 9265 }, { "epoch": 2.1083048919226393, "grad_norm": 0.6687916654549803, "learning_rate": 7.773568721670111e-07, "loss": 0.0259, "step": 9266 }, { "epoch": 2.108532423208191, "grad_norm": 1.8008293534548026, "learning_rate": 7.772702147608873e-07, "loss": 0.0264, "step": 9267 }, { "epoch": 2.1087599544937428, "grad_norm": 1.933657792193932, "learning_rate": 7.771835542426389e-07, "loss": 0.0621, "step": 9268 }, { "epoch": 2.1089874857792945, "grad_norm": 1.9376188439217816, "learning_rate": 7.770968906140376e-07, "loss": 0.1101, "step": 9269 }, { "epoch": 2.1092150170648463, "grad_norm": 1.5472534560706894, "learning_rate": 7.770102238768543e-07, "loss": 0.0526, "step": 9270 }, { "epoch": 2.109442548350398, "grad_norm": 0.7997691185086151, "learning_rate": 7.769235540328607e-07, "loss": 0.0095, "step": 9271 }, { "epoch": 2.1096700796359498, "grad_norm": 1.267434805419255, "learning_rate": 7.768368810838275e-07, "loss": 0.0345, "step": 9272 }, { "epoch": 2.1098976109215015, "grad_norm": 2.3345124217744986, "learning_rate": 7.767502050315266e-07, "loss": 0.0527, "step": 9273 }, { "epoch": 2.1101251422070533, "grad_norm": 1.6574598892540606, "learning_rate": 7.766635258777296e-07, "loss": 0.0457, "step": 9274 }, { "epoch": 2.110352673492605, "grad_norm": 1.8446041861565192, "learning_rate": 7.765768436242077e-07, "loss": 0.0545, "step": 9275 }, { "epoch": 2.1105802047781568, "grad_norm": 1.9922484224033918, "learning_rate": 7.764901582727328e-07, "loss": 0.1015, "step": 9276 }, { "epoch": 2.110807736063709, "grad_norm": 1.4663333368461369, "learning_rate": 7.764034698250767e-07, "loss": 0.0403, "step": 9277 }, { "epoch": 2.1110352673492607, "grad_norm": 1.7587593839166777, "learning_rate": 7.763167782830107e-07, "loss": 0.0442, "step": 9278 }, { "epoch": 2.1112627986348125, "grad_norm": 1.5505472155812654, "learning_rate": 7.762300836483069e-07, "loss": 0.0297, "step": 9279 }, { "epoch": 2.1114903299203642, "grad_norm": 4.552718774084342, "learning_rate": 7.761433859227373e-07, "loss": 0.1291, "step": 9280 }, { "epoch": 2.111717861205916, "grad_norm": 1.7308443259341895, "learning_rate": 7.760566851080736e-07, "loss": 0.1098, "step": 9281 }, { "epoch": 2.1119453924914677, "grad_norm": 1.4546359242636488, "learning_rate": 7.759699812060878e-07, "loss": 0.0327, "step": 9282 }, { "epoch": 2.1121729237770195, "grad_norm": 1.3589593002123062, "learning_rate": 7.758832742185523e-07, "loss": 0.0461, "step": 9283 }, { "epoch": 2.1124004550625712, "grad_norm": 1.7187535172779498, "learning_rate": 7.75796564147239e-07, "loss": 0.0301, "step": 9284 }, { "epoch": 2.112627986348123, "grad_norm": 1.8877137255874747, "learning_rate": 7.757098509939198e-07, "loss": 0.1051, "step": 9285 }, { "epoch": 2.1128555176336747, "grad_norm": 1.6167586830480725, "learning_rate": 7.756231347603674e-07, "loss": 0.0257, "step": 9286 }, { "epoch": 2.1130830489192265, "grad_norm": 4.788451240481285, "learning_rate": 7.755364154483541e-07, "loss": 0.0366, "step": 9287 }, { "epoch": 2.1133105802047782, "grad_norm": 1.6206474647519005, "learning_rate": 7.754496930596521e-07, "loss": 0.062, "step": 9288 }, { "epoch": 2.11353811149033, "grad_norm": 1.907574892689514, "learning_rate": 7.753629675960339e-07, "loss": 0.0288, "step": 9289 }, { "epoch": 2.1137656427758817, "grad_norm": 0.9073674986740545, "learning_rate": 7.752762390592721e-07, "loss": 0.0232, "step": 9290 }, { "epoch": 2.1139931740614335, "grad_norm": 1.880735312372201, "learning_rate": 7.751895074511391e-07, "loss": 0.0821, "step": 9291 }, { "epoch": 2.1142207053469853, "grad_norm": 1.3482497632602575, "learning_rate": 7.751027727734075e-07, "loss": 0.0833, "step": 9292 }, { "epoch": 2.114448236632537, "grad_norm": 1.0427717471233335, "learning_rate": 7.750160350278503e-07, "loss": 0.0175, "step": 9293 }, { "epoch": 2.1146757679180888, "grad_norm": 2.3483958958765068, "learning_rate": 7.7492929421624e-07, "loss": 0.0537, "step": 9294 }, { "epoch": 2.1149032992036405, "grad_norm": 2.161396815830777, "learning_rate": 7.748425503403497e-07, "loss": 0.0859, "step": 9295 }, { "epoch": 2.1151308304891923, "grad_norm": 1.8146383508840411, "learning_rate": 7.747558034019518e-07, "loss": 0.0833, "step": 9296 }, { "epoch": 2.115358361774744, "grad_norm": 2.5248523466150554, "learning_rate": 7.746690534028198e-07, "loss": 0.0817, "step": 9297 }, { "epoch": 2.1155858930602958, "grad_norm": 1.9570064726403718, "learning_rate": 7.745823003447262e-07, "loss": 0.0172, "step": 9298 }, { "epoch": 2.1158134243458475, "grad_norm": 1.5616533451594679, "learning_rate": 7.744955442294444e-07, "loss": 0.0845, "step": 9299 }, { "epoch": 2.1160409556313993, "grad_norm": 1.114550302147854, "learning_rate": 7.744087850587476e-07, "loss": 0.0609, "step": 9300 }, { "epoch": 2.116268486916951, "grad_norm": 1.4756084667763665, "learning_rate": 7.743220228344088e-07, "loss": 0.0667, "step": 9301 }, { "epoch": 2.1164960182025028, "grad_norm": 0.5628312222017164, "learning_rate": 7.742352575582014e-07, "loss": 0.006, "step": 9302 }, { "epoch": 2.1167235494880545, "grad_norm": 1.2782286449244458, "learning_rate": 7.741484892318986e-07, "loss": 0.0482, "step": 9303 }, { "epoch": 2.1169510807736063, "grad_norm": 1.3831854833490138, "learning_rate": 7.740617178572738e-07, "loss": 0.0322, "step": 9304 }, { "epoch": 2.117178612059158, "grad_norm": 1.9633493269534374, "learning_rate": 7.739749434361003e-07, "loss": 0.2001, "step": 9305 }, { "epoch": 2.11740614334471, "grad_norm": 1.6257913797625625, "learning_rate": 7.738881659701521e-07, "loss": 0.0571, "step": 9306 }, { "epoch": 2.1176336746302615, "grad_norm": 1.9442813740192841, "learning_rate": 7.738013854612023e-07, "loss": 0.0408, "step": 9307 }, { "epoch": 2.1178612059158133, "grad_norm": 1.2053114079894778, "learning_rate": 7.737146019110247e-07, "loss": 0.0525, "step": 9308 }, { "epoch": 2.118088737201365, "grad_norm": 0.9481311933193868, "learning_rate": 7.736278153213932e-07, "loss": 0.0736, "step": 9309 }, { "epoch": 2.118316268486917, "grad_norm": 1.4583880459240584, "learning_rate": 7.735410256940812e-07, "loss": 0.0236, "step": 9310 }, { "epoch": 2.1185437997724685, "grad_norm": 1.6654794619107185, "learning_rate": 7.734542330308626e-07, "loss": 0.0201, "step": 9311 }, { "epoch": 2.1187713310580203, "grad_norm": 1.9559494752862399, "learning_rate": 7.733674373335112e-07, "loss": 0.0497, "step": 9312 }, { "epoch": 2.118998862343572, "grad_norm": 1.6291429217634994, "learning_rate": 7.732806386038013e-07, "loss": 0.076, "step": 9313 }, { "epoch": 2.119226393629124, "grad_norm": 1.6769086835362415, "learning_rate": 7.731938368435069e-07, "loss": 0.1245, "step": 9314 }, { "epoch": 2.1194539249146755, "grad_norm": 1.7261062993980787, "learning_rate": 7.731070320544017e-07, "loss": 0.0645, "step": 9315 }, { "epoch": 2.1196814562002277, "grad_norm": 1.354263270160637, "learning_rate": 7.7302022423826e-07, "loss": 0.0473, "step": 9316 }, { "epoch": 2.1199089874857795, "grad_norm": 1.144116211948735, "learning_rate": 7.72933413396856e-07, "loss": 0.0403, "step": 9317 }, { "epoch": 2.1201365187713312, "grad_norm": 0.7540237291195536, "learning_rate": 7.728465995319641e-07, "loss": 0.0095, "step": 9318 }, { "epoch": 2.120364050056883, "grad_norm": 1.9035585388260063, "learning_rate": 7.727597826453583e-07, "loss": 0.0668, "step": 9319 }, { "epoch": 2.1205915813424348, "grad_norm": 1.8238624012381768, "learning_rate": 7.726729627388132e-07, "loss": 0.0504, "step": 9320 }, { "epoch": 2.1208191126279865, "grad_norm": 1.9086678345242842, "learning_rate": 7.725861398141032e-07, "loss": 0.047, "step": 9321 }, { "epoch": 2.1210466439135383, "grad_norm": 1.5033310708941, "learning_rate": 7.724993138730027e-07, "loss": 0.0843, "step": 9322 }, { "epoch": 2.12127417519909, "grad_norm": 1.1496840426498787, "learning_rate": 7.724124849172865e-07, "loss": 0.0972, "step": 9323 }, { "epoch": 2.1215017064846418, "grad_norm": 2.3831184390276308, "learning_rate": 7.723256529487291e-07, "loss": 0.0543, "step": 9324 }, { "epoch": 2.1217292377701935, "grad_norm": 1.3729829894259622, "learning_rate": 7.722388179691051e-07, "loss": 0.1203, "step": 9325 }, { "epoch": 2.1219567690557453, "grad_norm": 1.5554020234867307, "learning_rate": 7.721519799801892e-07, "loss": 0.1493, "step": 9326 }, { "epoch": 2.122184300341297, "grad_norm": 1.5741715519315977, "learning_rate": 7.720651389837566e-07, "loss": 0.0454, "step": 9327 }, { "epoch": 2.1224118316268488, "grad_norm": 2.105704821929588, "learning_rate": 7.719782949815817e-07, "loss": 0.0964, "step": 9328 }, { "epoch": 2.1226393629124005, "grad_norm": 1.6909820738720005, "learning_rate": 7.718914479754395e-07, "loss": 0.0381, "step": 9329 }, { "epoch": 2.1228668941979523, "grad_norm": 1.3862966404078314, "learning_rate": 7.718045979671054e-07, "loss": 0.03, "step": 9330 }, { "epoch": 2.123094425483504, "grad_norm": 1.4632245048086763, "learning_rate": 7.717177449583541e-07, "loss": 0.0194, "step": 9331 }, { "epoch": 2.1233219567690558, "grad_norm": 1.7957475628487232, "learning_rate": 7.716308889509608e-07, "loss": 0.0962, "step": 9332 }, { "epoch": 2.1235494880546075, "grad_norm": 1.4308951859223038, "learning_rate": 7.715440299467006e-07, "loss": 0.075, "step": 9333 }, { "epoch": 2.1237770193401593, "grad_norm": 1.827115383065602, "learning_rate": 7.714571679473489e-07, "loss": 0.079, "step": 9334 }, { "epoch": 2.124004550625711, "grad_norm": 1.7496968559853514, "learning_rate": 7.713703029546809e-07, "loss": 0.0694, "step": 9335 }, { "epoch": 2.124232081911263, "grad_norm": 1.3700989172510394, "learning_rate": 7.712834349704718e-07, "loss": 0.0216, "step": 9336 }, { "epoch": 2.1244596131968145, "grad_norm": 1.5251962233140202, "learning_rate": 7.711965639964971e-07, "loss": 0.0336, "step": 9337 }, { "epoch": 2.1246871444823663, "grad_norm": 2.07406916221973, "learning_rate": 7.711096900345327e-07, "loss": 0.0501, "step": 9338 }, { "epoch": 2.124914675767918, "grad_norm": 1.5684792220083636, "learning_rate": 7.710228130863537e-07, "loss": 0.1003, "step": 9339 }, { "epoch": 2.12514220705347, "grad_norm": 1.1999205767233307, "learning_rate": 7.70935933153736e-07, "loss": 0.0676, "step": 9340 }, { "epoch": 2.1253697383390215, "grad_norm": 0.9945934713928603, "learning_rate": 7.70849050238455e-07, "loss": 0.0183, "step": 9341 }, { "epoch": 2.1255972696245733, "grad_norm": 1.5438017475505683, "learning_rate": 7.707621643422862e-07, "loss": 0.0237, "step": 9342 }, { "epoch": 2.125824800910125, "grad_norm": 1.334350766967659, "learning_rate": 7.706752754670061e-07, "loss": 0.1407, "step": 9343 }, { "epoch": 2.126052332195677, "grad_norm": 1.1002643363414366, "learning_rate": 7.7058838361439e-07, "loss": 0.0318, "step": 9344 }, { "epoch": 2.1262798634812285, "grad_norm": 1.1860539424250727, "learning_rate": 7.70501488786214e-07, "loss": 0.0162, "step": 9345 }, { "epoch": 2.1265073947667803, "grad_norm": 0.8553559475992251, "learning_rate": 7.70414590984254e-07, "loss": 0.0175, "step": 9346 }, { "epoch": 2.126734926052332, "grad_norm": 1.4510114726435523, "learning_rate": 7.703276902102863e-07, "loss": 0.0439, "step": 9347 }, { "epoch": 2.126962457337884, "grad_norm": 1.91645784490155, "learning_rate": 7.702407864660865e-07, "loss": 0.0397, "step": 9348 }, { "epoch": 2.1271899886234356, "grad_norm": 2.0171474300527814, "learning_rate": 7.701538797534311e-07, "loss": 0.0188, "step": 9349 }, { "epoch": 2.1274175199089873, "grad_norm": 2.6771909067136868, "learning_rate": 7.700669700740962e-07, "loss": 0.0433, "step": 9350 }, { "epoch": 2.127645051194539, "grad_norm": 2.3833218184332954, "learning_rate": 7.699800574298582e-07, "loss": 0.0888, "step": 9351 }, { "epoch": 2.127872582480091, "grad_norm": 1.2784843894285451, "learning_rate": 7.698931418224934e-07, "loss": 0.0384, "step": 9352 }, { "epoch": 2.128100113765643, "grad_norm": 1.0439730225104034, "learning_rate": 7.69806223253778e-07, "loss": 0.0149, "step": 9353 }, { "epoch": 2.1283276450511943, "grad_norm": 1.1905001337091723, "learning_rate": 7.697193017254886e-07, "loss": 0.0552, "step": 9354 }, { "epoch": 2.1285551763367465, "grad_norm": 2.263266920896213, "learning_rate": 7.696323772394018e-07, "loss": 0.1084, "step": 9355 }, { "epoch": 2.1287827076222983, "grad_norm": 2.2829384441719722, "learning_rate": 7.69545449797294e-07, "loss": 0.0593, "step": 9356 }, { "epoch": 2.12901023890785, "grad_norm": 1.2589596813152582, "learning_rate": 7.694585194009419e-07, "loss": 0.0679, "step": 9357 }, { "epoch": 2.1292377701934018, "grad_norm": 2.1008274118717063, "learning_rate": 7.693715860521224e-07, "loss": 0.0494, "step": 9358 }, { "epoch": 2.1294653014789535, "grad_norm": 2.093089778207093, "learning_rate": 7.69284649752612e-07, "loss": 0.0417, "step": 9359 }, { "epoch": 2.1296928327645053, "grad_norm": 1.6706339428594954, "learning_rate": 7.691977105041876e-07, "loss": 0.0568, "step": 9360 }, { "epoch": 2.129920364050057, "grad_norm": 1.61865596608991, "learning_rate": 7.69110768308626e-07, "loss": 0.0462, "step": 9361 }, { "epoch": 2.1301478953356088, "grad_norm": 1.3800817073437315, "learning_rate": 7.690238231677045e-07, "loss": 0.0691, "step": 9362 }, { "epoch": 2.1303754266211605, "grad_norm": 1.4972416473429389, "learning_rate": 7.689368750831994e-07, "loss": 0.0413, "step": 9363 }, { "epoch": 2.1306029579067123, "grad_norm": 1.467718571259731, "learning_rate": 7.688499240568883e-07, "loss": 0.0178, "step": 9364 }, { "epoch": 2.130830489192264, "grad_norm": 1.1384642957667104, "learning_rate": 7.687629700905485e-07, "loss": 0.019, "step": 9365 }, { "epoch": 2.131058020477816, "grad_norm": 1.495499693656789, "learning_rate": 7.686760131859566e-07, "loss": 0.0816, "step": 9366 }, { "epoch": 2.1312855517633675, "grad_norm": 1.0987974074095415, "learning_rate": 7.685890533448901e-07, "loss": 0.0739, "step": 9367 }, { "epoch": 2.1315130830489193, "grad_norm": 1.328379571268298, "learning_rate": 7.685020905691265e-07, "loss": 0.0311, "step": 9368 }, { "epoch": 2.131740614334471, "grad_norm": 1.8858406859241228, "learning_rate": 7.684151248604428e-07, "loss": 0.086, "step": 9369 }, { "epoch": 2.131968145620023, "grad_norm": 2.1099407081741903, "learning_rate": 7.683281562206167e-07, "loss": 0.0636, "step": 9370 }, { "epoch": 2.1321956769055745, "grad_norm": 1.8010112143230375, "learning_rate": 7.682411846514258e-07, "loss": 0.1258, "step": 9371 }, { "epoch": 2.1324232081911263, "grad_norm": 0.908234135058555, "learning_rate": 7.68154210154647e-07, "loss": 0.0164, "step": 9372 }, { "epoch": 2.132650739476678, "grad_norm": 2.1527881845029926, "learning_rate": 7.680672327320586e-07, "loss": 0.1047, "step": 9373 }, { "epoch": 2.13287827076223, "grad_norm": 1.6232277831727786, "learning_rate": 7.679802523854379e-07, "loss": 0.0372, "step": 9374 }, { "epoch": 2.1331058020477816, "grad_norm": 1.4897316167472505, "learning_rate": 7.678932691165628e-07, "loss": 0.0354, "step": 9375 }, { "epoch": 2.1333333333333333, "grad_norm": 0.8040641557234913, "learning_rate": 7.678062829272107e-07, "loss": 0.0386, "step": 9376 }, { "epoch": 2.133560864618885, "grad_norm": 2.1506293627331585, "learning_rate": 7.677192938191599e-07, "loss": 0.0708, "step": 9377 }, { "epoch": 2.133788395904437, "grad_norm": 1.217271987394615, "learning_rate": 7.676323017941882e-07, "loss": 0.0444, "step": 9378 }, { "epoch": 2.1340159271899886, "grad_norm": 1.640052554857382, "learning_rate": 7.675453068540733e-07, "loss": 0.0304, "step": 9379 }, { "epoch": 2.1342434584755403, "grad_norm": 1.396330359096103, "learning_rate": 7.674583090005934e-07, "loss": 0.0766, "step": 9380 }, { "epoch": 2.134470989761092, "grad_norm": 1.2593153400616426, "learning_rate": 7.673713082355267e-07, "loss": 0.0612, "step": 9381 }, { "epoch": 2.134698521046644, "grad_norm": 1.9196581659153655, "learning_rate": 7.672843045606511e-07, "loss": 0.0925, "step": 9382 }, { "epoch": 2.1349260523321956, "grad_norm": 2.424430667024588, "learning_rate": 7.671972979777448e-07, "loss": 0.0443, "step": 9383 }, { "epoch": 2.1351535836177473, "grad_norm": 1.789095012568431, "learning_rate": 7.671102884885865e-07, "loss": 0.1368, "step": 9384 }, { "epoch": 2.135381114903299, "grad_norm": 1.9908795772804533, "learning_rate": 7.67023276094954e-07, "loss": 0.0493, "step": 9385 }, { "epoch": 2.135608646188851, "grad_norm": 1.544089389991087, "learning_rate": 7.669362607986256e-07, "loss": 0.0939, "step": 9386 }, { "epoch": 2.1358361774744026, "grad_norm": 1.5223363268950432, "learning_rate": 7.668492426013802e-07, "loss": 0.0738, "step": 9387 }, { "epoch": 2.1360637087599543, "grad_norm": 2.590422659932699, "learning_rate": 7.667622215049959e-07, "loss": 0.094, "step": 9388 }, { "epoch": 2.136291240045506, "grad_norm": 0.9533778360174558, "learning_rate": 7.666751975112515e-07, "loss": 0.0747, "step": 9389 }, { "epoch": 2.136518771331058, "grad_norm": 1.6869229827903554, "learning_rate": 7.665881706219256e-07, "loss": 0.1335, "step": 9390 }, { "epoch": 2.1367463026166096, "grad_norm": 1.3404189139443545, "learning_rate": 7.665011408387968e-07, "loss": 0.0379, "step": 9391 }, { "epoch": 2.136973833902162, "grad_norm": 0.90948302588563, "learning_rate": 7.664141081636438e-07, "loss": 0.0367, "step": 9392 }, { "epoch": 2.137201365187713, "grad_norm": 1.7494800758004867, "learning_rate": 7.663270725982452e-07, "loss": 0.053, "step": 9393 }, { "epoch": 2.1374288964732653, "grad_norm": 1.1693153351230774, "learning_rate": 7.662400341443804e-07, "loss": 0.0583, "step": 9394 }, { "epoch": 2.137656427758817, "grad_norm": 1.557485317691199, "learning_rate": 7.661529928038277e-07, "loss": 0.015, "step": 9395 }, { "epoch": 2.137883959044369, "grad_norm": 1.6827033189928462, "learning_rate": 7.660659485783667e-07, "loss": 0.0508, "step": 9396 }, { "epoch": 2.1381114903299205, "grad_norm": 1.1102334222602452, "learning_rate": 7.659789014697758e-07, "loss": 0.0271, "step": 9397 }, { "epoch": 2.1383390216154723, "grad_norm": 1.774247472400488, "learning_rate": 7.658918514798345e-07, "loss": 0.0445, "step": 9398 }, { "epoch": 2.138566552901024, "grad_norm": 1.5178141737496575, "learning_rate": 7.658047986103215e-07, "loss": 0.1275, "step": 9399 }, { "epoch": 2.138794084186576, "grad_norm": 1.2540145922087464, "learning_rate": 7.657177428630164e-07, "loss": 0.0362, "step": 9400 }, { "epoch": 2.1390216154721275, "grad_norm": 1.5881320882876477, "learning_rate": 7.656306842396985e-07, "loss": 0.0329, "step": 9401 }, { "epoch": 2.1392491467576793, "grad_norm": 1.0347622447579183, "learning_rate": 7.655436227421468e-07, "loss": 0.0442, "step": 9402 }, { "epoch": 2.139476678043231, "grad_norm": 0.9942062765597185, "learning_rate": 7.65456558372141e-07, "loss": 0.0444, "step": 9403 }, { "epoch": 2.139704209328783, "grad_norm": 1.4894334597650998, "learning_rate": 7.653694911314602e-07, "loss": 0.0939, "step": 9404 }, { "epoch": 2.1399317406143346, "grad_norm": 2.5553012707365985, "learning_rate": 7.652824210218842e-07, "loss": 0.0582, "step": 9405 }, { "epoch": 2.1401592718998863, "grad_norm": 2.2207688664464396, "learning_rate": 7.651953480451923e-07, "loss": 0.0822, "step": 9406 }, { "epoch": 2.140386803185438, "grad_norm": 1.459662417091181, "learning_rate": 7.651082722031644e-07, "loss": 0.0415, "step": 9407 }, { "epoch": 2.14061433447099, "grad_norm": 0.9799504903348215, "learning_rate": 7.6502119349758e-07, "loss": 0.0176, "step": 9408 }, { "epoch": 2.1408418657565416, "grad_norm": 2.452607093339156, "learning_rate": 7.649341119302188e-07, "loss": 0.0475, "step": 9409 }, { "epoch": 2.1410693970420933, "grad_norm": 2.091931225218873, "learning_rate": 7.648470275028607e-07, "loss": 0.0558, "step": 9410 }, { "epoch": 2.141296928327645, "grad_norm": 1.5658581552329647, "learning_rate": 7.647599402172854e-07, "loss": 0.0998, "step": 9411 }, { "epoch": 2.141524459613197, "grad_norm": 1.823426513047552, "learning_rate": 7.646728500752729e-07, "loss": 0.0354, "step": 9412 }, { "epoch": 2.1417519908987486, "grad_norm": 2.2777896486497293, "learning_rate": 7.645857570786029e-07, "loss": 0.0759, "step": 9413 }, { "epoch": 2.1419795221843003, "grad_norm": 0.6840226560483477, "learning_rate": 7.64498661229056e-07, "loss": 0.0283, "step": 9414 }, { "epoch": 2.142207053469852, "grad_norm": 3.3359064141459296, "learning_rate": 7.644115625284119e-07, "loss": 0.0575, "step": 9415 }, { "epoch": 2.142434584755404, "grad_norm": 1.2843331590612495, "learning_rate": 7.643244609784506e-07, "loss": 0.0796, "step": 9416 }, { "epoch": 2.1426621160409556, "grad_norm": 1.9411472132833747, "learning_rate": 7.642373565809527e-07, "loss": 0.0293, "step": 9417 }, { "epoch": 2.1428896473265073, "grad_norm": 1.769227281674406, "learning_rate": 7.641502493376981e-07, "loss": 0.0649, "step": 9418 }, { "epoch": 2.143117178612059, "grad_norm": 2.6692341252820286, "learning_rate": 7.640631392504673e-07, "loss": 0.0838, "step": 9419 }, { "epoch": 2.143344709897611, "grad_norm": 1.375710303841949, "learning_rate": 7.639760263210405e-07, "loss": 0.0745, "step": 9420 }, { "epoch": 2.1435722411831626, "grad_norm": 1.1020386493301202, "learning_rate": 7.638889105511983e-07, "loss": 0.0374, "step": 9421 }, { "epoch": 2.1437997724687143, "grad_norm": 1.5165134244613883, "learning_rate": 7.638017919427212e-07, "loss": 0.0962, "step": 9422 }, { "epoch": 2.144027303754266, "grad_norm": 2.263419238613544, "learning_rate": 7.637146704973897e-07, "loss": 0.0475, "step": 9423 }, { "epoch": 2.144254835039818, "grad_norm": 1.6063544321457297, "learning_rate": 7.636275462169843e-07, "loss": 0.0676, "step": 9424 }, { "epoch": 2.1444823663253696, "grad_norm": 1.535827142260173, "learning_rate": 7.635404191032858e-07, "loss": 0.044, "step": 9425 }, { "epoch": 2.1447098976109213, "grad_norm": 1.3474563003971842, "learning_rate": 7.634532891580748e-07, "loss": 0.0468, "step": 9426 }, { "epoch": 2.144937428896473, "grad_norm": 1.225183736294941, "learning_rate": 7.633661563831321e-07, "loss": 0.0297, "step": 9427 }, { "epoch": 2.145164960182025, "grad_norm": 2.238719040829829, "learning_rate": 7.632790207802388e-07, "loss": 0.0631, "step": 9428 }, { "epoch": 2.1453924914675766, "grad_norm": 1.395162871308499, "learning_rate": 7.631918823511751e-07, "loss": 0.0335, "step": 9429 }, { "epoch": 2.1456200227531284, "grad_norm": 2.092423508040525, "learning_rate": 7.631047410977227e-07, "loss": 0.0465, "step": 9430 }, { "epoch": 2.1458475540386805, "grad_norm": 1.4291616728321297, "learning_rate": 7.630175970216625e-07, "loss": 0.0265, "step": 9431 }, { "epoch": 2.146075085324232, "grad_norm": 1.6015952917569425, "learning_rate": 7.629304501247751e-07, "loss": 0.0778, "step": 9432 }, { "epoch": 2.146302616609784, "grad_norm": 1.2256299952561287, "learning_rate": 7.628433004088419e-07, "loss": 0.0905, "step": 9433 }, { "epoch": 2.146530147895336, "grad_norm": 2.5100871022265974, "learning_rate": 7.627561478756443e-07, "loss": 0.0732, "step": 9434 }, { "epoch": 2.1467576791808876, "grad_norm": 1.29392860753592, "learning_rate": 7.626689925269633e-07, "loss": 0.0405, "step": 9435 }, { "epoch": 2.1469852104664393, "grad_norm": 1.498969777012569, "learning_rate": 7.625818343645799e-07, "loss": 0.0371, "step": 9436 }, { "epoch": 2.147212741751991, "grad_norm": 1.3141756876340118, "learning_rate": 7.624946733902762e-07, "loss": 0.0516, "step": 9437 }, { "epoch": 2.147440273037543, "grad_norm": 2.3200031750786443, "learning_rate": 7.624075096058329e-07, "loss": 0.0871, "step": 9438 }, { "epoch": 2.1476678043230946, "grad_norm": 1.9264770280478787, "learning_rate": 7.623203430130319e-07, "loss": 0.0878, "step": 9439 }, { "epoch": 2.1478953356086463, "grad_norm": 1.444710945875574, "learning_rate": 7.622331736136546e-07, "loss": 0.0194, "step": 9440 }, { "epoch": 2.148122866894198, "grad_norm": 2.1435596203430145, "learning_rate": 7.621460014094825e-07, "loss": 0.1218, "step": 9441 }, { "epoch": 2.14835039817975, "grad_norm": 2.1048982631125717, "learning_rate": 7.620588264022973e-07, "loss": 0.1983, "step": 9442 }, { "epoch": 2.1485779294653016, "grad_norm": 1.1427311898981718, "learning_rate": 7.619716485938805e-07, "loss": 0.0926, "step": 9443 }, { "epoch": 2.1488054607508533, "grad_norm": 3.2663065997266867, "learning_rate": 7.618844679860144e-07, "loss": 0.06, "step": 9444 }, { "epoch": 2.149032992036405, "grad_norm": 1.6766682228640024, "learning_rate": 7.617972845804802e-07, "loss": 0.0417, "step": 9445 }, { "epoch": 2.149260523321957, "grad_norm": 0.8605363751316158, "learning_rate": 7.617100983790603e-07, "loss": 0.0167, "step": 9446 }, { "epoch": 2.1494880546075086, "grad_norm": 1.7274126311028568, "learning_rate": 7.616229093835361e-07, "loss": 0.0464, "step": 9447 }, { "epoch": 2.1497155858930603, "grad_norm": 1.4531075823883306, "learning_rate": 7.6153571759569e-07, "loss": 0.0901, "step": 9448 }, { "epoch": 2.149943117178612, "grad_norm": 1.2017254026999795, "learning_rate": 7.61448523017304e-07, "loss": 0.0454, "step": 9449 }, { "epoch": 2.150170648464164, "grad_norm": 1.9031272001920252, "learning_rate": 7.613613256501598e-07, "loss": 0.0372, "step": 9450 }, { "epoch": 2.1503981797497156, "grad_norm": 1.7048614482256514, "learning_rate": 7.612741254960398e-07, "loss": 0.0214, "step": 9451 }, { "epoch": 2.1506257110352673, "grad_norm": 1.6154795712268546, "learning_rate": 7.611869225567266e-07, "loss": 0.0722, "step": 9452 }, { "epoch": 2.150853242320819, "grad_norm": 1.6394253082305872, "learning_rate": 7.610997168340019e-07, "loss": 0.0546, "step": 9453 }, { "epoch": 2.151080773606371, "grad_norm": 1.8045044276515763, "learning_rate": 7.610125083296482e-07, "loss": 0.0614, "step": 9454 }, { "epoch": 2.1513083048919226, "grad_norm": 1.4902043316498548, "learning_rate": 7.609252970454481e-07, "loss": 0.023, "step": 9455 }, { "epoch": 2.1515358361774743, "grad_norm": 1.5827470563432342, "learning_rate": 7.608380829831838e-07, "loss": 0.0824, "step": 9456 }, { "epoch": 2.151763367463026, "grad_norm": 1.3928939708222616, "learning_rate": 7.607508661446378e-07, "loss": 0.0377, "step": 9457 }, { "epoch": 2.151990898748578, "grad_norm": 4.359928182637225, "learning_rate": 7.606636465315927e-07, "loss": 0.0445, "step": 9458 }, { "epoch": 2.1522184300341296, "grad_norm": 2.136122219824162, "learning_rate": 7.605764241458313e-07, "loss": 0.0309, "step": 9459 }, { "epoch": 2.1524459613196814, "grad_norm": 1.0921428417713035, "learning_rate": 7.604891989891358e-07, "loss": 0.0478, "step": 9460 }, { "epoch": 2.152673492605233, "grad_norm": 1.7350343198023745, "learning_rate": 7.604019710632895e-07, "loss": 0.0412, "step": 9461 }, { "epoch": 2.152901023890785, "grad_norm": 1.8683402579419655, "learning_rate": 7.603147403700746e-07, "loss": 0.0324, "step": 9462 }, { "epoch": 2.1531285551763366, "grad_norm": 2.03049621141726, "learning_rate": 7.602275069112742e-07, "loss": 0.0318, "step": 9463 }, { "epoch": 2.1533560864618884, "grad_norm": 1.6146820302973948, "learning_rate": 7.601402706886716e-07, "loss": 0.0887, "step": 9464 }, { "epoch": 2.15358361774744, "grad_norm": 1.4186738617417172, "learning_rate": 7.60053031704049e-07, "loss": 0.0231, "step": 9465 }, { "epoch": 2.153811149032992, "grad_norm": 1.8502692538793657, "learning_rate": 7.5996578995919e-07, "loss": 0.0447, "step": 9466 }, { "epoch": 2.1540386803185436, "grad_norm": 1.869073291535232, "learning_rate": 7.598785454558773e-07, "loss": 0.0502, "step": 9467 }, { "epoch": 2.1542662116040954, "grad_norm": 1.934302737222525, "learning_rate": 7.597912981958943e-07, "loss": 0.1231, "step": 9468 }, { "epoch": 2.154493742889647, "grad_norm": 1.4767373950442406, "learning_rate": 7.597040481810239e-07, "loss": 0.0249, "step": 9469 }, { "epoch": 2.1547212741751993, "grad_norm": 1.675161104911878, "learning_rate": 7.596167954130493e-07, "loss": 0.1211, "step": 9470 }, { "epoch": 2.1549488054607506, "grad_norm": 2.1630884838781133, "learning_rate": 7.595295398937541e-07, "loss": 0.0713, "step": 9471 }, { "epoch": 2.155176336746303, "grad_norm": 1.1951201714125987, "learning_rate": 7.594422816249217e-07, "loss": 0.0165, "step": 9472 }, { "epoch": 2.1554038680318546, "grad_norm": 1.4566850582274007, "learning_rate": 7.59355020608335e-07, "loss": 0.075, "step": 9473 }, { "epoch": 2.1556313993174063, "grad_norm": 1.1604749857711565, "learning_rate": 7.592677568457778e-07, "loss": 0.1078, "step": 9474 }, { "epoch": 2.155858930602958, "grad_norm": 1.6373713082842598, "learning_rate": 7.591804903390336e-07, "loss": 0.0598, "step": 9475 }, { "epoch": 2.15608646188851, "grad_norm": 1.6959815284412367, "learning_rate": 7.590932210898859e-07, "loss": 0.0874, "step": 9476 }, { "epoch": 2.1563139931740616, "grad_norm": 1.8014682740885186, "learning_rate": 7.590059491001183e-07, "loss": 0.0382, "step": 9477 }, { "epoch": 2.1565415244596133, "grad_norm": 1.1796467197904394, "learning_rate": 7.589186743715146e-07, "loss": 0.0794, "step": 9478 }, { "epoch": 2.156769055745165, "grad_norm": 1.756926717056566, "learning_rate": 7.588313969058584e-07, "loss": 0.0816, "step": 9479 }, { "epoch": 2.156996587030717, "grad_norm": 2.6269960117902325, "learning_rate": 7.587441167049335e-07, "loss": 0.1693, "step": 9480 }, { "epoch": 2.1572241183162686, "grad_norm": 0.42069891550323985, "learning_rate": 7.586568337705239e-07, "loss": 0.0029, "step": 9481 }, { "epoch": 2.1574516496018203, "grad_norm": 1.6964780873318768, "learning_rate": 7.585695481044133e-07, "loss": 0.05, "step": 9482 }, { "epoch": 2.157679180887372, "grad_norm": 2.3486378716222305, "learning_rate": 7.584822597083859e-07, "loss": 0.0782, "step": 9483 }, { "epoch": 2.157906712172924, "grad_norm": 1.7234854015521162, "learning_rate": 7.583949685842253e-07, "loss": 0.0746, "step": 9484 }, { "epoch": 2.1581342434584756, "grad_norm": 1.3851929183291392, "learning_rate": 7.583076747337162e-07, "loss": 0.0769, "step": 9485 }, { "epoch": 2.1583617747440274, "grad_norm": 1.042012257312453, "learning_rate": 7.582203781586422e-07, "loss": 0.0309, "step": 9486 }, { "epoch": 2.158589306029579, "grad_norm": 1.4771989092824398, "learning_rate": 7.581330788607875e-07, "loss": 0.0618, "step": 9487 }, { "epoch": 2.158816837315131, "grad_norm": 0.7828197948837077, "learning_rate": 7.580457768419367e-07, "loss": 0.0172, "step": 9488 }, { "epoch": 2.1590443686006826, "grad_norm": 1.7129026868418487, "learning_rate": 7.579584721038738e-07, "loss": 0.0445, "step": 9489 }, { "epoch": 2.1592718998862344, "grad_norm": 1.0927664918168947, "learning_rate": 7.578711646483832e-07, "loss": 0.081, "step": 9490 }, { "epoch": 2.159499431171786, "grad_norm": 1.8048150160951877, "learning_rate": 7.577838544772495e-07, "loss": 0.0855, "step": 9491 }, { "epoch": 2.159726962457338, "grad_norm": 1.8082573615440634, "learning_rate": 7.576965415922569e-07, "loss": 0.0604, "step": 9492 }, { "epoch": 2.1599544937428896, "grad_norm": 1.0695862916240448, "learning_rate": 7.576092259951899e-07, "loss": 0.0177, "step": 9493 }, { "epoch": 2.1601820250284414, "grad_norm": 1.3988895145486373, "learning_rate": 7.575219076878332e-07, "loss": 0.0829, "step": 9494 }, { "epoch": 2.160409556313993, "grad_norm": 3.021161121575875, "learning_rate": 7.574345866719715e-07, "loss": 0.0731, "step": 9495 }, { "epoch": 2.160637087599545, "grad_norm": 1.2035602500367737, "learning_rate": 7.573472629493894e-07, "loss": 0.0161, "step": 9496 }, { "epoch": 2.1608646188850966, "grad_norm": 1.9335725739239158, "learning_rate": 7.572599365218716e-07, "loss": 0.0783, "step": 9497 }, { "epoch": 2.1610921501706484, "grad_norm": 2.508763547609667, "learning_rate": 7.571726073912031e-07, "loss": 0.1002, "step": 9498 }, { "epoch": 2.1613196814562, "grad_norm": 1.279431099983586, "learning_rate": 7.570852755591683e-07, "loss": 0.1574, "step": 9499 }, { "epoch": 2.161547212741752, "grad_norm": 2.664533934222619, "learning_rate": 7.569979410275525e-07, "loss": 0.143, "step": 9500 }, { "epoch": 2.1617747440273036, "grad_norm": 2.137344675810386, "learning_rate": 7.569106037981403e-07, "loss": 0.0362, "step": 9501 }, { "epoch": 2.1620022753128554, "grad_norm": 1.8263233406423385, "learning_rate": 7.568232638727173e-07, "loss": 0.0452, "step": 9502 }, { "epoch": 2.162229806598407, "grad_norm": 1.7908284046425997, "learning_rate": 7.56735921253068e-07, "loss": 0.0948, "step": 9503 }, { "epoch": 2.162457337883959, "grad_norm": 1.1055638980278877, "learning_rate": 7.566485759409778e-07, "loss": 0.0671, "step": 9504 }, { "epoch": 2.1626848691695106, "grad_norm": 1.966283016527124, "learning_rate": 7.565612279382318e-07, "loss": 0.0748, "step": 9505 }, { "epoch": 2.1629124004550624, "grad_norm": 0.9699618369480882, "learning_rate": 7.564738772466153e-07, "loss": 0.0355, "step": 9506 }, { "epoch": 2.163139931740614, "grad_norm": 2.234936145616755, "learning_rate": 7.563865238679133e-07, "loss": 0.0425, "step": 9507 }, { "epoch": 2.163367463026166, "grad_norm": 0.6953923227624036, "learning_rate": 7.562991678039116e-07, "loss": 0.0053, "step": 9508 }, { "epoch": 2.163594994311718, "grad_norm": 1.4927692917033228, "learning_rate": 7.562118090563953e-07, "loss": 0.0753, "step": 9509 }, { "epoch": 2.1638225255972694, "grad_norm": 1.4045146878025545, "learning_rate": 7.5612444762715e-07, "loss": 0.034, "step": 9510 }, { "epoch": 2.1640500568828216, "grad_norm": 1.2359805545350997, "learning_rate": 7.560370835179611e-07, "loss": 0.051, "step": 9511 }, { "epoch": 2.1642775881683733, "grad_norm": 1.299803781802618, "learning_rate": 7.559497167306141e-07, "loss": 0.0344, "step": 9512 }, { "epoch": 2.164505119453925, "grad_norm": 1.5909800541101666, "learning_rate": 7.558623472668948e-07, "loss": 0.0332, "step": 9513 }, { "epoch": 2.164732650739477, "grad_norm": 1.7244695675540986, "learning_rate": 7.557749751285887e-07, "loss": 0.0317, "step": 9514 }, { "epoch": 2.1649601820250286, "grad_norm": 1.2855169129998656, "learning_rate": 7.556876003174816e-07, "loss": 0.055, "step": 9515 }, { "epoch": 2.1651877133105804, "grad_norm": 1.4606586922640823, "learning_rate": 7.556002228353595e-07, "loss": 0.0864, "step": 9516 }, { "epoch": 2.165415244596132, "grad_norm": 1.9120924782453184, "learning_rate": 7.555128426840078e-07, "loss": 0.0369, "step": 9517 }, { "epoch": 2.165642775881684, "grad_norm": 1.0365258067432568, "learning_rate": 7.554254598652127e-07, "loss": 0.0748, "step": 9518 }, { "epoch": 2.1658703071672356, "grad_norm": 2.1208575506873366, "learning_rate": 7.553380743807602e-07, "loss": 0.0278, "step": 9519 }, { "epoch": 2.1660978384527874, "grad_norm": 1.5769455246998723, "learning_rate": 7.552506862324358e-07, "loss": 0.0658, "step": 9520 }, { "epoch": 2.166325369738339, "grad_norm": 1.7764530341674318, "learning_rate": 7.551632954220263e-07, "loss": 0.0241, "step": 9521 }, { "epoch": 2.166552901023891, "grad_norm": 2.0564282685963278, "learning_rate": 7.550759019513173e-07, "loss": 0.1122, "step": 9522 }, { "epoch": 2.1667804323094426, "grad_norm": 1.4509585231688706, "learning_rate": 7.54988505822095e-07, "loss": 0.0771, "step": 9523 }, { "epoch": 2.1670079635949944, "grad_norm": 1.4519579808787662, "learning_rate": 7.549011070361459e-07, "loss": 0.1164, "step": 9524 }, { "epoch": 2.167235494880546, "grad_norm": 1.6549730773752167, "learning_rate": 7.548137055952559e-07, "loss": 0.1115, "step": 9525 }, { "epoch": 2.167463026166098, "grad_norm": 2.924801535098329, "learning_rate": 7.547263015012116e-07, "loss": 0.0509, "step": 9526 }, { "epoch": 2.1676905574516496, "grad_norm": 2.1844739743189523, "learning_rate": 7.54638894755799e-07, "loss": 0.1027, "step": 9527 }, { "epoch": 2.1679180887372014, "grad_norm": 1.3046122618011156, "learning_rate": 7.54551485360805e-07, "loss": 0.0093, "step": 9528 }, { "epoch": 2.168145620022753, "grad_norm": 1.856004275183145, "learning_rate": 7.544640733180161e-07, "loss": 0.0433, "step": 9529 }, { "epoch": 2.168373151308305, "grad_norm": 1.451278219696552, "learning_rate": 7.543766586292185e-07, "loss": 0.0844, "step": 9530 }, { "epoch": 2.1686006825938566, "grad_norm": 1.594240571498456, "learning_rate": 7.542892412961988e-07, "loss": 0.0425, "step": 9531 }, { "epoch": 2.1688282138794084, "grad_norm": 1.1423295038645647, "learning_rate": 7.54201821320744e-07, "loss": 0.037, "step": 9532 }, { "epoch": 2.16905574516496, "grad_norm": 1.558015800490758, "learning_rate": 7.541143987046406e-07, "loss": 0.0509, "step": 9533 }, { "epoch": 2.169283276450512, "grad_norm": 1.59035521399315, "learning_rate": 7.540269734496751e-07, "loss": 0.0237, "step": 9534 }, { "epoch": 2.1695108077360636, "grad_norm": 2.0807067699266133, "learning_rate": 7.539395455576348e-07, "loss": 0.0415, "step": 9535 }, { "epoch": 2.1697383390216154, "grad_norm": 2.067919182776939, "learning_rate": 7.538521150303063e-07, "loss": 0.1002, "step": 9536 }, { "epoch": 2.169965870307167, "grad_norm": 1.9992834288059516, "learning_rate": 7.537646818694764e-07, "loss": 0.1106, "step": 9537 }, { "epoch": 2.170193401592719, "grad_norm": 1.4123785047184512, "learning_rate": 7.536772460769324e-07, "loss": 0.0233, "step": 9538 }, { "epoch": 2.1704209328782706, "grad_norm": 1.665422220667401, "learning_rate": 7.535898076544611e-07, "loss": 0.0888, "step": 9539 }, { "epoch": 2.1706484641638224, "grad_norm": 1.5148200586824807, "learning_rate": 7.535023666038497e-07, "loss": 0.0309, "step": 9540 }, { "epoch": 2.170875995449374, "grad_norm": 1.482270971190226, "learning_rate": 7.534149229268852e-07, "loss": 0.0813, "step": 9541 }, { "epoch": 2.171103526734926, "grad_norm": 1.291244523440277, "learning_rate": 7.533274766253548e-07, "loss": 0.0724, "step": 9542 }, { "epoch": 2.1713310580204777, "grad_norm": 2.126739299406321, "learning_rate": 7.532400277010458e-07, "loss": 0.0552, "step": 9543 }, { "epoch": 2.1715585893060294, "grad_norm": 1.6666769692922412, "learning_rate": 7.531525761557454e-07, "loss": 0.0782, "step": 9544 }, { "epoch": 2.171786120591581, "grad_norm": 1.6514959116034478, "learning_rate": 7.530651219912413e-07, "loss": 0.0491, "step": 9545 }, { "epoch": 2.172013651877133, "grad_norm": 2.1927974860136437, "learning_rate": 7.529776652093204e-07, "loss": 0.0727, "step": 9546 }, { "epoch": 2.1722411831626847, "grad_norm": 1.9098009631719026, "learning_rate": 7.528902058117707e-07, "loss": 0.0791, "step": 9547 }, { "epoch": 2.172468714448237, "grad_norm": 1.3068065250915974, "learning_rate": 7.528027438003792e-07, "loss": 0.1249, "step": 9548 }, { "epoch": 2.172696245733788, "grad_norm": 2.503945304655778, "learning_rate": 7.527152791769338e-07, "loss": 0.1367, "step": 9549 }, { "epoch": 2.1729237770193404, "grad_norm": 1.991208013783355, "learning_rate": 7.526278119432219e-07, "loss": 0.0383, "step": 9550 }, { "epoch": 2.173151308304892, "grad_norm": 1.6803154280209256, "learning_rate": 7.52540342101031e-07, "loss": 0.0183, "step": 9551 }, { "epoch": 2.173378839590444, "grad_norm": 1.7516963775448646, "learning_rate": 7.524528696521495e-07, "loss": 0.0816, "step": 9552 }, { "epoch": 2.1736063708759956, "grad_norm": 1.257819638263749, "learning_rate": 7.523653945983645e-07, "loss": 0.0534, "step": 9553 }, { "epoch": 2.1738339021615474, "grad_norm": 0.8986766894755777, "learning_rate": 7.522779169414643e-07, "loss": 0.0686, "step": 9554 }, { "epoch": 2.174061433447099, "grad_norm": 2.0461962583918147, "learning_rate": 7.521904366832365e-07, "loss": 0.1134, "step": 9555 }, { "epoch": 2.174288964732651, "grad_norm": 2.114234582962323, "learning_rate": 7.521029538254692e-07, "loss": 0.0189, "step": 9556 }, { "epoch": 2.1745164960182026, "grad_norm": 1.2764140322075164, "learning_rate": 7.520154683699501e-07, "loss": 0.0358, "step": 9557 }, { "epoch": 2.1747440273037544, "grad_norm": 1.7779424078634884, "learning_rate": 7.519279803184676e-07, "loss": 0.0713, "step": 9558 }, { "epoch": 2.174971558589306, "grad_norm": 2.405996992965834, "learning_rate": 7.518404896728096e-07, "loss": 0.1248, "step": 9559 }, { "epoch": 2.175199089874858, "grad_norm": 2.1106260889167503, "learning_rate": 7.517529964347642e-07, "loss": 0.0597, "step": 9560 }, { "epoch": 2.1754266211604096, "grad_norm": 1.8519619466574628, "learning_rate": 7.516655006061198e-07, "loss": 0.1083, "step": 9561 }, { "epoch": 2.1756541524459614, "grad_norm": 1.2364667323648664, "learning_rate": 7.515780021886646e-07, "loss": 0.0205, "step": 9562 }, { "epoch": 2.175881683731513, "grad_norm": 1.3925058952327758, "learning_rate": 7.514905011841867e-07, "loss": 0.0338, "step": 9563 }, { "epoch": 2.176109215017065, "grad_norm": 1.3204649550810612, "learning_rate": 7.514029975944746e-07, "loss": 0.0548, "step": 9564 }, { "epoch": 2.1763367463026166, "grad_norm": 1.711066372302418, "learning_rate": 7.513154914213168e-07, "loss": 0.104, "step": 9565 }, { "epoch": 2.1765642775881684, "grad_norm": 1.8364607608657266, "learning_rate": 7.512279826665018e-07, "loss": 0.065, "step": 9566 }, { "epoch": 2.17679180887372, "grad_norm": 1.7375124140504703, "learning_rate": 7.511404713318178e-07, "loss": 0.087, "step": 9567 }, { "epoch": 2.177019340159272, "grad_norm": 1.1281803318894696, "learning_rate": 7.510529574190537e-07, "loss": 0.1356, "step": 9568 }, { "epoch": 2.1772468714448237, "grad_norm": 1.6726980185155025, "learning_rate": 7.50965440929998e-07, "loss": 0.0254, "step": 9569 }, { "epoch": 2.1774744027303754, "grad_norm": 2.3018997634205527, "learning_rate": 7.508779218664395e-07, "loss": 0.0494, "step": 9570 }, { "epoch": 2.177701934015927, "grad_norm": 1.7798912942310836, "learning_rate": 7.507904002301665e-07, "loss": 0.1451, "step": 9571 }, { "epoch": 2.177929465301479, "grad_norm": 1.3289332852047717, "learning_rate": 7.507028760229683e-07, "loss": 0.0204, "step": 9572 }, { "epoch": 2.1781569965870307, "grad_norm": 2.1303411735158977, "learning_rate": 7.506153492466337e-07, "loss": 0.0386, "step": 9573 }, { "epoch": 2.1783845278725824, "grad_norm": 1.664882393547048, "learning_rate": 7.505278199029511e-07, "loss": 0.0678, "step": 9574 }, { "epoch": 2.178612059158134, "grad_norm": 1.7529702135397573, "learning_rate": 7.504402879937098e-07, "loss": 0.0297, "step": 9575 }, { "epoch": 2.178839590443686, "grad_norm": 1.2339158207363576, "learning_rate": 7.503527535206989e-07, "loss": 0.0203, "step": 9576 }, { "epoch": 2.1790671217292377, "grad_norm": 1.3485489170953888, "learning_rate": 7.502652164857072e-07, "loss": 0.0769, "step": 9577 }, { "epoch": 2.1792946530147894, "grad_norm": 1.3897911338084257, "learning_rate": 7.501776768905238e-07, "loss": 0.1447, "step": 9578 }, { "epoch": 2.179522184300341, "grad_norm": 1.6876611981736775, "learning_rate": 7.500901347369382e-07, "loss": 0.0485, "step": 9579 }, { "epoch": 2.179749715585893, "grad_norm": 1.0360630778350237, "learning_rate": 7.500025900267391e-07, "loss": 0.0107, "step": 9580 }, { "epoch": 2.1799772468714447, "grad_norm": 1.2384392901934855, "learning_rate": 7.49915042761716e-07, "loss": 0.0607, "step": 9581 }, { "epoch": 2.1802047781569964, "grad_norm": 2.594760956019517, "learning_rate": 7.498274929436583e-07, "loss": 0.0766, "step": 9582 }, { "epoch": 2.180432309442548, "grad_norm": 2.04775190126401, "learning_rate": 7.497399405743552e-07, "loss": 0.0527, "step": 9583 }, { "epoch": 2.1806598407281, "grad_norm": 2.3937978714203854, "learning_rate": 7.49652385655596e-07, "loss": 0.0353, "step": 9584 }, { "epoch": 2.1808873720136517, "grad_norm": 1.8812082815224427, "learning_rate": 7.495648281891707e-07, "loss": 0.0713, "step": 9585 }, { "epoch": 2.1811149032992034, "grad_norm": 2.6575784223226364, "learning_rate": 7.494772681768683e-07, "loss": 0.0716, "step": 9586 }, { "epoch": 2.1813424345847556, "grad_norm": 1.165442203890328, "learning_rate": 7.493897056204784e-07, "loss": 0.0208, "step": 9587 }, { "epoch": 2.181569965870307, "grad_norm": 1.371548163191165, "learning_rate": 7.493021405217907e-07, "loss": 0.0672, "step": 9588 }, { "epoch": 2.181797497155859, "grad_norm": 1.6820420633054833, "learning_rate": 7.49214572882595e-07, "loss": 0.0641, "step": 9589 }, { "epoch": 2.182025028441411, "grad_norm": 2.7580081413468744, "learning_rate": 7.49127002704681e-07, "loss": 0.0575, "step": 9590 }, { "epoch": 2.1822525597269626, "grad_norm": 0.8828889868952942, "learning_rate": 7.490394299898382e-07, "loss": 0.0146, "step": 9591 }, { "epoch": 2.1824800910125144, "grad_norm": 1.2354119944929676, "learning_rate": 7.489518547398568e-07, "loss": 0.0524, "step": 9592 }, { "epoch": 2.182707622298066, "grad_norm": 1.8210810705906149, "learning_rate": 7.488642769565264e-07, "loss": 0.0098, "step": 9593 }, { "epoch": 2.182935153583618, "grad_norm": 2.141462464205735, "learning_rate": 7.48776696641637e-07, "loss": 0.0781, "step": 9594 }, { "epoch": 2.1831626848691696, "grad_norm": 1.8866884388185365, "learning_rate": 7.486891137969786e-07, "loss": 0.1051, "step": 9595 }, { "epoch": 2.1833902161547214, "grad_norm": 1.2232011243579046, "learning_rate": 7.486015284243413e-07, "loss": 0.0779, "step": 9596 }, { "epoch": 2.183617747440273, "grad_norm": 1.3069532595453608, "learning_rate": 7.485139405255151e-07, "loss": 0.0205, "step": 9597 }, { "epoch": 2.183845278725825, "grad_norm": 1.7750761019098342, "learning_rate": 7.484263501022902e-07, "loss": 0.0922, "step": 9598 }, { "epoch": 2.1840728100113767, "grad_norm": 2.067377867721793, "learning_rate": 7.483387571564567e-07, "loss": 0.0567, "step": 9599 }, { "epoch": 2.1843003412969284, "grad_norm": 1.0342518095213404, "learning_rate": 7.48251161689805e-07, "loss": 0.0337, "step": 9600 }, { "epoch": 2.18452787258248, "grad_norm": 1.7104776462824303, "learning_rate": 7.48163563704125e-07, "loss": 0.0797, "step": 9601 }, { "epoch": 2.184755403868032, "grad_norm": 2.8112183343579664, "learning_rate": 7.480759632012074e-07, "loss": 0.0406, "step": 9602 }, { "epoch": 2.1849829351535837, "grad_norm": 2.4812297741799116, "learning_rate": 7.479883601828428e-07, "loss": 0.0405, "step": 9603 }, { "epoch": 2.1852104664391354, "grad_norm": 1.2978523279780194, "learning_rate": 7.479007546508211e-07, "loss": 0.0556, "step": 9604 }, { "epoch": 2.185437997724687, "grad_norm": 1.4854935371422038, "learning_rate": 7.478131466069331e-07, "loss": 0.0575, "step": 9605 }, { "epoch": 2.185665529010239, "grad_norm": 1.6313942260549827, "learning_rate": 7.477255360529695e-07, "loss": 0.1904, "step": 9606 }, { "epoch": 2.1858930602957907, "grad_norm": 1.216152900468775, "learning_rate": 7.476379229907205e-07, "loss": 0.0696, "step": 9607 }, { "epoch": 2.1861205915813424, "grad_norm": 0.6737250650335149, "learning_rate": 7.475503074219769e-07, "loss": 0.0123, "step": 9608 }, { "epoch": 2.186348122866894, "grad_norm": 1.422171400837565, "learning_rate": 7.474626893485295e-07, "loss": 0.0241, "step": 9609 }, { "epoch": 2.186575654152446, "grad_norm": 1.7517935884683415, "learning_rate": 7.473750687721692e-07, "loss": 0.0414, "step": 9610 }, { "epoch": 2.1868031854379977, "grad_norm": 1.5468806772704229, "learning_rate": 7.472874456946865e-07, "loss": 0.0736, "step": 9611 }, { "epoch": 2.1870307167235494, "grad_norm": 2.435102851240498, "learning_rate": 7.471998201178724e-07, "loss": 0.0396, "step": 9612 }, { "epoch": 2.187258248009101, "grad_norm": 0.9156841203689913, "learning_rate": 7.471121920435176e-07, "loss": 0.013, "step": 9613 }, { "epoch": 2.187485779294653, "grad_norm": 1.3462951849139937, "learning_rate": 7.470245614734132e-07, "loss": 0.0366, "step": 9614 }, { "epoch": 2.1877133105802047, "grad_norm": 2.113392361520175, "learning_rate": 7.469369284093504e-07, "loss": 0.074, "step": 9615 }, { "epoch": 2.1879408418657564, "grad_norm": 1.6015486857455614, "learning_rate": 7.468492928531201e-07, "loss": 0.0441, "step": 9616 }, { "epoch": 2.188168373151308, "grad_norm": 1.8594041276412403, "learning_rate": 7.467616548065134e-07, "loss": 0.0563, "step": 9617 }, { "epoch": 2.18839590443686, "grad_norm": 1.479593231636543, "learning_rate": 7.466740142713217e-07, "loss": 0.0785, "step": 9618 }, { "epoch": 2.1886234357224117, "grad_norm": 1.5060560248711237, "learning_rate": 7.465863712493357e-07, "loss": 0.069, "step": 9619 }, { "epoch": 2.1888509670079634, "grad_norm": 1.9353817486116598, "learning_rate": 7.46498725742347e-07, "loss": 0.0429, "step": 9620 }, { "epoch": 2.189078498293515, "grad_norm": 1.9847954336755138, "learning_rate": 7.464110777521467e-07, "loss": 0.0418, "step": 9621 }, { "epoch": 2.189306029579067, "grad_norm": 1.7208996320826648, "learning_rate": 7.463234272805265e-07, "loss": 0.0688, "step": 9622 }, { "epoch": 2.1895335608646187, "grad_norm": 0.9602382872558913, "learning_rate": 7.462357743292778e-07, "loss": 0.0472, "step": 9623 }, { "epoch": 2.1897610921501705, "grad_norm": 1.7044560878181323, "learning_rate": 7.461481189001917e-07, "loss": 0.0338, "step": 9624 }, { "epoch": 2.189988623435722, "grad_norm": 1.2760275994057422, "learning_rate": 7.460604609950599e-07, "loss": 0.0439, "step": 9625 }, { "epoch": 2.1902161547212744, "grad_norm": 1.6892058616447903, "learning_rate": 7.459728006156741e-07, "loss": 0.0128, "step": 9626 }, { "epoch": 2.1904436860068257, "grad_norm": 2.0382940083903316, "learning_rate": 7.458851377638257e-07, "loss": 0.1025, "step": 9627 }, { "epoch": 2.190671217292378, "grad_norm": 1.068888687313207, "learning_rate": 7.457974724413065e-07, "loss": 0.0652, "step": 9628 }, { "epoch": 2.1908987485779297, "grad_norm": 1.3272239815035083, "learning_rate": 7.457098046499084e-07, "loss": 0.0216, "step": 9629 }, { "epoch": 2.1911262798634814, "grad_norm": 3.1492661389940086, "learning_rate": 7.456221343914228e-07, "loss": 0.1145, "step": 9630 }, { "epoch": 2.191353811149033, "grad_norm": 2.5980285749900074, "learning_rate": 7.455344616676415e-07, "loss": 0.0466, "step": 9631 }, { "epoch": 2.191581342434585, "grad_norm": 0.9492209432938257, "learning_rate": 7.454467864803567e-07, "loss": 0.0564, "step": 9632 }, { "epoch": 2.1918088737201367, "grad_norm": 0.9923431282879376, "learning_rate": 7.453591088313603e-07, "loss": 0.0216, "step": 9633 }, { "epoch": 2.1920364050056884, "grad_norm": 1.8548331462860232, "learning_rate": 7.45271428722444e-07, "loss": 0.046, "step": 9634 }, { "epoch": 2.19226393629124, "grad_norm": 2.08701457162056, "learning_rate": 7.451837461553998e-07, "loss": 0.0339, "step": 9635 }, { "epoch": 2.192491467576792, "grad_norm": 1.6404266103350365, "learning_rate": 7.450960611320204e-07, "loss": 0.0795, "step": 9636 }, { "epoch": 2.1927189988623437, "grad_norm": 1.7091531647594718, "learning_rate": 7.450083736540972e-07, "loss": 0.0306, "step": 9637 }, { "epoch": 2.1929465301478954, "grad_norm": 1.2599120065586638, "learning_rate": 7.449206837234224e-07, "loss": 0.0403, "step": 9638 }, { "epoch": 2.193174061433447, "grad_norm": 3.36104823873805, "learning_rate": 7.448329913417887e-07, "loss": 0.014, "step": 9639 }, { "epoch": 2.193401592718999, "grad_norm": 1.0454678044508545, "learning_rate": 7.447452965109882e-07, "loss": 0.0705, "step": 9640 }, { "epoch": 2.1936291240045507, "grad_norm": 1.2907620588951814, "learning_rate": 7.446575992328128e-07, "loss": 0.0724, "step": 9641 }, { "epoch": 2.1938566552901024, "grad_norm": 6.017704364504656, "learning_rate": 7.445698995090557e-07, "loss": 0.0277, "step": 9642 }, { "epoch": 2.194084186575654, "grad_norm": 1.486887890857098, "learning_rate": 7.444821973415086e-07, "loss": 0.0952, "step": 9643 }, { "epoch": 2.194311717861206, "grad_norm": 1.145657339951393, "learning_rate": 7.443944927319641e-07, "loss": 0.0685, "step": 9644 }, { "epoch": 2.1945392491467577, "grad_norm": 1.3397259790508476, "learning_rate": 7.443067856822149e-07, "loss": 0.0506, "step": 9645 }, { "epoch": 2.1947667804323094, "grad_norm": 1.7411937518722584, "learning_rate": 7.442190761940535e-07, "loss": 0.0531, "step": 9646 }, { "epoch": 2.194994311717861, "grad_norm": 1.192381975108253, "learning_rate": 7.441313642692726e-07, "loss": 0.0218, "step": 9647 }, { "epoch": 2.195221843003413, "grad_norm": 1.6976397972723314, "learning_rate": 7.440436499096647e-07, "loss": 0.0816, "step": 9648 }, { "epoch": 2.1954493742889647, "grad_norm": 1.6314851417235372, "learning_rate": 7.439559331170226e-07, "loss": 0.0651, "step": 9649 }, { "epoch": 2.1956769055745164, "grad_norm": 1.2445680459975024, "learning_rate": 7.438682138931393e-07, "loss": 0.0537, "step": 9650 }, { "epoch": 2.195904436860068, "grad_norm": 1.2745143884595345, "learning_rate": 7.43780492239807e-07, "loss": 0.0983, "step": 9651 }, { "epoch": 2.19613196814562, "grad_norm": 1.8918216438837654, "learning_rate": 7.436927681588192e-07, "loss": 0.0729, "step": 9652 }, { "epoch": 2.1963594994311717, "grad_norm": 1.4902332980583612, "learning_rate": 7.436050416519687e-07, "loss": 0.1101, "step": 9653 }, { "epoch": 2.1965870307167235, "grad_norm": 1.698538000206574, "learning_rate": 7.435173127210482e-07, "loss": 0.0906, "step": 9654 }, { "epoch": 2.196814562002275, "grad_norm": 2.441854587325201, "learning_rate": 7.43429581367851e-07, "loss": 0.0519, "step": 9655 }, { "epoch": 2.197042093287827, "grad_norm": 2.0217096794105065, "learning_rate": 7.4334184759417e-07, "loss": 0.0802, "step": 9656 }, { "epoch": 2.1972696245733787, "grad_norm": 0.5667236791158886, "learning_rate": 7.432541114017984e-07, "loss": 0.009, "step": 9657 }, { "epoch": 2.1974971558589305, "grad_norm": 1.2024057232721754, "learning_rate": 7.431663727925293e-07, "loss": 0.0705, "step": 9658 }, { "epoch": 2.197724687144482, "grad_norm": 1.4816985098359061, "learning_rate": 7.430786317681559e-07, "loss": 0.0331, "step": 9659 }, { "epoch": 2.197952218430034, "grad_norm": 1.2982682390365592, "learning_rate": 7.429908883304716e-07, "loss": 0.1017, "step": 9660 }, { "epoch": 2.1981797497155857, "grad_norm": 1.479198664737689, "learning_rate": 7.429031424812697e-07, "loss": 0.0379, "step": 9661 }, { "epoch": 2.1984072810011375, "grad_norm": 2.452143558116467, "learning_rate": 7.428153942223433e-07, "loss": 0.0716, "step": 9662 }, { "epoch": 2.198634812286689, "grad_norm": 1.8634538906957014, "learning_rate": 7.427276435554861e-07, "loss": 0.0993, "step": 9663 }, { "epoch": 2.198862343572241, "grad_norm": 1.9471649792562606, "learning_rate": 7.426398904824916e-07, "loss": 0.1173, "step": 9664 }, { "epoch": 2.199089874857793, "grad_norm": 2.1771342194370216, "learning_rate": 7.425521350051529e-07, "loss": 0.1255, "step": 9665 }, { "epoch": 2.1993174061433445, "grad_norm": 2.055664142746377, "learning_rate": 7.42464377125264e-07, "loss": 0.1055, "step": 9666 }, { "epoch": 2.1995449374288967, "grad_norm": 0.56759498734142, "learning_rate": 7.423766168446187e-07, "loss": 0.007, "step": 9667 }, { "epoch": 2.1997724687144484, "grad_norm": 2.608154903722345, "learning_rate": 7.422888541650097e-07, "loss": 0.0689, "step": 9668 }, { "epoch": 2.2, "grad_norm": 1.7413005487049034, "learning_rate": 7.422010890882317e-07, "loss": 0.0467, "step": 9669 }, { "epoch": 2.200227531285552, "grad_norm": 1.0901352604030778, "learning_rate": 7.421133216160781e-07, "loss": 0.0136, "step": 9670 }, { "epoch": 2.2004550625711037, "grad_norm": 1.4326097632701875, "learning_rate": 7.420255517503424e-07, "loss": 0.0294, "step": 9671 }, { "epoch": 2.2006825938566554, "grad_norm": 1.7809393341390753, "learning_rate": 7.41937779492819e-07, "loss": 0.0249, "step": 9672 }, { "epoch": 2.200910125142207, "grad_norm": 1.6838160667034856, "learning_rate": 7.418500048453016e-07, "loss": 0.0326, "step": 9673 }, { "epoch": 2.201137656427759, "grad_norm": 1.3809458996390551, "learning_rate": 7.417622278095838e-07, "loss": 0.0794, "step": 9674 }, { "epoch": 2.2013651877133107, "grad_norm": 2.4547422069992657, "learning_rate": 7.416744483874602e-07, "loss": 0.0687, "step": 9675 }, { "epoch": 2.2015927189988624, "grad_norm": 1.0275726858368945, "learning_rate": 7.415866665807245e-07, "loss": 0.0722, "step": 9676 }, { "epoch": 2.201820250284414, "grad_norm": 1.2538058634007505, "learning_rate": 7.414988823911708e-07, "loss": 0.1062, "step": 9677 }, { "epoch": 2.202047781569966, "grad_norm": 1.1193489447576097, "learning_rate": 7.41411095820593e-07, "loss": 0.0275, "step": 9678 }, { "epoch": 2.2022753128555177, "grad_norm": 1.6408552393319376, "learning_rate": 7.41323306870786e-07, "loss": 0.1016, "step": 9679 }, { "epoch": 2.2025028441410694, "grad_norm": 1.474030368936237, "learning_rate": 7.412355155435437e-07, "loss": 0.0171, "step": 9680 }, { "epoch": 2.202730375426621, "grad_norm": 1.6839679677626955, "learning_rate": 7.411477218406602e-07, "loss": 0.0297, "step": 9681 }, { "epoch": 2.202957906712173, "grad_norm": 1.7100779752934014, "learning_rate": 7.410599257639299e-07, "loss": 0.0816, "step": 9682 }, { "epoch": 2.2031854379977247, "grad_norm": 1.7401050895523356, "learning_rate": 7.409721273151473e-07, "loss": 0.0679, "step": 9683 }, { "epoch": 2.2034129692832765, "grad_norm": 1.0215883331858133, "learning_rate": 7.408843264961068e-07, "loss": 0.0356, "step": 9684 }, { "epoch": 2.203640500568828, "grad_norm": 1.85268010108362, "learning_rate": 7.407965233086029e-07, "loss": 0.1243, "step": 9685 }, { "epoch": 2.20386803185438, "grad_norm": 2.511210838270463, "learning_rate": 7.407087177544304e-07, "loss": 0.0713, "step": 9686 }, { "epoch": 2.2040955631399317, "grad_norm": 2.284806877904906, "learning_rate": 7.406209098353834e-07, "loss": 0.072, "step": 9687 }, { "epoch": 2.2043230944254835, "grad_norm": 1.3709918922264874, "learning_rate": 7.405330995532566e-07, "loss": 0.1123, "step": 9688 }, { "epoch": 2.204550625711035, "grad_norm": 1.7365254377542252, "learning_rate": 7.40445286909845e-07, "loss": 0.0289, "step": 9689 }, { "epoch": 2.204778156996587, "grad_norm": 2.709186003181355, "learning_rate": 7.403574719069431e-07, "loss": 0.0781, "step": 9690 }, { "epoch": 2.2050056882821387, "grad_norm": 1.6901995066750213, "learning_rate": 7.402696545463458e-07, "loss": 0.0449, "step": 9691 }, { "epoch": 2.2052332195676905, "grad_norm": 1.0710209839388836, "learning_rate": 7.401818348298478e-07, "loss": 0.0376, "step": 9692 }, { "epoch": 2.2054607508532422, "grad_norm": 1.7365012428305326, "learning_rate": 7.400940127592441e-07, "loss": 0.0298, "step": 9693 }, { "epoch": 2.205688282138794, "grad_norm": 1.4249477504420347, "learning_rate": 7.400061883363297e-07, "loss": 0.0229, "step": 9694 }, { "epoch": 2.2059158134243457, "grad_norm": 2.0061713425028365, "learning_rate": 7.399183615628991e-07, "loss": 0.0284, "step": 9695 }, { "epoch": 2.2061433447098975, "grad_norm": 2.028487499424723, "learning_rate": 7.398305324407479e-07, "loss": 0.0797, "step": 9696 }, { "epoch": 2.2063708759954492, "grad_norm": 0.8043899684970321, "learning_rate": 7.397427009716709e-07, "loss": 0.0031, "step": 9697 }, { "epoch": 2.206598407281001, "grad_norm": 2.1450208855852666, "learning_rate": 7.396548671574632e-07, "loss": 0.0562, "step": 9698 }, { "epoch": 2.2068259385665527, "grad_norm": 1.1377903816480621, "learning_rate": 7.395670309999201e-07, "loss": 0.0342, "step": 9699 }, { "epoch": 2.2070534698521045, "grad_norm": 1.8889917881251588, "learning_rate": 7.394791925008366e-07, "loss": 0.023, "step": 9700 }, { "epoch": 2.2072810011376562, "grad_norm": 1.4591739416194378, "learning_rate": 7.39391351662008e-07, "loss": 0.0581, "step": 9701 }, { "epoch": 2.207508532423208, "grad_norm": 2.1030733777122066, "learning_rate": 7.393035084852296e-07, "loss": 0.0693, "step": 9702 }, { "epoch": 2.2077360637087597, "grad_norm": 1.3391074219072032, "learning_rate": 7.39215662972297e-07, "loss": 0.0739, "step": 9703 }, { "epoch": 2.207963594994312, "grad_norm": 1.8189202859693807, "learning_rate": 7.391278151250053e-07, "loss": 0.0673, "step": 9704 }, { "epoch": 2.2081911262798632, "grad_norm": 1.8331683339714793, "learning_rate": 7.390399649451501e-07, "loss": 0.0411, "step": 9705 }, { "epoch": 2.2084186575654154, "grad_norm": 2.013278806253988, "learning_rate": 7.389521124345271e-07, "loss": 0.0931, "step": 9706 }, { "epoch": 2.208646188850967, "grad_norm": 1.5027122811390399, "learning_rate": 7.388642575949315e-07, "loss": 0.0791, "step": 9707 }, { "epoch": 2.208873720136519, "grad_norm": 2.2217244601224015, "learning_rate": 7.387764004281588e-07, "loss": 0.0684, "step": 9708 }, { "epoch": 2.2091012514220707, "grad_norm": 0.5824954666368191, "learning_rate": 7.38688540936005e-07, "loss": 0.0079, "step": 9709 }, { "epoch": 2.2093287827076225, "grad_norm": 1.642309980878137, "learning_rate": 7.386006791202656e-07, "loss": 0.078, "step": 9710 }, { "epoch": 2.209556313993174, "grad_norm": 1.86901387440969, "learning_rate": 7.385128149827364e-07, "loss": 0.1119, "step": 9711 }, { "epoch": 2.209783845278726, "grad_norm": 1.4169879392035163, "learning_rate": 7.384249485252132e-07, "loss": 0.0482, "step": 9712 }, { "epoch": 2.2100113765642777, "grad_norm": 1.9100739304666876, "learning_rate": 7.383370797494918e-07, "loss": 0.0434, "step": 9713 }, { "epoch": 2.2102389078498295, "grad_norm": 2.399990436019343, "learning_rate": 7.382492086573679e-07, "loss": 0.0694, "step": 9714 }, { "epoch": 2.210466439135381, "grad_norm": 1.812050241754321, "learning_rate": 7.381613352506376e-07, "loss": 0.1388, "step": 9715 }, { "epoch": 2.210693970420933, "grad_norm": 1.5722994482086958, "learning_rate": 7.380734595310969e-07, "loss": 0.0942, "step": 9716 }, { "epoch": 2.2109215017064847, "grad_norm": 1.9028886715125728, "learning_rate": 7.37985581500542e-07, "loss": 0.038, "step": 9717 }, { "epoch": 2.2111490329920365, "grad_norm": 1.6522125900547038, "learning_rate": 7.378977011607684e-07, "loss": 0.0784, "step": 9718 }, { "epoch": 2.211376564277588, "grad_norm": 1.353064902067639, "learning_rate": 7.378098185135727e-07, "loss": 0.0707, "step": 9719 }, { "epoch": 2.21160409556314, "grad_norm": 1.6426673688489113, "learning_rate": 7.377219335607509e-07, "loss": 0.0498, "step": 9720 }, { "epoch": 2.2118316268486917, "grad_norm": 1.589301263115234, "learning_rate": 7.376340463040993e-07, "loss": 0.1053, "step": 9721 }, { "epoch": 2.2120591581342435, "grad_norm": 1.9195414892558205, "learning_rate": 7.375461567454138e-07, "loss": 0.0299, "step": 9722 }, { "epoch": 2.2122866894197952, "grad_norm": 1.9133285965390077, "learning_rate": 7.374582648864912e-07, "loss": 0.0623, "step": 9723 }, { "epoch": 2.212514220705347, "grad_norm": 1.384011575068315, "learning_rate": 7.373703707291277e-07, "loss": 0.0974, "step": 9724 }, { "epoch": 2.2127417519908987, "grad_norm": 1.2982385454864285, "learning_rate": 7.372824742751194e-07, "loss": 0.0605, "step": 9725 }, { "epoch": 2.2129692832764505, "grad_norm": 1.7065357308487878, "learning_rate": 7.37194575526263e-07, "loss": 0.1313, "step": 9726 }, { "epoch": 2.2131968145620022, "grad_norm": 1.374205231380851, "learning_rate": 7.371066744843551e-07, "loss": 0.0545, "step": 9727 }, { "epoch": 2.213424345847554, "grad_norm": 1.689381913821839, "learning_rate": 7.37018771151192e-07, "loss": 0.0475, "step": 9728 }, { "epoch": 2.2136518771331057, "grad_norm": 1.9299964339226994, "learning_rate": 7.369308655285702e-07, "loss": 0.0377, "step": 9729 }, { "epoch": 2.2138794084186575, "grad_norm": 1.126734400343911, "learning_rate": 7.368429576182869e-07, "loss": 0.0373, "step": 9730 }, { "epoch": 2.2141069397042092, "grad_norm": 2.1669924188436376, "learning_rate": 7.367550474221381e-07, "loss": 0.0428, "step": 9731 }, { "epoch": 2.214334470989761, "grad_norm": 1.3739401667507931, "learning_rate": 7.366671349419207e-07, "loss": 0.0837, "step": 9732 }, { "epoch": 2.2145620022753127, "grad_norm": 1.4806133911538504, "learning_rate": 7.365792201794318e-07, "loss": 0.0595, "step": 9733 }, { "epoch": 2.2147895335608645, "grad_norm": 1.8386823423252745, "learning_rate": 7.364913031364679e-07, "loss": 0.0383, "step": 9734 }, { "epoch": 2.2150170648464163, "grad_norm": 1.1325561103587034, "learning_rate": 7.364033838148258e-07, "loss": 0.1147, "step": 9735 }, { "epoch": 2.215244596131968, "grad_norm": 1.5302834735325364, "learning_rate": 7.363154622163029e-07, "loss": 0.0474, "step": 9736 }, { "epoch": 2.2154721274175198, "grad_norm": 0.9544798037125929, "learning_rate": 7.362275383426956e-07, "loss": 0.0199, "step": 9737 }, { "epoch": 2.2156996587030715, "grad_norm": 1.5671573786227384, "learning_rate": 7.361396121958011e-07, "loss": 0.0574, "step": 9738 }, { "epoch": 2.2159271899886233, "grad_norm": 0.8337324725004281, "learning_rate": 7.360516837774165e-07, "loss": 0.0488, "step": 9739 }, { "epoch": 2.216154721274175, "grad_norm": 1.7156210086056303, "learning_rate": 7.359637530893389e-07, "loss": 0.0481, "step": 9740 }, { "epoch": 2.2163822525597268, "grad_norm": 1.8430166484430528, "learning_rate": 7.358758201333654e-07, "loss": 0.1091, "step": 9741 }, { "epoch": 2.2166097838452785, "grad_norm": 1.7462111277595131, "learning_rate": 7.357878849112932e-07, "loss": 0.1213, "step": 9742 }, { "epoch": 2.2168373151308307, "grad_norm": 0.9610959896146984, "learning_rate": 7.356999474249196e-07, "loss": 0.0181, "step": 9743 }, { "epoch": 2.217064846416382, "grad_norm": 2.0783946435552605, "learning_rate": 7.356120076760417e-07, "loss": 0.0559, "step": 9744 }, { "epoch": 2.217292377701934, "grad_norm": 1.207894693989277, "learning_rate": 7.35524065666457e-07, "loss": 0.0635, "step": 9745 }, { "epoch": 2.217519908987486, "grad_norm": 1.4518995713090144, "learning_rate": 7.354361213979627e-07, "loss": 0.102, "step": 9746 }, { "epoch": 2.2177474402730377, "grad_norm": 2.0477258265422487, "learning_rate": 7.353481748723565e-07, "loss": 0.0964, "step": 9747 }, { "epoch": 2.2179749715585895, "grad_norm": 1.5116308436141248, "learning_rate": 7.352602260914356e-07, "loss": 0.024, "step": 9748 }, { "epoch": 2.218202502844141, "grad_norm": 1.1216515051005054, "learning_rate": 7.351722750569976e-07, "loss": 0.0968, "step": 9749 }, { "epoch": 2.218430034129693, "grad_norm": 1.1057340754528662, "learning_rate": 7.350843217708402e-07, "loss": 0.0489, "step": 9750 }, { "epoch": 2.2186575654152447, "grad_norm": 1.6239021685625052, "learning_rate": 7.349963662347608e-07, "loss": 0.0491, "step": 9751 }, { "epoch": 2.2188850967007965, "grad_norm": 1.5597764106867107, "learning_rate": 7.349084084505569e-07, "loss": 0.0729, "step": 9752 }, { "epoch": 2.2191126279863482, "grad_norm": 1.6577340113661923, "learning_rate": 7.348204484200267e-07, "loss": 0.0583, "step": 9753 }, { "epoch": 2.2193401592719, "grad_norm": 1.4670468899875693, "learning_rate": 7.347324861449676e-07, "loss": 0.0628, "step": 9754 }, { "epoch": 2.2195676905574517, "grad_norm": 1.272090927322844, "learning_rate": 7.346445216271773e-07, "loss": 0.0516, "step": 9755 }, { "epoch": 2.2197952218430035, "grad_norm": 1.6793841175002284, "learning_rate": 7.34556554868454e-07, "loss": 0.0401, "step": 9756 }, { "epoch": 2.2200227531285552, "grad_norm": 1.1523066967480309, "learning_rate": 7.344685858705952e-07, "loss": 0.0843, "step": 9757 }, { "epoch": 2.220250284414107, "grad_norm": 1.2505093583404552, "learning_rate": 7.343806146353991e-07, "loss": 0.0474, "step": 9758 }, { "epoch": 2.2204778156996587, "grad_norm": 1.5414448891525965, "learning_rate": 7.342926411646634e-07, "loss": 0.0872, "step": 9759 }, { "epoch": 2.2207053469852105, "grad_norm": 1.1602468762362055, "learning_rate": 7.342046654601864e-07, "loss": 0.0455, "step": 9760 }, { "epoch": 2.2209328782707622, "grad_norm": 1.6372674271227037, "learning_rate": 7.341166875237661e-07, "loss": 0.0357, "step": 9761 }, { "epoch": 2.221160409556314, "grad_norm": 1.3260888967521394, "learning_rate": 7.340287073572004e-07, "loss": 0.072, "step": 9762 }, { "epoch": 2.2213879408418657, "grad_norm": 2.102233929435973, "learning_rate": 7.339407249622878e-07, "loss": 0.0621, "step": 9763 }, { "epoch": 2.2216154721274175, "grad_norm": 2.1229429716481145, "learning_rate": 7.33852740340826e-07, "loss": 0.0383, "step": 9764 }, { "epoch": 2.2218430034129693, "grad_norm": 1.1120423681299842, "learning_rate": 7.337647534946137e-07, "loss": 0.0634, "step": 9765 }, { "epoch": 2.222070534698521, "grad_norm": 1.2874234004712588, "learning_rate": 7.33676764425449e-07, "loss": 0.0163, "step": 9766 }, { "epoch": 2.2222980659840728, "grad_norm": 1.6403793909671274, "learning_rate": 7.335887731351303e-07, "loss": 0.1012, "step": 9767 }, { "epoch": 2.2225255972696245, "grad_norm": 1.4790826574202585, "learning_rate": 7.33500779625456e-07, "loss": 0.0641, "step": 9768 }, { "epoch": 2.2227531285551763, "grad_norm": 1.831512684653163, "learning_rate": 7.334127838982244e-07, "loss": 0.0289, "step": 9769 }, { "epoch": 2.222980659840728, "grad_norm": 1.1976119278530164, "learning_rate": 7.333247859552343e-07, "loss": 0.1138, "step": 9770 }, { "epoch": 2.2232081911262798, "grad_norm": 1.42079083380393, "learning_rate": 7.332367857982836e-07, "loss": 0.0193, "step": 9771 }, { "epoch": 2.2234357224118315, "grad_norm": 0.8551325789309228, "learning_rate": 7.331487834291712e-07, "loss": 0.0093, "step": 9772 }, { "epoch": 2.2236632536973833, "grad_norm": 1.5245253810997095, "learning_rate": 7.33060778849696e-07, "loss": 0.064, "step": 9773 }, { "epoch": 2.223890784982935, "grad_norm": 2.3375577939766847, "learning_rate": 7.329727720616564e-07, "loss": 0.0725, "step": 9774 }, { "epoch": 2.2241183162684868, "grad_norm": 1.6557009780459495, "learning_rate": 7.328847630668508e-07, "loss": 0.1068, "step": 9775 }, { "epoch": 2.2243458475540385, "grad_norm": 1.5981644410565476, "learning_rate": 7.327967518670784e-07, "loss": 0.1051, "step": 9776 }, { "epoch": 2.2245733788395903, "grad_norm": 1.3488788647667986, "learning_rate": 7.327087384641378e-07, "loss": 0.0914, "step": 9777 }, { "epoch": 2.224800910125142, "grad_norm": 2.0507278721482987, "learning_rate": 7.326207228598278e-07, "loss": 0.0369, "step": 9778 }, { "epoch": 2.225028441410694, "grad_norm": 1.8708311122404728, "learning_rate": 7.325327050559473e-07, "loss": 0.1559, "step": 9779 }, { "epoch": 2.2252559726962455, "grad_norm": 1.341988589812003, "learning_rate": 7.324446850542954e-07, "loss": 0.0766, "step": 9780 }, { "epoch": 2.2254835039817973, "grad_norm": 1.654650910486216, "learning_rate": 7.32356662856671e-07, "loss": 0.0753, "step": 9781 }, { "epoch": 2.2257110352673495, "grad_norm": 1.8807522907636354, "learning_rate": 7.322686384648727e-07, "loss": 0.0791, "step": 9782 }, { "epoch": 2.225938566552901, "grad_norm": 1.5036944036892335, "learning_rate": 7.321806118807e-07, "loss": 0.035, "step": 9783 }, { "epoch": 2.226166097838453, "grad_norm": 1.4208447880112791, "learning_rate": 7.320925831059519e-07, "loss": 0.0778, "step": 9784 }, { "epoch": 2.2263936291240047, "grad_norm": 1.7838232242465537, "learning_rate": 7.320045521424277e-07, "loss": 0.0627, "step": 9785 }, { "epoch": 2.2266211604095565, "grad_norm": 1.803026894813878, "learning_rate": 7.31916518991926e-07, "loss": 0.086, "step": 9786 }, { "epoch": 2.2268486916951082, "grad_norm": 3.3696694661970064, "learning_rate": 7.318284836562469e-07, "loss": 0.1026, "step": 9787 }, { "epoch": 2.22707622298066, "grad_norm": 2.4538791249337257, "learning_rate": 7.31740446137189e-07, "loss": 0.0489, "step": 9788 }, { "epoch": 2.2273037542662117, "grad_norm": 1.3973711922977448, "learning_rate": 7.31652406436552e-07, "loss": 0.0868, "step": 9789 }, { "epoch": 2.2275312855517635, "grad_norm": 1.0066511313590052, "learning_rate": 7.315643645561351e-07, "loss": 0.0544, "step": 9790 }, { "epoch": 2.2277588168373152, "grad_norm": 1.0743552925218625, "learning_rate": 7.314763204977376e-07, "loss": 0.0299, "step": 9791 }, { "epoch": 2.227986348122867, "grad_norm": 2.483939066987129, "learning_rate": 7.313882742631594e-07, "loss": 0.1224, "step": 9792 }, { "epoch": 2.2282138794084188, "grad_norm": 2.3064273686519816, "learning_rate": 7.313002258541994e-07, "loss": 0.0925, "step": 9793 }, { "epoch": 2.2284414106939705, "grad_norm": 1.2767050376656814, "learning_rate": 7.312121752726577e-07, "loss": 0.0508, "step": 9794 }, { "epoch": 2.2286689419795223, "grad_norm": 1.0868279370785345, "learning_rate": 7.311241225203336e-07, "loss": 0.0548, "step": 9795 }, { "epoch": 2.228896473265074, "grad_norm": 1.1202440281984138, "learning_rate": 7.310360675990266e-07, "loss": 0.0295, "step": 9796 }, { "epoch": 2.2291240045506258, "grad_norm": 1.502226360489142, "learning_rate": 7.309480105105368e-07, "loss": 0.0578, "step": 9797 }, { "epoch": 2.2293515358361775, "grad_norm": 1.4792232919585468, "learning_rate": 7.308599512566636e-07, "loss": 0.0384, "step": 9798 }, { "epoch": 2.2295790671217293, "grad_norm": 1.3290497926242055, "learning_rate": 7.30771889839207e-07, "loss": 0.0159, "step": 9799 }, { "epoch": 2.229806598407281, "grad_norm": 1.2580296624029728, "learning_rate": 7.306838262599666e-07, "loss": 0.0841, "step": 9800 }, { "epoch": 2.2300341296928328, "grad_norm": 2.081060327990041, "learning_rate": 7.305957605207423e-07, "loss": 0.0494, "step": 9801 }, { "epoch": 2.2302616609783845, "grad_norm": 2.374571970889199, "learning_rate": 7.30507692623334e-07, "loss": 0.0498, "step": 9802 }, { "epoch": 2.2304891922639363, "grad_norm": 1.4096838482715424, "learning_rate": 7.304196225695417e-07, "loss": 0.033, "step": 9803 }, { "epoch": 2.230716723549488, "grad_norm": 1.2938763635639914, "learning_rate": 7.303315503611654e-07, "loss": 0.0559, "step": 9804 }, { "epoch": 2.2309442548350398, "grad_norm": 2.2710267938224122, "learning_rate": 7.302434760000053e-07, "loss": 0.1357, "step": 9805 }, { "epoch": 2.2311717861205915, "grad_norm": 1.2615744956314672, "learning_rate": 7.301553994878613e-07, "loss": 0.035, "step": 9806 }, { "epoch": 2.2313993174061433, "grad_norm": 1.9084302832787015, "learning_rate": 7.300673208265332e-07, "loss": 0.0298, "step": 9807 }, { "epoch": 2.231626848691695, "grad_norm": 1.80520200352483, "learning_rate": 7.299792400178219e-07, "loss": 0.0513, "step": 9808 }, { "epoch": 2.231854379977247, "grad_norm": 2.324964360887839, "learning_rate": 7.298911570635267e-07, "loss": 0.1249, "step": 9809 }, { "epoch": 2.2320819112627985, "grad_norm": 0.5890416085092136, "learning_rate": 7.298030719654487e-07, "loss": 0.0085, "step": 9810 }, { "epoch": 2.2323094425483503, "grad_norm": 1.2366273798903231, "learning_rate": 7.297149847253877e-07, "loss": 0.0566, "step": 9811 }, { "epoch": 2.232536973833902, "grad_norm": 1.321596022173302, "learning_rate": 7.296268953451443e-07, "loss": 0.063, "step": 9812 }, { "epoch": 2.232764505119454, "grad_norm": 1.6276366578385928, "learning_rate": 7.295388038265188e-07, "loss": 0.0917, "step": 9813 }, { "epoch": 2.2329920364050055, "grad_norm": 1.6677993307189287, "learning_rate": 7.294507101713115e-07, "loss": 0.042, "step": 9814 }, { "epoch": 2.2332195676905573, "grad_norm": 1.6822351005486869, "learning_rate": 7.29362614381323e-07, "loss": 0.0311, "step": 9815 }, { "epoch": 2.233447098976109, "grad_norm": 1.3732093547178756, "learning_rate": 7.292745164583537e-07, "loss": 0.0491, "step": 9816 }, { "epoch": 2.233674630261661, "grad_norm": 1.2500554936588006, "learning_rate": 7.291864164042042e-07, "loss": 0.0308, "step": 9817 }, { "epoch": 2.2339021615472126, "grad_norm": 1.857020354428366, "learning_rate": 7.290983142206756e-07, "loss": 0.0297, "step": 9818 }, { "epoch": 2.2341296928327643, "grad_norm": 1.9841408180487183, "learning_rate": 7.290102099095676e-07, "loss": 0.1278, "step": 9819 }, { "epoch": 2.234357224118316, "grad_norm": 1.4379399634472614, "learning_rate": 7.289221034726816e-07, "loss": 0.0671, "step": 9820 }, { "epoch": 2.2345847554038683, "grad_norm": 1.473057794580955, "learning_rate": 7.288339949118182e-07, "loss": 0.1103, "step": 9821 }, { "epoch": 2.23481228668942, "grad_norm": 1.0442658715530824, "learning_rate": 7.287458842287781e-07, "loss": 0.0531, "step": 9822 }, { "epoch": 2.2350398179749718, "grad_norm": 2.5068843676249326, "learning_rate": 7.286577714253619e-07, "loss": 0.1222, "step": 9823 }, { "epoch": 2.2352673492605235, "grad_norm": 1.4522134035301333, "learning_rate": 7.285696565033711e-07, "loss": 0.0874, "step": 9824 }, { "epoch": 2.2354948805460753, "grad_norm": 1.2481878278233278, "learning_rate": 7.284815394646058e-07, "loss": 0.0815, "step": 9825 }, { "epoch": 2.235722411831627, "grad_norm": 1.5249838750004165, "learning_rate": 7.283934203108675e-07, "loss": 0.1, "step": 9826 }, { "epoch": 2.2359499431171788, "grad_norm": 1.3030221063732124, "learning_rate": 7.283052990439571e-07, "loss": 0.0594, "step": 9827 }, { "epoch": 2.2361774744027305, "grad_norm": 2.3216748010911283, "learning_rate": 7.282171756656756e-07, "loss": 0.0255, "step": 9828 }, { "epoch": 2.2364050056882823, "grad_norm": 1.9443385917771203, "learning_rate": 7.281290501778238e-07, "loss": 0.0457, "step": 9829 }, { "epoch": 2.236632536973834, "grad_norm": 1.0056931152888324, "learning_rate": 7.280409225822033e-07, "loss": 0.0233, "step": 9830 }, { "epoch": 2.2368600682593858, "grad_norm": 1.3155469661761876, "learning_rate": 7.279527928806152e-07, "loss": 0.1135, "step": 9831 }, { "epoch": 2.2370875995449375, "grad_norm": 0.7904031792767765, "learning_rate": 7.278646610748602e-07, "loss": 0.0064, "step": 9832 }, { "epoch": 2.2373151308304893, "grad_norm": 1.9904079589452548, "learning_rate": 7.277765271667402e-07, "loss": 0.1661, "step": 9833 }, { "epoch": 2.237542662116041, "grad_norm": 1.4170094295444529, "learning_rate": 7.276883911580561e-07, "loss": 0.0539, "step": 9834 }, { "epoch": 2.2377701934015928, "grad_norm": 1.4207997584092764, "learning_rate": 7.276002530506094e-07, "loss": 0.0577, "step": 9835 }, { "epoch": 2.2379977246871445, "grad_norm": 1.2605270035527005, "learning_rate": 7.275121128462012e-07, "loss": 0.0456, "step": 9836 }, { "epoch": 2.2382252559726963, "grad_norm": 2.3260293746886114, "learning_rate": 7.274239705466336e-07, "loss": 0.0451, "step": 9837 }, { "epoch": 2.238452787258248, "grad_norm": 1.3997759929972946, "learning_rate": 7.273358261537074e-07, "loss": 0.0354, "step": 9838 }, { "epoch": 2.2386803185438, "grad_norm": 1.2172453709257616, "learning_rate": 7.272476796692242e-07, "loss": 0.0561, "step": 9839 }, { "epoch": 2.2389078498293515, "grad_norm": 1.4994253324301205, "learning_rate": 7.271595310949858e-07, "loss": 0.0566, "step": 9840 }, { "epoch": 2.2391353811149033, "grad_norm": 1.4451968807486255, "learning_rate": 7.270713804327937e-07, "loss": 0.0622, "step": 9841 }, { "epoch": 2.239362912400455, "grad_norm": 0.8515032871637725, "learning_rate": 7.269832276844494e-07, "loss": 0.0179, "step": 9842 }, { "epoch": 2.239590443686007, "grad_norm": 1.3621068936112914, "learning_rate": 7.26895072851755e-07, "loss": 0.0986, "step": 9843 }, { "epoch": 2.2398179749715585, "grad_norm": 1.5511523626297565, "learning_rate": 7.268069159365117e-07, "loss": 0.0241, "step": 9844 }, { "epoch": 2.2400455062571103, "grad_norm": 1.9676231000096438, "learning_rate": 7.267187569405215e-07, "loss": 0.0417, "step": 9845 }, { "epoch": 2.240273037542662, "grad_norm": 1.6958828791299405, "learning_rate": 7.266305958655861e-07, "loss": 0.0585, "step": 9846 }, { "epoch": 2.240500568828214, "grad_norm": 0.9741324819867013, "learning_rate": 7.265424327135077e-07, "loss": 0.0231, "step": 9847 }, { "epoch": 2.2407281001137656, "grad_norm": 1.630118897926355, "learning_rate": 7.264542674860878e-07, "loss": 0.0695, "step": 9848 }, { "epoch": 2.2409556313993173, "grad_norm": 1.9962775184840824, "learning_rate": 7.263661001851284e-07, "loss": 0.0488, "step": 9849 }, { "epoch": 2.241183162684869, "grad_norm": 0.9182636614486046, "learning_rate": 7.262779308124317e-07, "loss": 0.0788, "step": 9850 }, { "epoch": 2.241410693970421, "grad_norm": 1.3622488518858606, "learning_rate": 7.261897593697995e-07, "loss": 0.0771, "step": 9851 }, { "epoch": 2.2416382252559726, "grad_norm": 1.3539481154987774, "learning_rate": 7.26101585859034e-07, "loss": 0.075, "step": 9852 }, { "epoch": 2.2418657565415243, "grad_norm": 1.7429889114509627, "learning_rate": 7.26013410281937e-07, "loss": 0.0889, "step": 9853 }, { "epoch": 2.242093287827076, "grad_norm": 2.5125690754750236, "learning_rate": 7.259252326403111e-07, "loss": 0.0732, "step": 9854 }, { "epoch": 2.242320819112628, "grad_norm": 2.6576290109840848, "learning_rate": 7.258370529359583e-07, "loss": 0.0606, "step": 9855 }, { "epoch": 2.2425483503981796, "grad_norm": 1.1706931835111798, "learning_rate": 7.257488711706808e-07, "loss": 0.0737, "step": 9856 }, { "epoch": 2.2427758816837313, "grad_norm": 1.6960326501685519, "learning_rate": 7.256606873462808e-07, "loss": 0.0368, "step": 9857 }, { "epoch": 2.243003412969283, "grad_norm": 2.6125585430856084, "learning_rate": 7.255725014645608e-07, "loss": 0.0674, "step": 9858 }, { "epoch": 2.243230944254835, "grad_norm": 2.1835334473959938, "learning_rate": 7.254843135273229e-07, "loss": 0.0862, "step": 9859 }, { "epoch": 2.243458475540387, "grad_norm": 2.3970061321251235, "learning_rate": 7.253961235363699e-07, "loss": 0.0495, "step": 9860 }, { "epoch": 2.2436860068259388, "grad_norm": 2.074108416187049, "learning_rate": 7.253079314935038e-07, "loss": 0.0454, "step": 9861 }, { "epoch": 2.2439135381114905, "grad_norm": 1.328079819897513, "learning_rate": 7.252197374005273e-07, "loss": 0.0191, "step": 9862 }, { "epoch": 2.2441410693970423, "grad_norm": 1.2939855825394784, "learning_rate": 7.251315412592431e-07, "loss": 0.0578, "step": 9863 }, { "epoch": 2.244368600682594, "grad_norm": 2.2509045818859548, "learning_rate": 7.250433430714534e-07, "loss": 0.1045, "step": 9864 }, { "epoch": 2.244596131968146, "grad_norm": 1.4898212729805753, "learning_rate": 7.249551428389612e-07, "loss": 0.1267, "step": 9865 }, { "epoch": 2.2448236632536975, "grad_norm": 1.6614659712378599, "learning_rate": 7.248669405635686e-07, "loss": 0.0553, "step": 9866 }, { "epoch": 2.2450511945392493, "grad_norm": 1.8869151203162269, "learning_rate": 7.247787362470789e-07, "loss": 0.0916, "step": 9867 }, { "epoch": 2.245278725824801, "grad_norm": 1.5458943538465553, "learning_rate": 7.246905298912945e-07, "loss": 0.0554, "step": 9868 }, { "epoch": 2.245506257110353, "grad_norm": 0.7392169094288412, "learning_rate": 7.246023214980183e-07, "loss": 0.0524, "step": 9869 }, { "epoch": 2.2457337883959045, "grad_norm": 1.8209684923561502, "learning_rate": 7.245141110690529e-07, "loss": 0.0322, "step": 9870 }, { "epoch": 2.2459613196814563, "grad_norm": 1.522212197488696, "learning_rate": 7.244258986062015e-07, "loss": 0.0645, "step": 9871 }, { "epoch": 2.246188850967008, "grad_norm": 1.3876316248473302, "learning_rate": 7.243376841112668e-07, "loss": 0.0603, "step": 9872 }, { "epoch": 2.24641638225256, "grad_norm": 1.5317231710378987, "learning_rate": 7.242494675860515e-07, "loss": 0.1296, "step": 9873 }, { "epoch": 2.2466439135381115, "grad_norm": 1.1426129960504483, "learning_rate": 7.241612490323591e-07, "loss": 0.0823, "step": 9874 }, { "epoch": 2.2468714448236633, "grad_norm": 2.3052770604857344, "learning_rate": 7.240730284519924e-07, "loss": 0.1218, "step": 9875 }, { "epoch": 2.247098976109215, "grad_norm": 1.5940210980654792, "learning_rate": 7.239848058467544e-07, "loss": 0.1389, "step": 9876 }, { "epoch": 2.247326507394767, "grad_norm": 1.7269644276056755, "learning_rate": 7.238965812184482e-07, "loss": 0.0697, "step": 9877 }, { "epoch": 2.2475540386803186, "grad_norm": 1.0238053294267258, "learning_rate": 7.23808354568877e-07, "loss": 0.0301, "step": 9878 }, { "epoch": 2.2477815699658703, "grad_norm": 1.7187898246569568, "learning_rate": 7.23720125899844e-07, "loss": 0.1406, "step": 9879 }, { "epoch": 2.248009101251422, "grad_norm": 1.5000066250827442, "learning_rate": 7.236318952131524e-07, "loss": 0.1786, "step": 9880 }, { "epoch": 2.248236632536974, "grad_norm": 1.463942565947484, "learning_rate": 7.235436625106057e-07, "loss": 0.0793, "step": 9881 }, { "epoch": 2.2484641638225256, "grad_norm": 1.7880537918522657, "learning_rate": 7.234554277940067e-07, "loss": 0.0448, "step": 9882 }, { "epoch": 2.2486916951080773, "grad_norm": 2.260998293452545, "learning_rate": 7.233671910651592e-07, "loss": 0.0529, "step": 9883 }, { "epoch": 2.248919226393629, "grad_norm": 1.2807372862338948, "learning_rate": 7.232789523258665e-07, "loss": 0.059, "step": 9884 }, { "epoch": 2.249146757679181, "grad_norm": 1.4950384427415302, "learning_rate": 7.23190711577932e-07, "loss": 0.0574, "step": 9885 }, { "epoch": 2.2493742889647326, "grad_norm": 1.5598319789485426, "learning_rate": 7.23102468823159e-07, "loss": 0.047, "step": 9886 }, { "epoch": 2.2496018202502843, "grad_norm": 1.9796818456053953, "learning_rate": 7.230142240633515e-07, "loss": 0.0707, "step": 9887 }, { "epoch": 2.249829351535836, "grad_norm": 1.9752382025732782, "learning_rate": 7.229259773003128e-07, "loss": 0.0386, "step": 9888 }, { "epoch": 2.250056882821388, "grad_norm": 7.475740126394822, "learning_rate": 7.228377285358461e-07, "loss": 0.1604, "step": 9889 }, { "epoch": 2.2502844141069396, "grad_norm": 1.7369815637099226, "learning_rate": 7.227494777717555e-07, "loss": 0.1107, "step": 9890 }, { "epoch": 2.2505119453924913, "grad_norm": 1.7058053834872264, "learning_rate": 7.226612250098449e-07, "loss": 0.0415, "step": 9891 }, { "epoch": 2.250739476678043, "grad_norm": 1.9920451641621941, "learning_rate": 7.225729702519174e-07, "loss": 0.0864, "step": 9892 }, { "epoch": 2.250967007963595, "grad_norm": 1.5859261163302527, "learning_rate": 7.224847134997772e-07, "loss": 0.0746, "step": 9893 }, { "epoch": 2.2511945392491466, "grad_norm": 3.0570926687797613, "learning_rate": 7.223964547552281e-07, "loss": 0.1796, "step": 9894 }, { "epoch": 2.2514220705346983, "grad_norm": 2.1411531385665046, "learning_rate": 7.223081940200738e-07, "loss": 0.1186, "step": 9895 }, { "epoch": 2.25164960182025, "grad_norm": 2.5605029239540857, "learning_rate": 7.22219931296118e-07, "loss": 0.0903, "step": 9896 }, { "epoch": 2.2518771331058023, "grad_norm": 1.4942403697676752, "learning_rate": 7.22131666585165e-07, "loss": 0.0936, "step": 9897 }, { "epoch": 2.2521046643913536, "grad_norm": 3.5156967237443735, "learning_rate": 7.220433998890188e-07, "loss": 0.0587, "step": 9898 }, { "epoch": 2.252332195676906, "grad_norm": 1.0127705571222378, "learning_rate": 7.21955131209483e-07, "loss": 0.0144, "step": 9899 }, { "epoch": 2.252559726962457, "grad_norm": 1.8410734362290129, "learning_rate": 7.21866860548362e-07, "loss": 0.0524, "step": 9900 }, { "epoch": 2.2527872582480093, "grad_norm": 1.80027236056254, "learning_rate": 7.217785879074597e-07, "loss": 0.0522, "step": 9901 }, { "epoch": 2.253014789533561, "grad_norm": 2.5337764729931918, "learning_rate": 7.216903132885803e-07, "loss": 0.0441, "step": 9902 }, { "epoch": 2.253242320819113, "grad_norm": 1.5470448377651977, "learning_rate": 7.216020366935279e-07, "loss": 0.0217, "step": 9903 }, { "epoch": 2.2534698521046646, "grad_norm": 1.2354501880350004, "learning_rate": 7.215137581241071e-07, "loss": 0.0748, "step": 9904 }, { "epoch": 2.2536973833902163, "grad_norm": 1.0999499177289918, "learning_rate": 7.214254775821216e-07, "loss": 0.035, "step": 9905 }, { "epoch": 2.253924914675768, "grad_norm": 1.0690591094007649, "learning_rate": 7.213371950693759e-07, "loss": 0.0284, "step": 9906 }, { "epoch": 2.25415244596132, "grad_norm": 0.6173208377977818, "learning_rate": 7.212489105876745e-07, "loss": 0.0074, "step": 9907 }, { "epoch": 2.2543799772468716, "grad_norm": 1.7166506684911709, "learning_rate": 7.211606241388217e-07, "loss": 0.0831, "step": 9908 }, { "epoch": 2.2546075085324233, "grad_norm": 2.062793189329472, "learning_rate": 7.210723357246216e-07, "loss": 0.0573, "step": 9909 }, { "epoch": 2.254835039817975, "grad_norm": 1.0112757233221028, "learning_rate": 7.20984045346879e-07, "loss": 0.0392, "step": 9910 }, { "epoch": 2.255062571103527, "grad_norm": 1.6933376215077947, "learning_rate": 7.208957530073983e-07, "loss": 0.0544, "step": 9911 }, { "epoch": 2.2552901023890786, "grad_norm": 1.7129631404596137, "learning_rate": 7.208074587079841e-07, "loss": 0.0418, "step": 9912 }, { "epoch": 2.2555176336746303, "grad_norm": 1.2546125790474387, "learning_rate": 7.207191624504409e-07, "loss": 0.0552, "step": 9913 }, { "epoch": 2.255745164960182, "grad_norm": 2.2119715662073527, "learning_rate": 7.206308642365733e-07, "loss": 0.0487, "step": 9914 }, { "epoch": 2.255972696245734, "grad_norm": 1.4917211182583106, "learning_rate": 7.20542564068186e-07, "loss": 0.0244, "step": 9915 }, { "epoch": 2.2562002275312856, "grad_norm": 2.0171286803125867, "learning_rate": 7.204542619470837e-07, "loss": 0.1563, "step": 9916 }, { "epoch": 2.2564277588168373, "grad_norm": 1.1436411224819762, "learning_rate": 7.203659578750709e-07, "loss": 0.0513, "step": 9917 }, { "epoch": 2.256655290102389, "grad_norm": 1.8735502255584853, "learning_rate": 7.202776518539527e-07, "loss": 0.0469, "step": 9918 }, { "epoch": 2.256882821387941, "grad_norm": 1.4066962208218972, "learning_rate": 7.20189343885534e-07, "loss": 0.0474, "step": 9919 }, { "epoch": 2.2571103526734926, "grad_norm": 1.9691489628955297, "learning_rate": 7.201010339716191e-07, "loss": 0.1539, "step": 9920 }, { "epoch": 2.2573378839590443, "grad_norm": 1.7835434199911864, "learning_rate": 7.200127221140134e-07, "loss": 0.0579, "step": 9921 }, { "epoch": 2.257565415244596, "grad_norm": 1.0668305086393692, "learning_rate": 7.199244083145217e-07, "loss": 0.0534, "step": 9922 }, { "epoch": 2.257792946530148, "grad_norm": 2.2770530807974465, "learning_rate": 7.198360925749488e-07, "loss": 0.044, "step": 9923 }, { "epoch": 2.2580204778156996, "grad_norm": 1.7399756467597929, "learning_rate": 7.197477748971e-07, "loss": 0.0153, "step": 9924 }, { "epoch": 2.2582480091012513, "grad_norm": 1.7431788996917812, "learning_rate": 7.196594552827803e-07, "loss": 0.1163, "step": 9925 }, { "epoch": 2.258475540386803, "grad_norm": 1.5634561554047466, "learning_rate": 7.195711337337943e-07, "loss": 0.0367, "step": 9926 }, { "epoch": 2.258703071672355, "grad_norm": 1.3915344740514322, "learning_rate": 7.194828102519479e-07, "loss": 0.0284, "step": 9927 }, { "epoch": 2.2589306029579066, "grad_norm": 1.6271005234824392, "learning_rate": 7.193944848390458e-07, "loss": 0.0229, "step": 9928 }, { "epoch": 2.2591581342434583, "grad_norm": 1.928042580660145, "learning_rate": 7.193061574968932e-07, "loss": 0.0689, "step": 9929 }, { "epoch": 2.25938566552901, "grad_norm": 2.177865049462624, "learning_rate": 7.192178282272955e-07, "loss": 0.0598, "step": 9930 }, { "epoch": 2.259613196814562, "grad_norm": 1.151007882661663, "learning_rate": 7.191294970320581e-07, "loss": 0.0597, "step": 9931 }, { "epoch": 2.2598407281001136, "grad_norm": 1.6218107645944073, "learning_rate": 7.19041163912986e-07, "loss": 0.0395, "step": 9932 }, { "epoch": 2.2600682593856654, "grad_norm": 1.673841673978979, "learning_rate": 7.189528288718846e-07, "loss": 0.0161, "step": 9933 }, { "epoch": 2.260295790671217, "grad_norm": 1.0345413033273134, "learning_rate": 7.188644919105597e-07, "loss": 0.0244, "step": 9934 }, { "epoch": 2.260523321956769, "grad_norm": 1.906104579654973, "learning_rate": 7.187761530308164e-07, "loss": 0.0512, "step": 9935 }, { "epoch": 2.260750853242321, "grad_norm": 1.7059121788224236, "learning_rate": 7.186878122344602e-07, "loss": 0.0422, "step": 9936 }, { "epoch": 2.2609783845278724, "grad_norm": 1.6416412531110258, "learning_rate": 7.185994695232967e-07, "loss": 0.0401, "step": 9937 }, { "epoch": 2.2612059158134246, "grad_norm": 1.4075819784417, "learning_rate": 7.185111248991318e-07, "loss": 0.067, "step": 9938 }, { "epoch": 2.261433447098976, "grad_norm": 2.1174815038953616, "learning_rate": 7.184227783637705e-07, "loss": 0.1327, "step": 9939 }, { "epoch": 2.261660978384528, "grad_norm": 1.542504117878399, "learning_rate": 7.183344299190186e-07, "loss": 0.1173, "step": 9940 }, { "epoch": 2.26188850967008, "grad_norm": 2.101680236434913, "learning_rate": 7.182460795666821e-07, "loss": 0.0369, "step": 9941 }, { "epoch": 2.2621160409556316, "grad_norm": 1.356841882599635, "learning_rate": 7.181577273085663e-07, "loss": 0.0405, "step": 9942 }, { "epoch": 2.2623435722411833, "grad_norm": 1.5454684769120097, "learning_rate": 7.180693731464773e-07, "loss": 0.0643, "step": 9943 }, { "epoch": 2.262571103526735, "grad_norm": 1.3510060039314944, "learning_rate": 7.179810170822208e-07, "loss": 0.1395, "step": 9944 }, { "epoch": 2.262798634812287, "grad_norm": 1.4931031761170002, "learning_rate": 7.178926591176025e-07, "loss": 0.0457, "step": 9945 }, { "epoch": 2.2630261660978386, "grad_norm": 1.7016546510133408, "learning_rate": 7.178042992544284e-07, "loss": 0.1187, "step": 9946 }, { "epoch": 2.2632536973833903, "grad_norm": 1.9574623874457986, "learning_rate": 7.177159374945043e-07, "loss": 0.2011, "step": 9947 }, { "epoch": 2.263481228668942, "grad_norm": 2.460061761843094, "learning_rate": 7.176275738396363e-07, "loss": 0.0683, "step": 9948 }, { "epoch": 2.263708759954494, "grad_norm": 1.4173305961331562, "learning_rate": 7.175392082916305e-07, "loss": 0.0494, "step": 9949 }, { "epoch": 2.2639362912400456, "grad_norm": 1.8854550657499727, "learning_rate": 7.174508408522926e-07, "loss": 0.1446, "step": 9950 }, { "epoch": 2.2641638225255973, "grad_norm": 1.5115714147812573, "learning_rate": 7.173624715234288e-07, "loss": 0.063, "step": 9951 }, { "epoch": 2.264391353811149, "grad_norm": 1.870748040561277, "learning_rate": 7.172741003068454e-07, "loss": 0.0529, "step": 9952 }, { "epoch": 2.264618885096701, "grad_norm": 3.4700874302820917, "learning_rate": 7.171857272043481e-07, "loss": 0.054, "step": 9953 }, { "epoch": 2.2648464163822526, "grad_norm": 3.077671443557571, "learning_rate": 7.170973522177435e-07, "loss": 0.0668, "step": 9954 }, { "epoch": 2.2650739476678043, "grad_norm": 1.2234363200663947, "learning_rate": 7.170089753488378e-07, "loss": 0.0145, "step": 9955 }, { "epoch": 2.265301478953356, "grad_norm": 2.615122626044104, "learning_rate": 7.169205965994371e-07, "loss": 0.111, "step": 9956 }, { "epoch": 2.265529010238908, "grad_norm": 1.455531441061188, "learning_rate": 7.168322159713477e-07, "loss": 0.0388, "step": 9957 }, { "epoch": 2.2657565415244596, "grad_norm": 1.8868133759128798, "learning_rate": 7.16743833466376e-07, "loss": 0.0208, "step": 9958 }, { "epoch": 2.2659840728100114, "grad_norm": 1.8986978689090965, "learning_rate": 7.166554490863283e-07, "loss": 0.0548, "step": 9959 }, { "epoch": 2.266211604095563, "grad_norm": 2.0257615128365547, "learning_rate": 7.165670628330112e-07, "loss": 0.0377, "step": 9960 }, { "epoch": 2.266439135381115, "grad_norm": 1.221895596184263, "learning_rate": 7.16478674708231e-07, "loss": 0.0228, "step": 9961 }, { "epoch": 2.2666666666666666, "grad_norm": 2.0120406197398744, "learning_rate": 7.163902847137942e-07, "loss": 0.0733, "step": 9962 }, { "epoch": 2.2668941979522184, "grad_norm": 1.1094453517774197, "learning_rate": 7.163018928515074e-07, "loss": 0.1032, "step": 9963 }, { "epoch": 2.26712172923777, "grad_norm": 1.641632508142861, "learning_rate": 7.16213499123177e-07, "loss": 0.1367, "step": 9964 }, { "epoch": 2.267349260523322, "grad_norm": 1.175803720791843, "learning_rate": 7.161251035306099e-07, "loss": 0.0277, "step": 9965 }, { "epoch": 2.2675767918088736, "grad_norm": 1.6048600629963754, "learning_rate": 7.160367060756125e-07, "loss": 0.124, "step": 9966 }, { "epoch": 2.2678043230944254, "grad_norm": 0.9414032827252117, "learning_rate": 7.159483067599913e-07, "loss": 0.0664, "step": 9967 }, { "epoch": 2.268031854379977, "grad_norm": 1.5401276895833678, "learning_rate": 7.158599055855536e-07, "loss": 0.0298, "step": 9968 }, { "epoch": 2.268259385665529, "grad_norm": 1.8654507290663085, "learning_rate": 7.157715025541059e-07, "loss": 0.1168, "step": 9969 }, { "epoch": 2.2684869169510806, "grad_norm": 2.888492273928279, "learning_rate": 7.156830976674547e-07, "loss": 0.0642, "step": 9970 }, { "epoch": 2.2687144482366324, "grad_norm": 1.4216840942395765, "learning_rate": 7.155946909274071e-07, "loss": 0.0355, "step": 9971 }, { "epoch": 2.268941979522184, "grad_norm": 1.9071513514995047, "learning_rate": 7.1550628233577e-07, "loss": 0.1619, "step": 9972 }, { "epoch": 2.269169510807736, "grad_norm": 2.4450924226682402, "learning_rate": 7.154178718943502e-07, "loss": 0.0472, "step": 9973 }, { "epoch": 2.2693970420932876, "grad_norm": 1.6468107756072237, "learning_rate": 7.153294596049547e-07, "loss": 0.0422, "step": 9974 }, { "epoch": 2.26962457337884, "grad_norm": 1.8138360151761403, "learning_rate": 7.152410454693905e-07, "loss": 0.0327, "step": 9975 }, { "epoch": 2.269852104664391, "grad_norm": 2.0459442632750062, "learning_rate": 7.151526294894646e-07, "loss": 0.0352, "step": 9976 }, { "epoch": 2.2700796359499433, "grad_norm": 1.982952688424646, "learning_rate": 7.150642116669839e-07, "loss": 0.0648, "step": 9977 }, { "epoch": 2.2703071672354946, "grad_norm": 2.55458641638833, "learning_rate": 7.149757920037558e-07, "loss": 0.0707, "step": 9978 }, { "epoch": 2.270534698521047, "grad_norm": 1.7074553739199554, "learning_rate": 7.148873705015875e-07, "loss": 0.0593, "step": 9979 }, { "epoch": 2.2707622298065986, "grad_norm": 2.074424873267346, "learning_rate": 7.147989471622856e-07, "loss": 0.0227, "step": 9980 }, { "epoch": 2.2709897610921503, "grad_norm": 1.3083781772002405, "learning_rate": 7.147105219876578e-07, "loss": 0.0459, "step": 9981 }, { "epoch": 2.271217292377702, "grad_norm": 1.4915122509404122, "learning_rate": 7.146220949795114e-07, "loss": 0.0341, "step": 9982 }, { "epoch": 2.271444823663254, "grad_norm": 2.8184334181010637, "learning_rate": 7.145336661396532e-07, "loss": 0.1324, "step": 9983 }, { "epoch": 2.2716723549488056, "grad_norm": 1.6353015872148624, "learning_rate": 7.144452354698911e-07, "loss": 0.0517, "step": 9984 }, { "epoch": 2.2718998862343573, "grad_norm": 2.9208976878906863, "learning_rate": 7.143568029720321e-07, "loss": 0.0848, "step": 9985 }, { "epoch": 2.272127417519909, "grad_norm": 2.0928691734337086, "learning_rate": 7.142683686478838e-07, "loss": 0.0609, "step": 9986 }, { "epoch": 2.272354948805461, "grad_norm": 1.8743162895870351, "learning_rate": 7.141799324992532e-07, "loss": 0.1895, "step": 9987 }, { "epoch": 2.2725824800910126, "grad_norm": 2.3999558451914784, "learning_rate": 7.140914945279486e-07, "loss": 0.0508, "step": 9988 }, { "epoch": 2.2728100113765644, "grad_norm": 2.049360168071473, "learning_rate": 7.140030547357768e-07, "loss": 0.1587, "step": 9989 }, { "epoch": 2.273037542662116, "grad_norm": 1.0111727363641771, "learning_rate": 7.139146131245453e-07, "loss": 0.0275, "step": 9990 }, { "epoch": 2.273265073947668, "grad_norm": 1.438736486549141, "learning_rate": 7.138261696960624e-07, "loss": 0.0405, "step": 9991 }, { "epoch": 2.2734926052332196, "grad_norm": 1.4944431094548225, "learning_rate": 7.137377244521348e-07, "loss": 0.0337, "step": 9992 }, { "epoch": 2.2737201365187714, "grad_norm": 1.9316880879496054, "learning_rate": 7.136492773945711e-07, "loss": 0.0879, "step": 9993 }, { "epoch": 2.273947667804323, "grad_norm": 1.9559043924097677, "learning_rate": 7.135608285251782e-07, "loss": 0.1422, "step": 9994 }, { "epoch": 2.274175199089875, "grad_norm": 1.5340336317413974, "learning_rate": 7.134723778457643e-07, "loss": 0.1182, "step": 9995 }, { "epoch": 2.2744027303754266, "grad_norm": 1.9683779705330084, "learning_rate": 7.13383925358137e-07, "loss": 0.0946, "step": 9996 }, { "epoch": 2.2746302616609784, "grad_norm": 1.4407703401361611, "learning_rate": 7.13295471064104e-07, "loss": 0.047, "step": 9997 }, { "epoch": 2.27485779294653, "grad_norm": 1.3620126880714782, "learning_rate": 7.132070149654734e-07, "loss": 0.0754, "step": 9998 }, { "epoch": 2.275085324232082, "grad_norm": 1.323713417182486, "learning_rate": 7.131185570640529e-07, "loss": 0.0945, "step": 9999 }, { "epoch": 2.2753128555176336, "grad_norm": 1.1379200276931136, "learning_rate": 7.130300973616506e-07, "loss": 0.0491, "step": 10000 }, { "epoch": 2.2755403868031854, "grad_norm": 2.159440972890442, "learning_rate": 7.129416358600742e-07, "loss": 0.0566, "step": 10001 }, { "epoch": 2.275767918088737, "grad_norm": 1.0553680750834689, "learning_rate": 7.12853172561132e-07, "loss": 0.0398, "step": 10002 }, { "epoch": 2.275995449374289, "grad_norm": 1.4740891265096314, "learning_rate": 7.127647074666316e-07, "loss": 0.0803, "step": 10003 }, { "epoch": 2.2762229806598406, "grad_norm": 1.683324086486543, "learning_rate": 7.126762405783813e-07, "loss": 0.0685, "step": 10004 }, { "epoch": 2.2764505119453924, "grad_norm": 1.3073877630721384, "learning_rate": 7.125877718981894e-07, "loss": 0.0517, "step": 10005 }, { "epoch": 2.276678043230944, "grad_norm": 1.2100626161922907, "learning_rate": 7.124993014278639e-07, "loss": 0.0368, "step": 10006 }, { "epoch": 2.276905574516496, "grad_norm": 2.799869689973577, "learning_rate": 7.124108291692128e-07, "loss": 0.0783, "step": 10007 }, { "epoch": 2.2771331058020476, "grad_norm": 1.0624859753841573, "learning_rate": 7.123223551240445e-07, "loss": 0.0171, "step": 10008 }, { "epoch": 2.2773606370875994, "grad_norm": 1.6051031330142305, "learning_rate": 7.122338792941671e-07, "loss": 0.0452, "step": 10009 }, { "epoch": 2.277588168373151, "grad_norm": 1.8050367214300154, "learning_rate": 7.121454016813889e-07, "loss": 0.0238, "step": 10010 }, { "epoch": 2.277815699658703, "grad_norm": 1.1666362583854997, "learning_rate": 7.120569222875184e-07, "loss": 0.0867, "step": 10011 }, { "epoch": 2.2780432309442546, "grad_norm": 1.2099871046712096, "learning_rate": 7.119684411143638e-07, "loss": 0.0876, "step": 10012 }, { "epoch": 2.2782707622298064, "grad_norm": 1.6031122595119038, "learning_rate": 7.118799581637336e-07, "loss": 0.0441, "step": 10013 }, { "epoch": 2.2784982935153586, "grad_norm": 1.6220347650096594, "learning_rate": 7.117914734374362e-07, "loss": 0.0662, "step": 10014 }, { "epoch": 2.27872582480091, "grad_norm": 2.308298130308238, "learning_rate": 7.1170298693728e-07, "loss": 0.0595, "step": 10015 }, { "epoch": 2.278953356086462, "grad_norm": 1.402203199616304, "learning_rate": 7.116144986650736e-07, "loss": 0.0963, "step": 10016 }, { "epoch": 2.2791808873720134, "grad_norm": 1.5311548749634378, "learning_rate": 7.115260086226253e-07, "loss": 0.0623, "step": 10017 }, { "epoch": 2.2794084186575656, "grad_norm": 1.7957851850677895, "learning_rate": 7.114375168117439e-07, "loss": 0.1566, "step": 10018 }, { "epoch": 2.2796359499431174, "grad_norm": 2.6418268330958146, "learning_rate": 7.113490232342381e-07, "loss": 0.0325, "step": 10019 }, { "epoch": 2.279863481228669, "grad_norm": 1.8574341568779602, "learning_rate": 7.112605278919163e-07, "loss": 0.0733, "step": 10020 }, { "epoch": 2.280091012514221, "grad_norm": 1.3796562818632219, "learning_rate": 7.111720307865872e-07, "loss": 0.1017, "step": 10021 }, { "epoch": 2.2803185437997726, "grad_norm": 3.0821744563477265, "learning_rate": 7.110835319200598e-07, "loss": 0.0544, "step": 10022 }, { "epoch": 2.2805460750853244, "grad_norm": 1.669297258209566, "learning_rate": 7.109950312941426e-07, "loss": 0.0952, "step": 10023 }, { "epoch": 2.280773606370876, "grad_norm": 2.0627172751780956, "learning_rate": 7.109065289106443e-07, "loss": 0.1216, "step": 10024 }, { "epoch": 2.281001137656428, "grad_norm": 1.207721341919061, "learning_rate": 7.10818024771374e-07, "loss": 0.0216, "step": 10025 }, { "epoch": 2.2812286689419796, "grad_norm": 1.6718852900111274, "learning_rate": 7.107295188781406e-07, "loss": 0.0838, "step": 10026 }, { "epoch": 2.2814562002275314, "grad_norm": 1.139793068897223, "learning_rate": 7.106410112327526e-07, "loss": 0.0447, "step": 10027 }, { "epoch": 2.281683731513083, "grad_norm": 1.3475479150898466, "learning_rate": 7.105525018370193e-07, "loss": 0.0773, "step": 10028 }, { "epoch": 2.281911262798635, "grad_norm": 1.1890249790286738, "learning_rate": 7.104639906927495e-07, "loss": 0.0419, "step": 10029 }, { "epoch": 2.2821387940841866, "grad_norm": 0.9030696064001631, "learning_rate": 7.103754778017522e-07, "loss": 0.0243, "step": 10030 }, { "epoch": 2.2823663253697384, "grad_norm": 1.9060237921462815, "learning_rate": 7.102869631658366e-07, "loss": 0.0857, "step": 10031 }, { "epoch": 2.28259385665529, "grad_norm": 1.1331184828802838, "learning_rate": 7.101984467868117e-07, "loss": 0.0765, "step": 10032 }, { "epoch": 2.282821387940842, "grad_norm": 4.987682033694193, "learning_rate": 7.101099286664864e-07, "loss": 0.1121, "step": 10033 }, { "epoch": 2.2830489192263936, "grad_norm": 1.3189295840667192, "learning_rate": 7.100214088066701e-07, "loss": 0.0292, "step": 10034 }, { "epoch": 2.2832764505119454, "grad_norm": 1.992738152444656, "learning_rate": 7.09932887209172e-07, "loss": 0.0824, "step": 10035 }, { "epoch": 2.283503981797497, "grad_norm": 1.2439271811185566, "learning_rate": 7.098443638758011e-07, "loss": 0.0374, "step": 10036 }, { "epoch": 2.283731513083049, "grad_norm": 1.449787819190392, "learning_rate": 7.097558388083669e-07, "loss": 0.0513, "step": 10037 }, { "epoch": 2.2839590443686006, "grad_norm": 2.03306034401581, "learning_rate": 7.096673120086786e-07, "loss": 0.0923, "step": 10038 }, { "epoch": 2.2841865756541524, "grad_norm": 1.2820139309970926, "learning_rate": 7.095787834785454e-07, "loss": 0.0831, "step": 10039 }, { "epoch": 2.284414106939704, "grad_norm": 2.3143883924302924, "learning_rate": 7.094902532197768e-07, "loss": 0.0592, "step": 10040 }, { "epoch": 2.284641638225256, "grad_norm": 1.96797682709101, "learning_rate": 7.094017212341821e-07, "loss": 0.0428, "step": 10041 }, { "epoch": 2.2848691695108077, "grad_norm": 2.0363815858478587, "learning_rate": 7.093131875235709e-07, "loss": 0.0329, "step": 10042 }, { "epoch": 2.2850967007963594, "grad_norm": 1.5373732609424149, "learning_rate": 7.092246520897525e-07, "loss": 0.0434, "step": 10043 }, { "epoch": 2.285324232081911, "grad_norm": 1.1273123585609461, "learning_rate": 7.091361149345364e-07, "loss": 0.0703, "step": 10044 }, { "epoch": 2.285551763367463, "grad_norm": 1.7123497664607472, "learning_rate": 7.090475760597323e-07, "loss": 0.0404, "step": 10045 }, { "epoch": 2.2857792946530147, "grad_norm": 1.6485543468584964, "learning_rate": 7.089590354671496e-07, "loss": 0.0305, "step": 10046 }, { "epoch": 2.2860068259385664, "grad_norm": 1.430496169992461, "learning_rate": 7.088704931585981e-07, "loss": 0.0712, "step": 10047 }, { "epoch": 2.286234357224118, "grad_norm": 1.5768349646484414, "learning_rate": 7.087819491358871e-07, "loss": 0.0426, "step": 10048 }, { "epoch": 2.28646188850967, "grad_norm": 1.0949635311954438, "learning_rate": 7.086934034008268e-07, "loss": 0.0229, "step": 10049 }, { "epoch": 2.2866894197952217, "grad_norm": 1.493275805313581, "learning_rate": 7.086048559552265e-07, "loss": 0.0467, "step": 10050 }, { "epoch": 2.2869169510807734, "grad_norm": 1.1998635039426, "learning_rate": 7.08516306800896e-07, "loss": 0.0803, "step": 10051 }, { "epoch": 2.287144482366325, "grad_norm": 1.1485306499221362, "learning_rate": 7.084277559396452e-07, "loss": 0.0274, "step": 10052 }, { "epoch": 2.2873720136518774, "grad_norm": 1.4765522299693974, "learning_rate": 7.083392033732839e-07, "loss": 0.0816, "step": 10053 }, { "epoch": 2.2875995449374287, "grad_norm": 3.201735581076717, "learning_rate": 7.082506491036216e-07, "loss": 0.1219, "step": 10054 }, { "epoch": 2.287827076222981, "grad_norm": 1.8492532949073477, "learning_rate": 7.081620931324687e-07, "loss": 0.0745, "step": 10055 }, { "epoch": 2.288054607508532, "grad_norm": 1.0218501279370673, "learning_rate": 7.080735354616349e-07, "loss": 0.1123, "step": 10056 }, { "epoch": 2.2882821387940844, "grad_norm": 1.5532113854842495, "learning_rate": 7.079849760929304e-07, "loss": 0.0358, "step": 10057 }, { "epoch": 2.288509670079636, "grad_norm": 0.9743767439398741, "learning_rate": 7.078964150281647e-07, "loss": 0.0769, "step": 10058 }, { "epoch": 2.288737201365188, "grad_norm": 1.3212847161018841, "learning_rate": 7.078078522691482e-07, "loss": 0.0921, "step": 10059 }, { "epoch": 2.2889647326507396, "grad_norm": 1.9147415766072364, "learning_rate": 7.07719287817691e-07, "loss": 0.0441, "step": 10060 }, { "epoch": 2.2891922639362914, "grad_norm": 1.1039619524571742, "learning_rate": 7.076307216756028e-07, "loss": 0.0322, "step": 10061 }, { "epoch": 2.289419795221843, "grad_norm": 2.3783154417331445, "learning_rate": 7.07542153844694e-07, "loss": 0.0593, "step": 10062 }, { "epoch": 2.289647326507395, "grad_norm": 2.422361220063779, "learning_rate": 7.074535843267749e-07, "loss": 0.1003, "step": 10063 }, { "epoch": 2.2898748577929466, "grad_norm": 1.7141078067367503, "learning_rate": 7.073650131236555e-07, "loss": 0.1274, "step": 10064 }, { "epoch": 2.2901023890784984, "grad_norm": 1.7069402366544915, "learning_rate": 7.07276440237146e-07, "loss": 0.0991, "step": 10065 }, { "epoch": 2.29032992036405, "grad_norm": 1.6129815915034778, "learning_rate": 7.071878656690567e-07, "loss": 0.0363, "step": 10066 }, { "epoch": 2.290557451649602, "grad_norm": 1.6072817085648465, "learning_rate": 7.070992894211981e-07, "loss": 0.141, "step": 10067 }, { "epoch": 2.2907849829351536, "grad_norm": 2.398030301241516, "learning_rate": 7.070107114953802e-07, "loss": 0.0602, "step": 10068 }, { "epoch": 2.2910125142207054, "grad_norm": 1.9998561195581437, "learning_rate": 7.069221318934137e-07, "loss": 0.0511, "step": 10069 }, { "epoch": 2.291240045506257, "grad_norm": 2.8922879413023708, "learning_rate": 7.068335506171089e-07, "loss": 0.0468, "step": 10070 }, { "epoch": 2.291467576791809, "grad_norm": 0.8405600450433354, "learning_rate": 7.067449676682761e-07, "loss": 0.0325, "step": 10071 }, { "epoch": 2.2916951080773607, "grad_norm": 1.147158665440208, "learning_rate": 7.066563830487259e-07, "loss": 0.0425, "step": 10072 }, { "epoch": 2.2919226393629124, "grad_norm": 1.054979192201, "learning_rate": 7.065677967602688e-07, "loss": 0.1022, "step": 10073 }, { "epoch": 2.292150170648464, "grad_norm": 3.2704257511897694, "learning_rate": 7.064792088047151e-07, "loss": 0.0712, "step": 10074 }, { "epoch": 2.292377701934016, "grad_norm": 1.3241895742777792, "learning_rate": 7.063906191838758e-07, "loss": 0.1161, "step": 10075 }, { "epoch": 2.2926052332195677, "grad_norm": 2.170950985447108, "learning_rate": 7.063020278995615e-07, "loss": 0.0512, "step": 10076 }, { "epoch": 2.2928327645051194, "grad_norm": 5.0787296093196295, "learning_rate": 7.062134349535822e-07, "loss": 0.0417, "step": 10077 }, { "epoch": 2.293060295790671, "grad_norm": 1.450519148716169, "learning_rate": 7.061248403477493e-07, "loss": 0.1035, "step": 10078 }, { "epoch": 2.293287827076223, "grad_norm": 1.0335660522453471, "learning_rate": 7.060362440838732e-07, "loss": 0.0682, "step": 10079 }, { "epoch": 2.2935153583617747, "grad_norm": 1.7219680743403527, "learning_rate": 7.059476461637647e-07, "loss": 0.0501, "step": 10080 }, { "epoch": 2.2937428896473264, "grad_norm": 1.3395445975809865, "learning_rate": 7.058590465892344e-07, "loss": 0.0524, "step": 10081 }, { "epoch": 2.293970420932878, "grad_norm": 1.6637046562372, "learning_rate": 7.057704453620934e-07, "loss": 0.1022, "step": 10082 }, { "epoch": 2.29419795221843, "grad_norm": 1.458889969476899, "learning_rate": 7.056818424841526e-07, "loss": 0.0257, "step": 10083 }, { "epoch": 2.2944254835039817, "grad_norm": 1.7554511970810964, "learning_rate": 7.055932379572225e-07, "loss": 0.0065, "step": 10084 }, { "epoch": 2.2946530147895334, "grad_norm": 2.1773005283315436, "learning_rate": 7.055046317831142e-07, "loss": 0.0702, "step": 10085 }, { "epoch": 2.294880546075085, "grad_norm": 4.317779856586926, "learning_rate": 7.054160239636387e-07, "loss": 0.0618, "step": 10086 }, { "epoch": 2.295108077360637, "grad_norm": 1.7896226389962315, "learning_rate": 7.05327414500607e-07, "loss": 0.0288, "step": 10087 }, { "epoch": 2.2953356086461887, "grad_norm": 1.3495030196993232, "learning_rate": 7.052388033958299e-07, "loss": 0.1267, "step": 10088 }, { "epoch": 2.2955631399317404, "grad_norm": 3.1537967131140094, "learning_rate": 7.051501906511189e-07, "loss": 0.077, "step": 10089 }, { "epoch": 2.295790671217292, "grad_norm": 1.3866776838236015, "learning_rate": 7.050615762682845e-07, "loss": 0.0212, "step": 10090 }, { "epoch": 2.296018202502844, "grad_norm": 1.712567216083561, "learning_rate": 7.049729602491381e-07, "loss": 0.0691, "step": 10091 }, { "epoch": 2.296245733788396, "grad_norm": 1.8720464388897193, "learning_rate": 7.048843425954911e-07, "loss": 0.0773, "step": 10092 }, { "epoch": 2.2964732650739474, "grad_norm": 1.7376347225843973, "learning_rate": 7.047957233091543e-07, "loss": 0.0369, "step": 10093 }, { "epoch": 2.2967007963594996, "grad_norm": 1.5352656131792628, "learning_rate": 7.047071023919391e-07, "loss": 0.0437, "step": 10094 }, { "epoch": 2.296928327645051, "grad_norm": 1.4800851511735222, "learning_rate": 7.046184798456566e-07, "loss": 0.0513, "step": 10095 }, { "epoch": 2.297155858930603, "grad_norm": 0.9481631934344069, "learning_rate": 7.045298556721184e-07, "loss": 0.0245, "step": 10096 }, { "epoch": 2.297383390216155, "grad_norm": 2.059395352873947, "learning_rate": 7.044412298731354e-07, "loss": 0.0654, "step": 10097 }, { "epoch": 2.2976109215017066, "grad_norm": 1.566533347687481, "learning_rate": 7.043526024505191e-07, "loss": 0.0959, "step": 10098 }, { "epoch": 2.2978384527872584, "grad_norm": 1.98758901008803, "learning_rate": 7.042639734060811e-07, "loss": 0.0572, "step": 10099 }, { "epoch": 2.29806598407281, "grad_norm": 0.8716641372376241, "learning_rate": 7.041753427416326e-07, "loss": 0.0165, "step": 10100 }, { "epoch": 2.298293515358362, "grad_norm": 1.7701070819210463, "learning_rate": 7.040867104589852e-07, "loss": 0.044, "step": 10101 }, { "epoch": 2.2985210466439137, "grad_norm": 1.191527553532166, "learning_rate": 7.0399807655995e-07, "loss": 0.0244, "step": 10102 }, { "epoch": 2.2987485779294654, "grad_norm": 1.6399352190142107, "learning_rate": 7.03909441046339e-07, "loss": 0.0504, "step": 10103 }, { "epoch": 2.298976109215017, "grad_norm": 1.9999448132226203, "learning_rate": 7.038208039199634e-07, "loss": 0.0501, "step": 10104 }, { "epoch": 2.299203640500569, "grad_norm": 1.3417488814515268, "learning_rate": 7.037321651826351e-07, "loss": 0.0403, "step": 10105 }, { "epoch": 2.2994311717861207, "grad_norm": 3.9039504510784613, "learning_rate": 7.036435248361655e-07, "loss": 0.0347, "step": 10106 }, { "epoch": 2.2996587030716724, "grad_norm": 2.3893293146461345, "learning_rate": 7.035548828823662e-07, "loss": 0.1475, "step": 10107 }, { "epoch": 2.299886234357224, "grad_norm": 1.6916934633424234, "learning_rate": 7.03466239323049e-07, "loss": 0.0383, "step": 10108 }, { "epoch": 2.300113765642776, "grad_norm": 3.4172866070423247, "learning_rate": 7.033775941600257e-07, "loss": 0.0622, "step": 10109 }, { "epoch": 2.3003412969283277, "grad_norm": 1.6726203425653483, "learning_rate": 7.032889473951078e-07, "loss": 0.039, "step": 10110 }, { "epoch": 2.3005688282138794, "grad_norm": 2.33938310576764, "learning_rate": 7.032002990301071e-07, "loss": 0.0599, "step": 10111 }, { "epoch": 2.300796359499431, "grad_norm": 4.091327884543059, "learning_rate": 7.031116490668355e-07, "loss": 0.0838, "step": 10112 }, { "epoch": 2.301023890784983, "grad_norm": 2.5933876046357214, "learning_rate": 7.030229975071049e-07, "loss": 0.0486, "step": 10113 }, { "epoch": 2.3012514220705347, "grad_norm": 1.874560356299034, "learning_rate": 7.029343443527273e-07, "loss": 0.0466, "step": 10114 }, { "epoch": 2.3014789533560864, "grad_norm": 2.2935452149335642, "learning_rate": 7.028456896055143e-07, "loss": 0.0768, "step": 10115 }, { "epoch": 2.301706484641638, "grad_norm": 2.688109074386162, "learning_rate": 7.02757033267278e-07, "loss": 0.0786, "step": 10116 }, { "epoch": 2.30193401592719, "grad_norm": 1.1151270718575637, "learning_rate": 7.026683753398303e-07, "loss": 0.0631, "step": 10117 }, { "epoch": 2.3021615472127417, "grad_norm": 1.3473426685731544, "learning_rate": 7.02579715824983e-07, "loss": 0.0905, "step": 10118 }, { "epoch": 2.3023890784982934, "grad_norm": 1.1242956105928719, "learning_rate": 7.024910547245488e-07, "loss": 0.0143, "step": 10119 }, { "epoch": 2.302616609783845, "grad_norm": 1.870730120407611, "learning_rate": 7.024023920403395e-07, "loss": 0.024, "step": 10120 }, { "epoch": 2.302844141069397, "grad_norm": 1.1529219808523674, "learning_rate": 7.023137277741665e-07, "loss": 0.0558, "step": 10121 }, { "epoch": 2.3030716723549487, "grad_norm": 1.839284993413365, "learning_rate": 7.022250619278428e-07, "loss": 0.0594, "step": 10122 }, { "epoch": 2.3032992036405004, "grad_norm": 1.0954860678635077, "learning_rate": 7.021363945031804e-07, "loss": 0.0236, "step": 10123 }, { "epoch": 2.303526734926052, "grad_norm": 1.5779464396709175, "learning_rate": 7.020477255019911e-07, "loss": 0.0506, "step": 10124 }, { "epoch": 2.303754266211604, "grad_norm": 1.9392962932491264, "learning_rate": 7.019590549260874e-07, "loss": 0.0737, "step": 10125 }, { "epoch": 2.3039817974971557, "grad_norm": 1.8043832425297857, "learning_rate": 7.018703827772816e-07, "loss": 0.0635, "step": 10126 }, { "epoch": 2.3042093287827075, "grad_norm": 1.4711456805325058, "learning_rate": 7.017817090573863e-07, "loss": 0.0183, "step": 10127 }, { "epoch": 2.304436860068259, "grad_norm": 2.2512359681063963, "learning_rate": 7.016930337682131e-07, "loss": 0.1504, "step": 10128 }, { "epoch": 2.304664391353811, "grad_norm": 1.677884166750546, "learning_rate": 7.016043569115747e-07, "loss": 0.0738, "step": 10129 }, { "epoch": 2.3048919226393627, "grad_norm": 1.854098351346858, "learning_rate": 7.015156784892838e-07, "loss": 0.0667, "step": 10130 }, { "epoch": 2.305119453924915, "grad_norm": 1.693603994712442, "learning_rate": 7.014269985031523e-07, "loss": 0.0725, "step": 10131 }, { "epoch": 2.305346985210466, "grad_norm": 2.06112326539677, "learning_rate": 7.01338316954993e-07, "loss": 0.0728, "step": 10132 }, { "epoch": 2.3055745164960184, "grad_norm": 1.2319994558308323, "learning_rate": 7.012496338466186e-07, "loss": 0.0208, "step": 10133 }, { "epoch": 2.3058020477815697, "grad_norm": 1.1966728922907925, "learning_rate": 7.01160949179841e-07, "loss": 0.0136, "step": 10134 }, { "epoch": 2.306029579067122, "grad_norm": 1.595694119966933, "learning_rate": 7.010722629564732e-07, "loss": 0.0335, "step": 10135 }, { "epoch": 2.3062571103526737, "grad_norm": 1.3753962254891392, "learning_rate": 7.009835751783277e-07, "loss": 0.0374, "step": 10136 }, { "epoch": 2.3064846416382254, "grad_norm": 3.0798061098172087, "learning_rate": 7.00894885847217e-07, "loss": 0.0793, "step": 10137 }, { "epoch": 2.306712172923777, "grad_norm": 0.9753511578079058, "learning_rate": 7.008061949649537e-07, "loss": 0.0082, "step": 10138 }, { "epoch": 2.306939704209329, "grad_norm": 1.1312077766122042, "learning_rate": 7.00717502533351e-07, "loss": 0.0657, "step": 10139 }, { "epoch": 2.3071672354948807, "grad_norm": 1.2848887656888683, "learning_rate": 7.00628808554221e-07, "loss": 0.0403, "step": 10140 }, { "epoch": 2.3073947667804324, "grad_norm": 1.832572487338452, "learning_rate": 7.005401130293765e-07, "loss": 0.0457, "step": 10141 }, { "epoch": 2.307622298065984, "grad_norm": 1.0083600291135606, "learning_rate": 7.004514159606307e-07, "loss": 0.0603, "step": 10142 }, { "epoch": 2.307849829351536, "grad_norm": 1.4021911649167007, "learning_rate": 7.00362717349796e-07, "loss": 0.0664, "step": 10143 }, { "epoch": 2.3080773606370877, "grad_norm": 1.7458432050105481, "learning_rate": 7.002740171986853e-07, "loss": 0.0351, "step": 10144 }, { "epoch": 2.3083048919226394, "grad_norm": 1.124439338286419, "learning_rate": 7.001853155091117e-07, "loss": 0.0584, "step": 10145 }, { "epoch": 2.308532423208191, "grad_norm": 1.138275465729119, "learning_rate": 7.00096612282888e-07, "loss": 0.0856, "step": 10146 }, { "epoch": 2.308759954493743, "grad_norm": 2.0102108957457134, "learning_rate": 7.000079075218269e-07, "loss": 0.0256, "step": 10147 }, { "epoch": 2.3089874857792947, "grad_norm": 1.9643995708564213, "learning_rate": 6.999192012277416e-07, "loss": 0.0562, "step": 10148 }, { "epoch": 2.3092150170648464, "grad_norm": 2.025784661414506, "learning_rate": 6.99830493402445e-07, "loss": 0.0352, "step": 10149 }, { "epoch": 2.309442548350398, "grad_norm": 2.254375944695357, "learning_rate": 6.997417840477502e-07, "loss": 0.0765, "step": 10150 }, { "epoch": 2.30967007963595, "grad_norm": 1.7845894886209006, "learning_rate": 6.996530731654704e-07, "loss": 0.0879, "step": 10151 }, { "epoch": 2.3098976109215017, "grad_norm": 1.4978389960366234, "learning_rate": 6.995643607574182e-07, "loss": 0.0643, "step": 10152 }, { "epoch": 2.3101251422070535, "grad_norm": 3.0634914492451153, "learning_rate": 6.994756468254072e-07, "loss": 0.0674, "step": 10153 }, { "epoch": 2.310352673492605, "grad_norm": 1.553686778765929, "learning_rate": 6.993869313712504e-07, "loss": 0.1393, "step": 10154 }, { "epoch": 2.310580204778157, "grad_norm": 0.8618737168268622, "learning_rate": 6.992982143967607e-07, "loss": 0.0284, "step": 10155 }, { "epoch": 2.3108077360637087, "grad_norm": 0.7854142579646235, "learning_rate": 6.992094959037518e-07, "loss": 0.0595, "step": 10156 }, { "epoch": 2.3110352673492605, "grad_norm": 1.3907922375502866, "learning_rate": 6.991207758940367e-07, "loss": 0.0631, "step": 10157 }, { "epoch": 2.311262798634812, "grad_norm": 1.3702732962873834, "learning_rate": 6.990320543694287e-07, "loss": 0.0381, "step": 10158 }, { "epoch": 2.311490329920364, "grad_norm": 1.048321945459076, "learning_rate": 6.98943331331741e-07, "loss": 0.1012, "step": 10159 }, { "epoch": 2.3117178612059157, "grad_norm": 1.8533975651346535, "learning_rate": 6.988546067827872e-07, "loss": 0.1625, "step": 10160 }, { "epoch": 2.3119453924914675, "grad_norm": 1.0225727774122166, "learning_rate": 6.987658807243803e-07, "loss": 0.0508, "step": 10161 }, { "epoch": 2.312172923777019, "grad_norm": 1.6201709012239547, "learning_rate": 6.986771531583339e-07, "loss": 0.1398, "step": 10162 }, { "epoch": 2.312400455062571, "grad_norm": 1.8130798331051952, "learning_rate": 6.985884240864614e-07, "loss": 0.0778, "step": 10163 }, { "epoch": 2.3126279863481227, "grad_norm": 1.774029908656697, "learning_rate": 6.984996935105765e-07, "loss": 0.0541, "step": 10164 }, { "epoch": 2.3128555176336745, "grad_norm": 1.4729286003642776, "learning_rate": 6.984109614324923e-07, "loss": 0.0911, "step": 10165 }, { "epoch": 2.3130830489192262, "grad_norm": 2.3564239816606407, "learning_rate": 6.983222278540225e-07, "loss": 0.1191, "step": 10166 }, { "epoch": 2.313310580204778, "grad_norm": 1.2995952701598417, "learning_rate": 6.982334927769807e-07, "loss": 0.0581, "step": 10167 }, { "epoch": 2.3135381114903297, "grad_norm": 2.1883321448395154, "learning_rate": 6.981447562031804e-07, "loss": 0.1078, "step": 10168 }, { "epoch": 2.3137656427758815, "grad_norm": 2.292356538303416, "learning_rate": 6.980560181344352e-07, "loss": 0.1524, "step": 10169 }, { "epoch": 2.3139931740614337, "grad_norm": 1.1786745483805585, "learning_rate": 6.979672785725588e-07, "loss": 0.0543, "step": 10170 }, { "epoch": 2.314220705346985, "grad_norm": 1.628133287353302, "learning_rate": 6.97878537519365e-07, "loss": 0.0975, "step": 10171 }, { "epoch": 2.314448236632537, "grad_norm": 1.115136308714282, "learning_rate": 6.977897949766673e-07, "loss": 0.0918, "step": 10172 }, { "epoch": 2.3146757679180885, "grad_norm": 1.097723287992714, "learning_rate": 6.977010509462795e-07, "loss": 0.0925, "step": 10173 }, { "epoch": 2.3149032992036407, "grad_norm": 2.046566107351678, "learning_rate": 6.976123054300153e-07, "loss": 0.0507, "step": 10174 }, { "epoch": 2.3151308304891924, "grad_norm": 1.4087649609051183, "learning_rate": 6.975235584296884e-07, "loss": 0.1052, "step": 10175 }, { "epoch": 2.315358361774744, "grad_norm": 1.6218734642931316, "learning_rate": 6.97434809947113e-07, "loss": 0.0525, "step": 10176 }, { "epoch": 2.315585893060296, "grad_norm": 1.8035795155258856, "learning_rate": 6.973460599841029e-07, "loss": 0.0809, "step": 10177 }, { "epoch": 2.3158134243458477, "grad_norm": 1.6433672023606338, "learning_rate": 6.972573085424715e-07, "loss": 0.0698, "step": 10178 }, { "epoch": 2.3160409556313994, "grad_norm": 2.0510882021247077, "learning_rate": 6.971685556240331e-07, "loss": 0.0596, "step": 10179 }, { "epoch": 2.316268486916951, "grad_norm": 1.2412693951098324, "learning_rate": 6.970798012306018e-07, "loss": 0.0868, "step": 10180 }, { "epoch": 2.316496018202503, "grad_norm": 1.528722328660584, "learning_rate": 6.969910453639912e-07, "loss": 0.1172, "step": 10181 }, { "epoch": 2.3167235494880547, "grad_norm": 2.2801723786674217, "learning_rate": 6.969022880260155e-07, "loss": 0.0233, "step": 10182 }, { "epoch": 2.3169510807736065, "grad_norm": 2.743946029399533, "learning_rate": 6.968135292184889e-07, "loss": 0.0475, "step": 10183 }, { "epoch": 2.317178612059158, "grad_norm": 1.1287020117603226, "learning_rate": 6.967247689432252e-07, "loss": 0.0601, "step": 10184 }, { "epoch": 2.31740614334471, "grad_norm": 2.6164682812761852, "learning_rate": 6.966360072020384e-07, "loss": 0.0769, "step": 10185 }, { "epoch": 2.3176336746302617, "grad_norm": 1.4709909524809845, "learning_rate": 6.965472439967428e-07, "loss": 0.032, "step": 10186 }, { "epoch": 2.3178612059158135, "grad_norm": 1.334787807805681, "learning_rate": 6.964584793291527e-07, "loss": 0.0662, "step": 10187 }, { "epoch": 2.318088737201365, "grad_norm": 1.2806736191782186, "learning_rate": 6.963697132010822e-07, "loss": 0.0579, "step": 10188 }, { "epoch": 2.318316268486917, "grad_norm": 2.1529014904395396, "learning_rate": 6.962809456143453e-07, "loss": 0.0572, "step": 10189 }, { "epoch": 2.3185437997724687, "grad_norm": 1.6323391990780214, "learning_rate": 6.961921765707567e-07, "loss": 0.0922, "step": 10190 }, { "epoch": 2.3187713310580205, "grad_norm": 1.138389230455129, "learning_rate": 6.961034060721303e-07, "loss": 0.0391, "step": 10191 }, { "epoch": 2.318998862343572, "grad_norm": 1.3181284758375673, "learning_rate": 6.960146341202802e-07, "loss": 0.1295, "step": 10192 }, { "epoch": 2.319226393629124, "grad_norm": 1.6807350928785871, "learning_rate": 6.959258607170213e-07, "loss": 0.026, "step": 10193 }, { "epoch": 2.3194539249146757, "grad_norm": 1.26778168705304, "learning_rate": 6.958370858641676e-07, "loss": 0.078, "step": 10194 }, { "epoch": 2.3196814562002275, "grad_norm": 1.2176158924301097, "learning_rate": 6.957483095635335e-07, "loss": 0.0882, "step": 10195 }, { "epoch": 2.3199089874857792, "grad_norm": 1.257297282404605, "learning_rate": 6.956595318169339e-07, "loss": 0.1025, "step": 10196 }, { "epoch": 2.320136518771331, "grad_norm": 1.9543429712640359, "learning_rate": 6.955707526261826e-07, "loss": 0.047, "step": 10197 }, { "epoch": 2.3203640500568827, "grad_norm": 1.6798414963335275, "learning_rate": 6.954819719930944e-07, "loss": 0.0603, "step": 10198 }, { "epoch": 2.3205915813424345, "grad_norm": 1.687521388398465, "learning_rate": 6.953931899194838e-07, "loss": 0.0584, "step": 10199 }, { "epoch": 2.3208191126279862, "grad_norm": 1.8006313152407738, "learning_rate": 6.953044064071653e-07, "loss": 0.0943, "step": 10200 }, { "epoch": 2.321046643913538, "grad_norm": 2.0515418147475764, "learning_rate": 6.952156214579535e-07, "loss": 0.0726, "step": 10201 }, { "epoch": 2.3212741751990897, "grad_norm": 1.4252738231772164, "learning_rate": 6.95126835073663e-07, "loss": 0.0378, "step": 10202 }, { "epoch": 2.3215017064846415, "grad_norm": 1.4816934761450637, "learning_rate": 6.950380472561084e-07, "loss": 0.0311, "step": 10203 }, { "epoch": 2.3217292377701932, "grad_norm": 2.201113295909613, "learning_rate": 6.949492580071044e-07, "loss": 0.0399, "step": 10204 }, { "epoch": 2.321956769055745, "grad_norm": 1.951993390816971, "learning_rate": 6.948604673284655e-07, "loss": 0.1165, "step": 10205 }, { "epoch": 2.3221843003412967, "grad_norm": 1.9014797322372625, "learning_rate": 6.947716752220069e-07, "loss": 0.0814, "step": 10206 }, { "epoch": 2.3224118316268485, "grad_norm": 1.7386795623318834, "learning_rate": 6.946828816895428e-07, "loss": 0.0365, "step": 10207 }, { "epoch": 2.3226393629124003, "grad_norm": 1.4151791465942025, "learning_rate": 6.945940867328883e-07, "loss": 0.0396, "step": 10208 }, { "epoch": 2.3228668941979524, "grad_norm": 2.0806909803120726, "learning_rate": 6.945052903538582e-07, "loss": 0.0498, "step": 10209 }, { "epoch": 2.3230944254835038, "grad_norm": 1.3075092377123654, "learning_rate": 6.944164925542672e-07, "loss": 0.0909, "step": 10210 }, { "epoch": 2.323321956769056, "grad_norm": 2.0010878949395643, "learning_rate": 6.943276933359302e-07, "loss": 0.0453, "step": 10211 }, { "epoch": 2.3235494880546073, "grad_norm": 0.947341885880175, "learning_rate": 6.94238892700662e-07, "loss": 0.0202, "step": 10212 }, { "epoch": 2.3237770193401595, "grad_norm": 1.1837389422504694, "learning_rate": 6.941500906502778e-07, "loss": 0.0261, "step": 10213 }, { "epoch": 2.324004550625711, "grad_norm": 2.0267488048839684, "learning_rate": 6.940612871865924e-07, "loss": 0.0339, "step": 10214 }, { "epoch": 2.324232081911263, "grad_norm": 1.1447099379950065, "learning_rate": 6.939724823114206e-07, "loss": 0.0521, "step": 10215 }, { "epoch": 2.3244596131968147, "grad_norm": 1.1693330043500185, "learning_rate": 6.938836760265778e-07, "loss": 0.0981, "step": 10216 }, { "epoch": 2.3246871444823665, "grad_norm": 1.6897289722976172, "learning_rate": 6.937948683338787e-07, "loss": 0.0825, "step": 10217 }, { "epoch": 2.324914675767918, "grad_norm": 1.2395248507723478, "learning_rate": 6.937060592351386e-07, "loss": 0.0201, "step": 10218 }, { "epoch": 2.32514220705347, "grad_norm": 1.9277027392111943, "learning_rate": 6.936172487321722e-07, "loss": 0.1542, "step": 10219 }, { "epoch": 2.3253697383390217, "grad_norm": 1.2088750775925792, "learning_rate": 6.935284368267951e-07, "loss": 0.0565, "step": 10220 }, { "epoch": 2.3255972696245735, "grad_norm": 1.8880837573650477, "learning_rate": 6.934396235208224e-07, "loss": 0.1229, "step": 10221 }, { "epoch": 2.3258248009101252, "grad_norm": 1.1402610991841549, "learning_rate": 6.933508088160689e-07, "loss": 0.0786, "step": 10222 }, { "epoch": 2.326052332195677, "grad_norm": 1.4819716197064237, "learning_rate": 6.932619927143501e-07, "loss": 0.0479, "step": 10223 }, { "epoch": 2.3262798634812287, "grad_norm": 1.8724274607064795, "learning_rate": 6.931731752174813e-07, "loss": 0.0522, "step": 10224 }, { "epoch": 2.3265073947667805, "grad_norm": 2.7618583509610133, "learning_rate": 6.930843563272774e-07, "loss": 0.0369, "step": 10225 }, { "epoch": 2.3267349260523322, "grad_norm": 1.4961723432110914, "learning_rate": 6.929955360455542e-07, "loss": 0.0653, "step": 10226 }, { "epoch": 2.326962457337884, "grad_norm": 1.2489142942883422, "learning_rate": 6.929067143741267e-07, "loss": 0.0176, "step": 10227 }, { "epoch": 2.3271899886234357, "grad_norm": 1.8688671407267392, "learning_rate": 6.928178913148101e-07, "loss": 0.151, "step": 10228 }, { "epoch": 2.3274175199089875, "grad_norm": 1.2547087381362572, "learning_rate": 6.927290668694203e-07, "loss": 0.0447, "step": 10229 }, { "epoch": 2.3276450511945392, "grad_norm": 2.547317737539808, "learning_rate": 6.926402410397723e-07, "loss": 0.073, "step": 10230 }, { "epoch": 2.327872582480091, "grad_norm": 2.212436434130153, "learning_rate": 6.925514138276816e-07, "loss": 0.1046, "step": 10231 }, { "epoch": 2.3281001137656427, "grad_norm": 1.5109396462937263, "learning_rate": 6.924625852349637e-07, "loss": 0.0639, "step": 10232 }, { "epoch": 2.3283276450511945, "grad_norm": 1.1343876043701355, "learning_rate": 6.923737552634342e-07, "loss": 0.0993, "step": 10233 }, { "epoch": 2.3285551763367462, "grad_norm": 2.1883740880583913, "learning_rate": 6.922849239149087e-07, "loss": 0.0581, "step": 10234 }, { "epoch": 2.328782707622298, "grad_norm": 2.3715039601045573, "learning_rate": 6.921960911912024e-07, "loss": 0.0779, "step": 10235 }, { "epoch": 2.3290102389078498, "grad_norm": 1.1629067708971725, "learning_rate": 6.921072570941311e-07, "loss": 0.0381, "step": 10236 }, { "epoch": 2.3292377701934015, "grad_norm": 1.3144806707499532, "learning_rate": 6.920184216255102e-07, "loss": 0.1268, "step": 10237 }, { "epoch": 2.3294653014789533, "grad_norm": 1.3928998391045477, "learning_rate": 6.919295847871557e-07, "loss": 0.0693, "step": 10238 }, { "epoch": 2.329692832764505, "grad_norm": 2.8158021702923874, "learning_rate": 6.918407465808828e-07, "loss": 0.0889, "step": 10239 }, { "epoch": 2.3299203640500568, "grad_norm": 1.2226379251498047, "learning_rate": 6.917519070085078e-07, "loss": 0.0782, "step": 10240 }, { "epoch": 2.3301478953356085, "grad_norm": 1.1831458582331267, "learning_rate": 6.91663066071846e-07, "loss": 0.0294, "step": 10241 }, { "epoch": 2.3303754266211603, "grad_norm": 1.5032665133997754, "learning_rate": 6.91574223772713e-07, "loss": 0.0239, "step": 10242 }, { "epoch": 2.330602957906712, "grad_norm": 1.7402310819181557, "learning_rate": 6.914853801129249e-07, "loss": 0.0997, "step": 10243 }, { "epoch": 2.3308304891922638, "grad_norm": 0.9440754659422651, "learning_rate": 6.913965350942975e-07, "loss": 0.0656, "step": 10244 }, { "epoch": 2.3310580204778155, "grad_norm": 1.5156113119001404, "learning_rate": 6.913076887186465e-07, "loss": 0.0386, "step": 10245 }, { "epoch": 2.3312855517633673, "grad_norm": 1.1621760685679863, "learning_rate": 6.912188409877876e-07, "loss": 0.0593, "step": 10246 }, { "epoch": 2.331513083048919, "grad_norm": 1.3907589276753458, "learning_rate": 6.91129991903537e-07, "loss": 0.11, "step": 10247 }, { "epoch": 2.331740614334471, "grad_norm": 1.3054115370213695, "learning_rate": 6.910411414677105e-07, "loss": 0.0994, "step": 10248 }, { "epoch": 2.3319681456200225, "grad_norm": 1.5335187923286828, "learning_rate": 6.909522896821239e-07, "loss": 0.0358, "step": 10249 }, { "epoch": 2.3321956769055747, "grad_norm": 1.8568287158029235, "learning_rate": 6.908634365485933e-07, "loss": 0.0282, "step": 10250 }, { "epoch": 2.3324232081911265, "grad_norm": 1.2864840099993982, "learning_rate": 6.907745820689349e-07, "loss": 0.0214, "step": 10251 }, { "epoch": 2.3326507394766782, "grad_norm": 1.634727551940298, "learning_rate": 6.906857262449642e-07, "loss": 0.0866, "step": 10252 }, { "epoch": 2.33287827076223, "grad_norm": 1.2768484301977154, "learning_rate": 6.905968690784978e-07, "loss": 0.0346, "step": 10253 }, { "epoch": 2.3331058020477817, "grad_norm": 1.2471433914175802, "learning_rate": 6.905080105713514e-07, "loss": 0.0364, "step": 10254 }, { "epoch": 2.3333333333333335, "grad_norm": 1.683253554991136, "learning_rate": 6.904191507253412e-07, "loss": 0.148, "step": 10255 }, { "epoch": 2.3335608646188852, "grad_norm": 1.1810479667758818, "learning_rate": 6.903302895422835e-07, "loss": 0.1034, "step": 10256 }, { "epoch": 2.333788395904437, "grad_norm": 1.518684420162794, "learning_rate": 6.902414270239942e-07, "loss": 0.1036, "step": 10257 }, { "epoch": 2.3340159271899887, "grad_norm": 1.767095021711705, "learning_rate": 6.901525631722896e-07, "loss": 0.0405, "step": 10258 }, { "epoch": 2.3342434584755405, "grad_norm": 1.2109482043628774, "learning_rate": 6.900636979889861e-07, "loss": 0.0558, "step": 10259 }, { "epoch": 2.3344709897610922, "grad_norm": 1.2183266640422594, "learning_rate": 6.899748314758997e-07, "loss": 0.0386, "step": 10260 }, { "epoch": 2.334698521046644, "grad_norm": 1.74579132112299, "learning_rate": 6.898859636348468e-07, "loss": 0.0557, "step": 10261 }, { "epoch": 2.3349260523321957, "grad_norm": 2.504356833529423, "learning_rate": 6.897970944676434e-07, "loss": 0.0518, "step": 10262 }, { "epoch": 2.3351535836177475, "grad_norm": 1.5461574518753687, "learning_rate": 6.897082239761063e-07, "loss": 0.1372, "step": 10263 }, { "epoch": 2.3353811149032992, "grad_norm": 1.6835284694355497, "learning_rate": 6.896193521620514e-07, "loss": 0.032, "step": 10264 }, { "epoch": 2.335608646188851, "grad_norm": 2.1364155279289823, "learning_rate": 6.895304790272956e-07, "loss": 0.0638, "step": 10265 }, { "epoch": 2.3358361774744028, "grad_norm": 1.3660435833531537, "learning_rate": 6.894416045736547e-07, "loss": 0.0973, "step": 10266 }, { "epoch": 2.3360637087599545, "grad_norm": 1.4432499747109002, "learning_rate": 6.893527288029456e-07, "loss": 0.0747, "step": 10267 }, { "epoch": 2.3362912400455063, "grad_norm": 1.6956561497830758, "learning_rate": 6.892638517169844e-07, "loss": 0.1693, "step": 10268 }, { "epoch": 2.336518771331058, "grad_norm": 1.6549834682911633, "learning_rate": 6.891749733175879e-07, "loss": 0.0667, "step": 10269 }, { "epoch": 2.3367463026166098, "grad_norm": 1.4037229719452258, "learning_rate": 6.890860936065724e-07, "loss": 0.0892, "step": 10270 }, { "epoch": 2.3369738339021615, "grad_norm": 1.2477080327455767, "learning_rate": 6.889972125857547e-07, "loss": 0.0572, "step": 10271 }, { "epoch": 2.3372013651877133, "grad_norm": 1.813293932133548, "learning_rate": 6.889083302569511e-07, "loss": 0.1296, "step": 10272 }, { "epoch": 2.337428896473265, "grad_norm": 1.719958752451596, "learning_rate": 6.88819446621978e-07, "loss": 0.1362, "step": 10273 }, { "epoch": 2.3376564277588168, "grad_norm": 0.9069044556525777, "learning_rate": 6.887305616826525e-07, "loss": 0.0344, "step": 10274 }, { "epoch": 2.3378839590443685, "grad_norm": 2.093450119488748, "learning_rate": 6.886416754407912e-07, "loss": 0.0539, "step": 10275 }, { "epoch": 2.3381114903299203, "grad_norm": 1.4251842254677067, "learning_rate": 6.885527878982103e-07, "loss": 0.0466, "step": 10276 }, { "epoch": 2.338339021615472, "grad_norm": 1.0877267410202085, "learning_rate": 6.88463899056727e-07, "loss": 0.0587, "step": 10277 }, { "epoch": 2.3385665529010238, "grad_norm": 1.4686012432471598, "learning_rate": 6.883750089181579e-07, "loss": 0.0338, "step": 10278 }, { "epoch": 2.3387940841865755, "grad_norm": 1.3154641679217756, "learning_rate": 6.882861174843194e-07, "loss": 0.0797, "step": 10279 }, { "epoch": 2.3390216154721273, "grad_norm": 1.3542498985255194, "learning_rate": 6.881972247570288e-07, "loss": 0.0888, "step": 10280 }, { "epoch": 2.339249146757679, "grad_norm": 2.303195591240399, "learning_rate": 6.881083307381026e-07, "loss": 0.0479, "step": 10281 }, { "epoch": 2.339476678043231, "grad_norm": 1.4884105157363325, "learning_rate": 6.880194354293577e-07, "loss": 0.1355, "step": 10282 }, { "epoch": 2.3397042093287825, "grad_norm": 1.459211571114225, "learning_rate": 6.879305388326109e-07, "loss": 0.0221, "step": 10283 }, { "epoch": 2.3399317406143343, "grad_norm": 1.8900595358093086, "learning_rate": 6.878416409496793e-07, "loss": 0.1154, "step": 10284 }, { "epoch": 2.3401592718998865, "grad_norm": 1.7133699210458022, "learning_rate": 6.877527417823795e-07, "loss": 0.0684, "step": 10285 }, { "epoch": 2.340386803185438, "grad_norm": 2.351694662533878, "learning_rate": 6.876638413325286e-07, "loss": 0.0569, "step": 10286 }, { "epoch": 2.34061433447099, "grad_norm": 2.4489783252942687, "learning_rate": 6.875749396019435e-07, "loss": 0.0939, "step": 10287 }, { "epoch": 2.3408418657565413, "grad_norm": 2.620683841381813, "learning_rate": 6.874860365924415e-07, "loss": 0.1409, "step": 10288 }, { "epoch": 2.3410693970420935, "grad_norm": 1.9696176755661552, "learning_rate": 6.873971323058389e-07, "loss": 0.1135, "step": 10289 }, { "epoch": 2.3412969283276452, "grad_norm": 1.666515835233714, "learning_rate": 6.873082267439536e-07, "loss": 0.019, "step": 10290 }, { "epoch": 2.341524459613197, "grad_norm": 0.9954620044682266, "learning_rate": 6.87219319908602e-07, "loss": 0.0415, "step": 10291 }, { "epoch": 2.3417519908987487, "grad_norm": 1.2820102800365247, "learning_rate": 6.871304118016015e-07, "loss": 0.0491, "step": 10292 }, { "epoch": 2.3419795221843005, "grad_norm": 1.7366173682816122, "learning_rate": 6.870415024247693e-07, "loss": 0.0473, "step": 10293 }, { "epoch": 2.3422070534698523, "grad_norm": 2.1789347674413646, "learning_rate": 6.869525917799223e-07, "loss": 0.0835, "step": 10294 }, { "epoch": 2.342434584755404, "grad_norm": 1.9513803531887899, "learning_rate": 6.868636798688778e-07, "loss": 0.0376, "step": 10295 }, { "epoch": 2.3426621160409558, "grad_norm": 1.7117804503129268, "learning_rate": 6.867747666934529e-07, "loss": 0.0462, "step": 10296 }, { "epoch": 2.3428896473265075, "grad_norm": 0.9348923089631415, "learning_rate": 6.866858522554652e-07, "loss": 0.0246, "step": 10297 }, { "epoch": 2.3431171786120593, "grad_norm": 1.9836947542230896, "learning_rate": 6.865969365567314e-07, "loss": 0.0833, "step": 10298 }, { "epoch": 2.343344709897611, "grad_norm": 2.112972121619996, "learning_rate": 6.865080195990691e-07, "loss": 0.0271, "step": 10299 }, { "epoch": 2.3435722411831628, "grad_norm": 1.292114056225606, "learning_rate": 6.864191013842955e-07, "loss": 0.0773, "step": 10300 }, { "epoch": 2.3437997724687145, "grad_norm": 1.1551357054172011, "learning_rate": 6.863301819142279e-07, "loss": 0.0215, "step": 10301 }, { "epoch": 2.3440273037542663, "grad_norm": 2.0761561059966183, "learning_rate": 6.862412611906838e-07, "loss": 0.1052, "step": 10302 }, { "epoch": 2.344254835039818, "grad_norm": 1.7150075711051953, "learning_rate": 6.861523392154805e-07, "loss": 0.0392, "step": 10303 }, { "epoch": 2.3444823663253698, "grad_norm": 2.0297239687712425, "learning_rate": 6.860634159904354e-07, "loss": 0.0412, "step": 10304 }, { "epoch": 2.3447098976109215, "grad_norm": 2.3473129194214954, "learning_rate": 6.859744915173658e-07, "loss": 0.1759, "step": 10305 }, { "epoch": 2.3449374288964733, "grad_norm": 0.9927135538329924, "learning_rate": 6.858855657980891e-07, "loss": 0.0623, "step": 10306 }, { "epoch": 2.345164960182025, "grad_norm": 1.0502860816941773, "learning_rate": 6.857966388344232e-07, "loss": 0.0986, "step": 10307 }, { "epoch": 2.345392491467577, "grad_norm": 1.8586572672920925, "learning_rate": 6.857077106281852e-07, "loss": 0.0868, "step": 10308 }, { "epoch": 2.3456200227531285, "grad_norm": 2.8671632145889974, "learning_rate": 6.856187811811929e-07, "loss": 0.05, "step": 10309 }, { "epoch": 2.3458475540386803, "grad_norm": 1.6482087827889116, "learning_rate": 6.855298504952637e-07, "loss": 0.0471, "step": 10310 }, { "epoch": 2.346075085324232, "grad_norm": 1.5900866985357793, "learning_rate": 6.854409185722152e-07, "loss": 0.0516, "step": 10311 }, { "epoch": 2.346302616609784, "grad_norm": 1.0992061635391959, "learning_rate": 6.853519854138652e-07, "loss": 0.0721, "step": 10312 }, { "epoch": 2.3465301478953355, "grad_norm": 1.297847017352353, "learning_rate": 6.852630510220308e-07, "loss": 0.0503, "step": 10313 }, { "epoch": 2.3467576791808873, "grad_norm": 1.2115690449850958, "learning_rate": 6.851741153985302e-07, "loss": 0.039, "step": 10314 }, { "epoch": 2.346985210466439, "grad_norm": 1.9199319473735892, "learning_rate": 6.850851785451809e-07, "loss": 0.0471, "step": 10315 }, { "epoch": 2.347212741751991, "grad_norm": 1.5680436429643736, "learning_rate": 6.849962404638005e-07, "loss": 0.0197, "step": 10316 }, { "epoch": 2.3474402730375425, "grad_norm": 3.0613039449943518, "learning_rate": 6.849073011562069e-07, "loss": 0.0542, "step": 10317 }, { "epoch": 2.3476678043230943, "grad_norm": 1.502510452484883, "learning_rate": 6.848183606242177e-07, "loss": 0.1118, "step": 10318 }, { "epoch": 2.347895335608646, "grad_norm": 1.273131133360515, "learning_rate": 6.847294188696507e-07, "loss": 0.0514, "step": 10319 }, { "epoch": 2.348122866894198, "grad_norm": 1.731481576304848, "learning_rate": 6.84640475894324e-07, "loss": 0.0805, "step": 10320 }, { "epoch": 2.3483503981797496, "grad_norm": 2.4925389495427104, "learning_rate": 6.845515317000551e-07, "loss": 0.0865, "step": 10321 }, { "epoch": 2.3485779294653013, "grad_norm": 1.7572616660739149, "learning_rate": 6.844625862886618e-07, "loss": 0.1315, "step": 10322 }, { "epoch": 2.348805460750853, "grad_norm": 1.8615518240805375, "learning_rate": 6.843736396619622e-07, "loss": 0.0476, "step": 10323 }, { "epoch": 2.3490329920364053, "grad_norm": 1.978938155796057, "learning_rate": 6.842846918217743e-07, "loss": 0.0427, "step": 10324 }, { "epoch": 2.3492605233219566, "grad_norm": 1.796764703808653, "learning_rate": 6.841957427699158e-07, "loss": 0.107, "step": 10325 }, { "epoch": 2.3494880546075088, "grad_norm": 2.291946513424975, "learning_rate": 6.841067925082046e-07, "loss": 0.0971, "step": 10326 }, { "epoch": 2.34971558589306, "grad_norm": 1.3626579338306246, "learning_rate": 6.840178410384591e-07, "loss": 0.0281, "step": 10327 }, { "epoch": 2.3499431171786123, "grad_norm": 1.508767760007627, "learning_rate": 6.839288883624969e-07, "loss": 0.1248, "step": 10328 }, { "epoch": 2.350170648464164, "grad_norm": 1.5004316046603756, "learning_rate": 6.838399344821359e-07, "loss": 0.0231, "step": 10329 }, { "epoch": 2.3503981797497158, "grad_norm": 1.601205357241486, "learning_rate": 6.837509793991946e-07, "loss": 0.1039, "step": 10330 }, { "epoch": 2.3506257110352675, "grad_norm": 1.3673686063479995, "learning_rate": 6.836620231154908e-07, "loss": 0.0402, "step": 10331 }, { "epoch": 2.3508532423208193, "grad_norm": 1.6027769574096573, "learning_rate": 6.835730656328429e-07, "loss": 0.0382, "step": 10332 }, { "epoch": 2.351080773606371, "grad_norm": 1.3406223894071378, "learning_rate": 6.834841069530686e-07, "loss": 0.0342, "step": 10333 }, { "epoch": 2.3513083048919228, "grad_norm": 1.3144468817365327, "learning_rate": 6.833951470779864e-07, "loss": 0.0631, "step": 10334 }, { "epoch": 2.3515358361774745, "grad_norm": 2.0359626840176595, "learning_rate": 6.833061860094142e-07, "loss": 0.065, "step": 10335 }, { "epoch": 2.3517633674630263, "grad_norm": 1.6705499922072669, "learning_rate": 6.832172237491703e-07, "loss": 0.1039, "step": 10336 }, { "epoch": 2.351990898748578, "grad_norm": 1.5039483741569506, "learning_rate": 6.831282602990731e-07, "loss": 0.0581, "step": 10337 }, { "epoch": 2.35221843003413, "grad_norm": 0.8204124970938125, "learning_rate": 6.830392956609406e-07, "loss": 0.0966, "step": 10338 }, { "epoch": 2.3524459613196815, "grad_norm": 2.162296188550802, "learning_rate": 6.829503298365913e-07, "loss": 0.1249, "step": 10339 }, { "epoch": 2.3526734926052333, "grad_norm": 1.4952317784774078, "learning_rate": 6.828613628278433e-07, "loss": 0.0304, "step": 10340 }, { "epoch": 2.352901023890785, "grad_norm": 1.802162035842996, "learning_rate": 6.827723946365153e-07, "loss": 0.1375, "step": 10341 }, { "epoch": 2.353128555176337, "grad_norm": 1.5797018260595177, "learning_rate": 6.826834252644251e-07, "loss": 0.02, "step": 10342 }, { "epoch": 2.3533560864618885, "grad_norm": 1.172831570434394, "learning_rate": 6.825944547133913e-07, "loss": 0.0199, "step": 10343 }, { "epoch": 2.3535836177474403, "grad_norm": 1.5883685356610204, "learning_rate": 6.825054829852323e-07, "loss": 0.0408, "step": 10344 }, { "epoch": 2.353811149032992, "grad_norm": 1.6460657275530246, "learning_rate": 6.824165100817667e-07, "loss": 0.106, "step": 10345 }, { "epoch": 2.354038680318544, "grad_norm": 1.590402614110204, "learning_rate": 6.823275360048126e-07, "loss": 0.0361, "step": 10346 }, { "epoch": 2.3542662116040955, "grad_norm": 1.6117724208524986, "learning_rate": 6.822385607561889e-07, "loss": 0.0455, "step": 10347 }, { "epoch": 2.3544937428896473, "grad_norm": 1.5289786611280218, "learning_rate": 6.821495843377138e-07, "loss": 0.0348, "step": 10348 }, { "epoch": 2.354721274175199, "grad_norm": 1.3615083313526894, "learning_rate": 6.820606067512056e-07, "loss": 0.0348, "step": 10349 }, { "epoch": 2.354948805460751, "grad_norm": 1.1035158333778545, "learning_rate": 6.819716279984833e-07, "loss": 0.0224, "step": 10350 }, { "epoch": 2.3551763367463026, "grad_norm": 1.6127580019064336, "learning_rate": 6.818826480813652e-07, "loss": 0.0402, "step": 10351 }, { "epoch": 2.3554038680318543, "grad_norm": 1.0067043003137657, "learning_rate": 6.8179366700167e-07, "loss": 0.0143, "step": 10352 }, { "epoch": 2.355631399317406, "grad_norm": 2.734268855996695, "learning_rate": 6.817046847612164e-07, "loss": 0.033, "step": 10353 }, { "epoch": 2.355858930602958, "grad_norm": 1.4410312234429459, "learning_rate": 6.816157013618227e-07, "loss": 0.0384, "step": 10354 }, { "epoch": 2.3560864618885096, "grad_norm": 1.3855904304090667, "learning_rate": 6.815267168053078e-07, "loss": 0.0453, "step": 10355 }, { "epoch": 2.3563139931740613, "grad_norm": 2.7515947683773003, "learning_rate": 6.814377310934901e-07, "loss": 0.0574, "step": 10356 }, { "epoch": 2.356541524459613, "grad_norm": 1.4031745709918233, "learning_rate": 6.813487442281888e-07, "loss": 0.0352, "step": 10357 }, { "epoch": 2.356769055745165, "grad_norm": 2.8484808815150218, "learning_rate": 6.812597562112223e-07, "loss": 0.0626, "step": 10358 }, { "epoch": 2.3569965870307166, "grad_norm": 1.534668915501644, "learning_rate": 6.811707670444095e-07, "loss": 0.0402, "step": 10359 }, { "epoch": 2.3572241183162683, "grad_norm": 1.765601543389788, "learning_rate": 6.810817767295691e-07, "loss": 0.103, "step": 10360 }, { "epoch": 2.35745164960182, "grad_norm": 1.7072836316197535, "learning_rate": 6.809927852685198e-07, "loss": 0.0764, "step": 10361 }, { "epoch": 2.357679180887372, "grad_norm": 1.1410970983956845, "learning_rate": 6.809037926630806e-07, "loss": 0.0179, "step": 10362 }, { "epoch": 2.357906712172924, "grad_norm": 0.8803541930038186, "learning_rate": 6.808147989150701e-07, "loss": 0.0799, "step": 10363 }, { "epoch": 2.3581342434584753, "grad_norm": 1.6949544656573008, "learning_rate": 6.807258040263075e-07, "loss": 0.0574, "step": 10364 }, { "epoch": 2.3583617747440275, "grad_norm": 2.6125927975379435, "learning_rate": 6.806368079986114e-07, "loss": 0.0772, "step": 10365 }, { "epoch": 2.358589306029579, "grad_norm": 1.667257128844991, "learning_rate": 6.805478108338009e-07, "loss": 0.1122, "step": 10366 }, { "epoch": 2.358816837315131, "grad_norm": 1.856152188586075, "learning_rate": 6.80458812533695e-07, "loss": 0.0294, "step": 10367 }, { "epoch": 2.359044368600683, "grad_norm": 1.8307329732533564, "learning_rate": 6.803698131001124e-07, "loss": 0.0588, "step": 10368 }, { "epoch": 2.3592718998862345, "grad_norm": 1.489190243200775, "learning_rate": 6.802808125348722e-07, "loss": 0.0632, "step": 10369 }, { "epoch": 2.3594994311717863, "grad_norm": 1.2333545950135139, "learning_rate": 6.801918108397934e-07, "loss": 0.0393, "step": 10370 }, { "epoch": 2.359726962457338, "grad_norm": 2.519109848016765, "learning_rate": 6.801028080166952e-07, "loss": 0.0418, "step": 10371 }, { "epoch": 2.35995449374289, "grad_norm": 1.6748861718812718, "learning_rate": 6.800138040673964e-07, "loss": 0.054, "step": 10372 }, { "epoch": 2.3601820250284415, "grad_norm": 1.822331215936714, "learning_rate": 6.799247989937163e-07, "loss": 0.0662, "step": 10373 }, { "epoch": 2.3604095563139933, "grad_norm": 1.4346477535407098, "learning_rate": 6.798357927974739e-07, "loss": 0.0922, "step": 10374 }, { "epoch": 2.360637087599545, "grad_norm": 1.252795785587945, "learning_rate": 6.797467854804884e-07, "loss": 0.0326, "step": 10375 }, { "epoch": 2.360864618885097, "grad_norm": 1.9896922356110807, "learning_rate": 6.796577770445785e-07, "loss": 0.046, "step": 10376 }, { "epoch": 2.3610921501706486, "grad_norm": 1.064666659792519, "learning_rate": 6.795687674915639e-07, "loss": 0.0476, "step": 10377 }, { "epoch": 2.3613196814562003, "grad_norm": 1.0207304433420497, "learning_rate": 6.794797568232639e-07, "loss": 0.0257, "step": 10378 }, { "epoch": 2.361547212741752, "grad_norm": 1.2416918419794614, "learning_rate": 6.793907450414972e-07, "loss": 0.0181, "step": 10379 }, { "epoch": 2.361774744027304, "grad_norm": 1.4477653807207993, "learning_rate": 6.793017321480834e-07, "loss": 0.0874, "step": 10380 }, { "epoch": 2.3620022753128556, "grad_norm": 1.1817989268786084, "learning_rate": 6.792127181448415e-07, "loss": 0.0337, "step": 10381 }, { "epoch": 2.3622298065984073, "grad_norm": 1.0375656397551531, "learning_rate": 6.791237030335911e-07, "loss": 0.0421, "step": 10382 }, { "epoch": 2.362457337883959, "grad_norm": 1.9502506528063932, "learning_rate": 6.790346868161511e-07, "loss": 0.0506, "step": 10383 }, { "epoch": 2.362684869169511, "grad_norm": 1.5800476766186669, "learning_rate": 6.789456694943413e-07, "loss": 0.1179, "step": 10384 }, { "epoch": 2.3629124004550626, "grad_norm": 0.9469539194251811, "learning_rate": 6.788566510699808e-07, "loss": 0.0319, "step": 10385 }, { "epoch": 2.3631399317406143, "grad_norm": 1.674166648976265, "learning_rate": 6.787676315448887e-07, "loss": 0.046, "step": 10386 }, { "epoch": 2.363367463026166, "grad_norm": 1.3057095334506177, "learning_rate": 6.78678610920885e-07, "loss": 0.0331, "step": 10387 }, { "epoch": 2.363594994311718, "grad_norm": 1.2221485169323887, "learning_rate": 6.785895891997887e-07, "loss": 0.0575, "step": 10388 }, { "epoch": 2.3638225255972696, "grad_norm": 2.47200812817322, "learning_rate": 6.785005663834193e-07, "loss": 0.0595, "step": 10389 }, { "epoch": 2.3640500568828213, "grad_norm": 1.2902949523847893, "learning_rate": 6.784115424735962e-07, "loss": 0.0295, "step": 10390 }, { "epoch": 2.364277588168373, "grad_norm": 2.026623907790253, "learning_rate": 6.783225174721393e-07, "loss": 0.0785, "step": 10391 }, { "epoch": 2.364505119453925, "grad_norm": 1.7188511270636622, "learning_rate": 6.782334913808678e-07, "loss": 0.0746, "step": 10392 }, { "epoch": 2.3647326507394766, "grad_norm": 1.7316607200710685, "learning_rate": 6.781444642016008e-07, "loss": 0.0309, "step": 10393 }, { "epoch": 2.3649601820250283, "grad_norm": 1.555419843322222, "learning_rate": 6.780554359361585e-07, "loss": 0.0505, "step": 10394 }, { "epoch": 2.36518771331058, "grad_norm": 1.0243836687399601, "learning_rate": 6.779664065863605e-07, "loss": 0.0602, "step": 10395 }, { "epoch": 2.365415244596132, "grad_norm": 1.349428242935904, "learning_rate": 6.778773761540258e-07, "loss": 0.0535, "step": 10396 }, { "epoch": 2.3656427758816836, "grad_norm": 1.3894434057799359, "learning_rate": 6.777883446409746e-07, "loss": 0.0621, "step": 10397 }, { "epoch": 2.3658703071672353, "grad_norm": 1.4582759889005104, "learning_rate": 6.776993120490262e-07, "loss": 0.0301, "step": 10398 }, { "epoch": 2.366097838452787, "grad_norm": 1.42088956232628, "learning_rate": 6.776102783800005e-07, "loss": 0.1321, "step": 10399 }, { "epoch": 2.366325369738339, "grad_norm": 1.040231240478876, "learning_rate": 6.775212436357167e-07, "loss": 0.0135, "step": 10400 }, { "epoch": 2.3665529010238906, "grad_norm": 1.441762410375783, "learning_rate": 6.774322078179953e-07, "loss": 0.0632, "step": 10401 }, { "epoch": 2.366780432309443, "grad_norm": 1.7463501720699834, "learning_rate": 6.773431709286554e-07, "loss": 0.0482, "step": 10402 }, { "epoch": 2.367007963594994, "grad_norm": 2.313903667162793, "learning_rate": 6.77254132969517e-07, "loss": 0.0684, "step": 10403 }, { "epoch": 2.3672354948805463, "grad_norm": 1.2809218776804336, "learning_rate": 6.771650939423999e-07, "loss": 0.043, "step": 10404 }, { "epoch": 2.3674630261660976, "grad_norm": 1.405519046760732, "learning_rate": 6.770760538491236e-07, "loss": 0.0632, "step": 10405 }, { "epoch": 2.36769055745165, "grad_norm": 1.18910682344233, "learning_rate": 6.769870126915083e-07, "loss": 0.0183, "step": 10406 }, { "epoch": 2.3679180887372016, "grad_norm": 1.5451868911236262, "learning_rate": 6.768979704713735e-07, "loss": 0.1525, "step": 10407 }, { "epoch": 2.3681456200227533, "grad_norm": 1.954012861557462, "learning_rate": 6.768089271905394e-07, "loss": 0.0648, "step": 10408 }, { "epoch": 2.368373151308305, "grad_norm": 1.9295472963366158, "learning_rate": 6.767198828508257e-07, "loss": 0.0324, "step": 10409 }, { "epoch": 2.368600682593857, "grad_norm": 1.660240641429707, "learning_rate": 6.766308374540523e-07, "loss": 0.0779, "step": 10410 }, { "epoch": 2.3688282138794086, "grad_norm": 1.169389631628708, "learning_rate": 6.76541791002039e-07, "loss": 0.0397, "step": 10411 }, { "epoch": 2.3690557451649603, "grad_norm": 1.8021577785523133, "learning_rate": 6.76452743496606e-07, "loss": 0.0365, "step": 10412 }, { "epoch": 2.369283276450512, "grad_norm": 1.6652405511680066, "learning_rate": 6.763636949395731e-07, "loss": 0.0882, "step": 10413 }, { "epoch": 2.369510807736064, "grad_norm": 1.5091100026402833, "learning_rate": 6.762746453327604e-07, "loss": 0.0486, "step": 10414 }, { "epoch": 2.3697383390216156, "grad_norm": 1.8699053258256377, "learning_rate": 6.761855946779879e-07, "loss": 0.0707, "step": 10415 }, { "epoch": 2.3699658703071673, "grad_norm": 1.5328100039491475, "learning_rate": 6.760965429770755e-07, "loss": 0.0689, "step": 10416 }, { "epoch": 2.370193401592719, "grad_norm": 1.314123434522282, "learning_rate": 6.760074902318435e-07, "loss": 0.0383, "step": 10417 }, { "epoch": 2.370420932878271, "grad_norm": 1.4855379347463653, "learning_rate": 6.759184364441117e-07, "loss": 0.0482, "step": 10418 }, { "epoch": 2.3706484641638226, "grad_norm": 2.670435917113975, "learning_rate": 6.758293816157003e-07, "loss": 0.0643, "step": 10419 }, { "epoch": 2.3708759954493743, "grad_norm": 1.9280635575174545, "learning_rate": 6.757403257484293e-07, "loss": 0.0199, "step": 10420 }, { "epoch": 2.371103526734926, "grad_norm": 0.9549318126905393, "learning_rate": 6.756512688441191e-07, "loss": 0.0355, "step": 10421 }, { "epoch": 2.371331058020478, "grad_norm": 1.332552454350489, "learning_rate": 6.7556221090459e-07, "loss": 0.031, "step": 10422 }, { "epoch": 2.3715585893060296, "grad_norm": 2.215723961040164, "learning_rate": 6.754731519316615e-07, "loss": 0.0522, "step": 10423 }, { "epoch": 2.3717861205915813, "grad_norm": 0.8980813897551289, "learning_rate": 6.753840919271542e-07, "loss": 0.0179, "step": 10424 }, { "epoch": 2.372013651877133, "grad_norm": 2.180420797793543, "learning_rate": 6.752950308928887e-07, "loss": 0.0434, "step": 10425 }, { "epoch": 2.372241183162685, "grad_norm": 1.0978645616605351, "learning_rate": 6.752059688306846e-07, "loss": 0.0436, "step": 10426 }, { "epoch": 2.3724687144482366, "grad_norm": 0.9931325799878876, "learning_rate": 6.751169057423625e-07, "loss": 0.0767, "step": 10427 }, { "epoch": 2.3726962457337883, "grad_norm": 1.5451840499172698, "learning_rate": 6.750278416297426e-07, "loss": 0.1591, "step": 10428 }, { "epoch": 2.37292377701934, "grad_norm": 1.5924635594374823, "learning_rate": 6.749387764946454e-07, "loss": 0.0657, "step": 10429 }, { "epoch": 2.373151308304892, "grad_norm": 2.1149792343170857, "learning_rate": 6.748497103388908e-07, "loss": 0.0922, "step": 10430 }, { "epoch": 2.3733788395904436, "grad_norm": 1.7817157981695644, "learning_rate": 6.747606431642996e-07, "loss": 0.0616, "step": 10431 }, { "epoch": 2.3736063708759954, "grad_norm": 1.1096939510401005, "learning_rate": 6.746715749726921e-07, "loss": 0.0148, "step": 10432 }, { "epoch": 2.373833902161547, "grad_norm": 2.0019869696931742, "learning_rate": 6.745825057658884e-07, "loss": 0.0595, "step": 10433 }, { "epoch": 2.374061433447099, "grad_norm": 1.2718307936644717, "learning_rate": 6.744934355457089e-07, "loss": 0.093, "step": 10434 }, { "epoch": 2.3742889647326506, "grad_norm": 1.3919586686320002, "learning_rate": 6.744043643139746e-07, "loss": 0.0363, "step": 10435 }, { "epoch": 2.3745164960182024, "grad_norm": 1.5809566609156156, "learning_rate": 6.743152920725054e-07, "loss": 0.0811, "step": 10436 }, { "epoch": 2.374744027303754, "grad_norm": 3.036678376933095, "learning_rate": 6.742262188231219e-07, "loss": 0.0378, "step": 10437 }, { "epoch": 2.374971558589306, "grad_norm": 1.8720444851573026, "learning_rate": 6.741371445676448e-07, "loss": 0.12, "step": 10438 }, { "epoch": 2.3751990898748576, "grad_norm": 1.5379917535938616, "learning_rate": 6.740480693078944e-07, "loss": 0.0351, "step": 10439 }, { "epoch": 2.3754266211604094, "grad_norm": 0.9430200124299626, "learning_rate": 6.739589930456911e-07, "loss": 0.0501, "step": 10440 }, { "epoch": 2.3756541524459616, "grad_norm": 1.9896257764938758, "learning_rate": 6.738699157828558e-07, "loss": 0.1009, "step": 10441 }, { "epoch": 2.375881683731513, "grad_norm": 2.018242344845083, "learning_rate": 6.737808375212091e-07, "loss": 0.053, "step": 10442 }, { "epoch": 2.376109215017065, "grad_norm": 1.584315374444164, "learning_rate": 6.73691758262571e-07, "loss": 0.0605, "step": 10443 }, { "epoch": 2.3763367463026164, "grad_norm": 1.6702742440286638, "learning_rate": 6.736026780087627e-07, "loss": 0.0228, "step": 10444 }, { "epoch": 2.3765642775881686, "grad_norm": 1.6115908004236295, "learning_rate": 6.735135967616048e-07, "loss": 0.0353, "step": 10445 }, { "epoch": 2.3767918088737203, "grad_norm": 2.2811883410106764, "learning_rate": 6.734245145229179e-07, "loss": 0.0676, "step": 10446 }, { "epoch": 2.377019340159272, "grad_norm": 2.3498223584151003, "learning_rate": 6.733354312945223e-07, "loss": 0.0503, "step": 10447 }, { "epoch": 2.377246871444824, "grad_norm": 1.445057700877811, "learning_rate": 6.732463470782394e-07, "loss": 0.1034, "step": 10448 }, { "epoch": 2.3774744027303756, "grad_norm": 2.1756792209653697, "learning_rate": 6.731572618758893e-07, "loss": 0.0487, "step": 10449 }, { "epoch": 2.3777019340159273, "grad_norm": 2.2820082997636826, "learning_rate": 6.730681756892929e-07, "loss": 0.0959, "step": 10450 }, { "epoch": 2.377929465301479, "grad_norm": 1.5032639741412177, "learning_rate": 6.729790885202712e-07, "loss": 0.0295, "step": 10451 }, { "epoch": 2.378156996587031, "grad_norm": 1.780461004855868, "learning_rate": 6.728900003706446e-07, "loss": 0.0498, "step": 10452 }, { "epoch": 2.3783845278725826, "grad_norm": 2.2662441775377253, "learning_rate": 6.728009112422341e-07, "loss": 0.0367, "step": 10453 }, { "epoch": 2.3786120591581343, "grad_norm": 2.067235913331479, "learning_rate": 6.727118211368607e-07, "loss": 0.0735, "step": 10454 }, { "epoch": 2.378839590443686, "grad_norm": 1.4605393416349512, "learning_rate": 6.72622730056345e-07, "loss": 0.0544, "step": 10455 }, { "epoch": 2.379067121729238, "grad_norm": 2.390078839111463, "learning_rate": 6.725336380025078e-07, "loss": 0.0907, "step": 10456 }, { "epoch": 2.3792946530147896, "grad_norm": 1.2800789058267308, "learning_rate": 6.724445449771702e-07, "loss": 0.045, "step": 10457 }, { "epoch": 2.3795221843003413, "grad_norm": 1.2874977725325683, "learning_rate": 6.72355450982153e-07, "loss": 0.0929, "step": 10458 }, { "epoch": 2.379749715585893, "grad_norm": 1.4661100617335616, "learning_rate": 6.72266356019277e-07, "loss": 0.1, "step": 10459 }, { "epoch": 2.379977246871445, "grad_norm": 1.396837121599532, "learning_rate": 6.721772600903634e-07, "loss": 0.1244, "step": 10460 }, { "epoch": 2.3802047781569966, "grad_norm": 1.2305615808874142, "learning_rate": 6.720881631972328e-07, "loss": 0.0425, "step": 10461 }, { "epoch": 2.3804323094425484, "grad_norm": 1.3978490997285564, "learning_rate": 6.719990653417066e-07, "loss": 0.0828, "step": 10462 }, { "epoch": 2.3806598407281, "grad_norm": 0.8497783443439443, "learning_rate": 6.719099665256056e-07, "loss": 0.0226, "step": 10463 }, { "epoch": 2.380887372013652, "grad_norm": 1.4514177373432697, "learning_rate": 6.718208667507506e-07, "loss": 0.0559, "step": 10464 }, { "epoch": 2.3811149032992036, "grad_norm": 2.480240492112608, "learning_rate": 6.717317660189629e-07, "loss": 0.048, "step": 10465 }, { "epoch": 2.3813424345847554, "grad_norm": 2.270491728647551, "learning_rate": 6.716426643320635e-07, "loss": 0.1405, "step": 10466 }, { "epoch": 2.381569965870307, "grad_norm": 2.518922614754743, "learning_rate": 6.715535616918735e-07, "loss": 0.1303, "step": 10467 }, { "epoch": 2.381797497155859, "grad_norm": 2.4748624828029095, "learning_rate": 6.714644581002139e-07, "loss": 0.0945, "step": 10468 }, { "epoch": 2.3820250284414106, "grad_norm": 1.2874037769841746, "learning_rate": 6.71375353558906e-07, "loss": 0.0298, "step": 10469 }, { "epoch": 2.3822525597269624, "grad_norm": 1.781652751677099, "learning_rate": 6.712862480697705e-07, "loss": 0.0323, "step": 10470 }, { "epoch": 2.382480091012514, "grad_norm": 1.3399476686901257, "learning_rate": 6.711971416346291e-07, "loss": 0.0251, "step": 10471 }, { "epoch": 2.382707622298066, "grad_norm": 1.089469592931769, "learning_rate": 6.711080342553027e-07, "loss": 0.0643, "step": 10472 }, { "epoch": 2.3829351535836176, "grad_norm": 1.4013619938263189, "learning_rate": 6.710189259336125e-07, "loss": 0.0922, "step": 10473 }, { "epoch": 2.3831626848691694, "grad_norm": 3.376435801233772, "learning_rate": 6.709298166713799e-07, "loss": 0.1387, "step": 10474 }, { "epoch": 2.383390216154721, "grad_norm": 1.9844579581141586, "learning_rate": 6.708407064704258e-07, "loss": 0.0767, "step": 10475 }, { "epoch": 2.383617747440273, "grad_norm": 1.9453254227786483, "learning_rate": 6.707515953325716e-07, "loss": 0.0635, "step": 10476 }, { "epoch": 2.3838452787258246, "grad_norm": 1.452221262310961, "learning_rate": 6.706624832596385e-07, "loss": 0.0494, "step": 10477 }, { "epoch": 2.3840728100113764, "grad_norm": 1.38605309294856, "learning_rate": 6.70573370253448e-07, "loss": 0.0276, "step": 10478 }, { "epoch": 2.384300341296928, "grad_norm": 1.7034378285942493, "learning_rate": 6.704842563158214e-07, "loss": 0.0224, "step": 10479 }, { "epoch": 2.3845278725824803, "grad_norm": 1.021182659891657, "learning_rate": 6.703951414485796e-07, "loss": 0.0162, "step": 10480 }, { "epoch": 2.3847554038680316, "grad_norm": 1.3913943329450087, "learning_rate": 6.703060256535445e-07, "loss": 0.0785, "step": 10481 }, { "epoch": 2.384982935153584, "grad_norm": 1.5931390005406347, "learning_rate": 6.702169089325371e-07, "loss": 0.1032, "step": 10482 }, { "epoch": 2.385210466439135, "grad_norm": 1.8369869104956416, "learning_rate": 6.70127791287379e-07, "loss": 0.0667, "step": 10483 }, { "epoch": 2.3854379977246873, "grad_norm": 1.7301862270354487, "learning_rate": 6.700386727198911e-07, "loss": 0.041, "step": 10484 }, { "epoch": 2.385665529010239, "grad_norm": 2.1651615466332315, "learning_rate": 6.699495532318957e-07, "loss": 0.0995, "step": 10485 }, { "epoch": 2.385893060295791, "grad_norm": 1.3938195707799108, "learning_rate": 6.698604328252137e-07, "loss": 0.0856, "step": 10486 }, { "epoch": 2.3861205915813426, "grad_norm": 1.7030373623693367, "learning_rate": 6.697713115016663e-07, "loss": 0.0881, "step": 10487 }, { "epoch": 2.3863481228668944, "grad_norm": 1.8946035407113386, "learning_rate": 6.696821892630754e-07, "loss": 0.0395, "step": 10488 }, { "epoch": 2.386575654152446, "grad_norm": 1.7859831641223316, "learning_rate": 6.695930661112625e-07, "loss": 0.0516, "step": 10489 }, { "epoch": 2.386803185437998, "grad_norm": 2.086999207048147, "learning_rate": 6.69503942048049e-07, "loss": 0.0306, "step": 10490 }, { "epoch": 2.3870307167235496, "grad_norm": 1.2645238222756374, "learning_rate": 6.694148170752562e-07, "loss": 0.0535, "step": 10491 }, { "epoch": 2.3872582480091014, "grad_norm": 1.28426810874403, "learning_rate": 6.693256911947063e-07, "loss": 0.0998, "step": 10492 }, { "epoch": 2.387485779294653, "grad_norm": 1.3537016819729306, "learning_rate": 6.692365644082202e-07, "loss": 0.0531, "step": 10493 }, { "epoch": 2.387713310580205, "grad_norm": 2.83060355995732, "learning_rate": 6.691474367176195e-07, "loss": 0.102, "step": 10494 }, { "epoch": 2.3879408418657566, "grad_norm": 1.4814180587134658, "learning_rate": 6.690583081247264e-07, "loss": 0.0652, "step": 10495 }, { "epoch": 2.3881683731513084, "grad_norm": 1.7368911499047102, "learning_rate": 6.68969178631362e-07, "loss": 0.0802, "step": 10496 }, { "epoch": 2.38839590443686, "grad_norm": 2.580003746707147, "learning_rate": 6.688800482393481e-07, "loss": 0.0751, "step": 10497 }, { "epoch": 2.388623435722412, "grad_norm": 1.6310291695861174, "learning_rate": 6.687909169505066e-07, "loss": 0.038, "step": 10498 }, { "epoch": 2.3888509670079636, "grad_norm": 2.1723786955879385, "learning_rate": 6.687017847666588e-07, "loss": 0.1669, "step": 10499 }, { "epoch": 2.3890784982935154, "grad_norm": 1.666439442594346, "learning_rate": 6.686126516896266e-07, "loss": 0.0962, "step": 10500 }, { "epoch": 2.389306029579067, "grad_norm": 2.4965699284748966, "learning_rate": 6.685235177212315e-07, "loss": 0.1345, "step": 10501 }, { "epoch": 2.389533560864619, "grad_norm": 1.2904438567954284, "learning_rate": 6.684343828632957e-07, "loss": 0.0433, "step": 10502 }, { "epoch": 2.3897610921501706, "grad_norm": 1.6373753924522154, "learning_rate": 6.683452471176405e-07, "loss": 0.1001, "step": 10503 }, { "epoch": 2.3899886234357224, "grad_norm": 1.7248619499584223, "learning_rate": 6.682561104860878e-07, "loss": 0.1004, "step": 10504 }, { "epoch": 2.390216154721274, "grad_norm": 1.4465268197949492, "learning_rate": 6.681669729704595e-07, "loss": 0.0779, "step": 10505 }, { "epoch": 2.390443686006826, "grad_norm": 1.7436657794943984, "learning_rate": 6.680778345725773e-07, "loss": 0.0303, "step": 10506 }, { "epoch": 2.3906712172923776, "grad_norm": 1.5225262043163035, "learning_rate": 6.679886952942629e-07, "loss": 0.0795, "step": 10507 }, { "epoch": 2.3908987485779294, "grad_norm": 1.329757770233145, "learning_rate": 6.678995551373385e-07, "loss": 0.0397, "step": 10508 }, { "epoch": 2.391126279863481, "grad_norm": 1.339207113075806, "learning_rate": 6.678104141036257e-07, "loss": 0.0317, "step": 10509 }, { "epoch": 2.391353811149033, "grad_norm": 1.2227312515171715, "learning_rate": 6.677212721949464e-07, "loss": 0.0318, "step": 10510 }, { "epoch": 2.3915813424345846, "grad_norm": 1.3724822590412142, "learning_rate": 6.676321294131226e-07, "loss": 0.0742, "step": 10511 }, { "epoch": 2.3918088737201364, "grad_norm": 1.560545283718502, "learning_rate": 6.675429857599762e-07, "loss": 0.1334, "step": 10512 }, { "epoch": 2.392036405005688, "grad_norm": 1.8795699309375529, "learning_rate": 6.674538412373289e-07, "loss": 0.032, "step": 10513 }, { "epoch": 2.39226393629124, "grad_norm": 2.829895580966993, "learning_rate": 6.673646958470029e-07, "loss": 0.153, "step": 10514 }, { "epoch": 2.3924914675767917, "grad_norm": 1.569501158041787, "learning_rate": 6.672755495908202e-07, "loss": 0.0261, "step": 10515 }, { "epoch": 2.3927189988623434, "grad_norm": 1.522202430622725, "learning_rate": 6.671864024706027e-07, "loss": 0.0793, "step": 10516 }, { "epoch": 2.392946530147895, "grad_norm": 4.090585940030713, "learning_rate": 6.670972544881723e-07, "loss": 0.0648, "step": 10517 }, { "epoch": 2.393174061433447, "grad_norm": 1.495096016038974, "learning_rate": 6.670081056453512e-07, "loss": 0.0507, "step": 10518 }, { "epoch": 2.393401592718999, "grad_norm": 2.71488569789709, "learning_rate": 6.669189559439613e-07, "loss": 0.0832, "step": 10519 }, { "epoch": 2.3936291240045504, "grad_norm": 2.179825439345257, "learning_rate": 6.668298053858248e-07, "loss": 0.0469, "step": 10520 }, { "epoch": 2.3938566552901026, "grad_norm": 1.2228033131349032, "learning_rate": 6.667406539727634e-07, "loss": 0.091, "step": 10521 }, { "epoch": 2.394084186575654, "grad_norm": 2.352962433677147, "learning_rate": 6.666515017065997e-07, "loss": 0.0412, "step": 10522 }, { "epoch": 2.394311717861206, "grad_norm": 2.3997551364175105, "learning_rate": 6.665623485891558e-07, "loss": 0.0582, "step": 10523 }, { "epoch": 2.394539249146758, "grad_norm": 1.1027134450061242, "learning_rate": 6.664731946222531e-07, "loss": 0.0206, "step": 10524 }, { "epoch": 2.3947667804323096, "grad_norm": 2.4549027128997642, "learning_rate": 6.663840398077146e-07, "loss": 0.0472, "step": 10525 }, { "epoch": 2.3949943117178614, "grad_norm": 2.285795803069614, "learning_rate": 6.662948841473621e-07, "loss": 0.1502, "step": 10526 }, { "epoch": 2.395221843003413, "grad_norm": 2.1733632351365633, "learning_rate": 6.662057276430179e-07, "loss": 0.1195, "step": 10527 }, { "epoch": 2.395449374288965, "grad_norm": 1.6885301323046775, "learning_rate": 6.661165702965037e-07, "loss": 0.1168, "step": 10528 }, { "epoch": 2.3956769055745166, "grad_norm": 1.5015771246663587, "learning_rate": 6.660274121096425e-07, "loss": 0.0375, "step": 10529 }, { "epoch": 2.3959044368600684, "grad_norm": 3.086439361778395, "learning_rate": 6.659382530842561e-07, "loss": 0.0481, "step": 10530 }, { "epoch": 2.39613196814562, "grad_norm": 0.9911636574779332, "learning_rate": 6.658490932221664e-07, "loss": 0.0445, "step": 10531 }, { "epoch": 2.396359499431172, "grad_norm": 1.0139124781259454, "learning_rate": 6.657599325251964e-07, "loss": 0.0703, "step": 10532 }, { "epoch": 2.3965870307167236, "grad_norm": 2.627123214882737, "learning_rate": 6.656707709951679e-07, "loss": 0.1473, "step": 10533 }, { "epoch": 2.3968145620022754, "grad_norm": 1.5488525003364622, "learning_rate": 6.655816086339033e-07, "loss": 0.0444, "step": 10534 }, { "epoch": 2.397042093287827, "grad_norm": 1.7357048098095746, "learning_rate": 6.654924454432251e-07, "loss": 0.1388, "step": 10535 }, { "epoch": 2.397269624573379, "grad_norm": 1.8321173442713896, "learning_rate": 6.654032814249554e-07, "loss": 0.0781, "step": 10536 }, { "epoch": 2.3974971558589306, "grad_norm": 1.5942316650306552, "learning_rate": 6.653141165809166e-07, "loss": 0.0406, "step": 10537 }, { "epoch": 2.3977246871444824, "grad_norm": 2.6834686068069757, "learning_rate": 6.65224950912931e-07, "loss": 0.1056, "step": 10538 }, { "epoch": 2.397952218430034, "grad_norm": 1.769237068832356, "learning_rate": 6.651357844228213e-07, "loss": 0.0813, "step": 10539 }, { "epoch": 2.398179749715586, "grad_norm": 1.5233688630082565, "learning_rate": 6.650466171124094e-07, "loss": 0.0386, "step": 10540 }, { "epoch": 2.3984072810011376, "grad_norm": 2.008063039797502, "learning_rate": 6.649574489835181e-07, "loss": 0.0273, "step": 10541 }, { "epoch": 2.3986348122866894, "grad_norm": 1.6343752435695706, "learning_rate": 6.648682800379698e-07, "loss": 0.0331, "step": 10542 }, { "epoch": 2.398862343572241, "grad_norm": 1.1926677364645177, "learning_rate": 6.647791102775869e-07, "loss": 0.0404, "step": 10543 }, { "epoch": 2.399089874857793, "grad_norm": 1.5172629638216661, "learning_rate": 6.646899397041915e-07, "loss": 0.0912, "step": 10544 }, { "epoch": 2.3993174061433447, "grad_norm": 1.5887104653749333, "learning_rate": 6.646007683196068e-07, "loss": 0.097, "step": 10545 }, { "epoch": 2.3995449374288964, "grad_norm": 1.52999208817766, "learning_rate": 6.645115961256549e-07, "loss": 0.0364, "step": 10546 }, { "epoch": 2.399772468714448, "grad_norm": 1.6407741258520294, "learning_rate": 6.644224231241582e-07, "loss": 0.1035, "step": 10547 }, { "epoch": 2.4, "grad_norm": 1.3227732207379344, "learning_rate": 6.643332493169393e-07, "loss": 0.0594, "step": 10548 }, { "epoch": 2.4002275312855517, "grad_norm": 1.4032556973087806, "learning_rate": 6.642440747058209e-07, "loss": 0.0276, "step": 10549 }, { "epoch": 2.4004550625711034, "grad_norm": 1.5628829654489371, "learning_rate": 6.641548992926256e-07, "loss": 0.071, "step": 10550 }, { "epoch": 2.400682593856655, "grad_norm": 2.642593705218311, "learning_rate": 6.640657230791757e-07, "loss": 0.0606, "step": 10551 }, { "epoch": 2.400910125142207, "grad_norm": 2.257245286182545, "learning_rate": 6.639765460672941e-07, "loss": 0.0452, "step": 10552 }, { "epoch": 2.4011376564277587, "grad_norm": 1.09570693200126, "learning_rate": 6.638873682588032e-07, "loss": 0.0459, "step": 10553 }, { "epoch": 2.4013651877133104, "grad_norm": 1.3975264910232386, "learning_rate": 6.637981896555257e-07, "loss": 0.0388, "step": 10554 }, { "epoch": 2.401592718998862, "grad_norm": 1.835536891122451, "learning_rate": 6.637090102592843e-07, "loss": 0.1115, "step": 10555 }, { "epoch": 2.401820250284414, "grad_norm": 2.040239846492686, "learning_rate": 6.636198300719017e-07, "loss": 0.0594, "step": 10556 }, { "epoch": 2.4020477815699657, "grad_norm": 1.9975693704452562, "learning_rate": 6.635306490952003e-07, "loss": 0.1658, "step": 10557 }, { "epoch": 2.402275312855518, "grad_norm": 1.923275401920876, "learning_rate": 6.63441467331003e-07, "loss": 0.1388, "step": 10558 }, { "epoch": 2.402502844141069, "grad_norm": 1.2983548889372092, "learning_rate": 6.633522847811327e-07, "loss": 0.0808, "step": 10559 }, { "epoch": 2.4027303754266214, "grad_norm": 1.236313620739703, "learning_rate": 6.63263101447412e-07, "loss": 0.091, "step": 10560 }, { "epoch": 2.4029579067121727, "grad_norm": 1.5806673071769493, "learning_rate": 6.631739173316634e-07, "loss": 0.0402, "step": 10561 }, { "epoch": 2.403185437997725, "grad_norm": 1.6003664986803758, "learning_rate": 6.630847324357098e-07, "loss": 0.1065, "step": 10562 }, { "epoch": 2.4034129692832766, "grad_norm": 1.2945758422502862, "learning_rate": 6.629955467613741e-07, "loss": 0.0235, "step": 10563 }, { "epoch": 2.4036405005688284, "grad_norm": 1.5211462068444157, "learning_rate": 6.629063603104789e-07, "loss": 0.0418, "step": 10564 }, { "epoch": 2.40386803185438, "grad_norm": 1.2126875345668893, "learning_rate": 6.628171730848474e-07, "loss": 0.0695, "step": 10565 }, { "epoch": 2.404095563139932, "grad_norm": 2.57120009931569, "learning_rate": 6.62727985086302e-07, "loss": 0.0576, "step": 10566 }, { "epoch": 2.4043230944254836, "grad_norm": 3.2348915583257583, "learning_rate": 6.626387963166655e-07, "loss": 0.0391, "step": 10567 }, { "epoch": 2.4045506257110354, "grad_norm": 1.7454433517441201, "learning_rate": 6.625496067777612e-07, "loss": 0.0921, "step": 10568 }, { "epoch": 2.404778156996587, "grad_norm": 2.321262013510454, "learning_rate": 6.624604164714115e-07, "loss": 0.0483, "step": 10569 }, { "epoch": 2.405005688282139, "grad_norm": 1.7997980777352869, "learning_rate": 6.623712253994397e-07, "loss": 0.031, "step": 10570 }, { "epoch": 2.4052332195676907, "grad_norm": 1.3828194024201597, "learning_rate": 6.622820335636683e-07, "loss": 0.0326, "step": 10571 }, { "epoch": 2.4054607508532424, "grad_norm": 1.7488919911490366, "learning_rate": 6.621928409659204e-07, "loss": 0.0156, "step": 10572 }, { "epoch": 2.405688282138794, "grad_norm": 1.7382412068729043, "learning_rate": 6.621036476080191e-07, "loss": 0.0979, "step": 10573 }, { "epoch": 2.405915813424346, "grad_norm": 1.6281716241666897, "learning_rate": 6.620144534917872e-07, "loss": 0.0827, "step": 10574 }, { "epoch": 2.4061433447098977, "grad_norm": 1.2394446083517534, "learning_rate": 6.619252586190477e-07, "loss": 0.0504, "step": 10575 }, { "epoch": 2.4063708759954494, "grad_norm": 1.1996299911059907, "learning_rate": 6.618360629916233e-07, "loss": 0.0089, "step": 10576 }, { "epoch": 2.406598407281001, "grad_norm": 1.6445535839940941, "learning_rate": 6.617468666113375e-07, "loss": 0.0632, "step": 10577 }, { "epoch": 2.406825938566553, "grad_norm": 1.7208377523448997, "learning_rate": 6.616576694800126e-07, "loss": 0.0335, "step": 10578 }, { "epoch": 2.4070534698521047, "grad_norm": 1.2991586455146025, "learning_rate": 6.615684715994725e-07, "loss": 0.0504, "step": 10579 }, { "epoch": 2.4072810011376564, "grad_norm": 2.387057251902132, "learning_rate": 6.614792729715398e-07, "loss": 0.0278, "step": 10580 }, { "epoch": 2.407508532423208, "grad_norm": 1.1634268950453206, "learning_rate": 6.613900735980374e-07, "loss": 0.0124, "step": 10581 }, { "epoch": 2.40773606370876, "grad_norm": 1.49682262983672, "learning_rate": 6.613008734807886e-07, "loss": 0.1172, "step": 10582 }, { "epoch": 2.4079635949943117, "grad_norm": 1.4685646699587809, "learning_rate": 6.612116726216164e-07, "loss": 0.0661, "step": 10583 }, { "epoch": 2.4081911262798634, "grad_norm": 1.5513479030600477, "learning_rate": 6.611224710223441e-07, "loss": 0.103, "step": 10584 }, { "epoch": 2.408418657565415, "grad_norm": 1.466169056624238, "learning_rate": 6.610332686847944e-07, "loss": 0.0416, "step": 10585 }, { "epoch": 2.408646188850967, "grad_norm": 2.137838763287238, "learning_rate": 6.609440656107909e-07, "loss": 0.0266, "step": 10586 }, { "epoch": 2.4088737201365187, "grad_norm": 1.5612872060850345, "learning_rate": 6.608548618021563e-07, "loss": 0.0277, "step": 10587 }, { "epoch": 2.4091012514220704, "grad_norm": 1.2888637789751431, "learning_rate": 6.60765657260714e-07, "loss": 0.0692, "step": 10588 }, { "epoch": 2.409328782707622, "grad_norm": 2.2910864201785692, "learning_rate": 6.606764519882874e-07, "loss": 0.0475, "step": 10589 }, { "epoch": 2.409556313993174, "grad_norm": 1.5458940848749017, "learning_rate": 6.605872459866993e-07, "loss": 0.0357, "step": 10590 }, { "epoch": 2.4097838452787257, "grad_norm": 1.8092381758797624, "learning_rate": 6.604980392577729e-07, "loss": 0.0737, "step": 10591 }, { "epoch": 2.4100113765642774, "grad_norm": 0.8472770721012022, "learning_rate": 6.604088318033321e-07, "loss": 0.0302, "step": 10592 }, { "epoch": 2.410238907849829, "grad_norm": 1.4934119759903768, "learning_rate": 6.603196236251993e-07, "loss": 0.0567, "step": 10593 }, { "epoch": 2.410466439135381, "grad_norm": 1.37423121528308, "learning_rate": 6.60230414725198e-07, "loss": 0.0387, "step": 10594 }, { "epoch": 2.4106939704209327, "grad_norm": 1.9350754144067868, "learning_rate": 6.601412051051516e-07, "loss": 0.0237, "step": 10595 }, { "epoch": 2.4109215017064844, "grad_norm": 2.5387441409983396, "learning_rate": 6.600519947668835e-07, "loss": 0.093, "step": 10596 }, { "epoch": 2.4111490329920366, "grad_norm": 1.3363002317558488, "learning_rate": 6.599627837122167e-07, "loss": 0.0593, "step": 10597 }, { "epoch": 2.411376564277588, "grad_norm": 2.3969188016027974, "learning_rate": 6.598735719429744e-07, "loss": 0.0512, "step": 10598 }, { "epoch": 2.41160409556314, "grad_norm": 1.609682451610226, "learning_rate": 6.597843594609806e-07, "loss": 0.0519, "step": 10599 }, { "epoch": 2.4118316268486915, "grad_norm": 2.072723128432861, "learning_rate": 6.59695146268058e-07, "loss": 0.1232, "step": 10600 }, { "epoch": 2.4120591581342437, "grad_norm": 1.817786471435142, "learning_rate": 6.596059323660299e-07, "loss": 0.1908, "step": 10601 }, { "epoch": 2.4122866894197954, "grad_norm": 1.4728070130617423, "learning_rate": 6.595167177567203e-07, "loss": 0.0191, "step": 10602 }, { "epoch": 2.412514220705347, "grad_norm": 1.842650549871717, "learning_rate": 6.594275024419521e-07, "loss": 0.0615, "step": 10603 }, { "epoch": 2.412741751990899, "grad_norm": 1.130832928406942, "learning_rate": 6.593382864235487e-07, "loss": 0.0198, "step": 10604 }, { "epoch": 2.4129692832764507, "grad_norm": 1.4138956609023778, "learning_rate": 6.592490697033337e-07, "loss": 0.0588, "step": 10605 }, { "epoch": 2.4131968145620024, "grad_norm": 1.6261557637810253, "learning_rate": 6.591598522831303e-07, "loss": 0.0368, "step": 10606 }, { "epoch": 2.413424345847554, "grad_norm": 0.803953439992238, "learning_rate": 6.590706341647623e-07, "loss": 0.0214, "step": 10607 }, { "epoch": 2.413651877133106, "grad_norm": 1.45102004082111, "learning_rate": 6.589814153500527e-07, "loss": 0.0688, "step": 10608 }, { "epoch": 2.4138794084186577, "grad_norm": 1.3071350386826537, "learning_rate": 6.588921958408254e-07, "loss": 0.1016, "step": 10609 }, { "epoch": 2.4141069397042094, "grad_norm": 1.561782085640104, "learning_rate": 6.588029756389037e-07, "loss": 0.0725, "step": 10610 }, { "epoch": 2.414334470989761, "grad_norm": 1.1935765970410221, "learning_rate": 6.587137547461108e-07, "loss": 0.0375, "step": 10611 }, { "epoch": 2.414562002275313, "grad_norm": 1.9599412696011829, "learning_rate": 6.586245331642707e-07, "loss": 0.1582, "step": 10612 }, { "epoch": 2.4147895335608647, "grad_norm": 2.8304494698055827, "learning_rate": 6.585353108952068e-07, "loss": 0.0454, "step": 10613 }, { "epoch": 2.4150170648464164, "grad_norm": 1.7453458925600094, "learning_rate": 6.584460879407425e-07, "loss": 0.0909, "step": 10614 }, { "epoch": 2.415244596131968, "grad_norm": 1.759146367940718, "learning_rate": 6.583568643027012e-07, "loss": 0.0695, "step": 10615 }, { "epoch": 2.41547212741752, "grad_norm": 1.621949265713317, "learning_rate": 6.58267639982907e-07, "loss": 0.0415, "step": 10616 }, { "epoch": 2.4156996587030717, "grad_norm": 2.39749211543786, "learning_rate": 6.58178414983183e-07, "loss": 0.0464, "step": 10617 }, { "epoch": 2.4159271899886234, "grad_norm": 1.6038931260049425, "learning_rate": 6.58089189305353e-07, "loss": 0.0539, "step": 10618 }, { "epoch": 2.416154721274175, "grad_norm": 1.0756461489983145, "learning_rate": 6.579999629512407e-07, "loss": 0.0148, "step": 10619 }, { "epoch": 2.416382252559727, "grad_norm": 1.540697181124578, "learning_rate": 6.579107359226695e-07, "loss": 0.0189, "step": 10620 }, { "epoch": 2.4166097838452787, "grad_norm": 1.3854580522572384, "learning_rate": 6.578215082214629e-07, "loss": 0.0533, "step": 10621 }, { "epoch": 2.4168373151308304, "grad_norm": 1.4341453049106634, "learning_rate": 6.577322798494449e-07, "loss": 0.0296, "step": 10622 }, { "epoch": 2.417064846416382, "grad_norm": 1.5702105258088734, "learning_rate": 6.576430508084393e-07, "loss": 0.031, "step": 10623 }, { "epoch": 2.417292377701934, "grad_norm": 1.3171292579547644, "learning_rate": 6.575538211002693e-07, "loss": 0.03, "step": 10624 }, { "epoch": 2.4175199089874857, "grad_norm": 1.4878630861481952, "learning_rate": 6.57464590726759e-07, "loss": 0.0325, "step": 10625 }, { "epoch": 2.4177474402730375, "grad_norm": 1.2318300992441433, "learning_rate": 6.573753596897318e-07, "loss": 0.0255, "step": 10626 }, { "epoch": 2.417974971558589, "grad_norm": 1.1438230000394354, "learning_rate": 6.572861279910114e-07, "loss": 0.0191, "step": 10627 }, { "epoch": 2.418202502844141, "grad_norm": 1.9748913576549123, "learning_rate": 6.571968956324218e-07, "loss": 0.0579, "step": 10628 }, { "epoch": 2.4184300341296927, "grad_norm": 2.548692414052215, "learning_rate": 6.571076626157866e-07, "loss": 0.0563, "step": 10629 }, { "epoch": 2.4186575654152445, "grad_norm": 1.841220556087748, "learning_rate": 6.570184289429297e-07, "loss": 0.1044, "step": 10630 }, { "epoch": 2.418885096700796, "grad_norm": 2.417142875175133, "learning_rate": 6.569291946156746e-07, "loss": 0.1108, "step": 10631 }, { "epoch": 2.419112627986348, "grad_norm": 1.3672577776495731, "learning_rate": 6.568399596358453e-07, "loss": 0.0347, "step": 10632 }, { "epoch": 2.4193401592718997, "grad_norm": 1.1848394405550018, "learning_rate": 6.567507240052655e-07, "loss": 0.0157, "step": 10633 }, { "epoch": 2.4195676905574515, "grad_norm": 0.7408360795309923, "learning_rate": 6.566614877257591e-07, "loss": 0.0515, "step": 10634 }, { "epoch": 2.419795221843003, "grad_norm": 3.5702580931507053, "learning_rate": 6.565722507991497e-07, "loss": 0.058, "step": 10635 }, { "epoch": 2.4200227531285554, "grad_norm": 1.0847262663520345, "learning_rate": 6.564830132272617e-07, "loss": 0.0718, "step": 10636 }, { "epoch": 2.4202502844141067, "grad_norm": 2.887173513609022, "learning_rate": 6.563937750119183e-07, "loss": 0.0981, "step": 10637 }, { "epoch": 2.420477815699659, "grad_norm": 1.5870916114857796, "learning_rate": 6.563045361549436e-07, "loss": 0.0485, "step": 10638 }, { "epoch": 2.4207053469852102, "grad_norm": 1.0970076404749562, "learning_rate": 6.562152966581615e-07, "loss": 0.1013, "step": 10639 }, { "epoch": 2.4209328782707624, "grad_norm": 2.0280376001713885, "learning_rate": 6.561260565233961e-07, "loss": 0.0529, "step": 10640 }, { "epoch": 2.421160409556314, "grad_norm": 1.1391187745397657, "learning_rate": 6.56036815752471e-07, "loss": 0.0148, "step": 10641 }, { "epoch": 2.421387940841866, "grad_norm": 1.7302436040811533, "learning_rate": 6.559475743472101e-07, "loss": 0.0572, "step": 10642 }, { "epoch": 2.4216154721274177, "grad_norm": 1.6632283107436028, "learning_rate": 6.558583323094378e-07, "loss": 0.0401, "step": 10643 }, { "epoch": 2.4218430034129694, "grad_norm": 1.0592254181343634, "learning_rate": 6.557690896409774e-07, "loss": 0.0568, "step": 10644 }, { "epoch": 2.422070534698521, "grad_norm": 2.3529914876801574, "learning_rate": 6.556798463436531e-07, "loss": 0.0776, "step": 10645 }, { "epoch": 2.422298065984073, "grad_norm": 1.6577698001492, "learning_rate": 6.555906024192892e-07, "loss": 0.0757, "step": 10646 }, { "epoch": 2.4225255972696247, "grad_norm": 0.9053635418106493, "learning_rate": 6.555013578697092e-07, "loss": 0.0306, "step": 10647 }, { "epoch": 2.4227531285551764, "grad_norm": 1.5058578957353632, "learning_rate": 6.554121126967375e-07, "loss": 0.131, "step": 10648 }, { "epoch": 2.422980659840728, "grad_norm": 1.870336040572531, "learning_rate": 6.553228669021977e-07, "loss": 0.1092, "step": 10649 }, { "epoch": 2.42320819112628, "grad_norm": 1.9542945047991342, "learning_rate": 6.552336204879142e-07, "loss": 0.0669, "step": 10650 }, { "epoch": 2.4234357224118317, "grad_norm": 1.7999047473753527, "learning_rate": 6.551443734557108e-07, "loss": 0.0424, "step": 10651 }, { "epoch": 2.4236632536973834, "grad_norm": 0.9827538108341685, "learning_rate": 6.550551258074115e-07, "loss": 0.0203, "step": 10652 }, { "epoch": 2.423890784982935, "grad_norm": 1.4152410262112376, "learning_rate": 6.549658775448406e-07, "loss": 0.0705, "step": 10653 }, { "epoch": 2.424118316268487, "grad_norm": 1.08826764352521, "learning_rate": 6.54876628669822e-07, "loss": 0.0267, "step": 10654 }, { "epoch": 2.4243458475540387, "grad_norm": 1.5384408674930627, "learning_rate": 6.547873791841799e-07, "loss": 0.0475, "step": 10655 }, { "epoch": 2.4245733788395905, "grad_norm": 1.6667793947535348, "learning_rate": 6.546981290897383e-07, "loss": 0.0392, "step": 10656 }, { "epoch": 2.424800910125142, "grad_norm": 1.5860211298246367, "learning_rate": 6.546088783883215e-07, "loss": 0.0739, "step": 10657 }, { "epoch": 2.425028441410694, "grad_norm": 1.8038406137544518, "learning_rate": 6.545196270817531e-07, "loss": 0.0512, "step": 10658 }, { "epoch": 2.4252559726962457, "grad_norm": 2.1572391868774745, "learning_rate": 6.544303751718577e-07, "loss": 0.134, "step": 10659 }, { "epoch": 2.4254835039817975, "grad_norm": 1.8006479014105914, "learning_rate": 6.543411226604595e-07, "loss": 0.0238, "step": 10660 }, { "epoch": 2.425711035267349, "grad_norm": 2.208639376759731, "learning_rate": 6.542518695493823e-07, "loss": 0.0769, "step": 10661 }, { "epoch": 2.425938566552901, "grad_norm": 1.9443418278552014, "learning_rate": 6.541626158404506e-07, "loss": 0.1386, "step": 10662 }, { "epoch": 2.4261660978384527, "grad_norm": 1.5754241156737616, "learning_rate": 6.540733615354882e-07, "loss": 0.0689, "step": 10663 }, { "epoch": 2.4263936291240045, "grad_norm": 2.078032308936215, "learning_rate": 6.539841066363198e-07, "loss": 0.0436, "step": 10664 }, { "epoch": 2.426621160409556, "grad_norm": 1.8687785949756106, "learning_rate": 6.538948511447692e-07, "loss": 0.0917, "step": 10665 }, { "epoch": 2.426848691695108, "grad_norm": 1.3398527107964846, "learning_rate": 6.538055950626608e-07, "loss": 0.0846, "step": 10666 }, { "epoch": 2.4270762229806597, "grad_norm": 1.2825273691433272, "learning_rate": 6.537163383918188e-07, "loss": 0.1239, "step": 10667 }, { "epoch": 2.4273037542662115, "grad_norm": 1.9280214421112567, "learning_rate": 6.536270811340674e-07, "loss": 0.0986, "step": 10668 }, { "epoch": 2.4275312855517632, "grad_norm": 1.4151310712289835, "learning_rate": 6.53537823291231e-07, "loss": 0.1475, "step": 10669 }, { "epoch": 2.427758816837315, "grad_norm": 1.8422843240846454, "learning_rate": 6.534485648651337e-07, "loss": 0.0652, "step": 10670 }, { "epoch": 2.4279863481228667, "grad_norm": 1.819895056833755, "learning_rate": 6.533593058575997e-07, "loss": 0.0312, "step": 10671 }, { "epoch": 2.4282138794084185, "grad_norm": 2.060412485749707, "learning_rate": 6.532700462704534e-07, "loss": 0.0885, "step": 10672 }, { "epoch": 2.4284414106939702, "grad_norm": 1.3171707458026929, "learning_rate": 6.531807861055194e-07, "loss": 0.0472, "step": 10673 }, { "epoch": 2.428668941979522, "grad_norm": 1.6014971080885667, "learning_rate": 6.530915253646219e-07, "loss": 0.0236, "step": 10674 }, { "epoch": 2.428896473265074, "grad_norm": 0.991228100862655, "learning_rate": 6.530022640495845e-07, "loss": 0.0325, "step": 10675 }, { "epoch": 2.4291240045506255, "grad_norm": 1.7731158077990916, "learning_rate": 6.529130021622324e-07, "loss": 0.08, "step": 10676 }, { "epoch": 2.4293515358361777, "grad_norm": 1.8359169565368136, "learning_rate": 6.528237397043896e-07, "loss": 0.119, "step": 10677 }, { "epoch": 2.429579067121729, "grad_norm": 1.2173501994298692, "learning_rate": 6.527344766778806e-07, "loss": 0.0519, "step": 10678 }, { "epoch": 2.429806598407281, "grad_norm": 0.8681978705300911, "learning_rate": 6.526452130845296e-07, "loss": 0.0122, "step": 10679 }, { "epoch": 2.430034129692833, "grad_norm": 1.4590692338752178, "learning_rate": 6.525559489261612e-07, "loss": 0.0723, "step": 10680 }, { "epoch": 2.4302616609783847, "grad_norm": 2.0795092949387683, "learning_rate": 6.524666842045997e-07, "loss": 0.1308, "step": 10681 }, { "epoch": 2.4304891922639364, "grad_norm": 1.772161161399589, "learning_rate": 6.523774189216692e-07, "loss": 0.0634, "step": 10682 }, { "epoch": 2.430716723549488, "grad_norm": 2.1812446826159766, "learning_rate": 6.522881530791945e-07, "loss": 0.1379, "step": 10683 }, { "epoch": 2.43094425483504, "grad_norm": 1.749884881161601, "learning_rate": 6.521988866790001e-07, "loss": 0.0363, "step": 10684 }, { "epoch": 2.4311717861205917, "grad_norm": 2.262473775430507, "learning_rate": 6.5210961972291e-07, "loss": 0.0289, "step": 10685 }, { "epoch": 2.4313993174061435, "grad_norm": 2.106821787850952, "learning_rate": 6.520203522127492e-07, "loss": 0.038, "step": 10686 }, { "epoch": 2.431626848691695, "grad_norm": 1.4823470626075748, "learning_rate": 6.519310841503419e-07, "loss": 0.0172, "step": 10687 }, { "epoch": 2.431854379977247, "grad_norm": 1.7654943667039744, "learning_rate": 6.518418155375123e-07, "loss": 0.0702, "step": 10688 }, { "epoch": 2.4320819112627987, "grad_norm": 1.6415241948042603, "learning_rate": 6.517525463760852e-07, "loss": 0.1197, "step": 10689 }, { "epoch": 2.4323094425483505, "grad_norm": 1.8510138218220162, "learning_rate": 6.516632766678853e-07, "loss": 0.0277, "step": 10690 }, { "epoch": 2.432536973833902, "grad_norm": 1.1099310953847858, "learning_rate": 6.515740064147366e-07, "loss": 0.0162, "step": 10691 }, { "epoch": 2.432764505119454, "grad_norm": 1.1581248515271256, "learning_rate": 6.514847356184639e-07, "loss": 0.0431, "step": 10692 }, { "epoch": 2.4329920364050057, "grad_norm": 1.1123917836464456, "learning_rate": 6.513954642808919e-07, "loss": 0.1107, "step": 10693 }, { "epoch": 2.4332195676905575, "grad_norm": 1.2671981504873233, "learning_rate": 6.513061924038448e-07, "loss": 0.0316, "step": 10694 }, { "epoch": 2.4334470989761092, "grad_norm": 1.2347140093114832, "learning_rate": 6.512169199891473e-07, "loss": 0.1102, "step": 10695 }, { "epoch": 2.433674630261661, "grad_norm": 1.7608166360822675, "learning_rate": 6.51127647038624e-07, "loss": 0.0927, "step": 10696 }, { "epoch": 2.4339021615472127, "grad_norm": 2.2439651714843816, "learning_rate": 6.510383735540994e-07, "loss": 0.0517, "step": 10697 }, { "epoch": 2.4341296928327645, "grad_norm": 2.016108585740252, "learning_rate": 6.509490995373983e-07, "loss": 0.026, "step": 10698 }, { "epoch": 2.4343572241183162, "grad_norm": 1.3594466625214487, "learning_rate": 6.50859824990345e-07, "loss": 0.063, "step": 10699 }, { "epoch": 2.434584755403868, "grad_norm": 1.5356254084154082, "learning_rate": 6.507705499147641e-07, "loss": 0.0333, "step": 10700 }, { "epoch": 2.4348122866894197, "grad_norm": 1.247460061236974, "learning_rate": 6.506812743124806e-07, "loss": 0.0434, "step": 10701 }, { "epoch": 2.4350398179749715, "grad_norm": 1.540275392451105, "learning_rate": 6.505919981853187e-07, "loss": 0.1154, "step": 10702 }, { "epoch": 2.4352673492605232, "grad_norm": 2.1244752670567317, "learning_rate": 6.505027215351033e-07, "loss": 0.0556, "step": 10703 }, { "epoch": 2.435494880546075, "grad_norm": 1.8341931503670963, "learning_rate": 6.504134443636591e-07, "loss": 0.1113, "step": 10704 }, { "epoch": 2.4357224118316267, "grad_norm": 2.3116116212807407, "learning_rate": 6.503241666728105e-07, "loss": 0.0521, "step": 10705 }, { "epoch": 2.4359499431171785, "grad_norm": 1.6178987248860397, "learning_rate": 6.502348884643824e-07, "loss": 0.0427, "step": 10706 }, { "epoch": 2.4361774744027302, "grad_norm": 1.9281110882142614, "learning_rate": 6.501456097401992e-07, "loss": 0.0455, "step": 10707 }, { "epoch": 2.436405005688282, "grad_norm": 1.8559225960543262, "learning_rate": 6.50056330502086e-07, "loss": 0.0454, "step": 10708 }, { "epoch": 2.4366325369738338, "grad_norm": 2.3483742537162424, "learning_rate": 6.499670507518671e-07, "loss": 0.0527, "step": 10709 }, { "epoch": 2.4368600682593855, "grad_norm": 1.4458951216701315, "learning_rate": 6.498777704913675e-07, "loss": 0.0354, "step": 10710 }, { "epoch": 2.4370875995449373, "grad_norm": 2.0427295101794942, "learning_rate": 6.497884897224119e-07, "loss": 0.0343, "step": 10711 }, { "epoch": 2.437315130830489, "grad_norm": 2.405025995931995, "learning_rate": 6.49699208446825e-07, "loss": 0.0823, "step": 10712 }, { "epoch": 2.4375426621160408, "grad_norm": 2.0468929323916676, "learning_rate": 6.496099266664314e-07, "loss": 0.0459, "step": 10713 }, { "epoch": 2.437770193401593, "grad_norm": 1.7648377287593435, "learning_rate": 6.495206443830561e-07, "loss": 0.0714, "step": 10714 }, { "epoch": 2.4379977246871443, "grad_norm": 1.8283934517937341, "learning_rate": 6.494313615985235e-07, "loss": 0.0499, "step": 10715 }, { "epoch": 2.4382252559726965, "grad_norm": 1.7208692980239855, "learning_rate": 6.493420783146587e-07, "loss": 0.1212, "step": 10716 }, { "epoch": 2.4384527872582478, "grad_norm": 1.716858466875207, "learning_rate": 6.492527945332865e-07, "loss": 0.1226, "step": 10717 }, { "epoch": 2.4386803185438, "grad_norm": 1.7351072488692236, "learning_rate": 6.491635102562315e-07, "loss": 0.082, "step": 10718 }, { "epoch": 2.4389078498293517, "grad_norm": 1.4982946300057065, "learning_rate": 6.490742254853187e-07, "loss": 0.0204, "step": 10719 }, { "epoch": 2.4391353811149035, "grad_norm": 0.9082073619983685, "learning_rate": 6.489849402223729e-07, "loss": 0.0261, "step": 10720 }, { "epoch": 2.439362912400455, "grad_norm": 1.6593591409181783, "learning_rate": 6.488956544692187e-07, "loss": 0.0362, "step": 10721 }, { "epoch": 2.439590443686007, "grad_norm": 1.5444429079340354, "learning_rate": 6.48806368227681e-07, "loss": 0.1274, "step": 10722 }, { "epoch": 2.4398179749715587, "grad_norm": 2.258854235516451, "learning_rate": 6.487170814995849e-07, "loss": 0.0541, "step": 10723 }, { "epoch": 2.4400455062571105, "grad_norm": 1.375819855271847, "learning_rate": 6.48627794286755e-07, "loss": 0.0255, "step": 10724 }, { "epoch": 2.4402730375426622, "grad_norm": 1.1788933902842234, "learning_rate": 6.485385065910163e-07, "loss": 0.0438, "step": 10725 }, { "epoch": 2.440500568828214, "grad_norm": 1.5680757015826552, "learning_rate": 6.484492184141937e-07, "loss": 0.1305, "step": 10726 }, { "epoch": 2.4407281001137657, "grad_norm": 1.476838607043402, "learning_rate": 6.48359929758112e-07, "loss": 0.0746, "step": 10727 }, { "epoch": 2.4409556313993175, "grad_norm": 2.3685586806747745, "learning_rate": 6.482706406245961e-07, "loss": 0.1306, "step": 10728 }, { "epoch": 2.4411831626848692, "grad_norm": 1.879831131236823, "learning_rate": 6.481813510154706e-07, "loss": 0.0547, "step": 10729 }, { "epoch": 2.441410693970421, "grad_norm": 1.3697853468516277, "learning_rate": 6.480920609325611e-07, "loss": 0.0208, "step": 10730 }, { "epoch": 2.4416382252559727, "grad_norm": 1.4657681407617524, "learning_rate": 6.480027703776923e-07, "loss": 0.0602, "step": 10731 }, { "epoch": 2.4418657565415245, "grad_norm": 1.9042919050218716, "learning_rate": 6.479134793526887e-07, "loss": 0.0388, "step": 10732 }, { "epoch": 2.4420932878270762, "grad_norm": 1.6406896281331167, "learning_rate": 6.478241878593755e-07, "loss": 0.0228, "step": 10733 }, { "epoch": 2.442320819112628, "grad_norm": 1.2483716306531623, "learning_rate": 6.47734895899578e-07, "loss": 0.0421, "step": 10734 }, { "epoch": 2.4425483503981797, "grad_norm": 2.785750578757238, "learning_rate": 6.476456034751207e-07, "loss": 0.0323, "step": 10735 }, { "epoch": 2.4427758816837315, "grad_norm": 1.876625132138733, "learning_rate": 6.475563105878285e-07, "loss": 0.0584, "step": 10736 }, { "epoch": 2.4430034129692833, "grad_norm": 1.3935791176038599, "learning_rate": 6.474670172395271e-07, "loss": 0.0377, "step": 10737 }, { "epoch": 2.443230944254835, "grad_norm": 1.275301134582811, "learning_rate": 6.473777234320408e-07, "loss": 0.0678, "step": 10738 }, { "epoch": 2.4434584755403868, "grad_norm": 1.5153471324070782, "learning_rate": 6.472884291671947e-07, "loss": 0.1395, "step": 10739 }, { "epoch": 2.4436860068259385, "grad_norm": 2.0215771823642883, "learning_rate": 6.47199134446814e-07, "loss": 0.0277, "step": 10740 }, { "epoch": 2.4439135381114903, "grad_norm": 1.248726231168551, "learning_rate": 6.471098392727238e-07, "loss": 0.0418, "step": 10741 }, { "epoch": 2.444141069397042, "grad_norm": 1.9909115972775195, "learning_rate": 6.470205436467487e-07, "loss": 0.0956, "step": 10742 }, { "epoch": 2.4443686006825938, "grad_norm": 1.1606253216543372, "learning_rate": 6.469312475707141e-07, "loss": 0.0451, "step": 10743 }, { "epoch": 2.4445961319681455, "grad_norm": 1.274108763143027, "learning_rate": 6.468419510464452e-07, "loss": 0.0842, "step": 10744 }, { "epoch": 2.4448236632536973, "grad_norm": 1.5163402041606309, "learning_rate": 6.467526540757666e-07, "loss": 0.0604, "step": 10745 }, { "epoch": 2.445051194539249, "grad_norm": 2.029191898687564, "learning_rate": 6.466633566605036e-07, "loss": 0.0587, "step": 10746 }, { "epoch": 2.4452787258248008, "grad_norm": 1.758333279331689, "learning_rate": 6.465740588024813e-07, "loss": 0.0511, "step": 10747 }, { "epoch": 2.4455062571103525, "grad_norm": 1.6379300702116257, "learning_rate": 6.464847605035247e-07, "loss": 0.1883, "step": 10748 }, { "epoch": 2.4457337883959043, "grad_norm": 1.9636421701081885, "learning_rate": 6.46395461765459e-07, "loss": 0.0359, "step": 10749 }, { "epoch": 2.445961319681456, "grad_norm": 1.9142765817445715, "learning_rate": 6.463061625901093e-07, "loss": 0.0755, "step": 10750 }, { "epoch": 2.4461888509670078, "grad_norm": 0.8841446043054305, "learning_rate": 6.462168629793008e-07, "loss": 0.013, "step": 10751 }, { "epoch": 2.4464163822525595, "grad_norm": 2.1394249546323945, "learning_rate": 6.461275629348581e-07, "loss": 0.1495, "step": 10752 }, { "epoch": 2.4466439135381117, "grad_norm": 0.7270738165419912, "learning_rate": 6.460382624586069e-07, "loss": 0.0074, "step": 10753 }, { "epoch": 2.446871444823663, "grad_norm": 1.15289704204101, "learning_rate": 6.45948961552372e-07, "loss": 0.0562, "step": 10754 }, { "epoch": 2.4470989761092152, "grad_norm": 1.6473230116533466, "learning_rate": 6.45859660217979e-07, "loss": 0.116, "step": 10755 }, { "epoch": 2.4473265073947665, "grad_norm": 2.547342183522831, "learning_rate": 6.457703584572525e-07, "loss": 0.0521, "step": 10756 }, { "epoch": 2.4475540386803187, "grad_norm": 1.6838663706265002, "learning_rate": 6.45681056272018e-07, "loss": 0.0455, "step": 10757 }, { "epoch": 2.4477815699658705, "grad_norm": 1.2212377724665087, "learning_rate": 6.455917536641006e-07, "loss": 0.04, "step": 10758 }, { "epoch": 2.4480091012514222, "grad_norm": 1.490475672931126, "learning_rate": 6.455024506353252e-07, "loss": 0.0671, "step": 10759 }, { "epoch": 2.448236632536974, "grad_norm": 1.5184591661740638, "learning_rate": 6.454131471875176e-07, "loss": 0.0294, "step": 10760 }, { "epoch": 2.4484641638225257, "grad_norm": 1.3938123308936536, "learning_rate": 6.453238433225026e-07, "loss": 0.1112, "step": 10761 }, { "epoch": 2.4486916951080775, "grad_norm": 1.252816498717164, "learning_rate": 6.452345390421054e-07, "loss": 0.0199, "step": 10762 }, { "epoch": 2.4489192263936292, "grad_norm": 2.3025806568067817, "learning_rate": 6.451452343481512e-07, "loss": 0.0483, "step": 10763 }, { "epoch": 2.449146757679181, "grad_norm": 2.2006329204755817, "learning_rate": 6.450559292424655e-07, "loss": 0.0595, "step": 10764 }, { "epoch": 2.4493742889647327, "grad_norm": 2.14628033721118, "learning_rate": 6.449666237268733e-07, "loss": 0.0297, "step": 10765 }, { "epoch": 2.4496018202502845, "grad_norm": 1.617248945564638, "learning_rate": 6.448773178031996e-07, "loss": 0.0448, "step": 10766 }, { "epoch": 2.4498293515358363, "grad_norm": 2.103623179671804, "learning_rate": 6.447880114732702e-07, "loss": 0.0973, "step": 10767 }, { "epoch": 2.450056882821388, "grad_norm": 1.0954074293093925, "learning_rate": 6.4469870473891e-07, "loss": 0.04, "step": 10768 }, { "epoch": 2.4502844141069398, "grad_norm": 1.2354183827654832, "learning_rate": 6.446093976019443e-07, "loss": 0.0806, "step": 10769 }, { "epoch": 2.4505119453924915, "grad_norm": 1.0217733571217007, "learning_rate": 6.445200900641986e-07, "loss": 0.0274, "step": 10770 }, { "epoch": 2.4507394766780433, "grad_norm": 1.0159088874973368, "learning_rate": 6.444307821274979e-07, "loss": 0.0392, "step": 10771 }, { "epoch": 2.450967007963595, "grad_norm": 1.9220407075099928, "learning_rate": 6.443414737936677e-07, "loss": 0.1153, "step": 10772 }, { "epoch": 2.4511945392491468, "grad_norm": 1.4222736350031056, "learning_rate": 6.442521650645329e-07, "loss": 0.025, "step": 10773 }, { "epoch": 2.4514220705346985, "grad_norm": 1.6802408563288893, "learning_rate": 6.441628559419194e-07, "loss": 0.0609, "step": 10774 }, { "epoch": 2.4516496018202503, "grad_norm": 1.2694793613626496, "learning_rate": 6.440735464276524e-07, "loss": 0.0164, "step": 10775 }, { "epoch": 2.451877133105802, "grad_norm": 3.622568264399461, "learning_rate": 6.439842365235566e-07, "loss": 0.0614, "step": 10776 }, { "epoch": 2.4521046643913538, "grad_norm": 1.0645642623729403, "learning_rate": 6.43894926231458e-07, "loss": 0.0541, "step": 10777 }, { "epoch": 2.4523321956769055, "grad_norm": 1.1049432103300538, "learning_rate": 6.438056155531816e-07, "loss": 0.0316, "step": 10778 }, { "epoch": 2.4525597269624573, "grad_norm": 2.6035304274976063, "learning_rate": 6.437163044905528e-07, "loss": 0.1614, "step": 10779 }, { "epoch": 2.452787258248009, "grad_norm": 1.6884683257989475, "learning_rate": 6.436269930453971e-07, "loss": 0.0496, "step": 10780 }, { "epoch": 2.453014789533561, "grad_norm": 1.3339295587517084, "learning_rate": 6.4353768121954e-07, "loss": 0.0516, "step": 10781 }, { "epoch": 2.4532423208191125, "grad_norm": 1.4020930989417688, "learning_rate": 6.434483690148063e-07, "loss": 0.0577, "step": 10782 }, { "epoch": 2.4534698521046643, "grad_norm": 1.3912072581744486, "learning_rate": 6.43359056433022e-07, "loss": 0.0385, "step": 10783 }, { "epoch": 2.453697383390216, "grad_norm": 1.38982276338953, "learning_rate": 6.43269743476012e-07, "loss": 0.0631, "step": 10784 }, { "epoch": 2.453924914675768, "grad_norm": 1.5348886513779152, "learning_rate": 6.43180430145602e-07, "loss": 0.0274, "step": 10785 }, { "epoch": 2.4541524459613195, "grad_norm": 3.7506311156621175, "learning_rate": 6.430911164436172e-07, "loss": 0.1153, "step": 10786 }, { "epoch": 2.4543799772468713, "grad_norm": 1.6448626628839786, "learning_rate": 6.430018023718833e-07, "loss": 0.0584, "step": 10787 }, { "epoch": 2.454607508532423, "grad_norm": 1.0534019966424395, "learning_rate": 6.429124879322256e-07, "loss": 0.0314, "step": 10788 }, { "epoch": 2.454835039817975, "grad_norm": 1.4640930397866614, "learning_rate": 6.42823173126469e-07, "loss": 0.1025, "step": 10789 }, { "epoch": 2.4550625711035265, "grad_norm": 1.8435906346136508, "learning_rate": 6.427338579564397e-07, "loss": 0.0702, "step": 10790 }, { "epoch": 2.4552901023890783, "grad_norm": 1.5390586109838404, "learning_rate": 6.426445424239629e-07, "loss": 0.0507, "step": 10791 }, { "epoch": 2.4555176336746305, "grad_norm": 1.792875825383981, "learning_rate": 6.425552265308639e-07, "loss": 0.0292, "step": 10792 }, { "epoch": 2.455745164960182, "grad_norm": 1.3350294846723614, "learning_rate": 6.424659102789681e-07, "loss": 0.0387, "step": 10793 }, { "epoch": 2.455972696245734, "grad_norm": 1.1981536555136512, "learning_rate": 6.423765936701012e-07, "loss": 0.0798, "step": 10794 }, { "epoch": 2.4562002275312853, "grad_norm": 1.5112908389859945, "learning_rate": 6.422872767060886e-07, "loss": 0.0387, "step": 10795 }, { "epoch": 2.4564277588168375, "grad_norm": 2.4397855264389885, "learning_rate": 6.421979593887555e-07, "loss": 0.0549, "step": 10796 }, { "epoch": 2.4566552901023893, "grad_norm": 1.7392320043842686, "learning_rate": 6.421086417199277e-07, "loss": 0.026, "step": 10797 }, { "epoch": 2.456882821387941, "grad_norm": 1.5748479447667432, "learning_rate": 6.420193237014306e-07, "loss": 0.0789, "step": 10798 }, { "epoch": 2.4571103526734928, "grad_norm": 1.275351095421082, "learning_rate": 6.419300053350898e-07, "loss": 0.0476, "step": 10799 }, { "epoch": 2.4573378839590445, "grad_norm": 1.5009957426345688, "learning_rate": 6.418406866227306e-07, "loss": 0.0412, "step": 10800 }, { "epoch": 2.4575654152445963, "grad_norm": 1.6812093111025637, "learning_rate": 6.417513675661787e-07, "loss": 0.0414, "step": 10801 }, { "epoch": 2.457792946530148, "grad_norm": 1.3673637868393416, "learning_rate": 6.416620481672595e-07, "loss": 0.0399, "step": 10802 }, { "epoch": 2.4580204778156998, "grad_norm": 1.8916693927031183, "learning_rate": 6.415727284277984e-07, "loss": 0.0895, "step": 10803 }, { "epoch": 2.4582480091012515, "grad_norm": 2.524607698907239, "learning_rate": 6.414834083496212e-07, "loss": 0.0465, "step": 10804 }, { "epoch": 2.4584755403868033, "grad_norm": 2.2817091471153605, "learning_rate": 6.413940879345533e-07, "loss": 0.1208, "step": 10805 }, { "epoch": 2.458703071672355, "grad_norm": 1.4010689281603355, "learning_rate": 6.413047671844203e-07, "loss": 0.082, "step": 10806 }, { "epoch": 2.4589306029579068, "grad_norm": 1.0169717727837417, "learning_rate": 6.412154461010477e-07, "loss": 0.0663, "step": 10807 }, { "epoch": 2.4591581342434585, "grad_norm": 1.6391566607979509, "learning_rate": 6.411261246862611e-07, "loss": 0.0499, "step": 10808 }, { "epoch": 2.4593856655290103, "grad_norm": 1.148975078128176, "learning_rate": 6.410368029418859e-07, "loss": 0.0659, "step": 10809 }, { "epoch": 2.459613196814562, "grad_norm": 1.99952874926377, "learning_rate": 6.40947480869748e-07, "loss": 0.0997, "step": 10810 }, { "epoch": 2.459840728100114, "grad_norm": 2.9458744665851273, "learning_rate": 6.408581584716728e-07, "loss": 0.0409, "step": 10811 }, { "epoch": 2.4600682593856655, "grad_norm": 1.5741732885923228, "learning_rate": 6.407688357494858e-07, "loss": 0.0375, "step": 10812 }, { "epoch": 2.4602957906712173, "grad_norm": 1.7425425192381223, "learning_rate": 6.406795127050126e-07, "loss": 0.0264, "step": 10813 }, { "epoch": 2.460523321956769, "grad_norm": 1.6044876619606474, "learning_rate": 6.405901893400791e-07, "loss": 0.0309, "step": 10814 }, { "epoch": 2.460750853242321, "grad_norm": 1.6192559761779082, "learning_rate": 6.405008656565105e-07, "loss": 0.062, "step": 10815 }, { "epoch": 2.4609783845278725, "grad_norm": 2.0358550989694773, "learning_rate": 6.404115416561326e-07, "loss": 0.0363, "step": 10816 }, { "epoch": 2.4612059158134243, "grad_norm": 1.9579573986723964, "learning_rate": 6.403222173407711e-07, "loss": 0.1106, "step": 10817 }, { "epoch": 2.461433447098976, "grad_norm": 1.103167386812476, "learning_rate": 6.402328927122514e-07, "loss": 0.0529, "step": 10818 }, { "epoch": 2.461660978384528, "grad_norm": 1.5010142939431699, "learning_rate": 6.401435677723995e-07, "loss": 0.0554, "step": 10819 }, { "epoch": 2.4618885096700796, "grad_norm": 2.3307306230733, "learning_rate": 6.400542425230407e-07, "loss": 0.0508, "step": 10820 }, { "epoch": 2.4621160409556313, "grad_norm": 0.7516582829505136, "learning_rate": 6.399649169660007e-07, "loss": 0.0305, "step": 10821 }, { "epoch": 2.462343572241183, "grad_norm": 1.4594580184464123, "learning_rate": 6.398755911031053e-07, "loss": 0.0645, "step": 10822 }, { "epoch": 2.462571103526735, "grad_norm": 2.1346944993056276, "learning_rate": 6.397862649361798e-07, "loss": 0.1862, "step": 10823 }, { "epoch": 2.4627986348122866, "grad_norm": 0.8743918396347649, "learning_rate": 6.396969384670504e-07, "loss": 0.0241, "step": 10824 }, { "epoch": 2.4630261660978383, "grad_norm": 1.6970079992379572, "learning_rate": 6.396076116975426e-07, "loss": 0.1211, "step": 10825 }, { "epoch": 2.46325369738339, "grad_norm": 2.2413093568023945, "learning_rate": 6.395182846294816e-07, "loss": 0.0887, "step": 10826 }, { "epoch": 2.463481228668942, "grad_norm": 1.9700672436219968, "learning_rate": 6.394289572646938e-07, "loss": 0.0618, "step": 10827 }, { "epoch": 2.4637087599544936, "grad_norm": 1.550471476068397, "learning_rate": 6.393396296050043e-07, "loss": 0.0597, "step": 10828 }, { "epoch": 2.4639362912400453, "grad_norm": 1.2971818428546398, "learning_rate": 6.392503016522392e-07, "loss": 0.0551, "step": 10829 }, { "epoch": 2.464163822525597, "grad_norm": 1.8438709238425102, "learning_rate": 6.391609734082238e-07, "loss": 0.1326, "step": 10830 }, { "epoch": 2.4643913538111493, "grad_norm": 2.29719579436221, "learning_rate": 6.390716448747841e-07, "loss": 0.0585, "step": 10831 }, { "epoch": 2.4646188850967006, "grad_norm": 1.1431082350176027, "learning_rate": 6.38982316053746e-07, "loss": 0.0283, "step": 10832 }, { "epoch": 2.4648464163822528, "grad_norm": 1.5486039126328415, "learning_rate": 6.388929869469348e-07, "loss": 0.118, "step": 10833 }, { "epoch": 2.465073947667804, "grad_norm": 1.2811949077748084, "learning_rate": 6.388036575561764e-07, "loss": 0.1024, "step": 10834 }, { "epoch": 2.4653014789533563, "grad_norm": 1.9528449906451266, "learning_rate": 6.387143278832964e-07, "loss": 0.0336, "step": 10835 }, { "epoch": 2.465529010238908, "grad_norm": 1.4934876792933103, "learning_rate": 6.386249979301207e-07, "loss": 0.0866, "step": 10836 }, { "epoch": 2.4657565415244598, "grad_norm": 1.346713413935413, "learning_rate": 6.385356676984751e-07, "loss": 0.0435, "step": 10837 }, { "epoch": 2.4659840728100115, "grad_norm": 1.8334525389627976, "learning_rate": 6.384463371901853e-07, "loss": 0.0259, "step": 10838 }, { "epoch": 2.4662116040955633, "grad_norm": 1.474866635603728, "learning_rate": 6.383570064070768e-07, "loss": 0.0437, "step": 10839 }, { "epoch": 2.466439135381115, "grad_norm": 1.0519673060463886, "learning_rate": 6.382676753509756e-07, "loss": 0.0566, "step": 10840 }, { "epoch": 2.466666666666667, "grad_norm": 1.7430158133885028, "learning_rate": 6.381783440237076e-07, "loss": 0.0712, "step": 10841 }, { "epoch": 2.4668941979522185, "grad_norm": 1.6182931250989727, "learning_rate": 6.380890124270982e-07, "loss": 0.0406, "step": 10842 }, { "epoch": 2.4671217292377703, "grad_norm": 1.4382384768853405, "learning_rate": 6.379996805629733e-07, "loss": 0.1288, "step": 10843 }, { "epoch": 2.467349260523322, "grad_norm": 1.2369613458375917, "learning_rate": 6.37910348433159e-07, "loss": 0.0774, "step": 10844 }, { "epoch": 2.467576791808874, "grad_norm": 0.8755195888088544, "learning_rate": 6.378210160394807e-07, "loss": 0.044, "step": 10845 }, { "epoch": 2.4678043230944255, "grad_norm": 1.7498352397362558, "learning_rate": 6.37731683383764e-07, "loss": 0.0733, "step": 10846 }, { "epoch": 2.4680318543799773, "grad_norm": 3.0243093650135178, "learning_rate": 6.376423504678354e-07, "loss": 0.0525, "step": 10847 }, { "epoch": 2.468259385665529, "grad_norm": 1.9088213266639864, "learning_rate": 6.375530172935203e-07, "loss": 0.0791, "step": 10848 }, { "epoch": 2.468486916951081, "grad_norm": 0.9999765427526403, "learning_rate": 6.374636838626444e-07, "loss": 0.0554, "step": 10849 }, { "epoch": 2.4687144482366326, "grad_norm": 1.3301437955659614, "learning_rate": 6.373743501770335e-07, "loss": 0.0396, "step": 10850 }, { "epoch": 2.4689419795221843, "grad_norm": 1.3990404816959723, "learning_rate": 6.372850162385139e-07, "loss": 0.0358, "step": 10851 }, { "epoch": 2.469169510807736, "grad_norm": 1.4934607337372596, "learning_rate": 6.371956820489107e-07, "loss": 0.0678, "step": 10852 }, { "epoch": 2.469397042093288, "grad_norm": 1.9751544493176358, "learning_rate": 6.371063476100501e-07, "loss": 0.0439, "step": 10853 }, { "epoch": 2.4696245733788396, "grad_norm": 1.9053408951195272, "learning_rate": 6.370170129237582e-07, "loss": 0.0944, "step": 10854 }, { "epoch": 2.4698521046643913, "grad_norm": 0.7055216183332879, "learning_rate": 6.369276779918604e-07, "loss": 0.0146, "step": 10855 }, { "epoch": 2.470079635949943, "grad_norm": 1.332404056281277, "learning_rate": 6.368383428161829e-07, "loss": 0.0499, "step": 10856 }, { "epoch": 2.470307167235495, "grad_norm": 1.5552345749572618, "learning_rate": 6.36749007398551e-07, "loss": 0.0394, "step": 10857 }, { "epoch": 2.4705346985210466, "grad_norm": 1.9573735317377305, "learning_rate": 6.366596717407912e-07, "loss": 0.0359, "step": 10858 }, { "epoch": 2.4707622298065983, "grad_norm": 2.142830137816076, "learning_rate": 6.36570335844729e-07, "loss": 0.0571, "step": 10859 }, { "epoch": 2.47098976109215, "grad_norm": 1.9239905053259085, "learning_rate": 6.364809997121901e-07, "loss": 0.067, "step": 10860 }, { "epoch": 2.471217292377702, "grad_norm": 1.8631922090993536, "learning_rate": 6.363916633450009e-07, "loss": 0.0458, "step": 10861 }, { "epoch": 2.4714448236632536, "grad_norm": 1.437313273971417, "learning_rate": 6.363023267449868e-07, "loss": 0.0345, "step": 10862 }, { "epoch": 2.4716723549488053, "grad_norm": 2.2565291195120363, "learning_rate": 6.362129899139739e-07, "loss": 0.091, "step": 10863 }, { "epoch": 2.471899886234357, "grad_norm": 0.8965433620198081, "learning_rate": 6.36123652853788e-07, "loss": 0.0101, "step": 10864 }, { "epoch": 2.472127417519909, "grad_norm": 1.563430697175068, "learning_rate": 6.360343155662551e-07, "loss": 0.1114, "step": 10865 }, { "epoch": 2.4723549488054606, "grad_norm": 1.6041844994917043, "learning_rate": 6.359449780532008e-07, "loss": 0.0993, "step": 10866 }, { "epoch": 2.4725824800910123, "grad_norm": 1.9128389674410409, "learning_rate": 6.358556403164513e-07, "loss": 0.1061, "step": 10867 }, { "epoch": 2.472810011376564, "grad_norm": 1.9540243726959918, "learning_rate": 6.357663023578324e-07, "loss": 0.1383, "step": 10868 }, { "epoch": 2.473037542662116, "grad_norm": 1.3820552685882872, "learning_rate": 6.3567696417917e-07, "loss": 0.0701, "step": 10869 }, { "epoch": 2.473265073947668, "grad_norm": 1.8594423898077386, "learning_rate": 6.3558762578229e-07, "loss": 0.1291, "step": 10870 }, { "epoch": 2.4734926052332193, "grad_norm": 2.1688244377617814, "learning_rate": 6.354982871690184e-07, "loss": 0.0756, "step": 10871 }, { "epoch": 2.4737201365187715, "grad_norm": 1.567508337160604, "learning_rate": 6.35408948341181e-07, "loss": 0.0662, "step": 10872 }, { "epoch": 2.473947667804323, "grad_norm": 1.3056905061664779, "learning_rate": 6.353196093006035e-07, "loss": 0.0383, "step": 10873 }, { "epoch": 2.474175199089875, "grad_norm": 1.3423014901373584, "learning_rate": 6.352302700491124e-07, "loss": 0.0144, "step": 10874 }, { "epoch": 2.474402730375427, "grad_norm": 1.9806297606684156, "learning_rate": 6.351409305885332e-07, "loss": 0.0388, "step": 10875 }, { "epoch": 2.4746302616609785, "grad_norm": 1.987514408078129, "learning_rate": 6.35051590920692e-07, "loss": 0.0446, "step": 10876 }, { "epoch": 2.4748577929465303, "grad_norm": 1.466224620623108, "learning_rate": 6.349622510474146e-07, "loss": 0.0676, "step": 10877 }, { "epoch": 2.475085324232082, "grad_norm": 1.7727006116703508, "learning_rate": 6.348729109705272e-07, "loss": 0.1048, "step": 10878 }, { "epoch": 2.475312855517634, "grad_norm": 1.0548915177972773, "learning_rate": 6.347835706918555e-07, "loss": 0.0135, "step": 10879 }, { "epoch": 2.4755403868031856, "grad_norm": 1.2244378985072317, "learning_rate": 6.346942302132253e-07, "loss": 0.0216, "step": 10880 }, { "epoch": 2.4757679180887373, "grad_norm": 1.5448357180995995, "learning_rate": 6.34604889536463e-07, "loss": 0.0784, "step": 10881 }, { "epoch": 2.475995449374289, "grad_norm": 2.3915536157049977, "learning_rate": 6.345155486633946e-07, "loss": 0.0529, "step": 10882 }, { "epoch": 2.476222980659841, "grad_norm": 5.283236051471409, "learning_rate": 6.344262075958454e-07, "loss": 0.1059, "step": 10883 }, { "epoch": 2.4764505119453926, "grad_norm": 2.9812241962005013, "learning_rate": 6.343368663356419e-07, "loss": 0.1676, "step": 10884 }, { "epoch": 2.4766780432309443, "grad_norm": 1.7392568804038129, "learning_rate": 6.3424752488461e-07, "loss": 0.1147, "step": 10885 }, { "epoch": 2.476905574516496, "grad_norm": 1.7778725744664248, "learning_rate": 6.341581832445757e-07, "loss": 0.0638, "step": 10886 }, { "epoch": 2.477133105802048, "grad_norm": 2.1746736380009914, "learning_rate": 6.340688414173647e-07, "loss": 0.0356, "step": 10887 }, { "epoch": 2.4773606370875996, "grad_norm": 1.75293729766791, "learning_rate": 6.339794994048035e-07, "loss": 0.0609, "step": 10888 }, { "epoch": 2.4775881683731513, "grad_norm": 2.3019638704234717, "learning_rate": 6.338901572087177e-07, "loss": 0.0762, "step": 10889 }, { "epoch": 2.477815699658703, "grad_norm": 1.7872490771912772, "learning_rate": 6.338008148309329e-07, "loss": 0.0384, "step": 10890 }, { "epoch": 2.478043230944255, "grad_norm": 1.5527157273794523, "learning_rate": 6.337114722732761e-07, "loss": 0.0206, "step": 10891 }, { "epoch": 2.4782707622298066, "grad_norm": 2.471283722500934, "learning_rate": 6.336221295375726e-07, "loss": 0.0387, "step": 10892 }, { "epoch": 2.4784982935153583, "grad_norm": 2.1810401201668084, "learning_rate": 6.335327866256486e-07, "loss": 0.0407, "step": 10893 }, { "epoch": 2.47872582480091, "grad_norm": 1.2269915474957105, "learning_rate": 6.334434435393298e-07, "loss": 0.0837, "step": 10894 }, { "epoch": 2.478953356086462, "grad_norm": 1.5026685542699592, "learning_rate": 6.333541002804429e-07, "loss": 0.0519, "step": 10895 }, { "epoch": 2.4791808873720136, "grad_norm": 1.4966079948366215, "learning_rate": 6.332647568508132e-07, "loss": 0.0354, "step": 10896 }, { "epoch": 2.4794084186575653, "grad_norm": 1.3340971483650939, "learning_rate": 6.33175413252267e-07, "loss": 0.0632, "step": 10897 }, { "epoch": 2.479635949943117, "grad_norm": 1.364128001145437, "learning_rate": 6.330860694866305e-07, "loss": 0.0884, "step": 10898 }, { "epoch": 2.479863481228669, "grad_norm": 1.4279572290887816, "learning_rate": 6.329967255557294e-07, "loss": 0.0211, "step": 10899 }, { "epoch": 2.4800910125142206, "grad_norm": 1.1514963524259667, "learning_rate": 6.329073814613899e-07, "loss": 0.0317, "step": 10900 }, { "epoch": 2.4803185437997723, "grad_norm": 1.2091953209581847, "learning_rate": 6.328180372054382e-07, "loss": 0.0967, "step": 10901 }, { "epoch": 2.480546075085324, "grad_norm": 1.7577632697097967, "learning_rate": 6.327286927897e-07, "loss": 0.0314, "step": 10902 }, { "epoch": 2.480773606370876, "grad_norm": 1.621642503371384, "learning_rate": 6.326393482160013e-07, "loss": 0.0417, "step": 10903 }, { "epoch": 2.4810011376564276, "grad_norm": 1.2998934288929127, "learning_rate": 6.325500034861684e-07, "loss": 0.023, "step": 10904 }, { "epoch": 2.4812286689419794, "grad_norm": 0.950097751804634, "learning_rate": 6.324606586020274e-07, "loss": 0.0219, "step": 10905 }, { "epoch": 2.481456200227531, "grad_norm": 1.2672036485273066, "learning_rate": 6.323713135654041e-07, "loss": 0.0761, "step": 10906 }, { "epoch": 2.481683731513083, "grad_norm": 1.6457683504653216, "learning_rate": 6.322819683781248e-07, "loss": 0.0964, "step": 10907 }, { "epoch": 2.4819112627986346, "grad_norm": 1.1300112488301512, "learning_rate": 6.321926230420153e-07, "loss": 0.063, "step": 10908 }, { "epoch": 2.482138794084187, "grad_norm": 1.7676597566346592, "learning_rate": 6.321032775589018e-07, "loss": 0.1228, "step": 10909 }, { "epoch": 2.482366325369738, "grad_norm": 1.3995083312884067, "learning_rate": 6.3201393193061e-07, "loss": 0.0316, "step": 10910 }, { "epoch": 2.4825938566552903, "grad_norm": 1.6485337964218663, "learning_rate": 6.319245861589666e-07, "loss": 0.0685, "step": 10911 }, { "epoch": 2.4828213879408416, "grad_norm": 1.8539161700493796, "learning_rate": 6.318352402457973e-07, "loss": 0.0371, "step": 10912 }, { "epoch": 2.483048919226394, "grad_norm": 2.07337472637515, "learning_rate": 6.317458941929281e-07, "loss": 0.0618, "step": 10913 }, { "epoch": 2.4832764505119456, "grad_norm": 2.0706240220245706, "learning_rate": 6.316565480021854e-07, "loss": 0.0853, "step": 10914 }, { "epoch": 2.4835039817974973, "grad_norm": 1.5878454449522577, "learning_rate": 6.315672016753949e-07, "loss": 0.0775, "step": 10915 }, { "epoch": 2.483731513083049, "grad_norm": 1.284417858069104, "learning_rate": 6.314778552143827e-07, "loss": 0.0382, "step": 10916 }, { "epoch": 2.483959044368601, "grad_norm": 1.4501725974250523, "learning_rate": 6.31388508620975e-07, "loss": 0.0689, "step": 10917 }, { "epoch": 2.4841865756541526, "grad_norm": 1.8863046952116718, "learning_rate": 6.312991618969981e-07, "loss": 0.051, "step": 10918 }, { "epoch": 2.4844141069397043, "grad_norm": 2.1319011616217893, "learning_rate": 6.312098150442777e-07, "loss": 0.0827, "step": 10919 }, { "epoch": 2.484641638225256, "grad_norm": 1.5243712806420018, "learning_rate": 6.311204680646403e-07, "loss": 0.0254, "step": 10920 }, { "epoch": 2.484869169510808, "grad_norm": 1.6283093342850699, "learning_rate": 6.310311209599115e-07, "loss": 0.0401, "step": 10921 }, { "epoch": 2.4850967007963596, "grad_norm": 1.5825092963348164, "learning_rate": 6.309417737319178e-07, "loss": 0.0759, "step": 10922 }, { "epoch": 2.4853242320819113, "grad_norm": 1.3152219871589137, "learning_rate": 6.30852426382485e-07, "loss": 0.0655, "step": 10923 }, { "epoch": 2.485551763367463, "grad_norm": 1.4591531408229925, "learning_rate": 6.307630789134393e-07, "loss": 0.0527, "step": 10924 }, { "epoch": 2.485779294653015, "grad_norm": 1.8254513711273366, "learning_rate": 6.306737313266069e-07, "loss": 0.0831, "step": 10925 }, { "epoch": 2.4860068259385666, "grad_norm": 1.552719127965666, "learning_rate": 6.305843836238139e-07, "loss": 0.0803, "step": 10926 }, { "epoch": 2.4862343572241183, "grad_norm": 1.5200975335161506, "learning_rate": 6.304950358068862e-07, "loss": 0.0346, "step": 10927 }, { "epoch": 2.48646188850967, "grad_norm": 1.589587217277866, "learning_rate": 6.3040568787765e-07, "loss": 0.1075, "step": 10928 }, { "epoch": 2.486689419795222, "grad_norm": 1.5307377554485049, "learning_rate": 6.303163398379316e-07, "loss": 0.1117, "step": 10929 }, { "epoch": 2.4869169510807736, "grad_norm": 1.829675622819192, "learning_rate": 6.302269916895566e-07, "loss": 0.0528, "step": 10930 }, { "epoch": 2.4871444823663253, "grad_norm": 1.0612215711711004, "learning_rate": 6.301376434343517e-07, "loss": 0.0203, "step": 10931 }, { "epoch": 2.487372013651877, "grad_norm": 1.7034236852627376, "learning_rate": 6.300482950741431e-07, "loss": 0.0446, "step": 10932 }, { "epoch": 2.487599544937429, "grad_norm": 0.8577980679304394, "learning_rate": 6.299589466107561e-07, "loss": 0.0459, "step": 10933 }, { "epoch": 2.4878270762229806, "grad_norm": 1.3297674705334859, "learning_rate": 6.298695980460174e-07, "loss": 0.0834, "step": 10934 }, { "epoch": 2.4880546075085324, "grad_norm": 1.0911005408658943, "learning_rate": 6.297802493817533e-07, "loss": 0.0273, "step": 10935 }, { "epoch": 2.488282138794084, "grad_norm": 1.5025784372982494, "learning_rate": 6.296909006197895e-07, "loss": 0.0378, "step": 10936 }, { "epoch": 2.488509670079636, "grad_norm": 1.3372925029531704, "learning_rate": 6.296015517619522e-07, "loss": 0.0306, "step": 10937 }, { "epoch": 2.4887372013651876, "grad_norm": 1.2720465753095207, "learning_rate": 6.295122028100677e-07, "loss": 0.0563, "step": 10938 }, { "epoch": 2.4889647326507394, "grad_norm": 1.1184442752655528, "learning_rate": 6.294228537659622e-07, "loss": 0.075, "step": 10939 }, { "epoch": 2.489192263936291, "grad_norm": 2.7369299494537453, "learning_rate": 6.293335046314612e-07, "loss": 0.0516, "step": 10940 }, { "epoch": 2.489419795221843, "grad_norm": 1.3520238857070834, "learning_rate": 6.292441554083917e-07, "loss": 0.031, "step": 10941 }, { "epoch": 2.4896473265073946, "grad_norm": 1.3526759700850182, "learning_rate": 6.291548060985793e-07, "loss": 0.0347, "step": 10942 }, { "epoch": 2.4898748577929464, "grad_norm": 1.9408977059615136, "learning_rate": 6.290654567038504e-07, "loss": 0.0943, "step": 10943 }, { "epoch": 2.490102389078498, "grad_norm": 3.2539159300907556, "learning_rate": 6.289761072260307e-07, "loss": 0.1325, "step": 10944 }, { "epoch": 2.49032992036405, "grad_norm": 1.813088070705479, "learning_rate": 6.28886757666947e-07, "loss": 0.0787, "step": 10945 }, { "epoch": 2.4905574516496016, "grad_norm": 1.3320876525299923, "learning_rate": 6.287974080284251e-07, "loss": 0.0338, "step": 10946 }, { "epoch": 2.4907849829351534, "grad_norm": 2.2645943301286873, "learning_rate": 6.287080583122908e-07, "loss": 0.0898, "step": 10947 }, { "epoch": 2.4910125142207056, "grad_norm": 1.5394760949896429, "learning_rate": 6.286187085203707e-07, "loss": 0.0369, "step": 10948 }, { "epoch": 2.491240045506257, "grad_norm": 1.1452440568877147, "learning_rate": 6.28529358654491e-07, "loss": 0.0302, "step": 10949 }, { "epoch": 2.491467576791809, "grad_norm": 2.0395924077660297, "learning_rate": 6.284400087164776e-07, "loss": 0.0699, "step": 10950 }, { "epoch": 2.4916951080773604, "grad_norm": 1.262492283073084, "learning_rate": 6.283506587081568e-07, "loss": 0.0341, "step": 10951 }, { "epoch": 2.4919226393629126, "grad_norm": 2.1215436901960634, "learning_rate": 6.282613086313546e-07, "loss": 0.0458, "step": 10952 }, { "epoch": 2.4921501706484643, "grad_norm": 0.7259583506962458, "learning_rate": 6.28171958487897e-07, "loss": 0.0074, "step": 10953 }, { "epoch": 2.492377701934016, "grad_norm": 1.9498742458947604, "learning_rate": 6.280826082796104e-07, "loss": 0.0792, "step": 10954 }, { "epoch": 2.492605233219568, "grad_norm": 1.221683181960274, "learning_rate": 6.279932580083212e-07, "loss": 0.0995, "step": 10955 }, { "epoch": 2.4928327645051196, "grad_norm": 1.5853967716985577, "learning_rate": 6.279039076758551e-07, "loss": 0.0601, "step": 10956 }, { "epoch": 2.4930602957906713, "grad_norm": 1.567758312242494, "learning_rate": 6.278145572840385e-07, "loss": 0.0474, "step": 10957 }, { "epoch": 2.493287827076223, "grad_norm": 1.6941242657058169, "learning_rate": 6.277252068346977e-07, "loss": 0.028, "step": 10958 }, { "epoch": 2.493515358361775, "grad_norm": 1.784161313779922, "learning_rate": 6.276358563296585e-07, "loss": 0.0373, "step": 10959 }, { "epoch": 2.4937428896473266, "grad_norm": 1.8811344626005924, "learning_rate": 6.275465057707471e-07, "loss": 0.0683, "step": 10960 }, { "epoch": 2.4939704209328784, "grad_norm": 2.1599851143667963, "learning_rate": 6.274571551597899e-07, "loss": 0.1277, "step": 10961 }, { "epoch": 2.49419795221843, "grad_norm": 2.0660868345046706, "learning_rate": 6.273678044986129e-07, "loss": 0.0356, "step": 10962 }, { "epoch": 2.494425483503982, "grad_norm": 2.311701878971303, "learning_rate": 6.272784537890425e-07, "loss": 0.1128, "step": 10963 }, { "epoch": 2.4946530147895336, "grad_norm": 1.487582948660883, "learning_rate": 6.271891030329046e-07, "loss": 0.0848, "step": 10964 }, { "epoch": 2.4948805460750854, "grad_norm": 1.5999148595393617, "learning_rate": 6.270997522320254e-07, "loss": 0.0484, "step": 10965 }, { "epoch": 2.495108077360637, "grad_norm": 1.436429889972859, "learning_rate": 6.270104013882311e-07, "loss": 0.1179, "step": 10966 }, { "epoch": 2.495335608646189, "grad_norm": 0.8154230969363158, "learning_rate": 6.269210505033476e-07, "loss": 0.0568, "step": 10967 }, { "epoch": 2.4955631399317406, "grad_norm": 1.7445634835749073, "learning_rate": 6.268316995792017e-07, "loss": 0.053, "step": 10968 }, { "epoch": 2.4957906712172924, "grad_norm": 2.3433361379413835, "learning_rate": 6.267423486176191e-07, "loss": 0.0349, "step": 10969 }, { "epoch": 2.496018202502844, "grad_norm": 1.1080018783863554, "learning_rate": 6.266529976204263e-07, "loss": 0.0295, "step": 10970 }, { "epoch": 2.496245733788396, "grad_norm": 1.093452768701886, "learning_rate": 6.26563646589449e-07, "loss": 0.0196, "step": 10971 }, { "epoch": 2.4964732650739476, "grad_norm": 1.0864518857444965, "learning_rate": 6.264742955265138e-07, "loss": 0.0117, "step": 10972 }, { "epoch": 2.4967007963594994, "grad_norm": 1.1478405442019546, "learning_rate": 6.263849444334464e-07, "loss": 0.0523, "step": 10973 }, { "epoch": 2.496928327645051, "grad_norm": 2.0314005977256975, "learning_rate": 6.262955933120735e-07, "loss": 0.1608, "step": 10974 }, { "epoch": 2.497155858930603, "grad_norm": 0.9983054994281034, "learning_rate": 6.262062421642211e-07, "loss": 0.0211, "step": 10975 }, { "epoch": 2.4973833902161546, "grad_norm": 1.2974449810419029, "learning_rate": 6.261168909917154e-07, "loss": 0.137, "step": 10976 }, { "epoch": 2.4976109215017064, "grad_norm": 1.2650681890286097, "learning_rate": 6.26027539796382e-07, "loss": 0.0311, "step": 10977 }, { "epoch": 2.497838452787258, "grad_norm": 1.1881429020101082, "learning_rate": 6.259381885800481e-07, "loss": 0.0151, "step": 10978 }, { "epoch": 2.49806598407281, "grad_norm": 1.4571595873025454, "learning_rate": 6.258488373445391e-07, "loss": 0.1114, "step": 10979 }, { "epoch": 2.4982935153583616, "grad_norm": 1.4081994726939846, "learning_rate": 6.257594860916815e-07, "loss": 0.0828, "step": 10980 }, { "epoch": 2.4985210466439134, "grad_norm": 1.3771773689730369, "learning_rate": 6.256701348233012e-07, "loss": 0.0478, "step": 10981 }, { "epoch": 2.498748577929465, "grad_norm": 2.5167708605910204, "learning_rate": 6.255807835412248e-07, "loss": 0.0338, "step": 10982 }, { "epoch": 2.498976109215017, "grad_norm": 1.389196305428987, "learning_rate": 6.254914322472783e-07, "loss": 0.0865, "step": 10983 }, { "epoch": 2.4992036405005686, "grad_norm": 2.1680229620137004, "learning_rate": 6.254020809432876e-07, "loss": 0.044, "step": 10984 }, { "epoch": 2.4994311717861204, "grad_norm": 2.0580800123487752, "learning_rate": 6.253127296310791e-07, "loss": 0.0313, "step": 10985 }, { "epoch": 2.499658703071672, "grad_norm": 1.7368747569550809, "learning_rate": 6.252233783124792e-07, "loss": 0.0316, "step": 10986 }, { "epoch": 2.4998862343572243, "grad_norm": 1.6387854826028714, "learning_rate": 6.251340269893138e-07, "loss": 0.0608, "step": 10987 }, { "epoch": 2.5001137656427757, "grad_norm": 1.9209892270794555, "learning_rate": 6.250446756634089e-07, "loss": 0.0994, "step": 10988 }, { "epoch": 2.500341296928328, "grad_norm": 2.557587086759991, "learning_rate": 6.249553243365913e-07, "loss": 0.0674, "step": 10989 }, { "epoch": 2.500568828213879, "grad_norm": 1.1622729488191976, "learning_rate": 6.248659730106863e-07, "loss": 0.0382, "step": 10990 }, { "epoch": 2.5007963594994314, "grad_norm": 1.1474335317567095, "learning_rate": 6.24776621687521e-07, "loss": 0.0347, "step": 10991 }, { "epoch": 2.5010238907849827, "grad_norm": 1.493008113130724, "learning_rate": 6.246872703689212e-07, "loss": 0.0793, "step": 10992 }, { "epoch": 2.501251422070535, "grad_norm": 1.3947752174446955, "learning_rate": 6.245979190567126e-07, "loss": 0.0981, "step": 10993 }, { "epoch": 2.5014789533560866, "grad_norm": 1.9942647265938565, "learning_rate": 6.245085677527219e-07, "loss": 0.1302, "step": 10994 }, { "epoch": 2.5017064846416384, "grad_norm": 1.4226297792199358, "learning_rate": 6.244192164587753e-07, "loss": 0.0287, "step": 10995 }, { "epoch": 2.50193401592719, "grad_norm": 0.9751331692519019, "learning_rate": 6.243298651766989e-07, "loss": 0.0485, "step": 10996 }, { "epoch": 2.502161547212742, "grad_norm": 2.659026624075969, "learning_rate": 6.242405139083186e-07, "loss": 0.1096, "step": 10997 }, { "epoch": 2.5023890784982936, "grad_norm": 1.7746402395082745, "learning_rate": 6.241511626554611e-07, "loss": 0.0366, "step": 10998 }, { "epoch": 2.5026166097838454, "grad_norm": 1.286081332148587, "learning_rate": 6.240618114199522e-07, "loss": 0.0455, "step": 10999 }, { "epoch": 2.502844141069397, "grad_norm": 0.8516668622858481, "learning_rate": 6.239724602036181e-07, "loss": 0.0411, "step": 11000 }, { "epoch": 2.503071672354949, "grad_norm": 1.2262068528065933, "learning_rate": 6.23883109008285e-07, "loss": 0.0322, "step": 11001 }, { "epoch": 2.5032992036405006, "grad_norm": 2.1785208442001096, "learning_rate": 6.23793757835779e-07, "loss": 0.0402, "step": 11002 }, { "epoch": 2.5035267349260524, "grad_norm": 2.379186611403098, "learning_rate": 6.237044066879268e-07, "loss": 0.0661, "step": 11003 }, { "epoch": 2.503754266211604, "grad_norm": 2.0080939224737073, "learning_rate": 6.236150555665537e-07, "loss": 0.0328, "step": 11004 }, { "epoch": 2.503981797497156, "grad_norm": 0.9759513029314878, "learning_rate": 6.235257044734864e-07, "loss": 0.0316, "step": 11005 }, { "epoch": 2.5042093287827076, "grad_norm": 1.534420311364177, "learning_rate": 6.234363534105513e-07, "loss": 0.0572, "step": 11006 }, { "epoch": 2.5044368600682594, "grad_norm": 1.2612808303628475, "learning_rate": 6.23347002379574e-07, "loss": 0.0407, "step": 11007 }, { "epoch": 2.504664391353811, "grad_norm": 0.8566965253614552, "learning_rate": 6.23257651382381e-07, "loss": 0.0561, "step": 11008 }, { "epoch": 2.504891922639363, "grad_norm": 2.268877775388729, "learning_rate": 6.231683004207984e-07, "loss": 0.1698, "step": 11009 }, { "epoch": 2.5051194539249146, "grad_norm": 2.887640350074492, "learning_rate": 6.230789494966525e-07, "loss": 0.109, "step": 11010 }, { "epoch": 2.5053469852104664, "grad_norm": 1.7785382762183093, "learning_rate": 6.229895986117693e-07, "loss": 0.0309, "step": 11011 }, { "epoch": 2.505574516496018, "grad_norm": 2.0159140124925257, "learning_rate": 6.229002477679749e-07, "loss": 0.1146, "step": 11012 }, { "epoch": 2.50580204778157, "grad_norm": 1.9841187845844523, "learning_rate": 6.228108969670959e-07, "loss": 0.0599, "step": 11013 }, { "epoch": 2.5060295790671216, "grad_norm": 1.5881943757830714, "learning_rate": 6.227215462109577e-07, "loss": 0.0476, "step": 11014 }, { "epoch": 2.5062571103526734, "grad_norm": 1.6265444658160035, "learning_rate": 6.226321955013872e-07, "loss": 0.0401, "step": 11015 }, { "epoch": 2.506484641638225, "grad_norm": 1.2011087711776314, "learning_rate": 6.225428448402102e-07, "loss": 0.0126, "step": 11016 }, { "epoch": 2.506712172923777, "grad_norm": 1.6469431396233152, "learning_rate": 6.224534942292531e-07, "loss": 0.0204, "step": 11017 }, { "epoch": 2.5069397042093287, "grad_norm": 1.6526004117766206, "learning_rate": 6.223641436703418e-07, "loss": 0.1061, "step": 11018 }, { "epoch": 2.5071672354948804, "grad_norm": 1.8611610896655753, "learning_rate": 6.222747931653025e-07, "loss": 0.0516, "step": 11019 }, { "epoch": 2.507394766780432, "grad_norm": 1.422459754183788, "learning_rate": 6.221854427159617e-07, "loss": 0.1223, "step": 11020 }, { "epoch": 2.507622298065984, "grad_norm": 1.5164008463616914, "learning_rate": 6.220960923241448e-07, "loss": 0.0451, "step": 11021 }, { "epoch": 2.507849829351536, "grad_norm": 2.9298653444322147, "learning_rate": 6.22006741991679e-07, "loss": 0.0487, "step": 11022 }, { "epoch": 2.5080773606370874, "grad_norm": 1.6393400485845175, "learning_rate": 6.219173917203896e-07, "loss": 0.0535, "step": 11023 }, { "epoch": 2.5083048919226396, "grad_norm": 2.476678108434716, "learning_rate": 6.218280415121032e-07, "loss": 0.042, "step": 11024 }, { "epoch": 2.508532423208191, "grad_norm": 1.7759330588507165, "learning_rate": 6.217386913686459e-07, "loss": 0.0368, "step": 11025 }, { "epoch": 2.508759954493743, "grad_norm": 2.0112445175558067, "learning_rate": 6.216493412918436e-07, "loss": 0.117, "step": 11026 }, { "epoch": 2.5089874857792944, "grad_norm": 2.0848083537206223, "learning_rate": 6.215599912835226e-07, "loss": 0.0397, "step": 11027 }, { "epoch": 2.5092150170648466, "grad_norm": 1.2578285990640168, "learning_rate": 6.214706413455091e-07, "loss": 0.0435, "step": 11028 }, { "epoch": 2.509442548350398, "grad_norm": 1.7198229490467627, "learning_rate": 6.213812914796294e-07, "loss": 0.0328, "step": 11029 }, { "epoch": 2.50967007963595, "grad_norm": 1.9867695825918112, "learning_rate": 6.212919416877094e-07, "loss": 0.0574, "step": 11030 }, { "epoch": 2.5098976109215014, "grad_norm": 2.7105225526160477, "learning_rate": 6.212025919715751e-07, "loss": 0.1359, "step": 11031 }, { "epoch": 2.5101251422070536, "grad_norm": 2.342574023145511, "learning_rate": 6.211132423330533e-07, "loss": 0.0358, "step": 11032 }, { "epoch": 2.5103526734926054, "grad_norm": 1.412072555498323, "learning_rate": 6.210238927739693e-07, "loss": 0.0506, "step": 11033 }, { "epoch": 2.510580204778157, "grad_norm": 1.7655034110485555, "learning_rate": 6.209345432961498e-07, "loss": 0.0708, "step": 11034 }, { "epoch": 2.510807736063709, "grad_norm": 2.1054380939463195, "learning_rate": 6.208451939014207e-07, "loss": 0.0433, "step": 11035 }, { "epoch": 2.5110352673492606, "grad_norm": 2.8617859785515947, "learning_rate": 6.207558445916085e-07, "loss": 0.1186, "step": 11036 }, { "epoch": 2.5112627986348124, "grad_norm": 2.1323967775729966, "learning_rate": 6.206664953685389e-07, "loss": 0.0782, "step": 11037 }, { "epoch": 2.511490329920364, "grad_norm": 1.5742435506765042, "learning_rate": 6.205771462340381e-07, "loss": 0.0468, "step": 11038 }, { "epoch": 2.511717861205916, "grad_norm": 1.3050726174092304, "learning_rate": 6.204877971899325e-07, "loss": 0.0569, "step": 11039 }, { "epoch": 2.5119453924914676, "grad_norm": 1.7443565804290286, "learning_rate": 6.203984482380479e-07, "loss": 0.0798, "step": 11040 }, { "epoch": 2.5121729237770194, "grad_norm": 2.5078676652413234, "learning_rate": 6.203090993802107e-07, "loss": 0.0677, "step": 11041 }, { "epoch": 2.512400455062571, "grad_norm": 1.9131801027057873, "learning_rate": 6.202197506182468e-07, "loss": 0.073, "step": 11042 }, { "epoch": 2.512627986348123, "grad_norm": 1.884257864387025, "learning_rate": 6.201304019539827e-07, "loss": 0.0436, "step": 11043 }, { "epoch": 2.5128555176336747, "grad_norm": 1.7798345635565676, "learning_rate": 6.200410533892441e-07, "loss": 0.0258, "step": 11044 }, { "epoch": 2.5130830489192264, "grad_norm": 2.4024235445276743, "learning_rate": 6.199517049258571e-07, "loss": 0.0535, "step": 11045 }, { "epoch": 2.513310580204778, "grad_norm": 2.033907517798713, "learning_rate": 6.198623565656484e-07, "loss": 0.0354, "step": 11046 }, { "epoch": 2.51353811149033, "grad_norm": 1.6898910861106502, "learning_rate": 6.197730083104433e-07, "loss": 0.0477, "step": 11047 }, { "epoch": 2.5137656427758817, "grad_norm": 1.1103854740403931, "learning_rate": 6.196836601620686e-07, "loss": 0.0162, "step": 11048 }, { "epoch": 2.5139931740614334, "grad_norm": 1.143845607126968, "learning_rate": 6.195943121223503e-07, "loss": 0.0227, "step": 11049 }, { "epoch": 2.514220705346985, "grad_norm": 1.6919805089429434, "learning_rate": 6.19504964193114e-07, "loss": 0.081, "step": 11050 }, { "epoch": 2.514448236632537, "grad_norm": 2.336081671533437, "learning_rate": 6.194156163761863e-07, "loss": 0.0382, "step": 11051 }, { "epoch": 2.5146757679180887, "grad_norm": 1.589334318516393, "learning_rate": 6.193262686733931e-07, "loss": 0.1608, "step": 11052 }, { "epoch": 2.5149032992036404, "grad_norm": 1.9718749666129884, "learning_rate": 6.192369210865609e-07, "loss": 0.018, "step": 11053 }, { "epoch": 2.515130830489192, "grad_norm": 0.9533565122642095, "learning_rate": 6.191475736175152e-07, "loss": 0.0134, "step": 11054 }, { "epoch": 2.515358361774744, "grad_norm": 1.2672994305420475, "learning_rate": 6.190582262680824e-07, "loss": 0.0425, "step": 11055 }, { "epoch": 2.5155858930602957, "grad_norm": 1.0244631647287852, "learning_rate": 6.189688790400888e-07, "loss": 0.0155, "step": 11056 }, { "epoch": 2.5158134243458474, "grad_norm": 2.049403246130174, "learning_rate": 6.188795319353599e-07, "loss": 0.0472, "step": 11057 }, { "epoch": 2.516040955631399, "grad_norm": 1.1568270284298823, "learning_rate": 6.187901849557224e-07, "loss": 0.011, "step": 11058 }, { "epoch": 2.516268486916951, "grad_norm": 2.041916449530491, "learning_rate": 6.187008381030019e-07, "loss": 0.075, "step": 11059 }, { "epoch": 2.5164960182025027, "grad_norm": 0.9417216816686345, "learning_rate": 6.186114913790251e-07, "loss": 0.0699, "step": 11060 }, { "epoch": 2.516723549488055, "grad_norm": 1.7052735078244157, "learning_rate": 6.185221447856175e-07, "loss": 0.0391, "step": 11061 }, { "epoch": 2.516951080773606, "grad_norm": 2.6500799341978363, "learning_rate": 6.184327983246054e-07, "loss": 0.0442, "step": 11062 }, { "epoch": 2.5171786120591584, "grad_norm": 1.5574220750337953, "learning_rate": 6.18343451997815e-07, "loss": 0.0255, "step": 11063 }, { "epoch": 2.5174061433447097, "grad_norm": 0.9291304127062001, "learning_rate": 6.182541058070721e-07, "loss": 0.0796, "step": 11064 }, { "epoch": 2.517633674630262, "grad_norm": 1.598773313826786, "learning_rate": 6.181647597542029e-07, "loss": 0.0455, "step": 11065 }, { "epoch": 2.517861205915813, "grad_norm": 1.5361754454204313, "learning_rate": 6.180754138410334e-07, "loss": 0.0425, "step": 11066 }, { "epoch": 2.5180887372013654, "grad_norm": 1.6102930226415857, "learning_rate": 6.179860680693902e-07, "loss": 0.1083, "step": 11067 }, { "epoch": 2.5183162684869167, "grad_norm": 1.3999155840529558, "learning_rate": 6.178967224410987e-07, "loss": 0.032, "step": 11068 }, { "epoch": 2.518543799772469, "grad_norm": 2.057394070295082, "learning_rate": 6.17807376957985e-07, "loss": 0.0937, "step": 11069 }, { "epoch": 2.51877133105802, "grad_norm": 1.6919586826668034, "learning_rate": 6.177180316218756e-07, "loss": 0.0917, "step": 11070 }, { "epoch": 2.5189988623435724, "grad_norm": 2.209998623716619, "learning_rate": 6.17628686434596e-07, "loss": 0.0893, "step": 11071 }, { "epoch": 2.519226393629124, "grad_norm": 2.143952188375939, "learning_rate": 6.175393413979728e-07, "loss": 0.0489, "step": 11072 }, { "epoch": 2.519453924914676, "grad_norm": 1.3893039312898101, "learning_rate": 6.174499965138316e-07, "loss": 0.0335, "step": 11073 }, { "epoch": 2.5196814562002277, "grad_norm": 1.5638689869103837, "learning_rate": 6.173606517839989e-07, "loss": 0.0725, "step": 11074 }, { "epoch": 2.5199089874857794, "grad_norm": 2.4618631078788065, "learning_rate": 6.172713072103004e-07, "loss": 0.0872, "step": 11075 }, { "epoch": 2.520136518771331, "grad_norm": 1.4188905367615328, "learning_rate": 6.17181962794562e-07, "loss": 0.0431, "step": 11076 }, { "epoch": 2.520364050056883, "grad_norm": 1.0415385112590472, "learning_rate": 6.170926185386102e-07, "loss": 0.0171, "step": 11077 }, { "epoch": 2.5205915813424347, "grad_norm": 1.948232303654507, "learning_rate": 6.170032744442706e-07, "loss": 0.1255, "step": 11078 }, { "epoch": 2.5208191126279864, "grad_norm": 0.7708876825095176, "learning_rate": 6.169139305133697e-07, "loss": 0.0683, "step": 11079 }, { "epoch": 2.521046643913538, "grad_norm": 1.5834734224473694, "learning_rate": 6.168245867477333e-07, "loss": 0.0281, "step": 11080 }, { "epoch": 2.52127417519909, "grad_norm": 2.088965532941998, "learning_rate": 6.16735243149187e-07, "loss": 0.0335, "step": 11081 }, { "epoch": 2.5215017064846417, "grad_norm": 1.3640471096775282, "learning_rate": 6.166458997195575e-07, "loss": 0.1846, "step": 11082 }, { "epoch": 2.5217292377701934, "grad_norm": 2.4088616521647324, "learning_rate": 6.165565564606702e-07, "loss": 0.0522, "step": 11083 }, { "epoch": 2.521956769055745, "grad_norm": 2.207807190861441, "learning_rate": 6.164672133743516e-07, "loss": 0.0704, "step": 11084 }, { "epoch": 2.522184300341297, "grad_norm": 1.6334687157525936, "learning_rate": 6.163778704624275e-07, "loss": 0.0884, "step": 11085 }, { "epoch": 2.5224118316268487, "grad_norm": 1.1711488252341609, "learning_rate": 6.162885277267241e-07, "loss": 0.0123, "step": 11086 }, { "epoch": 2.5226393629124004, "grad_norm": 1.4051759111261901, "learning_rate": 6.161991851690672e-07, "loss": 0.0564, "step": 11087 }, { "epoch": 2.522866894197952, "grad_norm": 1.8550063678903088, "learning_rate": 6.161098427912827e-07, "loss": 0.1002, "step": 11088 }, { "epoch": 2.523094425483504, "grad_norm": 2.5240498530430613, "learning_rate": 6.160205005951969e-07, "loss": 0.0563, "step": 11089 }, { "epoch": 2.5233219567690557, "grad_norm": 1.6237095838440487, "learning_rate": 6.159311585826353e-07, "loss": 0.1429, "step": 11090 }, { "epoch": 2.5235494880546074, "grad_norm": 2.0992388835763656, "learning_rate": 6.158418167554245e-07, "loss": 0.0927, "step": 11091 }, { "epoch": 2.523777019340159, "grad_norm": 1.7069591270863522, "learning_rate": 6.1575247511539e-07, "loss": 0.0321, "step": 11092 }, { "epoch": 2.524004550625711, "grad_norm": 1.3658787264675727, "learning_rate": 6.156631336643582e-07, "loss": 0.0368, "step": 11093 }, { "epoch": 2.5242320819112627, "grad_norm": 1.57359365698972, "learning_rate": 6.155737924041548e-07, "loss": 0.0888, "step": 11094 }, { "epoch": 2.5244596131968144, "grad_norm": 1.8229817642784567, "learning_rate": 6.154844513366056e-07, "loss": 0.0567, "step": 11095 }, { "epoch": 2.524687144482366, "grad_norm": 1.252903518572975, "learning_rate": 6.153951104635371e-07, "loss": 0.066, "step": 11096 }, { "epoch": 2.524914675767918, "grad_norm": 1.5752843940414576, "learning_rate": 6.153057697867746e-07, "loss": 0.0302, "step": 11097 }, { "epoch": 2.5251422070534697, "grad_norm": 0.8352223227098137, "learning_rate": 6.152164293081447e-07, "loss": 0.027, "step": 11098 }, { "epoch": 2.5253697383390215, "grad_norm": 1.2872165203116492, "learning_rate": 6.151270890294731e-07, "loss": 0.0266, "step": 11099 }, { "epoch": 2.5255972696245736, "grad_norm": 1.6736483992310536, "learning_rate": 6.150377489525855e-07, "loss": 0.0843, "step": 11100 }, { "epoch": 2.525824800910125, "grad_norm": 1.5283990713724713, "learning_rate": 6.149484090793082e-07, "loss": 0.0665, "step": 11101 }, { "epoch": 2.526052332195677, "grad_norm": 1.1029990214964833, "learning_rate": 6.148590694114669e-07, "loss": 0.058, "step": 11102 }, { "epoch": 2.5262798634812285, "grad_norm": 2.3808450948325235, "learning_rate": 6.147697299508878e-07, "loss": 0.0385, "step": 11103 }, { "epoch": 2.5265073947667807, "grad_norm": 1.7871770892632544, "learning_rate": 6.146803906993965e-07, "loss": 0.0966, "step": 11104 }, { "epoch": 2.526734926052332, "grad_norm": 1.6055374871882075, "learning_rate": 6.145910516588192e-07, "loss": 0.0752, "step": 11105 }, { "epoch": 2.526962457337884, "grad_norm": 1.4314569814657323, "learning_rate": 6.14501712830982e-07, "loss": 0.0251, "step": 11106 }, { "epoch": 2.5271899886234355, "grad_norm": 1.8763856309791092, "learning_rate": 6.144123742177101e-07, "loss": 0.0241, "step": 11107 }, { "epoch": 2.5274175199089877, "grad_norm": 1.1278200187346532, "learning_rate": 6.143230358208302e-07, "loss": 0.0195, "step": 11108 }, { "epoch": 2.527645051194539, "grad_norm": 1.0942087133806622, "learning_rate": 6.142336976421676e-07, "loss": 0.0334, "step": 11109 }, { "epoch": 2.527872582480091, "grad_norm": 1.0020504111705226, "learning_rate": 6.141443596835489e-07, "loss": 0.0147, "step": 11110 }, { "epoch": 2.528100113765643, "grad_norm": 2.978002399420471, "learning_rate": 6.140550219467993e-07, "loss": 0.0624, "step": 11111 }, { "epoch": 2.5283276450511947, "grad_norm": 1.2813170784229617, "learning_rate": 6.139656844337451e-07, "loss": 0.1234, "step": 11112 }, { "epoch": 2.5285551763367464, "grad_norm": 1.1020582218969623, "learning_rate": 6.138763471462122e-07, "loss": 0.038, "step": 11113 }, { "epoch": 2.528782707622298, "grad_norm": 3.1818779724938717, "learning_rate": 6.137870100860262e-07, "loss": 0.0684, "step": 11114 }, { "epoch": 2.52901023890785, "grad_norm": 1.176409295392125, "learning_rate": 6.136976732550134e-07, "loss": 0.0766, "step": 11115 }, { "epoch": 2.5292377701934017, "grad_norm": 1.257509496293317, "learning_rate": 6.136083366549992e-07, "loss": 0.0894, "step": 11116 }, { "epoch": 2.5294653014789534, "grad_norm": 1.7124880225206618, "learning_rate": 6.135190002878101e-07, "loss": 0.0753, "step": 11117 }, { "epoch": 2.529692832764505, "grad_norm": 1.370620808400998, "learning_rate": 6.134296641552713e-07, "loss": 0.0347, "step": 11118 }, { "epoch": 2.529920364050057, "grad_norm": 1.2490201983486138, "learning_rate": 6.13340328259209e-07, "loss": 0.0246, "step": 11119 }, { "epoch": 2.5301478953356087, "grad_norm": 1.0076937921353102, "learning_rate": 6.132509926014492e-07, "loss": 0.0355, "step": 11120 }, { "epoch": 2.5303754266211604, "grad_norm": 2.2114359382160518, "learning_rate": 6.131616571838174e-07, "loss": 0.2079, "step": 11121 }, { "epoch": 2.530602957906712, "grad_norm": 1.1025502518789578, "learning_rate": 6.130723220081398e-07, "loss": 0.0645, "step": 11122 }, { "epoch": 2.530830489192264, "grad_norm": 1.6620425530935523, "learning_rate": 6.129829870762419e-07, "loss": 0.052, "step": 11123 }, { "epoch": 2.5310580204778157, "grad_norm": 2.1385428750334627, "learning_rate": 6.1289365238995e-07, "loss": 0.0568, "step": 11124 }, { "epoch": 2.5312855517633674, "grad_norm": 2.035015516485445, "learning_rate": 6.128043179510895e-07, "loss": 0.1234, "step": 11125 }, { "epoch": 2.531513083048919, "grad_norm": 1.3288740151876715, "learning_rate": 6.127149837614864e-07, "loss": 0.0165, "step": 11126 }, { "epoch": 2.531740614334471, "grad_norm": 1.120114016185683, "learning_rate": 6.126256498229667e-07, "loss": 0.0425, "step": 11127 }, { "epoch": 2.5319681456200227, "grad_norm": 1.0568204576116342, "learning_rate": 6.125363161373558e-07, "loss": 0.0401, "step": 11128 }, { "epoch": 2.5321956769055745, "grad_norm": 1.4377060582265642, "learning_rate": 6.124469827064799e-07, "loss": 0.082, "step": 11129 }, { "epoch": 2.532423208191126, "grad_norm": 1.4617553134068448, "learning_rate": 6.123576495321646e-07, "loss": 0.1069, "step": 11130 }, { "epoch": 2.532650739476678, "grad_norm": 1.8413625840550265, "learning_rate": 6.12268316616236e-07, "loss": 0.1088, "step": 11131 }, { "epoch": 2.5328782707622297, "grad_norm": 0.8059310939928425, "learning_rate": 6.121789839605196e-07, "loss": 0.0293, "step": 11132 }, { "epoch": 2.5331058020477815, "grad_norm": 1.5190904120718345, "learning_rate": 6.120896515668412e-07, "loss": 0.0628, "step": 11133 }, { "epoch": 2.533333333333333, "grad_norm": 1.6665464784783472, "learning_rate": 6.120003194370269e-07, "loss": 0.0371, "step": 11134 }, { "epoch": 2.533560864618885, "grad_norm": 1.535152212089139, "learning_rate": 6.11910987572902e-07, "loss": 0.1216, "step": 11135 }, { "epoch": 2.5337883959044367, "grad_norm": 1.468380230969888, "learning_rate": 6.118216559762926e-07, "loss": 0.0755, "step": 11136 }, { "epoch": 2.5340159271899885, "grad_norm": 1.7295756206802766, "learning_rate": 6.117323246490246e-07, "loss": 0.121, "step": 11137 }, { "epoch": 2.5342434584755402, "grad_norm": 2.3246232877705992, "learning_rate": 6.116429935929234e-07, "loss": 0.0745, "step": 11138 }, { "epoch": 2.5344709897610924, "grad_norm": 1.3509874064077176, "learning_rate": 6.11553662809815e-07, "loss": 0.0912, "step": 11139 }, { "epoch": 2.5346985210466437, "grad_norm": 3.740761980786025, "learning_rate": 6.11464332301525e-07, "loss": 0.0926, "step": 11140 }, { "epoch": 2.534926052332196, "grad_norm": 2.163934727648125, "learning_rate": 6.113750020698795e-07, "loss": 0.0627, "step": 11141 }, { "epoch": 2.5351535836177472, "grad_norm": 2.0402896814179536, "learning_rate": 6.112856721167037e-07, "loss": 0.0457, "step": 11142 }, { "epoch": 2.5353811149032994, "grad_norm": 1.294271643984463, "learning_rate": 6.111963424438239e-07, "loss": 0.0784, "step": 11143 }, { "epoch": 2.5356086461888507, "grad_norm": 1.6785393568000202, "learning_rate": 6.111070130530655e-07, "loss": 0.029, "step": 11144 }, { "epoch": 2.535836177474403, "grad_norm": 1.2662489015916507, "learning_rate": 6.110176839462541e-07, "loss": 0.0799, "step": 11145 }, { "epoch": 2.5360637087599542, "grad_norm": 2.8083764682355525, "learning_rate": 6.10928355125216e-07, "loss": 0.0676, "step": 11146 }, { "epoch": 2.5362912400455064, "grad_norm": 1.8953956394238993, "learning_rate": 6.108390265917763e-07, "loss": 0.0616, "step": 11147 }, { "epoch": 2.536518771331058, "grad_norm": 3.4374902105000165, "learning_rate": 6.10749698347761e-07, "loss": 0.0342, "step": 11148 }, { "epoch": 2.53674630261661, "grad_norm": 1.3292654948166138, "learning_rate": 6.106603703949958e-07, "loss": 0.071, "step": 11149 }, { "epoch": 2.5369738339021617, "grad_norm": 2.969502208980304, "learning_rate": 6.105710427353065e-07, "loss": 0.1362, "step": 11150 }, { "epoch": 2.5372013651877134, "grad_norm": 1.9456005572080088, "learning_rate": 6.104817153705185e-07, "loss": 0.0514, "step": 11151 }, { "epoch": 2.537428896473265, "grad_norm": 1.9909423408953237, "learning_rate": 6.103923883024576e-07, "loss": 0.061, "step": 11152 }, { "epoch": 2.537656427758817, "grad_norm": 1.9868762496384462, "learning_rate": 6.103030615329497e-07, "loss": 0.0484, "step": 11153 }, { "epoch": 2.5378839590443687, "grad_norm": 1.1331781131784702, "learning_rate": 6.102137350638202e-07, "loss": 0.0527, "step": 11154 }, { "epoch": 2.5381114903299204, "grad_norm": 1.6922514275980938, "learning_rate": 6.101244088968948e-07, "loss": 0.0383, "step": 11155 }, { "epoch": 2.538339021615472, "grad_norm": 1.4977924461315157, "learning_rate": 6.100350830339996e-07, "loss": 0.0269, "step": 11156 }, { "epoch": 2.538566552901024, "grad_norm": 2.0005768903120176, "learning_rate": 6.099457574769595e-07, "loss": 0.067, "step": 11157 }, { "epoch": 2.5387940841865757, "grad_norm": 1.7160144850460914, "learning_rate": 6.098564322276007e-07, "loss": 0.0631, "step": 11158 }, { "epoch": 2.5390216154721275, "grad_norm": 2.3354400435552907, "learning_rate": 6.097671072877485e-07, "loss": 0.0668, "step": 11159 }, { "epoch": 2.539249146757679, "grad_norm": 2.918676499511788, "learning_rate": 6.096777826592292e-07, "loss": 0.0363, "step": 11160 }, { "epoch": 2.539476678043231, "grad_norm": 1.725176657012043, "learning_rate": 6.095884583438675e-07, "loss": 0.0488, "step": 11161 }, { "epoch": 2.5397042093287827, "grad_norm": 1.5021711289188824, "learning_rate": 6.094991343434896e-07, "loss": 0.053, "step": 11162 }, { "epoch": 2.5399317406143345, "grad_norm": 1.4917209791860586, "learning_rate": 6.094098106599212e-07, "loss": 0.0433, "step": 11163 }, { "epoch": 2.540159271899886, "grad_norm": 1.6526031962863332, "learning_rate": 6.093204872949875e-07, "loss": 0.0762, "step": 11164 }, { "epoch": 2.540386803185438, "grad_norm": 2.1831711804871006, "learning_rate": 6.092311642505144e-07, "loss": 0.0545, "step": 11165 }, { "epoch": 2.5406143344709897, "grad_norm": 1.3985208639254496, "learning_rate": 6.091418415283273e-07, "loss": 0.0858, "step": 11166 }, { "epoch": 2.5408418657565415, "grad_norm": 1.5384291997526152, "learning_rate": 6.090525191302522e-07, "loss": 0.0762, "step": 11167 }, { "epoch": 2.5410693970420932, "grad_norm": 1.818002368919848, "learning_rate": 6.089631970581142e-07, "loss": 0.1372, "step": 11168 }, { "epoch": 2.541296928327645, "grad_norm": 1.4159648206674065, "learning_rate": 6.088738753137391e-07, "loss": 0.0344, "step": 11169 }, { "epoch": 2.5415244596131967, "grad_norm": 1.506309200314262, "learning_rate": 6.087845538989525e-07, "loss": 0.0264, "step": 11170 }, { "epoch": 2.5417519908987485, "grad_norm": 1.303472337928869, "learning_rate": 6.086952328155798e-07, "loss": 0.0442, "step": 11171 }, { "epoch": 2.5419795221843002, "grad_norm": 2.2875817557963587, "learning_rate": 6.086059120654469e-07, "loss": 0.0774, "step": 11172 }, { "epoch": 2.542207053469852, "grad_norm": 0.8427789341457793, "learning_rate": 6.085165916503789e-07, "loss": 0.0241, "step": 11173 }, { "epoch": 2.5424345847554037, "grad_norm": 1.140083924064783, "learning_rate": 6.084272715722018e-07, "loss": 0.1004, "step": 11174 }, { "epoch": 2.5426621160409555, "grad_norm": 0.898781272703175, "learning_rate": 6.083379518327408e-07, "loss": 0.0233, "step": 11175 }, { "epoch": 2.5428896473265072, "grad_norm": 1.3394700246608933, "learning_rate": 6.082486324338214e-07, "loss": 0.085, "step": 11176 }, { "epoch": 2.543117178612059, "grad_norm": 1.838936691860532, "learning_rate": 6.081593133772697e-07, "loss": 0.0361, "step": 11177 }, { "epoch": 2.543344709897611, "grad_norm": 1.2648466434313177, "learning_rate": 6.080699946649104e-07, "loss": 0.0534, "step": 11178 }, { "epoch": 2.5435722411831625, "grad_norm": 1.7052089220622217, "learning_rate": 6.079806762985695e-07, "loss": 0.0259, "step": 11179 }, { "epoch": 2.5437997724687147, "grad_norm": 1.5701708247473452, "learning_rate": 6.078913582800723e-07, "loss": 0.1158, "step": 11180 }, { "epoch": 2.544027303754266, "grad_norm": 2.226223238696447, "learning_rate": 6.078020406112447e-07, "loss": 0.1436, "step": 11181 }, { "epoch": 2.544254835039818, "grad_norm": 1.3772541975560941, "learning_rate": 6.077127232939118e-07, "loss": 0.0427, "step": 11182 }, { "epoch": 2.5444823663253695, "grad_norm": 1.7001885499500589, "learning_rate": 6.07623406329899e-07, "loss": 0.0994, "step": 11183 }, { "epoch": 2.5447098976109217, "grad_norm": 1.6098879909569328, "learning_rate": 6.075340897210321e-07, "loss": 0.0364, "step": 11184 }, { "epoch": 2.544937428896473, "grad_norm": 1.6341793719592042, "learning_rate": 6.074447734691363e-07, "loss": 0.0223, "step": 11185 }, { "epoch": 2.545164960182025, "grad_norm": 1.461519498520087, "learning_rate": 6.073554575760373e-07, "loss": 0.0561, "step": 11186 }, { "epoch": 2.545392491467577, "grad_norm": 2.067650322066937, "learning_rate": 6.072661420435606e-07, "loss": 0.0222, "step": 11187 }, { "epoch": 2.5456200227531287, "grad_norm": 1.0473366212135327, "learning_rate": 6.071768268735311e-07, "loss": 0.1095, "step": 11188 }, { "epoch": 2.5458475540386805, "grad_norm": 1.7339459049147175, "learning_rate": 6.070875120677748e-07, "loss": 0.038, "step": 11189 }, { "epoch": 2.546075085324232, "grad_norm": 1.6804963177589742, "learning_rate": 6.069981976281169e-07, "loss": 0.0464, "step": 11190 }, { "epoch": 2.546302616609784, "grad_norm": 1.5361552580523827, "learning_rate": 6.06908883556383e-07, "loss": 0.0324, "step": 11191 }, { "epoch": 2.5465301478953357, "grad_norm": 1.739396037950091, "learning_rate": 6.068195698543981e-07, "loss": 0.1079, "step": 11192 }, { "epoch": 2.5467576791808875, "grad_norm": 1.5408638494094806, "learning_rate": 6.067302565239881e-07, "loss": 0.1114, "step": 11193 }, { "epoch": 2.546985210466439, "grad_norm": 1.3558161182466186, "learning_rate": 6.066409435669784e-07, "loss": 0.1032, "step": 11194 }, { "epoch": 2.547212741751991, "grad_norm": 1.3931008491041075, "learning_rate": 6.065516309851938e-07, "loss": 0.0335, "step": 11195 }, { "epoch": 2.5474402730375427, "grad_norm": 1.573720700136308, "learning_rate": 6.064623187804603e-07, "loss": 0.0235, "step": 11196 }, { "epoch": 2.5476678043230945, "grad_norm": 1.3597046242322741, "learning_rate": 6.063730069546029e-07, "loss": 0.0653, "step": 11197 }, { "epoch": 2.5478953356086462, "grad_norm": 1.0022506944594154, "learning_rate": 6.062836955094474e-07, "loss": 0.0648, "step": 11198 }, { "epoch": 2.548122866894198, "grad_norm": 1.730136927682986, "learning_rate": 6.061943844468184e-07, "loss": 0.0301, "step": 11199 }, { "epoch": 2.5483503981797497, "grad_norm": 1.7375347829804042, "learning_rate": 6.061050737685423e-07, "loss": 0.0283, "step": 11200 }, { "epoch": 2.5485779294653015, "grad_norm": 1.6378217710686713, "learning_rate": 6.060157634764438e-07, "loss": 0.0627, "step": 11201 }, { "epoch": 2.5488054607508532, "grad_norm": 1.2915602989107398, "learning_rate": 6.059264535723479e-07, "loss": 0.0389, "step": 11202 }, { "epoch": 2.549032992036405, "grad_norm": 1.6435300093033678, "learning_rate": 6.058371440580808e-07, "loss": 0.0907, "step": 11203 }, { "epoch": 2.5492605233219567, "grad_norm": 1.1418876403032299, "learning_rate": 6.057478349354671e-07, "loss": 0.0508, "step": 11204 }, { "epoch": 2.5494880546075085, "grad_norm": 1.3215334564457395, "learning_rate": 6.056585262063324e-07, "loss": 0.1, "step": 11205 }, { "epoch": 2.5497155858930602, "grad_norm": 0.9509572075881269, "learning_rate": 6.055692178725024e-07, "loss": 0.024, "step": 11206 }, { "epoch": 2.549943117178612, "grad_norm": 2.668091986434099, "learning_rate": 6.054799099358016e-07, "loss": 0.0665, "step": 11207 }, { "epoch": 2.5501706484641637, "grad_norm": 1.3746925017823068, "learning_rate": 6.053906023980558e-07, "loss": 0.0474, "step": 11208 }, { "epoch": 2.5503981797497155, "grad_norm": 1.3089953418206468, "learning_rate": 6.053012952610901e-07, "loss": 0.0187, "step": 11209 }, { "epoch": 2.5506257110352673, "grad_norm": 1.9380743841142436, "learning_rate": 6.0521198852673e-07, "loss": 0.1342, "step": 11210 }, { "epoch": 2.550853242320819, "grad_norm": 1.724579173166011, "learning_rate": 6.051226821968003e-07, "loss": 0.0457, "step": 11211 }, { "epoch": 2.5510807736063708, "grad_norm": 1.990887044731996, "learning_rate": 6.050333762731269e-07, "loss": 0.0327, "step": 11212 }, { "epoch": 2.5513083048919225, "grad_norm": 1.5371893457299248, "learning_rate": 6.049440707575348e-07, "loss": 0.0397, "step": 11213 }, { "epoch": 2.5515358361774743, "grad_norm": 1.0540755384345442, "learning_rate": 6.048547656518489e-07, "loss": 0.0226, "step": 11214 }, { "epoch": 2.551763367463026, "grad_norm": 2.3139294484793473, "learning_rate": 6.047654609578948e-07, "loss": 0.0331, "step": 11215 }, { "epoch": 2.5519908987485778, "grad_norm": 2.0154445753090315, "learning_rate": 6.046761566774975e-07, "loss": 0.1163, "step": 11216 }, { "epoch": 2.55221843003413, "grad_norm": 1.8528890614093967, "learning_rate": 6.045868528124825e-07, "loss": 0.1121, "step": 11217 }, { "epoch": 2.5524459613196813, "grad_norm": 1.408981298722494, "learning_rate": 6.044975493646748e-07, "loss": 0.0598, "step": 11218 }, { "epoch": 2.5526734926052335, "grad_norm": 1.5827864465665533, "learning_rate": 6.044082463358996e-07, "loss": 0.0541, "step": 11219 }, { "epoch": 2.5529010238907848, "grad_norm": 1.702804703889097, "learning_rate": 6.043189437279823e-07, "loss": 0.0268, "step": 11220 }, { "epoch": 2.553128555176337, "grad_norm": 1.723544014397141, "learning_rate": 6.042296415427477e-07, "loss": 0.0417, "step": 11221 }, { "epoch": 2.5533560864618883, "grad_norm": 1.3022960700128268, "learning_rate": 6.041403397820212e-07, "loss": 0.0302, "step": 11222 }, { "epoch": 2.5535836177474405, "grad_norm": 1.4099720099885529, "learning_rate": 6.040510384476279e-07, "loss": 0.0687, "step": 11223 }, { "epoch": 2.553811149032992, "grad_norm": 1.2813147885481115, "learning_rate": 6.039617375413934e-07, "loss": 0.0252, "step": 11224 }, { "epoch": 2.554038680318544, "grad_norm": 0.613852187453146, "learning_rate": 6.038724370651421e-07, "loss": 0.0211, "step": 11225 }, { "epoch": 2.5542662116040957, "grad_norm": 1.0559385454364212, "learning_rate": 6.037831370206995e-07, "loss": 0.0372, "step": 11226 }, { "epoch": 2.5544937428896475, "grad_norm": 1.3858142992309783, "learning_rate": 6.03693837409891e-07, "loss": 0.0514, "step": 11227 }, { "epoch": 2.5547212741751992, "grad_norm": 3.3935594528114206, "learning_rate": 6.036045382345411e-07, "loss": 0.0562, "step": 11228 }, { "epoch": 2.554948805460751, "grad_norm": 1.3487834302774637, "learning_rate": 6.035152394964755e-07, "loss": 0.0875, "step": 11229 }, { "epoch": 2.5551763367463027, "grad_norm": 1.4295767686947645, "learning_rate": 6.034259411975188e-07, "loss": 0.0297, "step": 11230 }, { "epoch": 2.5554038680318545, "grad_norm": 2.071234439356039, "learning_rate": 6.033366433394966e-07, "loss": 0.112, "step": 11231 }, { "epoch": 2.5556313993174062, "grad_norm": 1.2732717074004825, "learning_rate": 6.032473459242337e-07, "loss": 0.0595, "step": 11232 }, { "epoch": 2.555858930602958, "grad_norm": 1.8863298070653354, "learning_rate": 6.031580489535549e-07, "loss": 0.0705, "step": 11233 }, { "epoch": 2.5560864618885097, "grad_norm": 1.6502983748708067, "learning_rate": 6.030687524292861e-07, "loss": 0.0521, "step": 11234 }, { "epoch": 2.5563139931740615, "grad_norm": 1.8536028442403494, "learning_rate": 6.029794563532514e-07, "loss": 0.0149, "step": 11235 }, { "epoch": 2.5565415244596132, "grad_norm": 1.7373035037596962, "learning_rate": 6.028901607272765e-07, "loss": 0.0405, "step": 11236 }, { "epoch": 2.556769055745165, "grad_norm": 1.1436743730563816, "learning_rate": 6.02800865553186e-07, "loss": 0.0378, "step": 11237 }, { "epoch": 2.5569965870307167, "grad_norm": 1.0553905569489144, "learning_rate": 6.027115708328056e-07, "loss": 0.0449, "step": 11238 }, { "epoch": 2.5572241183162685, "grad_norm": 1.2471317235356165, "learning_rate": 6.026222765679595e-07, "loss": 0.0646, "step": 11239 }, { "epoch": 2.5574516496018203, "grad_norm": 1.3221778840331808, "learning_rate": 6.02532982760473e-07, "loss": 0.0177, "step": 11240 }, { "epoch": 2.557679180887372, "grad_norm": 1.4584719038388434, "learning_rate": 6.024436894121716e-07, "loss": 0.0263, "step": 11241 }, { "epoch": 2.5579067121729238, "grad_norm": 1.1950703189789076, "learning_rate": 6.023543965248795e-07, "loss": 0.0527, "step": 11242 }, { "epoch": 2.5581342434584755, "grad_norm": 1.4105155731362724, "learning_rate": 6.022651041004222e-07, "loss": 0.0737, "step": 11243 }, { "epoch": 2.5583617747440273, "grad_norm": 2.9447630201774313, "learning_rate": 6.021758121406247e-07, "loss": 0.0622, "step": 11244 }, { "epoch": 2.558589306029579, "grad_norm": 1.9829913566350865, "learning_rate": 6.020865206473116e-07, "loss": 0.1078, "step": 11245 }, { "epoch": 2.5588168373151308, "grad_norm": 2.359801845797294, "learning_rate": 6.01997229622308e-07, "loss": 0.0883, "step": 11246 }, { "epoch": 2.5590443686006825, "grad_norm": 1.7896523589808002, "learning_rate": 6.01907939067439e-07, "loss": 0.0259, "step": 11247 }, { "epoch": 2.5592718998862343, "grad_norm": 2.258265290813045, "learning_rate": 6.018186489845295e-07, "loss": 0.0464, "step": 11248 }, { "epoch": 2.559499431171786, "grad_norm": 1.7611166236301654, "learning_rate": 6.017293593754042e-07, "loss": 0.0496, "step": 11249 }, { "epoch": 2.5597269624573378, "grad_norm": 1.9261302488619283, "learning_rate": 6.016400702418883e-07, "loss": 0.0683, "step": 11250 }, { "epoch": 2.5599544937428895, "grad_norm": 0.7951118705271765, "learning_rate": 6.015507815858067e-07, "loss": 0.019, "step": 11251 }, { "epoch": 2.5601820250284413, "grad_norm": 1.7502094439361864, "learning_rate": 6.014614934089839e-07, "loss": 0.0978, "step": 11252 }, { "epoch": 2.560409556313993, "grad_norm": 1.9878120640072825, "learning_rate": 6.013722057132452e-07, "loss": 0.1062, "step": 11253 }, { "epoch": 2.560637087599545, "grad_norm": 2.600284752369147, "learning_rate": 6.012829185004153e-07, "loss": 0.155, "step": 11254 }, { "epoch": 2.5608646188850965, "grad_norm": 1.4657766238760555, "learning_rate": 6.011936317723192e-07, "loss": 0.0622, "step": 11255 }, { "epoch": 2.5610921501706487, "grad_norm": 1.8467265464457947, "learning_rate": 6.011043455307815e-07, "loss": 0.0833, "step": 11256 }, { "epoch": 2.5613196814562, "grad_norm": 1.9801222906465294, "learning_rate": 6.010150597776273e-07, "loss": 0.0684, "step": 11257 }, { "epoch": 2.5615472127417522, "grad_norm": 1.6662354142135198, "learning_rate": 6.009257745146815e-07, "loss": 0.0552, "step": 11258 }, { "epoch": 2.5617747440273035, "grad_norm": 2.8429211852251894, "learning_rate": 6.008364897437686e-07, "loss": 0.0624, "step": 11259 }, { "epoch": 2.5620022753128557, "grad_norm": 1.8338782144785741, "learning_rate": 6.007472054667136e-07, "loss": 0.0903, "step": 11260 }, { "epoch": 2.562229806598407, "grad_norm": 2.5157987143444847, "learning_rate": 6.006579216853413e-07, "loss": 0.0681, "step": 11261 }, { "epoch": 2.5624573378839592, "grad_norm": 2.1611233906380516, "learning_rate": 6.005686384014767e-07, "loss": 0.1041, "step": 11262 }, { "epoch": 2.5626848691695105, "grad_norm": 1.2573269589821006, "learning_rate": 6.004793556169443e-07, "loss": 0.059, "step": 11263 }, { "epoch": 2.5629124004550627, "grad_norm": 1.7718739621651667, "learning_rate": 6.003900733335688e-07, "loss": 0.0329, "step": 11264 }, { "epoch": 2.5631399317406145, "grad_norm": 1.2515821853119236, "learning_rate": 6.003007915531753e-07, "loss": 0.0606, "step": 11265 }, { "epoch": 2.5633674630261662, "grad_norm": 1.0029454035259728, "learning_rate": 6.002115102775881e-07, "loss": 0.0527, "step": 11266 }, { "epoch": 2.563594994311718, "grad_norm": 1.6349869327394029, "learning_rate": 6.001222295086326e-07, "loss": 0.0275, "step": 11267 }, { "epoch": 2.5638225255972698, "grad_norm": 2.138657399788485, "learning_rate": 6.00032949248133e-07, "loss": 0.0358, "step": 11268 }, { "epoch": 2.5640500568828215, "grad_norm": 1.2021590699775002, "learning_rate": 5.999436694979142e-07, "loss": 0.091, "step": 11269 }, { "epoch": 2.5642775881683733, "grad_norm": 1.8089694170611121, "learning_rate": 5.998543902598011e-07, "loss": 0.0344, "step": 11270 }, { "epoch": 2.564505119453925, "grad_norm": 1.5355789214525195, "learning_rate": 5.997651115356179e-07, "loss": 0.058, "step": 11271 }, { "epoch": 2.5647326507394768, "grad_norm": 3.1554965364943572, "learning_rate": 5.996758333271897e-07, "loss": 0.0401, "step": 11272 }, { "epoch": 2.5649601820250285, "grad_norm": 1.6512374030573962, "learning_rate": 5.99586555636341e-07, "loss": 0.0436, "step": 11273 }, { "epoch": 2.5651877133105803, "grad_norm": 1.4396578589001907, "learning_rate": 5.994972784648968e-07, "loss": 0.0457, "step": 11274 }, { "epoch": 2.565415244596132, "grad_norm": 1.4349358609130685, "learning_rate": 5.994080018146814e-07, "loss": 0.0482, "step": 11275 }, { "epoch": 2.5656427758816838, "grad_norm": 1.4820425865095184, "learning_rate": 5.993187256875196e-07, "loss": 0.0183, "step": 11276 }, { "epoch": 2.5658703071672355, "grad_norm": 1.81521590982157, "learning_rate": 5.992294500852361e-07, "loss": 0.0765, "step": 11277 }, { "epoch": 2.5660978384527873, "grad_norm": 1.3268441512975389, "learning_rate": 5.991401750096553e-07, "loss": 0.0534, "step": 11278 }, { "epoch": 2.566325369738339, "grad_norm": 1.2947162421066614, "learning_rate": 5.99050900462602e-07, "loss": 0.046, "step": 11279 }, { "epoch": 2.5665529010238908, "grad_norm": 1.1150498306280832, "learning_rate": 5.989616264459005e-07, "loss": 0.059, "step": 11280 }, { "epoch": 2.5667804323094425, "grad_norm": 1.6570278541285492, "learning_rate": 5.988723529613761e-07, "loss": 0.0513, "step": 11281 }, { "epoch": 2.5670079635949943, "grad_norm": 2.2974559117562627, "learning_rate": 5.987830800108528e-07, "loss": 0.1681, "step": 11282 }, { "epoch": 2.567235494880546, "grad_norm": 1.2747721803342356, "learning_rate": 5.986938075961553e-07, "loss": 0.0464, "step": 11283 }, { "epoch": 2.567463026166098, "grad_norm": 1.2936596822007398, "learning_rate": 5.986045357191083e-07, "loss": 0.0189, "step": 11284 }, { "epoch": 2.5676905574516495, "grad_norm": 2.4867954365427134, "learning_rate": 5.985152643815361e-07, "loss": 0.038, "step": 11285 }, { "epoch": 2.5679180887372013, "grad_norm": 1.5100472192684065, "learning_rate": 5.984259935852635e-07, "loss": 0.022, "step": 11286 }, { "epoch": 2.568145620022753, "grad_norm": 1.4452622114825642, "learning_rate": 5.983367233321148e-07, "loss": 0.1099, "step": 11287 }, { "epoch": 2.568373151308305, "grad_norm": 1.781898335123527, "learning_rate": 5.982474536239149e-07, "loss": 0.0487, "step": 11288 }, { "epoch": 2.5686006825938565, "grad_norm": 1.1181463213982776, "learning_rate": 5.981581844624878e-07, "loss": 0.0141, "step": 11289 }, { "epoch": 2.5688282138794083, "grad_norm": 1.9276018792094647, "learning_rate": 5.980689158496584e-07, "loss": 0.0861, "step": 11290 }, { "epoch": 2.56905574516496, "grad_norm": 1.562584495807189, "learning_rate": 5.97979647787251e-07, "loss": 0.1503, "step": 11291 }, { "epoch": 2.569283276450512, "grad_norm": 2.1008544074158677, "learning_rate": 5.9789038027709e-07, "loss": 0.0598, "step": 11292 }, { "epoch": 2.5695108077360636, "grad_norm": 1.6555749413839589, "learning_rate": 5.97801113321e-07, "loss": 0.1116, "step": 11293 }, { "epoch": 2.5697383390216153, "grad_norm": 1.8608235070300523, "learning_rate": 5.977118469208057e-07, "loss": 0.0499, "step": 11294 }, { "epoch": 2.5699658703071675, "grad_norm": 1.24004442036304, "learning_rate": 5.976225810783309e-07, "loss": 0.0353, "step": 11295 }, { "epoch": 2.570193401592719, "grad_norm": 1.083062415664442, "learning_rate": 5.975333157954007e-07, "loss": 0.0444, "step": 11296 }, { "epoch": 2.570420932878271, "grad_norm": 0.6632895289856584, "learning_rate": 5.974440510738389e-07, "loss": 0.0065, "step": 11297 }, { "epoch": 2.5706484641638223, "grad_norm": 1.5996805144185418, "learning_rate": 5.973547869154707e-07, "loss": 0.087, "step": 11298 }, { "epoch": 2.5708759954493745, "grad_norm": 1.141280245416342, "learning_rate": 5.972655233221195e-07, "loss": 0.0799, "step": 11299 }, { "epoch": 2.571103526734926, "grad_norm": 2.0134492043793606, "learning_rate": 5.971762602956105e-07, "loss": 0.0987, "step": 11300 }, { "epoch": 2.571331058020478, "grad_norm": 1.163702635154498, "learning_rate": 5.970869978377679e-07, "loss": 0.0516, "step": 11301 }, { "epoch": 2.5715585893060293, "grad_norm": 0.6879840825356797, "learning_rate": 5.969977359504156e-07, "loss": 0.0084, "step": 11302 }, { "epoch": 2.5717861205915815, "grad_norm": 1.9518604839921652, "learning_rate": 5.969084746353786e-07, "loss": 0.0428, "step": 11303 }, { "epoch": 2.5720136518771333, "grad_norm": 1.131824935185183, "learning_rate": 5.968192138944806e-07, "loss": 0.0392, "step": 11304 }, { "epoch": 2.572241183162685, "grad_norm": 1.7677743849287204, "learning_rate": 5.967299537295467e-07, "loss": 0.0755, "step": 11305 }, { "epoch": 2.5724687144482368, "grad_norm": 0.9939892636408904, "learning_rate": 5.966406941424004e-07, "loss": 0.0364, "step": 11306 }, { "epoch": 2.5726962457337885, "grad_norm": 2.3698253012203705, "learning_rate": 5.965514351348665e-07, "loss": 0.1033, "step": 11307 }, { "epoch": 2.5729237770193403, "grad_norm": 1.3898549639551678, "learning_rate": 5.964621767087694e-07, "loss": 0.1404, "step": 11308 }, { "epoch": 2.573151308304892, "grad_norm": 2.550913103016841, "learning_rate": 5.963729188659328e-07, "loss": 0.0527, "step": 11309 }, { "epoch": 2.573378839590444, "grad_norm": 1.2567634636141134, "learning_rate": 5.962836616081814e-07, "loss": 0.0476, "step": 11310 }, { "epoch": 2.5736063708759955, "grad_norm": 1.4171892988786112, "learning_rate": 5.961944049373393e-07, "loss": 0.0747, "step": 11311 }, { "epoch": 2.5738339021615473, "grad_norm": 1.4369918021660621, "learning_rate": 5.961051488552311e-07, "loss": 0.0781, "step": 11312 }, { "epoch": 2.574061433447099, "grad_norm": 1.0471632884145605, "learning_rate": 5.960158933636805e-07, "loss": 0.0155, "step": 11313 }, { "epoch": 2.574288964732651, "grad_norm": 1.9022568906010728, "learning_rate": 5.959266384645119e-07, "loss": 0.055, "step": 11314 }, { "epoch": 2.5745164960182025, "grad_norm": 1.2123664517165613, "learning_rate": 5.958373841595498e-07, "loss": 0.0908, "step": 11315 }, { "epoch": 2.5747440273037543, "grad_norm": 1.459239657055576, "learning_rate": 5.957481304506179e-07, "loss": 0.0844, "step": 11316 }, { "epoch": 2.574971558589306, "grad_norm": 0.9076599251054491, "learning_rate": 5.956588773395408e-07, "loss": 0.03, "step": 11317 }, { "epoch": 2.575199089874858, "grad_norm": 1.7636148695908769, "learning_rate": 5.955696248281424e-07, "loss": 0.0537, "step": 11318 }, { "epoch": 2.5754266211604095, "grad_norm": 1.356140148680869, "learning_rate": 5.954803729182471e-07, "loss": 0.1213, "step": 11319 }, { "epoch": 2.5756541524459613, "grad_norm": 2.0882012969427097, "learning_rate": 5.953911216116789e-07, "loss": 0.0286, "step": 11320 }, { "epoch": 2.575881683731513, "grad_norm": 1.6501503001700548, "learning_rate": 5.953018709102618e-07, "loss": 0.0473, "step": 11321 }, { "epoch": 2.576109215017065, "grad_norm": 1.4037530562586236, "learning_rate": 5.952126208158204e-07, "loss": 0.0677, "step": 11322 }, { "epoch": 2.5763367463026166, "grad_norm": 1.4550057940423853, "learning_rate": 5.95123371330178e-07, "loss": 0.0946, "step": 11323 }, { "epoch": 2.5765642775881683, "grad_norm": 1.8922167459266306, "learning_rate": 5.950341224551595e-07, "loss": 0.0521, "step": 11324 }, { "epoch": 2.57679180887372, "grad_norm": 1.7446320326862843, "learning_rate": 5.949448741925886e-07, "loss": 0.1393, "step": 11325 }, { "epoch": 2.577019340159272, "grad_norm": 1.7534406518414956, "learning_rate": 5.948556265442893e-07, "loss": 0.0517, "step": 11326 }, { "epoch": 2.5772468714448236, "grad_norm": 1.1730531701456794, "learning_rate": 5.947663795120861e-07, "loss": 0.034, "step": 11327 }, { "epoch": 2.5774744027303753, "grad_norm": 1.9953203113999998, "learning_rate": 5.946771330978024e-07, "loss": 0.1374, "step": 11328 }, { "epoch": 2.577701934015927, "grad_norm": 2.0038876573804822, "learning_rate": 5.945878873032628e-07, "loss": 0.0593, "step": 11329 }, { "epoch": 2.577929465301479, "grad_norm": 1.017496135636108, "learning_rate": 5.944986421302909e-07, "loss": 0.0516, "step": 11330 }, { "epoch": 2.5781569965870306, "grad_norm": 2.7825202509889513, "learning_rate": 5.94409397580711e-07, "loss": 0.0559, "step": 11331 }, { "epoch": 2.5783845278725823, "grad_norm": 1.8776605438533789, "learning_rate": 5.943201536563471e-07, "loss": 0.1624, "step": 11332 }, { "epoch": 2.578612059158134, "grad_norm": 2.2055086118314087, "learning_rate": 5.942309103590228e-07, "loss": 0.0563, "step": 11333 }, { "epoch": 2.5788395904436863, "grad_norm": 1.3663601481302576, "learning_rate": 5.941416676905626e-07, "loss": 0.0487, "step": 11334 }, { "epoch": 2.5790671217292376, "grad_norm": 1.160583795642819, "learning_rate": 5.940524256527899e-07, "loss": 0.0614, "step": 11335 }, { "epoch": 2.5792946530147898, "grad_norm": 1.7700759379429098, "learning_rate": 5.939631842475292e-07, "loss": 0.04, "step": 11336 }, { "epoch": 2.579522184300341, "grad_norm": 1.8656105125528721, "learning_rate": 5.938739434766039e-07, "loss": 0.1319, "step": 11337 }, { "epoch": 2.5797497155858933, "grad_norm": 1.6828860240999668, "learning_rate": 5.937847033418386e-07, "loss": 0.0322, "step": 11338 }, { "epoch": 2.5799772468714446, "grad_norm": 0.8176086430704234, "learning_rate": 5.936954638450566e-07, "loss": 0.0131, "step": 11339 }, { "epoch": 2.580204778156997, "grad_norm": 1.742386839764946, "learning_rate": 5.936062249880819e-07, "loss": 0.155, "step": 11340 }, { "epoch": 2.580432309442548, "grad_norm": 0.8508196564618963, "learning_rate": 5.935169867727386e-07, "loss": 0.0282, "step": 11341 }, { "epoch": 2.5806598407281003, "grad_norm": 1.6233925333468704, "learning_rate": 5.934277492008502e-07, "loss": 0.029, "step": 11342 }, { "epoch": 2.580887372013652, "grad_norm": 1.2477420326489652, "learning_rate": 5.93338512274241e-07, "loss": 0.0747, "step": 11343 }, { "epoch": 2.581114903299204, "grad_norm": 1.659391634588825, "learning_rate": 5.932492759947345e-07, "loss": 0.076, "step": 11344 }, { "epoch": 2.5813424345847555, "grad_norm": 1.6669349612130522, "learning_rate": 5.931600403641549e-07, "loss": 0.0719, "step": 11345 }, { "epoch": 2.5815699658703073, "grad_norm": 1.807855976624457, "learning_rate": 5.930708053843257e-07, "loss": 0.0271, "step": 11346 }, { "epoch": 2.581797497155859, "grad_norm": 1.6387252927000282, "learning_rate": 5.929815710570705e-07, "loss": 0.0889, "step": 11347 }, { "epoch": 2.582025028441411, "grad_norm": 1.9060817415537634, "learning_rate": 5.928923373842136e-07, "loss": 0.09, "step": 11348 }, { "epoch": 2.5822525597269625, "grad_norm": 1.5549027982988501, "learning_rate": 5.928031043675783e-07, "loss": 0.0809, "step": 11349 }, { "epoch": 2.5824800910125143, "grad_norm": 1.506400447434205, "learning_rate": 5.927138720089887e-07, "loss": 0.0286, "step": 11350 }, { "epoch": 2.582707622298066, "grad_norm": 1.029232525834201, "learning_rate": 5.926246403102686e-07, "loss": 0.044, "step": 11351 }, { "epoch": 2.582935153583618, "grad_norm": 2.2492115228378258, "learning_rate": 5.925354092732412e-07, "loss": 0.1162, "step": 11352 }, { "epoch": 2.5831626848691696, "grad_norm": 1.3669104613471, "learning_rate": 5.924461788997308e-07, "loss": 0.033, "step": 11353 }, { "epoch": 2.5833902161547213, "grad_norm": 1.3928135464989784, "learning_rate": 5.923569491915608e-07, "loss": 0.0682, "step": 11354 }, { "epoch": 2.583617747440273, "grad_norm": 1.3363366896052378, "learning_rate": 5.922677201505552e-07, "loss": 0.0634, "step": 11355 }, { "epoch": 2.583845278725825, "grad_norm": 1.6964221538196198, "learning_rate": 5.921784917785371e-07, "loss": 0.1063, "step": 11356 }, { "epoch": 2.5840728100113766, "grad_norm": 2.1269414818628314, "learning_rate": 5.920892640773308e-07, "loss": 0.0347, "step": 11357 }, { "epoch": 2.5843003412969283, "grad_norm": 1.5676360654566404, "learning_rate": 5.920000370487597e-07, "loss": 0.1457, "step": 11358 }, { "epoch": 2.58452787258248, "grad_norm": 1.3459490598492898, "learning_rate": 5.919108106946472e-07, "loss": 0.1102, "step": 11359 }, { "epoch": 2.584755403868032, "grad_norm": 2.656815385963225, "learning_rate": 5.918215850168171e-07, "loss": 0.0544, "step": 11360 }, { "epoch": 2.5849829351535836, "grad_norm": 1.3818013091958306, "learning_rate": 5.917323600170931e-07, "loss": 0.0519, "step": 11361 }, { "epoch": 2.5852104664391353, "grad_norm": 1.5470808780837668, "learning_rate": 5.916431356972989e-07, "loss": 0.0406, "step": 11362 }, { "epoch": 2.585437997724687, "grad_norm": 1.52865460806297, "learning_rate": 5.915539120592577e-07, "loss": 0.0242, "step": 11363 }, { "epoch": 2.585665529010239, "grad_norm": 1.8730621397725935, "learning_rate": 5.914646891047933e-07, "loss": 0.1017, "step": 11364 }, { "epoch": 2.5858930602957906, "grad_norm": 1.7696764633989794, "learning_rate": 5.913754668357295e-07, "loss": 0.0591, "step": 11365 }, { "epoch": 2.5861205915813423, "grad_norm": 1.9038730857776907, "learning_rate": 5.912862452538894e-07, "loss": 0.0377, "step": 11366 }, { "epoch": 2.586348122866894, "grad_norm": 1.2636276941178972, "learning_rate": 5.911970243610967e-07, "loss": 0.1067, "step": 11367 }, { "epoch": 2.586575654152446, "grad_norm": 1.3790285398145712, "learning_rate": 5.911078041591747e-07, "loss": 0.0677, "step": 11368 }, { "epoch": 2.5868031854379976, "grad_norm": 1.5137380342654783, "learning_rate": 5.910185846499474e-07, "loss": 0.0351, "step": 11369 }, { "epoch": 2.5870307167235493, "grad_norm": 1.5135980209606608, "learning_rate": 5.90929365835238e-07, "loss": 0.0292, "step": 11370 }, { "epoch": 2.587258248009101, "grad_norm": 1.846207268079374, "learning_rate": 5.908401477168698e-07, "loss": 0.0498, "step": 11371 }, { "epoch": 2.587485779294653, "grad_norm": 2.046580337918919, "learning_rate": 5.907509302966666e-07, "loss": 0.133, "step": 11372 }, { "epoch": 2.587713310580205, "grad_norm": 1.1271527297903718, "learning_rate": 5.906617135764515e-07, "loss": 0.0861, "step": 11373 }, { "epoch": 2.5879408418657563, "grad_norm": 2.2692743204236216, "learning_rate": 5.905724975580482e-07, "loss": 0.0286, "step": 11374 }, { "epoch": 2.5881683731513085, "grad_norm": 2.437681341834639, "learning_rate": 5.904832822432799e-07, "loss": 0.0551, "step": 11375 }, { "epoch": 2.58839590443686, "grad_norm": 1.080484784725643, "learning_rate": 5.903940676339702e-07, "loss": 0.0235, "step": 11376 }, { "epoch": 2.588623435722412, "grad_norm": 1.6227092182280989, "learning_rate": 5.903048537319424e-07, "loss": 0.0403, "step": 11377 }, { "epoch": 2.5888509670079634, "grad_norm": 1.601602936671207, "learning_rate": 5.902156405390196e-07, "loss": 0.0669, "step": 11378 }, { "epoch": 2.5890784982935156, "grad_norm": 2.1515301450195947, "learning_rate": 5.901264280570258e-07, "loss": 0.0372, "step": 11379 }, { "epoch": 2.589306029579067, "grad_norm": 1.0453066235648174, "learning_rate": 5.900372162877835e-07, "loss": 0.0108, "step": 11380 }, { "epoch": 2.589533560864619, "grad_norm": 2.3244576558421164, "learning_rate": 5.899480052331167e-07, "loss": 0.0546, "step": 11381 }, { "epoch": 2.589761092150171, "grad_norm": 1.0534270999872009, "learning_rate": 5.898587948948487e-07, "loss": 0.105, "step": 11382 }, { "epoch": 2.5899886234357226, "grad_norm": 1.501897824835638, "learning_rate": 5.897695852748022e-07, "loss": 0.0695, "step": 11383 }, { "epoch": 2.5902161547212743, "grad_norm": 1.2789626411278976, "learning_rate": 5.89680376374801e-07, "loss": 0.0728, "step": 11384 }, { "epoch": 2.590443686006826, "grad_norm": 2.242978707482051, "learning_rate": 5.895911681966681e-07, "loss": 0.0985, "step": 11385 }, { "epoch": 2.590671217292378, "grad_norm": 1.0986748450100736, "learning_rate": 5.895019607422272e-07, "loss": 0.0738, "step": 11386 }, { "epoch": 2.5908987485779296, "grad_norm": 1.7885961552044805, "learning_rate": 5.894127540133007e-07, "loss": 0.0394, "step": 11387 }, { "epoch": 2.5911262798634813, "grad_norm": 1.8402063927289658, "learning_rate": 5.893235480117128e-07, "loss": 0.0935, "step": 11388 }, { "epoch": 2.591353811149033, "grad_norm": 1.8183993658489153, "learning_rate": 5.892343427392862e-07, "loss": 0.0261, "step": 11389 }, { "epoch": 2.591581342434585, "grad_norm": 1.188127652124079, "learning_rate": 5.891451381978438e-07, "loss": 0.0256, "step": 11390 }, { "epoch": 2.5918088737201366, "grad_norm": 1.2192659809515038, "learning_rate": 5.890559343892094e-07, "loss": 0.0992, "step": 11391 }, { "epoch": 2.5920364050056883, "grad_norm": 2.021844078600756, "learning_rate": 5.889667313152057e-07, "loss": 0.0656, "step": 11392 }, { "epoch": 2.59226393629124, "grad_norm": 1.5353663274730058, "learning_rate": 5.888775289776561e-07, "loss": 0.0635, "step": 11393 }, { "epoch": 2.592491467576792, "grad_norm": 1.9442968070440463, "learning_rate": 5.887883273783836e-07, "loss": 0.0438, "step": 11394 }, { "epoch": 2.5927189988623436, "grad_norm": 1.6083538184056487, "learning_rate": 5.886991265192116e-07, "loss": 0.0392, "step": 11395 }, { "epoch": 2.5929465301478953, "grad_norm": 2.3703610460292683, "learning_rate": 5.886099264019627e-07, "loss": 0.0628, "step": 11396 }, { "epoch": 2.593174061433447, "grad_norm": 1.7429591504518207, "learning_rate": 5.885207270284604e-07, "loss": 0.0287, "step": 11397 }, { "epoch": 2.593401592718999, "grad_norm": 1.285607195532264, "learning_rate": 5.884315284005278e-07, "loss": 0.0721, "step": 11398 }, { "epoch": 2.5936291240045506, "grad_norm": 4.449596364904521, "learning_rate": 5.883423305199874e-07, "loss": 0.0793, "step": 11399 }, { "epoch": 2.5938566552901023, "grad_norm": 1.4552393353823274, "learning_rate": 5.882531333886627e-07, "loss": 0.0329, "step": 11400 }, { "epoch": 2.594084186575654, "grad_norm": 2.0782231514299907, "learning_rate": 5.88163937008377e-07, "loss": 0.0235, "step": 11401 }, { "epoch": 2.594311717861206, "grad_norm": 1.3223053258371353, "learning_rate": 5.880747413809526e-07, "loss": 0.0637, "step": 11402 }, { "epoch": 2.5945392491467576, "grad_norm": 1.9170290378227772, "learning_rate": 5.87985546508213e-07, "loss": 0.0509, "step": 11403 }, { "epoch": 2.5947667804323093, "grad_norm": 1.828104493945599, "learning_rate": 5.878963523919809e-07, "loss": 0.0471, "step": 11404 }, { "epoch": 2.594994311717861, "grad_norm": 1.2100714557746193, "learning_rate": 5.878071590340798e-07, "loss": 0.0736, "step": 11405 }, { "epoch": 2.595221843003413, "grad_norm": 3.1136369644946558, "learning_rate": 5.877179664363318e-07, "loss": 0.0463, "step": 11406 }, { "epoch": 2.5954493742889646, "grad_norm": 3.0797702783020946, "learning_rate": 5.876287746005605e-07, "loss": 0.1012, "step": 11407 }, { "epoch": 2.5956769055745164, "grad_norm": 1.66470689569564, "learning_rate": 5.875395835285887e-07, "loss": 0.0476, "step": 11408 }, { "epoch": 2.595904436860068, "grad_norm": 0.9846820212897227, "learning_rate": 5.87450393222239e-07, "loss": 0.0222, "step": 11409 }, { "epoch": 2.59613196814562, "grad_norm": 2.209076910407207, "learning_rate": 5.873612036833346e-07, "loss": 0.0512, "step": 11410 }, { "epoch": 2.5963594994311716, "grad_norm": 3.195113136686636, "learning_rate": 5.872720149136981e-07, "loss": 0.0452, "step": 11411 }, { "epoch": 2.596587030716724, "grad_norm": 1.3640809146025896, "learning_rate": 5.871828269151528e-07, "loss": 0.0877, "step": 11412 }, { "epoch": 2.596814562002275, "grad_norm": 3.0874948841156504, "learning_rate": 5.87093639689521e-07, "loss": 0.1533, "step": 11413 }, { "epoch": 2.5970420932878273, "grad_norm": 1.0780772089486457, "learning_rate": 5.87004453238626e-07, "loss": 0.0728, "step": 11414 }, { "epoch": 2.5972696245733786, "grad_norm": 1.6558589655078833, "learning_rate": 5.869152675642904e-07, "loss": 0.0364, "step": 11415 }, { "epoch": 2.597497155858931, "grad_norm": 1.7814591804392303, "learning_rate": 5.868260826683368e-07, "loss": 0.0328, "step": 11416 }, { "epoch": 2.597724687144482, "grad_norm": 1.4816846070171934, "learning_rate": 5.867368985525882e-07, "loss": 0.0298, "step": 11417 }, { "epoch": 2.5979522184300343, "grad_norm": 1.4840998929256903, "learning_rate": 5.866477152188673e-07, "loss": 0.0677, "step": 11418 }, { "epoch": 2.5981797497155856, "grad_norm": 1.2461090558669303, "learning_rate": 5.86558532668997e-07, "loss": 0.0199, "step": 11419 }, { "epoch": 2.598407281001138, "grad_norm": 2.4930997565544315, "learning_rate": 5.864693509048e-07, "loss": 0.0476, "step": 11420 }, { "epoch": 2.5986348122866896, "grad_norm": 1.4724510729171008, "learning_rate": 5.863801699280985e-07, "loss": 0.0854, "step": 11421 }, { "epoch": 2.5988623435722413, "grad_norm": 2.018385623273467, "learning_rate": 5.862909897407159e-07, "loss": 0.0614, "step": 11422 }, { "epoch": 2.599089874857793, "grad_norm": 1.36876262410324, "learning_rate": 5.862018103444744e-07, "loss": 0.0822, "step": 11423 }, { "epoch": 2.599317406143345, "grad_norm": 1.5740715558149074, "learning_rate": 5.86112631741197e-07, "loss": 0.0596, "step": 11424 }, { "epoch": 2.5995449374288966, "grad_norm": 1.273132243527267, "learning_rate": 5.86023453932706e-07, "loss": 0.0877, "step": 11425 }, { "epoch": 2.5997724687144483, "grad_norm": 0.9320552671256134, "learning_rate": 5.859342769208245e-07, "loss": 0.0106, "step": 11426 }, { "epoch": 2.6, "grad_norm": 1.6308814329826158, "learning_rate": 5.858451007073747e-07, "loss": 0.0417, "step": 11427 }, { "epoch": 2.600227531285552, "grad_norm": 1.092744682467741, "learning_rate": 5.857559252941792e-07, "loss": 0.0142, "step": 11428 }, { "epoch": 2.6004550625711036, "grad_norm": 1.3253285240275632, "learning_rate": 5.85666750683061e-07, "loss": 0.0544, "step": 11429 }, { "epoch": 2.6006825938566553, "grad_norm": 1.904289800580164, "learning_rate": 5.85577576875842e-07, "loss": 0.0807, "step": 11430 }, { "epoch": 2.600910125142207, "grad_norm": 0.5263336683173278, "learning_rate": 5.854884038743454e-07, "loss": 0.0101, "step": 11431 }, { "epoch": 2.601137656427759, "grad_norm": 1.9036372199134652, "learning_rate": 5.853992316803932e-07, "loss": 0.0311, "step": 11432 }, { "epoch": 2.6013651877133106, "grad_norm": 1.5803957322177937, "learning_rate": 5.853100602958086e-07, "loss": 0.0619, "step": 11433 }, { "epoch": 2.6015927189988624, "grad_norm": 2.1957196849769587, "learning_rate": 5.852208897224134e-07, "loss": 0.0501, "step": 11434 }, { "epoch": 2.601820250284414, "grad_norm": 1.3503135128203079, "learning_rate": 5.851317199620303e-07, "loss": 0.0466, "step": 11435 }, { "epoch": 2.602047781569966, "grad_norm": 2.25977183210748, "learning_rate": 5.850425510164821e-07, "loss": 0.0509, "step": 11436 }, { "epoch": 2.6022753128555176, "grad_norm": 1.0964298284260567, "learning_rate": 5.849533828875907e-07, "loss": 0.0278, "step": 11437 }, { "epoch": 2.6025028441410694, "grad_norm": 1.2520810785928222, "learning_rate": 5.84864215577179e-07, "loss": 0.0456, "step": 11438 }, { "epoch": 2.602730375426621, "grad_norm": 1.5811055072149085, "learning_rate": 5.847750490870694e-07, "loss": 0.0257, "step": 11439 }, { "epoch": 2.602957906712173, "grad_norm": 2.5398929281770584, "learning_rate": 5.846858834190837e-07, "loss": 0.118, "step": 11440 }, { "epoch": 2.6031854379977246, "grad_norm": 1.2938809186546272, "learning_rate": 5.845967185750449e-07, "loss": 0.0141, "step": 11441 }, { "epoch": 2.6034129692832764, "grad_norm": 1.496326309635037, "learning_rate": 5.84507554556775e-07, "loss": 0.0348, "step": 11442 }, { "epoch": 2.603640500568828, "grad_norm": 1.7553325258520773, "learning_rate": 5.844183913660969e-07, "loss": 0.0563, "step": 11443 }, { "epoch": 2.60386803185438, "grad_norm": 1.7381367129406298, "learning_rate": 5.843292290048321e-07, "loss": 0.054, "step": 11444 }, { "epoch": 2.6040955631399316, "grad_norm": 2.227660796003937, "learning_rate": 5.842400674748038e-07, "loss": 0.0666, "step": 11445 }, { "epoch": 2.6043230944254834, "grad_norm": 1.8308375313996825, "learning_rate": 5.841509067778339e-07, "loss": 0.036, "step": 11446 }, { "epoch": 2.604550625711035, "grad_norm": 1.311501307937401, "learning_rate": 5.840617469157441e-07, "loss": 0.0653, "step": 11447 }, { "epoch": 2.604778156996587, "grad_norm": 1.6270970155166686, "learning_rate": 5.839725878903578e-07, "loss": 0.0737, "step": 11448 }, { "epoch": 2.6050056882821386, "grad_norm": 2.2590341803261924, "learning_rate": 5.838834297034964e-07, "loss": 0.0838, "step": 11449 }, { "epoch": 2.6052332195676904, "grad_norm": 1.7167305697644528, "learning_rate": 5.837942723569825e-07, "loss": 0.0603, "step": 11450 }, { "epoch": 2.6054607508532426, "grad_norm": 1.0916686936809525, "learning_rate": 5.837051158526379e-07, "loss": 0.0865, "step": 11451 }, { "epoch": 2.605688282138794, "grad_norm": 1.882918747968751, "learning_rate": 5.836159601922856e-07, "loss": 0.032, "step": 11452 }, { "epoch": 2.605915813424346, "grad_norm": 1.4854069186560053, "learning_rate": 5.83526805377747e-07, "loss": 0.0331, "step": 11453 }, { "epoch": 2.6061433447098974, "grad_norm": 1.4497125053063291, "learning_rate": 5.834376514108444e-07, "loss": 0.044, "step": 11454 }, { "epoch": 2.6063708759954496, "grad_norm": 1.142292776264157, "learning_rate": 5.833484982934005e-07, "loss": 0.0231, "step": 11455 }, { "epoch": 2.606598407281001, "grad_norm": 1.3131337296556846, "learning_rate": 5.832593460272367e-07, "loss": 0.0509, "step": 11456 }, { "epoch": 2.606825938566553, "grad_norm": 3.2851810338242475, "learning_rate": 5.831701946141755e-07, "loss": 0.0806, "step": 11457 }, { "epoch": 2.6070534698521044, "grad_norm": 2.1558252541896645, "learning_rate": 5.83081044056039e-07, "loss": 0.0375, "step": 11458 }, { "epoch": 2.6072810011376566, "grad_norm": 1.7364183438482088, "learning_rate": 5.82991894354649e-07, "loss": 0.0357, "step": 11459 }, { "epoch": 2.6075085324232083, "grad_norm": 2.7580719979124173, "learning_rate": 5.829027455118279e-07, "loss": 0.0534, "step": 11460 }, { "epoch": 2.60773606370876, "grad_norm": 2.165448133448507, "learning_rate": 5.828135975293974e-07, "loss": 0.085, "step": 11461 }, { "epoch": 2.607963594994312, "grad_norm": 1.7769018183960847, "learning_rate": 5.8272445040918e-07, "loss": 0.1277, "step": 11462 }, { "epoch": 2.6081911262798636, "grad_norm": 1.7909399058483602, "learning_rate": 5.826353041529971e-07, "loss": 0.1079, "step": 11463 }, { "epoch": 2.6084186575654154, "grad_norm": 1.3049469065039678, "learning_rate": 5.825461587626712e-07, "loss": 0.0625, "step": 11464 }, { "epoch": 2.608646188850967, "grad_norm": 1.3801610379104876, "learning_rate": 5.824570142400242e-07, "loss": 0.0277, "step": 11465 }, { "epoch": 2.608873720136519, "grad_norm": 1.5876060303314063, "learning_rate": 5.823678705868775e-07, "loss": 0.076, "step": 11466 }, { "epoch": 2.6091012514220706, "grad_norm": 1.2134649588418256, "learning_rate": 5.822787278050537e-07, "loss": 0.0261, "step": 11467 }, { "epoch": 2.6093287827076224, "grad_norm": 1.6550042767202227, "learning_rate": 5.821895858963743e-07, "loss": 0.0991, "step": 11468 }, { "epoch": 2.609556313993174, "grad_norm": 1.3170422611205794, "learning_rate": 5.821004448626617e-07, "loss": 0.0348, "step": 11469 }, { "epoch": 2.609783845278726, "grad_norm": 1.8068538613549192, "learning_rate": 5.820113047057372e-07, "loss": 0.044, "step": 11470 }, { "epoch": 2.6100113765642776, "grad_norm": 1.4860893113930125, "learning_rate": 5.819221654274229e-07, "loss": 0.0477, "step": 11471 }, { "epoch": 2.6102389078498294, "grad_norm": 1.7903143791268474, "learning_rate": 5.818330270295408e-07, "loss": 0.0426, "step": 11472 }, { "epoch": 2.610466439135381, "grad_norm": 1.64442115855921, "learning_rate": 5.817438895139123e-07, "loss": 0.0397, "step": 11473 }, { "epoch": 2.610693970420933, "grad_norm": 2.1932172632972775, "learning_rate": 5.816547528823598e-07, "loss": 0.0602, "step": 11474 }, { "epoch": 2.6109215017064846, "grad_norm": 4.467042758897728, "learning_rate": 5.815656171367044e-07, "loss": 0.0215, "step": 11475 }, { "epoch": 2.6111490329920364, "grad_norm": 1.412366923025593, "learning_rate": 5.814764822787686e-07, "loss": 0.0306, "step": 11476 }, { "epoch": 2.611376564277588, "grad_norm": 1.6433722139256828, "learning_rate": 5.813873483103736e-07, "loss": 0.0609, "step": 11477 }, { "epoch": 2.61160409556314, "grad_norm": 1.5193543281913375, "learning_rate": 5.812982152333413e-07, "loss": 0.0861, "step": 11478 }, { "epoch": 2.6118316268486916, "grad_norm": 1.1301498463164104, "learning_rate": 5.812090830494937e-07, "loss": 0.0384, "step": 11479 }, { "epoch": 2.6120591581342434, "grad_norm": 1.2184884392451132, "learning_rate": 5.811199517606519e-07, "loss": 0.0332, "step": 11480 }, { "epoch": 2.612286689419795, "grad_norm": 2.2460416672398202, "learning_rate": 5.810308213686381e-07, "loss": 0.0674, "step": 11481 }, { "epoch": 2.612514220705347, "grad_norm": 1.1968385197349367, "learning_rate": 5.809416918752736e-07, "loss": 0.0325, "step": 11482 }, { "epoch": 2.6127417519908986, "grad_norm": 2.479618739661017, "learning_rate": 5.808525632823806e-07, "loss": 0.1423, "step": 11483 }, { "epoch": 2.6129692832764504, "grad_norm": 2.4146215623557197, "learning_rate": 5.807634355917801e-07, "loss": 0.0752, "step": 11484 }, { "epoch": 2.613196814562002, "grad_norm": 1.4441609192595954, "learning_rate": 5.806743088052939e-07, "loss": 0.0972, "step": 11485 }, { "epoch": 2.613424345847554, "grad_norm": 1.9935786227193832, "learning_rate": 5.805851829247439e-07, "loss": 0.0274, "step": 11486 }, { "epoch": 2.6136518771331056, "grad_norm": 3.151377122619521, "learning_rate": 5.804960579519512e-07, "loss": 0.0545, "step": 11487 }, { "epoch": 2.6138794084186574, "grad_norm": 1.9788253746723177, "learning_rate": 5.804069338887376e-07, "loss": 0.0939, "step": 11488 }, { "epoch": 2.614106939704209, "grad_norm": 1.525518587511123, "learning_rate": 5.803178107369248e-07, "loss": 0.043, "step": 11489 }, { "epoch": 2.6143344709897613, "grad_norm": 2.385915099136633, "learning_rate": 5.802286884983339e-07, "loss": 0.1275, "step": 11490 }, { "epoch": 2.6145620022753127, "grad_norm": 1.8214591753982048, "learning_rate": 5.801395671747866e-07, "loss": 0.0879, "step": 11491 }, { "epoch": 2.614789533560865, "grad_norm": 1.1088758645792947, "learning_rate": 5.800504467681044e-07, "loss": 0.0228, "step": 11492 }, { "epoch": 2.615017064846416, "grad_norm": 1.5419233123846983, "learning_rate": 5.79961327280109e-07, "loss": 0.0894, "step": 11493 }, { "epoch": 2.6152445961319684, "grad_norm": 1.9840541336214403, "learning_rate": 5.798722087126212e-07, "loss": 0.0446, "step": 11494 }, { "epoch": 2.6154721274175197, "grad_norm": 2.3753993769421125, "learning_rate": 5.797830910674631e-07, "loss": 0.0447, "step": 11495 }, { "epoch": 2.615699658703072, "grad_norm": 1.7904512895584102, "learning_rate": 5.796939743464558e-07, "loss": 0.0641, "step": 11496 }, { "epoch": 2.615927189988623, "grad_norm": 1.9708596736873678, "learning_rate": 5.796048585514205e-07, "loss": 0.0441, "step": 11497 }, { "epoch": 2.6161547212741754, "grad_norm": 1.527829145708746, "learning_rate": 5.795157436841789e-07, "loss": 0.0612, "step": 11498 }, { "epoch": 2.616382252559727, "grad_norm": 1.8821174407925256, "learning_rate": 5.794266297465521e-07, "loss": 0.07, "step": 11499 }, { "epoch": 2.616609783845279, "grad_norm": 1.878235067833539, "learning_rate": 5.793375167403617e-07, "loss": 0.0682, "step": 11500 }, { "epoch": 2.6168373151308306, "grad_norm": 1.8179728384625975, "learning_rate": 5.792484046674285e-07, "loss": 0.0318, "step": 11501 }, { "epoch": 2.6170648464163824, "grad_norm": 1.0308936523801775, "learning_rate": 5.791592935295745e-07, "loss": 0.0126, "step": 11502 }, { "epoch": 2.617292377701934, "grad_norm": 2.355963759844747, "learning_rate": 5.790701833286206e-07, "loss": 0.043, "step": 11503 }, { "epoch": 2.617519908987486, "grad_norm": 1.7904539118609144, "learning_rate": 5.789810740663876e-07, "loss": 0.0275, "step": 11504 }, { "epoch": 2.6177474402730376, "grad_norm": 1.0097797746732176, "learning_rate": 5.788919657446974e-07, "loss": 0.0441, "step": 11505 }, { "epoch": 2.6179749715585894, "grad_norm": 1.9079732770422995, "learning_rate": 5.788028583653709e-07, "loss": 0.0215, "step": 11506 }, { "epoch": 2.618202502844141, "grad_norm": 1.8762695085691972, "learning_rate": 5.787137519302297e-07, "loss": 0.0343, "step": 11507 }, { "epoch": 2.618430034129693, "grad_norm": 1.502574693487917, "learning_rate": 5.786246464410944e-07, "loss": 0.1015, "step": 11508 }, { "epoch": 2.6186575654152446, "grad_norm": 1.118618995592489, "learning_rate": 5.785355418997862e-07, "loss": 0.0304, "step": 11509 }, { "epoch": 2.6188850967007964, "grad_norm": 1.482321941512505, "learning_rate": 5.784464383081268e-07, "loss": 0.0688, "step": 11510 }, { "epoch": 2.619112627986348, "grad_norm": 1.380125892266162, "learning_rate": 5.783573356679365e-07, "loss": 0.0769, "step": 11511 }, { "epoch": 2.6193401592719, "grad_norm": 1.8578228944529016, "learning_rate": 5.782682339810374e-07, "loss": 0.0358, "step": 11512 }, { "epoch": 2.6195676905574516, "grad_norm": 2.0747027119568617, "learning_rate": 5.781791332492495e-07, "loss": 0.0527, "step": 11513 }, { "epoch": 2.6197952218430034, "grad_norm": 2.0677948256702834, "learning_rate": 5.780900334743946e-07, "loss": 0.0749, "step": 11514 }, { "epoch": 2.620022753128555, "grad_norm": 2.184950573609604, "learning_rate": 5.780009346582936e-07, "loss": 0.1071, "step": 11515 }, { "epoch": 2.620250284414107, "grad_norm": 1.0540904235840174, "learning_rate": 5.779118368027673e-07, "loss": 0.0161, "step": 11516 }, { "epoch": 2.6204778156996587, "grad_norm": 1.8647196150888898, "learning_rate": 5.778227399096368e-07, "loss": 0.0634, "step": 11517 }, { "epoch": 2.6207053469852104, "grad_norm": 1.5262834945794945, "learning_rate": 5.777336439807231e-07, "loss": 0.0403, "step": 11518 }, { "epoch": 2.620932878270762, "grad_norm": 1.6354648454191416, "learning_rate": 5.776445490178472e-07, "loss": 0.0597, "step": 11519 }, { "epoch": 2.621160409556314, "grad_norm": 1.0950436594909982, "learning_rate": 5.775554550228299e-07, "loss": 0.0206, "step": 11520 }, { "epoch": 2.6213879408418657, "grad_norm": 2.171081322453621, "learning_rate": 5.774663619974923e-07, "loss": 0.1351, "step": 11521 }, { "epoch": 2.6216154721274174, "grad_norm": 1.5698610137497837, "learning_rate": 5.773772699436553e-07, "loss": 0.1106, "step": 11522 }, { "epoch": 2.621843003412969, "grad_norm": 1.0987633715245881, "learning_rate": 5.772881788631394e-07, "loss": 0.0794, "step": 11523 }, { "epoch": 2.622070534698521, "grad_norm": 1.065644493995455, "learning_rate": 5.77199088757766e-07, "loss": 0.0331, "step": 11524 }, { "epoch": 2.6222980659840727, "grad_norm": 1.596943022266351, "learning_rate": 5.771099996293554e-07, "loss": 0.0184, "step": 11525 }, { "epoch": 2.6225255972696244, "grad_norm": 1.4352836286666624, "learning_rate": 5.770209114797292e-07, "loss": 0.1168, "step": 11526 }, { "epoch": 2.622753128555176, "grad_norm": 1.3862853118637561, "learning_rate": 5.769318243107073e-07, "loss": 0.089, "step": 11527 }, { "epoch": 2.622980659840728, "grad_norm": 1.0932091600159823, "learning_rate": 5.768427381241109e-07, "loss": 0.0314, "step": 11528 }, { "epoch": 2.62320819112628, "grad_norm": 1.9768579204656282, "learning_rate": 5.76753652921761e-07, "loss": 0.1968, "step": 11529 }, { "epoch": 2.6234357224118314, "grad_norm": 1.5208231205678655, "learning_rate": 5.766645687054778e-07, "loss": 0.0895, "step": 11530 }, { "epoch": 2.6236632536973836, "grad_norm": 0.8181164158415712, "learning_rate": 5.765754854770823e-07, "loss": 0.0522, "step": 11531 }, { "epoch": 2.623890784982935, "grad_norm": 1.07874122730216, "learning_rate": 5.764864032383951e-07, "loss": 0.0583, "step": 11532 }, { "epoch": 2.624118316268487, "grad_norm": 1.4875523397066885, "learning_rate": 5.763973219912374e-07, "loss": 0.0678, "step": 11533 }, { "epoch": 2.6243458475540384, "grad_norm": 0.8515640840028487, "learning_rate": 5.763082417374291e-07, "loss": 0.0236, "step": 11534 }, { "epoch": 2.6245733788395906, "grad_norm": 2.0274654553768543, "learning_rate": 5.762191624787912e-07, "loss": 0.0388, "step": 11535 }, { "epoch": 2.624800910125142, "grad_norm": 1.6517927059213418, "learning_rate": 5.761300842171445e-07, "loss": 0.0525, "step": 11536 }, { "epoch": 2.625028441410694, "grad_norm": 1.4480018028897568, "learning_rate": 5.76041006954309e-07, "loss": 0.0405, "step": 11537 }, { "epoch": 2.625255972696246, "grad_norm": 1.3845687398949031, "learning_rate": 5.759519306921059e-07, "loss": 0.0447, "step": 11538 }, { "epoch": 2.6254835039817976, "grad_norm": 2.733503027223945, "learning_rate": 5.758628554323553e-07, "loss": 0.0472, "step": 11539 }, { "epoch": 2.6257110352673494, "grad_norm": 1.5136794537042952, "learning_rate": 5.757737811768783e-07, "loss": 0.0398, "step": 11540 }, { "epoch": 2.625938566552901, "grad_norm": 1.5413252398295068, "learning_rate": 5.756847079274949e-07, "loss": 0.0684, "step": 11541 }, { "epoch": 2.626166097838453, "grad_norm": 1.3488638159560435, "learning_rate": 5.755956356860255e-07, "loss": 0.0315, "step": 11542 }, { "epoch": 2.6263936291240046, "grad_norm": 2.1256818387629357, "learning_rate": 5.755065644542912e-07, "loss": 0.1251, "step": 11543 }, { "epoch": 2.6266211604095564, "grad_norm": 1.6477062549724757, "learning_rate": 5.754174942341118e-07, "loss": 0.0445, "step": 11544 }, { "epoch": 2.626848691695108, "grad_norm": 1.1302583186738233, "learning_rate": 5.753284250273082e-07, "loss": 0.0403, "step": 11545 }, { "epoch": 2.62707622298066, "grad_norm": 1.8284292247091565, "learning_rate": 5.752393568357008e-07, "loss": 0.0594, "step": 11546 }, { "epoch": 2.6273037542662117, "grad_norm": 2.7818051482485897, "learning_rate": 5.751502896611093e-07, "loss": 0.0805, "step": 11547 }, { "epoch": 2.6275312855517634, "grad_norm": 1.316377814593299, "learning_rate": 5.750612235053548e-07, "loss": 0.0765, "step": 11548 }, { "epoch": 2.627758816837315, "grad_norm": 1.752616038015025, "learning_rate": 5.749721583702575e-07, "loss": 0.0633, "step": 11549 }, { "epoch": 2.627986348122867, "grad_norm": 1.2775723969211614, "learning_rate": 5.748830942576377e-07, "loss": 0.088, "step": 11550 }, { "epoch": 2.6282138794084187, "grad_norm": 2.248615576126813, "learning_rate": 5.747940311693156e-07, "loss": 0.1171, "step": 11551 }, { "epoch": 2.6284414106939704, "grad_norm": 1.779957174118643, "learning_rate": 5.747049691071116e-07, "loss": 0.0925, "step": 11552 }, { "epoch": 2.628668941979522, "grad_norm": 1.006010810744268, "learning_rate": 5.74615908072846e-07, "loss": 0.0183, "step": 11553 }, { "epoch": 2.628896473265074, "grad_norm": 1.5022572205460616, "learning_rate": 5.745268480683387e-07, "loss": 0.0446, "step": 11554 }, { "epoch": 2.6291240045506257, "grad_norm": 1.2440983850713727, "learning_rate": 5.744377890954103e-07, "loss": 0.0299, "step": 11555 }, { "epoch": 2.6293515358361774, "grad_norm": 1.2616031714935234, "learning_rate": 5.743487311558809e-07, "loss": 0.1015, "step": 11556 }, { "epoch": 2.629579067121729, "grad_norm": 3.5358494037930734, "learning_rate": 5.742596742515709e-07, "loss": 0.065, "step": 11557 }, { "epoch": 2.629806598407281, "grad_norm": 1.8546309932786083, "learning_rate": 5.741706183842999e-07, "loss": 0.1009, "step": 11558 }, { "epoch": 2.6300341296928327, "grad_norm": 2.869906669902615, "learning_rate": 5.740815635558885e-07, "loss": 0.0688, "step": 11559 }, { "epoch": 2.6302616609783844, "grad_norm": 1.3923121731357198, "learning_rate": 5.739925097681569e-07, "loss": 0.0308, "step": 11560 }, { "epoch": 2.630489192263936, "grad_norm": 2.4155053035574103, "learning_rate": 5.739034570229246e-07, "loss": 0.0614, "step": 11561 }, { "epoch": 2.630716723549488, "grad_norm": 1.1698776877180879, "learning_rate": 5.738144053220122e-07, "loss": 0.0491, "step": 11562 }, { "epoch": 2.6309442548350397, "grad_norm": 1.55110118671451, "learning_rate": 5.737253546672396e-07, "loss": 0.066, "step": 11563 }, { "epoch": 2.6311717861205914, "grad_norm": 1.5531296858945072, "learning_rate": 5.73636305060427e-07, "loss": 0.0551, "step": 11564 }, { "epoch": 2.631399317406143, "grad_norm": 2.376833208605661, "learning_rate": 5.735472565033942e-07, "loss": 0.1386, "step": 11565 }, { "epoch": 2.631626848691695, "grad_norm": 1.447943341721192, "learning_rate": 5.734582089979611e-07, "loss": 0.0914, "step": 11566 }, { "epoch": 2.6318543799772467, "grad_norm": 1.1336208710846614, "learning_rate": 5.733691625459481e-07, "loss": 0.0564, "step": 11567 }, { "epoch": 2.632081911262799, "grad_norm": 2.652675966988644, "learning_rate": 5.732801171491744e-07, "loss": 0.0767, "step": 11568 }, { "epoch": 2.63230944254835, "grad_norm": 1.4513473099714158, "learning_rate": 5.731910728094609e-07, "loss": 0.1008, "step": 11569 }, { "epoch": 2.6325369738339024, "grad_norm": 1.3344313631347113, "learning_rate": 5.731020295286265e-07, "loss": 0.0452, "step": 11570 }, { "epoch": 2.6327645051194537, "grad_norm": 1.18615380635411, "learning_rate": 5.730129873084919e-07, "loss": 0.033, "step": 11571 }, { "epoch": 2.632992036405006, "grad_norm": 1.2931124998852705, "learning_rate": 5.729239461508767e-07, "loss": 0.0363, "step": 11572 }, { "epoch": 2.633219567690557, "grad_norm": 1.3386538223653845, "learning_rate": 5.728349060576004e-07, "loss": 0.123, "step": 11573 }, { "epoch": 2.6334470989761094, "grad_norm": 1.4229631258549318, "learning_rate": 5.727458670304832e-07, "loss": 0.0765, "step": 11574 }, { "epoch": 2.6336746302616607, "grad_norm": 1.1734121568474887, "learning_rate": 5.726568290713447e-07, "loss": 0.1307, "step": 11575 }, { "epoch": 2.633902161547213, "grad_norm": 1.6707875980994444, "learning_rate": 5.725677921820049e-07, "loss": 0.0415, "step": 11576 }, { "epoch": 2.6341296928327647, "grad_norm": 1.6842812691163263, "learning_rate": 5.724787563642832e-07, "loss": 0.0679, "step": 11577 }, { "epoch": 2.6343572241183164, "grad_norm": 1.424499395238645, "learning_rate": 5.723897216199997e-07, "loss": 0.0507, "step": 11578 }, { "epoch": 2.634584755403868, "grad_norm": 0.8665211728932165, "learning_rate": 5.723006879509741e-07, "loss": 0.0142, "step": 11579 }, { "epoch": 2.63481228668942, "grad_norm": 1.9522105263871259, "learning_rate": 5.722116553590256e-07, "loss": 0.0371, "step": 11580 }, { "epoch": 2.6350398179749717, "grad_norm": 1.7669881170983022, "learning_rate": 5.721226238459744e-07, "loss": 0.1131, "step": 11581 }, { "epoch": 2.6352673492605234, "grad_norm": 0.8376610204138242, "learning_rate": 5.720335934136396e-07, "loss": 0.0178, "step": 11582 }, { "epoch": 2.635494880546075, "grad_norm": 1.1919065648084115, "learning_rate": 5.719445640638416e-07, "loss": 0.0995, "step": 11583 }, { "epoch": 2.635722411831627, "grad_norm": 1.9389414741795237, "learning_rate": 5.718555357983993e-07, "loss": 0.0513, "step": 11584 }, { "epoch": 2.6359499431171787, "grad_norm": 1.9179589221279794, "learning_rate": 5.717665086191325e-07, "loss": 0.0331, "step": 11585 }, { "epoch": 2.6361774744027304, "grad_norm": 2.1618180694370683, "learning_rate": 5.716774825278609e-07, "loss": 0.0859, "step": 11586 }, { "epoch": 2.636405005688282, "grad_norm": 2.26646842695997, "learning_rate": 5.715884575264038e-07, "loss": 0.0289, "step": 11587 }, { "epoch": 2.636632536973834, "grad_norm": 1.7761149359123354, "learning_rate": 5.714994336165808e-07, "loss": 0.058, "step": 11588 }, { "epoch": 2.6368600682593857, "grad_norm": 1.0950885159883987, "learning_rate": 5.714104108002113e-07, "loss": 0.0839, "step": 11589 }, { "epoch": 2.6370875995449374, "grad_norm": 1.2321115520228043, "learning_rate": 5.713213890791151e-07, "loss": 0.0562, "step": 11590 }, { "epoch": 2.637315130830489, "grad_norm": 1.3698389473105383, "learning_rate": 5.712323684551114e-07, "loss": 0.0413, "step": 11591 }, { "epoch": 2.637542662116041, "grad_norm": 1.4433928965909637, "learning_rate": 5.711433489300193e-07, "loss": 0.123, "step": 11592 }, { "epoch": 2.6377701934015927, "grad_norm": 1.363326300188744, "learning_rate": 5.710543305056589e-07, "loss": 0.0849, "step": 11593 }, { "epoch": 2.6379977246871444, "grad_norm": 1.3934997715785242, "learning_rate": 5.709653131838489e-07, "loss": 0.0796, "step": 11594 }, { "epoch": 2.638225255972696, "grad_norm": 1.9901379689420213, "learning_rate": 5.708762969664091e-07, "loss": 0.0824, "step": 11595 }, { "epoch": 2.638452787258248, "grad_norm": 2.7469364242934886, "learning_rate": 5.707872818551588e-07, "loss": 0.1202, "step": 11596 }, { "epoch": 2.6386803185437997, "grad_norm": 2.4106672486595917, "learning_rate": 5.706982678519169e-07, "loss": 0.0942, "step": 11597 }, { "epoch": 2.6389078498293514, "grad_norm": 1.4335862080600859, "learning_rate": 5.70609254958503e-07, "loss": 0.0417, "step": 11598 }, { "epoch": 2.639135381114903, "grad_norm": 1.5579006212335171, "learning_rate": 5.705202431767362e-07, "loss": 0.0542, "step": 11599 }, { "epoch": 2.639362912400455, "grad_norm": 1.1480325047585338, "learning_rate": 5.704312325084363e-07, "loss": 0.072, "step": 11600 }, { "epoch": 2.6395904436860067, "grad_norm": 1.031120446505473, "learning_rate": 5.703422229554215e-07, "loss": 0.0673, "step": 11601 }, { "epoch": 2.6398179749715585, "grad_norm": 1.3350024118950023, "learning_rate": 5.70253214519512e-07, "loss": 0.0232, "step": 11602 }, { "epoch": 2.64004550625711, "grad_norm": 1.9961265864492441, "learning_rate": 5.701642072025265e-07, "loss": 0.1141, "step": 11603 }, { "epoch": 2.640273037542662, "grad_norm": 1.0661131593310098, "learning_rate": 5.700752010062839e-07, "loss": 0.0212, "step": 11604 }, { "epoch": 2.640500568828214, "grad_norm": 1.5163594785065433, "learning_rate": 5.699861959326038e-07, "loss": 0.1174, "step": 11605 }, { "epoch": 2.6407281001137655, "grad_norm": 1.7853943572278428, "learning_rate": 5.698971919833049e-07, "loss": 0.1847, "step": 11606 }, { "epoch": 2.6409556313993177, "grad_norm": 2.3874174429111594, "learning_rate": 5.698081891602068e-07, "loss": 0.0876, "step": 11607 }, { "epoch": 2.641183162684869, "grad_norm": 1.164303484813605, "learning_rate": 5.69719187465128e-07, "loss": 0.0507, "step": 11608 }, { "epoch": 2.641410693970421, "grad_norm": 1.5307658664711812, "learning_rate": 5.696301868998878e-07, "loss": 0.0246, "step": 11609 }, { "epoch": 2.6416382252559725, "grad_norm": 1.9169909157769471, "learning_rate": 5.695411874663054e-07, "loss": 0.0577, "step": 11610 }, { "epoch": 2.6418657565415247, "grad_norm": 1.7535316188711025, "learning_rate": 5.694521891661992e-07, "loss": 0.057, "step": 11611 }, { "epoch": 2.642093287827076, "grad_norm": 1.299408674375057, "learning_rate": 5.693631920013887e-07, "loss": 0.0508, "step": 11612 }, { "epoch": 2.642320819112628, "grad_norm": 1.9962674093876664, "learning_rate": 5.692741959736925e-07, "loss": 0.0444, "step": 11613 }, { "epoch": 2.6425483503981795, "grad_norm": 1.0394017491390095, "learning_rate": 5.691852010849301e-07, "loss": 0.0457, "step": 11614 }, { "epoch": 2.6427758816837317, "grad_norm": 1.8059076556240647, "learning_rate": 5.690962073369196e-07, "loss": 0.0391, "step": 11615 }, { "epoch": 2.6430034129692834, "grad_norm": 1.2825761861812888, "learning_rate": 5.690072147314804e-07, "loss": 0.0358, "step": 11616 }, { "epoch": 2.643230944254835, "grad_norm": 1.506946550416108, "learning_rate": 5.689182232704313e-07, "loss": 0.0608, "step": 11617 }, { "epoch": 2.643458475540387, "grad_norm": 1.7637873564291744, "learning_rate": 5.688292329555906e-07, "loss": 0.09, "step": 11618 }, { "epoch": 2.6436860068259387, "grad_norm": 2.05612049017642, "learning_rate": 5.687402437887778e-07, "loss": 0.0613, "step": 11619 }, { "epoch": 2.6439135381114904, "grad_norm": 1.597632777783095, "learning_rate": 5.686512557718112e-07, "loss": 0.0704, "step": 11620 }, { "epoch": 2.644141069397042, "grad_norm": 1.8394207452155922, "learning_rate": 5.6856226890651e-07, "loss": 0.0852, "step": 11621 }, { "epoch": 2.644368600682594, "grad_norm": 1.504418649446496, "learning_rate": 5.684732831946925e-07, "loss": 0.046, "step": 11622 }, { "epoch": 2.6445961319681457, "grad_norm": 1.2002411288944344, "learning_rate": 5.683842986381775e-07, "loss": 0.0202, "step": 11623 }, { "epoch": 2.6448236632536974, "grad_norm": 1.7790495956186159, "learning_rate": 5.68295315238784e-07, "loss": 0.0465, "step": 11624 }, { "epoch": 2.645051194539249, "grad_norm": 2.364808595948646, "learning_rate": 5.682063329983301e-07, "loss": 0.118, "step": 11625 }, { "epoch": 2.645278725824801, "grad_norm": 1.8715020608862545, "learning_rate": 5.681173519186349e-07, "loss": 0.0923, "step": 11626 }, { "epoch": 2.6455062571103527, "grad_norm": 2.2086020050687027, "learning_rate": 5.680283720015167e-07, "loss": 0.055, "step": 11627 }, { "epoch": 2.6457337883959045, "grad_norm": 1.4535347093425, "learning_rate": 5.679393932487946e-07, "loss": 0.0197, "step": 11628 }, { "epoch": 2.645961319681456, "grad_norm": 1.5990966255423285, "learning_rate": 5.678504156622866e-07, "loss": 0.1183, "step": 11629 }, { "epoch": 2.646188850967008, "grad_norm": 1.1577474659584557, "learning_rate": 5.677614392438112e-07, "loss": 0.0402, "step": 11630 }, { "epoch": 2.6464163822525597, "grad_norm": 1.6169418696846196, "learning_rate": 5.676724639951876e-07, "loss": 0.1256, "step": 11631 }, { "epoch": 2.6466439135381115, "grad_norm": 1.3863158673913594, "learning_rate": 5.675834899182334e-07, "loss": 0.0574, "step": 11632 }, { "epoch": 2.646871444823663, "grad_norm": 1.970517842156483, "learning_rate": 5.674945170147678e-07, "loss": 0.0308, "step": 11633 }, { "epoch": 2.647098976109215, "grad_norm": 1.555892516536942, "learning_rate": 5.674055452866091e-07, "loss": 0.0219, "step": 11634 }, { "epoch": 2.6473265073947667, "grad_norm": 1.7589960938957299, "learning_rate": 5.673165747355751e-07, "loss": 0.0344, "step": 11635 }, { "epoch": 2.6475540386803185, "grad_norm": 1.5990474619785238, "learning_rate": 5.67227605363485e-07, "loss": 0.1423, "step": 11636 }, { "epoch": 2.64778156996587, "grad_norm": 1.1233759535386276, "learning_rate": 5.671386371721567e-07, "loss": 0.0253, "step": 11637 }, { "epoch": 2.648009101251422, "grad_norm": 1.7547948284589474, "learning_rate": 5.670496701634088e-07, "loss": 0.0719, "step": 11638 }, { "epoch": 2.6482366325369737, "grad_norm": 1.725698743827727, "learning_rate": 5.669607043390593e-07, "loss": 0.045, "step": 11639 }, { "epoch": 2.6484641638225255, "grad_norm": 2.1966256186191875, "learning_rate": 5.668717397009271e-07, "loss": 0.1278, "step": 11640 }, { "epoch": 2.6486916951080772, "grad_norm": 2.9812616033608026, "learning_rate": 5.667827762508299e-07, "loss": 0.0757, "step": 11641 }, { "epoch": 2.648919226393629, "grad_norm": 2.213543480779433, "learning_rate": 5.66693813990586e-07, "loss": 0.1386, "step": 11642 }, { "epoch": 2.6491467576791807, "grad_norm": 1.6004519843928147, "learning_rate": 5.666048529220139e-07, "loss": 0.0488, "step": 11643 }, { "epoch": 2.649374288964733, "grad_norm": 1.6124232938059613, "learning_rate": 5.665158930469315e-07, "loss": 0.0594, "step": 11644 }, { "epoch": 2.6496018202502842, "grad_norm": 1.555393348374662, "learning_rate": 5.664269343671573e-07, "loss": 0.0259, "step": 11645 }, { "epoch": 2.6498293515358364, "grad_norm": 2.611398633825476, "learning_rate": 5.663379768845091e-07, "loss": 0.0638, "step": 11646 }, { "epoch": 2.6500568828213877, "grad_norm": 1.2131017309645624, "learning_rate": 5.662490206008056e-07, "loss": 0.0942, "step": 11647 }, { "epoch": 2.65028441410694, "grad_norm": 1.316473334761673, "learning_rate": 5.661600655178643e-07, "loss": 0.0929, "step": 11648 }, { "epoch": 2.6505119453924912, "grad_norm": 3.2355567806746786, "learning_rate": 5.660711116375034e-07, "loss": 0.0529, "step": 11649 }, { "epoch": 2.6507394766780434, "grad_norm": 1.2874521202555396, "learning_rate": 5.659821589615412e-07, "loss": 0.0343, "step": 11650 }, { "epoch": 2.6509670079635947, "grad_norm": 1.032880666122527, "learning_rate": 5.658932074917955e-07, "loss": 0.0759, "step": 11651 }, { "epoch": 2.651194539249147, "grad_norm": 1.5027127706550056, "learning_rate": 5.658042572300844e-07, "loss": 0.0951, "step": 11652 }, { "epoch": 2.6514220705346982, "grad_norm": 1.0223805973481688, "learning_rate": 5.65715308178226e-07, "loss": 0.0727, "step": 11653 }, { "epoch": 2.6516496018202504, "grad_norm": 1.4683741071472305, "learning_rate": 5.656263603380379e-07, "loss": 0.0182, "step": 11654 }, { "epoch": 2.651877133105802, "grad_norm": 1.7456250715463058, "learning_rate": 5.655374137113384e-07, "loss": 0.0711, "step": 11655 }, { "epoch": 2.652104664391354, "grad_norm": 1.1883910640711308, "learning_rate": 5.65448468299945e-07, "loss": 0.0315, "step": 11656 }, { "epoch": 2.6523321956769057, "grad_norm": 1.079642840962677, "learning_rate": 5.653595241056763e-07, "loss": 0.0407, "step": 11657 }, { "epoch": 2.6525597269624575, "grad_norm": 1.6162767609094588, "learning_rate": 5.652705811303493e-07, "loss": 0.1201, "step": 11658 }, { "epoch": 2.652787258248009, "grad_norm": 1.1388868496178834, "learning_rate": 5.651816393757825e-07, "loss": 0.0212, "step": 11659 }, { "epoch": 2.653014789533561, "grad_norm": 0.8776488722110775, "learning_rate": 5.650926988437934e-07, "loss": 0.0375, "step": 11660 }, { "epoch": 2.6532423208191127, "grad_norm": 1.1571946859124045, "learning_rate": 5.650037595361997e-07, "loss": 0.0307, "step": 11661 }, { "epoch": 2.6534698521046645, "grad_norm": 1.6372974399278044, "learning_rate": 5.649148214548194e-07, "loss": 0.1101, "step": 11662 }, { "epoch": 2.653697383390216, "grad_norm": 2.310699423670982, "learning_rate": 5.648258846014699e-07, "loss": 0.0544, "step": 11663 }, { "epoch": 2.653924914675768, "grad_norm": 1.2178392297131135, "learning_rate": 5.647369489779695e-07, "loss": 0.0205, "step": 11664 }, { "epoch": 2.6541524459613197, "grad_norm": 1.0506728983325602, "learning_rate": 5.646480145861351e-07, "loss": 0.0494, "step": 11665 }, { "epoch": 2.6543799772468715, "grad_norm": 1.88424965354095, "learning_rate": 5.645590814277849e-07, "loss": 0.0654, "step": 11666 }, { "epoch": 2.654607508532423, "grad_norm": 1.381573160162818, "learning_rate": 5.644701495047365e-07, "loss": 0.0925, "step": 11667 }, { "epoch": 2.654835039817975, "grad_norm": 1.8520183903586709, "learning_rate": 5.643812188188072e-07, "loss": 0.081, "step": 11668 }, { "epoch": 2.6550625711035267, "grad_norm": 1.6449200616961712, "learning_rate": 5.642922893718149e-07, "loss": 0.0638, "step": 11669 }, { "epoch": 2.6552901023890785, "grad_norm": 1.5680640431629644, "learning_rate": 5.642033611655769e-07, "loss": 0.0482, "step": 11670 }, { "epoch": 2.6555176336746302, "grad_norm": 2.4167327869476445, "learning_rate": 5.64114434201911e-07, "loss": 0.106, "step": 11671 }, { "epoch": 2.655745164960182, "grad_norm": 1.4997338404060383, "learning_rate": 5.640255084826346e-07, "loss": 0.0586, "step": 11672 }, { "epoch": 2.6559726962457337, "grad_norm": 1.0634002101050146, "learning_rate": 5.639365840095649e-07, "loss": 0.0627, "step": 11673 }, { "epoch": 2.6562002275312855, "grad_norm": 1.5125888952021564, "learning_rate": 5.638476607845199e-07, "loss": 0.0265, "step": 11674 }, { "epoch": 2.6564277588168372, "grad_norm": 1.5017397276262865, "learning_rate": 5.637587388093164e-07, "loss": 0.0599, "step": 11675 }, { "epoch": 2.656655290102389, "grad_norm": 1.319788184764793, "learning_rate": 5.636698180857722e-07, "loss": 0.0492, "step": 11676 }, { "epoch": 2.6568828213879407, "grad_norm": 1.8641979973931786, "learning_rate": 5.635808986157046e-07, "loss": 0.0362, "step": 11677 }, { "epoch": 2.6571103526734925, "grad_norm": 0.8305884124028412, "learning_rate": 5.634919804009312e-07, "loss": 0.0477, "step": 11678 }, { "epoch": 2.6573378839590442, "grad_norm": 1.4483248481758633, "learning_rate": 5.634030634432688e-07, "loss": 0.0475, "step": 11679 }, { "epoch": 2.657565415244596, "grad_norm": 1.213953845973403, "learning_rate": 5.63314147744535e-07, "loss": 0.0239, "step": 11680 }, { "epoch": 2.6577929465301477, "grad_norm": 1.1720782335152122, "learning_rate": 5.632252333065473e-07, "loss": 0.0511, "step": 11681 }, { "epoch": 2.6580204778156995, "grad_norm": 1.193516848100127, "learning_rate": 5.631363201311224e-07, "loss": 0.0368, "step": 11682 }, { "epoch": 2.6582480091012517, "grad_norm": 1.4637223374215265, "learning_rate": 5.630474082200779e-07, "loss": 0.0539, "step": 11683 }, { "epoch": 2.658475540386803, "grad_norm": 1.0794261321348364, "learning_rate": 5.629584975752308e-07, "loss": 0.0538, "step": 11684 }, { "epoch": 2.658703071672355, "grad_norm": 1.6276538774179117, "learning_rate": 5.628695881983987e-07, "loss": 0.0583, "step": 11685 }, { "epoch": 2.6589306029579065, "grad_norm": 1.4962848959371438, "learning_rate": 5.627806800913982e-07, "loss": 0.0387, "step": 11686 }, { "epoch": 2.6591581342434587, "grad_norm": 1.4021895398279203, "learning_rate": 5.626917732560467e-07, "loss": 0.0157, "step": 11687 }, { "epoch": 2.65938566552901, "grad_norm": 1.3141420451533892, "learning_rate": 5.626028676941612e-07, "loss": 0.0938, "step": 11688 }, { "epoch": 2.659613196814562, "grad_norm": 1.9521971505651778, "learning_rate": 5.625139634075589e-07, "loss": 0.0656, "step": 11689 }, { "epoch": 2.6598407281001135, "grad_norm": 2.1045806253942625, "learning_rate": 5.624250603980566e-07, "loss": 0.079, "step": 11690 }, { "epoch": 2.6600682593856657, "grad_norm": 0.7800423458776545, "learning_rate": 5.623361586674718e-07, "loss": 0.0442, "step": 11691 }, { "epoch": 2.660295790671217, "grad_norm": 1.4615498984463113, "learning_rate": 5.622472582176207e-07, "loss": 0.0465, "step": 11692 }, { "epoch": 2.660523321956769, "grad_norm": 1.0914075663397564, "learning_rate": 5.62158359050321e-07, "loss": 0.03, "step": 11693 }, { "epoch": 2.660750853242321, "grad_norm": 1.4172944595539725, "learning_rate": 5.620694611673891e-07, "loss": 0.0407, "step": 11694 }, { "epoch": 2.6609783845278727, "grad_norm": 1.3162941074765209, "learning_rate": 5.619805645706424e-07, "loss": 0.0384, "step": 11695 }, { "epoch": 2.6612059158134245, "grad_norm": 1.8015432117189558, "learning_rate": 5.618916692618974e-07, "loss": 0.0488, "step": 11696 }, { "epoch": 2.6614334470989762, "grad_norm": 1.3590682116662758, "learning_rate": 5.618027752429714e-07, "loss": 0.0387, "step": 11697 }, { "epoch": 2.661660978384528, "grad_norm": 1.5373854276987424, "learning_rate": 5.617138825156808e-07, "loss": 0.0244, "step": 11698 }, { "epoch": 2.6618885096700797, "grad_norm": 2.0067333612350744, "learning_rate": 5.616249910818423e-07, "loss": 0.0283, "step": 11699 }, { "epoch": 2.6621160409556315, "grad_norm": 1.5968591934409682, "learning_rate": 5.615361009432732e-07, "loss": 0.0681, "step": 11700 }, { "epoch": 2.6623435722411832, "grad_norm": 1.6547621233189642, "learning_rate": 5.614472121017897e-07, "loss": 0.0375, "step": 11701 }, { "epoch": 2.662571103526735, "grad_norm": 1.7778323212415053, "learning_rate": 5.61358324559209e-07, "loss": 0.0734, "step": 11702 }, { "epoch": 2.6627986348122867, "grad_norm": 2.034588132290653, "learning_rate": 5.612694383173477e-07, "loss": 0.1214, "step": 11703 }, { "epoch": 2.6630261660978385, "grad_norm": 1.1198358779509605, "learning_rate": 5.611805533780221e-07, "loss": 0.0223, "step": 11704 }, { "epoch": 2.6632536973833902, "grad_norm": 1.138840721041643, "learning_rate": 5.610916697430492e-07, "loss": 0.1074, "step": 11705 }, { "epoch": 2.663481228668942, "grad_norm": 1.5810067301270923, "learning_rate": 5.610027874142454e-07, "loss": 0.0516, "step": 11706 }, { "epoch": 2.6637087599544937, "grad_norm": 1.7513244613907883, "learning_rate": 5.609139063934278e-07, "loss": 0.0365, "step": 11707 }, { "epoch": 2.6639362912400455, "grad_norm": 1.3665229059057786, "learning_rate": 5.608250266824121e-07, "loss": 0.1047, "step": 11708 }, { "epoch": 2.6641638225255972, "grad_norm": 1.5823274244961116, "learning_rate": 5.607361482830157e-07, "loss": 0.0572, "step": 11709 }, { "epoch": 2.664391353811149, "grad_norm": 1.425307478623604, "learning_rate": 5.606472711970547e-07, "loss": 0.0323, "step": 11710 }, { "epoch": 2.6646188850967008, "grad_norm": 0.9436976824299956, "learning_rate": 5.605583954263454e-07, "loss": 0.0402, "step": 11711 }, { "epoch": 2.6648464163822525, "grad_norm": 1.6749349593866905, "learning_rate": 5.604695209727046e-07, "loss": 0.0234, "step": 11712 }, { "epoch": 2.6650739476678043, "grad_norm": 1.5223163016690948, "learning_rate": 5.603806478379485e-07, "loss": 0.039, "step": 11713 }, { "epoch": 2.665301478953356, "grad_norm": 1.6930831823188222, "learning_rate": 5.602917760238939e-07, "loss": 0.0188, "step": 11714 }, { "epoch": 2.6655290102389078, "grad_norm": 1.6087819491912803, "learning_rate": 5.602029055323566e-07, "loss": 0.1283, "step": 11715 }, { "epoch": 2.6657565415244595, "grad_norm": 1.9474224973707586, "learning_rate": 5.601140363651534e-07, "loss": 0.0264, "step": 11716 }, { "epoch": 2.6659840728100113, "grad_norm": 2.5998183688030316, "learning_rate": 5.600251685241005e-07, "loss": 0.0478, "step": 11717 }, { "epoch": 2.666211604095563, "grad_norm": 2.1207175429715726, "learning_rate": 5.59936302011014e-07, "loss": 0.0464, "step": 11718 }, { "epoch": 2.6664391353811148, "grad_norm": 1.4114046172204144, "learning_rate": 5.598474368277105e-07, "loss": 0.1033, "step": 11719 }, { "epoch": 2.6666666666666665, "grad_norm": 1.8354887946032963, "learning_rate": 5.597585729760058e-07, "loss": 0.0848, "step": 11720 }, { "epoch": 2.6668941979522183, "grad_norm": 1.6375727281261445, "learning_rate": 5.596697104577167e-07, "loss": 0.0249, "step": 11721 }, { "epoch": 2.6671217292377705, "grad_norm": 1.7203491456932862, "learning_rate": 5.59580849274659e-07, "loss": 0.065, "step": 11722 }, { "epoch": 2.6673492605233218, "grad_norm": 3.1038730225047875, "learning_rate": 5.594919894286487e-07, "loss": 0.0711, "step": 11723 }, { "epoch": 2.667576791808874, "grad_norm": 2.0609650921048592, "learning_rate": 5.594031309215025e-07, "loss": 0.0752, "step": 11724 }, { "epoch": 2.6678043230944253, "grad_norm": 1.3682023802395702, "learning_rate": 5.593142737550359e-07, "loss": 0.0316, "step": 11725 }, { "epoch": 2.6680318543799775, "grad_norm": 1.1392943208691484, "learning_rate": 5.592254179310653e-07, "loss": 0.023, "step": 11726 }, { "epoch": 2.668259385665529, "grad_norm": 2.4155802435139764, "learning_rate": 5.591365634514067e-07, "loss": 0.0491, "step": 11727 }, { "epoch": 2.668486916951081, "grad_norm": 2.037179725892147, "learning_rate": 5.590477103178762e-07, "loss": 0.0473, "step": 11728 }, { "epoch": 2.6687144482366323, "grad_norm": 2.194875172632493, "learning_rate": 5.589588585322898e-07, "loss": 0.0948, "step": 11729 }, { "epoch": 2.6689419795221845, "grad_norm": 1.3269917042812034, "learning_rate": 5.588700080964631e-07, "loss": 0.0207, "step": 11730 }, { "epoch": 2.669169510807736, "grad_norm": 1.3141721118488432, "learning_rate": 5.587811590122126e-07, "loss": 0.0933, "step": 11731 }, { "epoch": 2.669397042093288, "grad_norm": 2.0672260186780966, "learning_rate": 5.586923112813537e-07, "loss": 0.0389, "step": 11732 }, { "epoch": 2.6696245733788397, "grad_norm": 1.6559539848446803, "learning_rate": 5.586034649057027e-07, "loss": 0.0988, "step": 11733 }, { "epoch": 2.6698521046643915, "grad_norm": 1.9821811116980137, "learning_rate": 5.585146198870751e-07, "loss": 0.1199, "step": 11734 }, { "epoch": 2.6700796359499432, "grad_norm": 2.0100932494785715, "learning_rate": 5.584257762272871e-07, "loss": 0.0609, "step": 11735 }, { "epoch": 2.670307167235495, "grad_norm": 1.9799422130986184, "learning_rate": 5.583369339281543e-07, "loss": 0.1375, "step": 11736 }, { "epoch": 2.6705346985210467, "grad_norm": 1.81160419691293, "learning_rate": 5.582480929914924e-07, "loss": 0.0252, "step": 11737 }, { "epoch": 2.6707622298065985, "grad_norm": 1.407557736012808, "learning_rate": 5.581592534191173e-07, "loss": 0.0801, "step": 11738 }, { "epoch": 2.6709897610921502, "grad_norm": 1.7179250450574088, "learning_rate": 5.580704152128445e-07, "loss": 0.0464, "step": 11739 }, { "epoch": 2.671217292377702, "grad_norm": 3.634713639593359, "learning_rate": 5.579815783744899e-07, "loss": 0.0943, "step": 11740 }, { "epoch": 2.6714448236632538, "grad_norm": 1.1916913424321092, "learning_rate": 5.578927429058694e-07, "loss": 0.0186, "step": 11741 }, { "epoch": 2.6716723549488055, "grad_norm": 2.3713609761926127, "learning_rate": 5.578039088087978e-07, "loss": 0.048, "step": 11742 }, { "epoch": 2.6718998862343573, "grad_norm": 1.0762299176822459, "learning_rate": 5.577150760850916e-07, "loss": 0.0254, "step": 11743 }, { "epoch": 2.672127417519909, "grad_norm": 2.3881054333423055, "learning_rate": 5.576262447365659e-07, "loss": 0.0493, "step": 11744 }, { "epoch": 2.6723549488054608, "grad_norm": 1.4478769635469395, "learning_rate": 5.575374147650364e-07, "loss": 0.0453, "step": 11745 }, { "epoch": 2.6725824800910125, "grad_norm": 1.9307890699315309, "learning_rate": 5.574485861723185e-07, "loss": 0.0601, "step": 11746 }, { "epoch": 2.6728100113765643, "grad_norm": 2.1515131911056735, "learning_rate": 5.573597589602279e-07, "loss": 0.06, "step": 11747 }, { "epoch": 2.673037542662116, "grad_norm": 1.1926774040188197, "learning_rate": 5.5727093313058e-07, "loss": 0.0492, "step": 11748 }, { "epoch": 2.6732650739476678, "grad_norm": 1.6561920901107503, "learning_rate": 5.5718210868519e-07, "loss": 0.0627, "step": 11749 }, { "epoch": 2.6734926052332195, "grad_norm": 1.4567909179482412, "learning_rate": 5.570932856258736e-07, "loss": 0.037, "step": 11750 }, { "epoch": 2.6737201365187713, "grad_norm": 1.187220924422061, "learning_rate": 5.57004463954446e-07, "loss": 0.0729, "step": 11751 }, { "epoch": 2.673947667804323, "grad_norm": 2.6286502380289325, "learning_rate": 5.569156436727229e-07, "loss": 0.1255, "step": 11752 }, { "epoch": 2.6741751990898748, "grad_norm": 1.985973892151708, "learning_rate": 5.568268247825188e-07, "loss": 0.07, "step": 11753 }, { "epoch": 2.6744027303754265, "grad_norm": 0.9718405983411343, "learning_rate": 5.5673800728565e-07, "loss": 0.0739, "step": 11754 }, { "epoch": 2.6746302616609783, "grad_norm": 1.1132465242471574, "learning_rate": 5.566491911839314e-07, "loss": 0.0284, "step": 11755 }, { "epoch": 2.67485779294653, "grad_norm": 1.755157548732689, "learning_rate": 5.565603764791778e-07, "loss": 0.0986, "step": 11756 }, { "epoch": 2.675085324232082, "grad_norm": 0.9931092749103487, "learning_rate": 5.564715631732051e-07, "loss": 0.0523, "step": 11757 }, { "epoch": 2.6753128555176335, "grad_norm": 1.4830974767179366, "learning_rate": 5.563827512678279e-07, "loss": 0.02, "step": 11758 }, { "epoch": 2.6755403868031853, "grad_norm": 1.6823605556632475, "learning_rate": 5.562939407648617e-07, "loss": 0.044, "step": 11759 }, { "epoch": 2.675767918088737, "grad_norm": 1.9335114683831598, "learning_rate": 5.562051316661216e-07, "loss": 0.0522, "step": 11760 }, { "epoch": 2.6759954493742892, "grad_norm": 1.5811237687505812, "learning_rate": 5.561163239734224e-07, "loss": 0.0809, "step": 11761 }, { "epoch": 2.6762229806598405, "grad_norm": 3.162420828073671, "learning_rate": 5.560275176885795e-07, "loss": 0.0864, "step": 11762 }, { "epoch": 2.6764505119453927, "grad_norm": 1.076437692159865, "learning_rate": 5.559387128134077e-07, "loss": 0.0428, "step": 11763 }, { "epoch": 2.676678043230944, "grad_norm": 2.0841596418658543, "learning_rate": 5.558499093497225e-07, "loss": 0.0577, "step": 11764 }, { "epoch": 2.6769055745164962, "grad_norm": 1.4695311966455085, "learning_rate": 5.55761107299338e-07, "loss": 0.0624, "step": 11765 }, { "epoch": 2.6771331058020476, "grad_norm": 2.6350278606877695, "learning_rate": 5.5567230666407e-07, "loss": 0.0647, "step": 11766 }, { "epoch": 2.6773606370875997, "grad_norm": 2.023767151308111, "learning_rate": 5.555835074457332e-07, "loss": 0.1324, "step": 11767 }, { "epoch": 2.677588168373151, "grad_norm": 1.5409152522318246, "learning_rate": 5.55494709646142e-07, "loss": 0.0224, "step": 11768 }, { "epoch": 2.6778156996587033, "grad_norm": 2.6819719175137817, "learning_rate": 5.554059132671118e-07, "loss": 0.0436, "step": 11769 }, { "epoch": 2.6780432309442546, "grad_norm": 1.382728770728853, "learning_rate": 5.553171183104572e-07, "loss": 0.0338, "step": 11770 }, { "epoch": 2.6782707622298068, "grad_norm": 3.842233580227688, "learning_rate": 5.552283247779934e-07, "loss": 0.064, "step": 11771 }, { "epoch": 2.6784982935153585, "grad_norm": 1.2360563790175114, "learning_rate": 5.551395326715345e-07, "loss": 0.0412, "step": 11772 }, { "epoch": 2.6787258248009103, "grad_norm": 1.749239188291256, "learning_rate": 5.550507419928958e-07, "loss": 0.1045, "step": 11773 }, { "epoch": 2.678953356086462, "grad_norm": 0.6676220373240995, "learning_rate": 5.54961952743892e-07, "loss": 0.0057, "step": 11774 }, { "epoch": 2.6791808873720138, "grad_norm": 1.253954369359576, "learning_rate": 5.548731649263372e-07, "loss": 0.0438, "step": 11775 }, { "epoch": 2.6794084186575655, "grad_norm": 1.384775810050929, "learning_rate": 5.547843785420467e-07, "loss": 0.0196, "step": 11776 }, { "epoch": 2.6796359499431173, "grad_norm": 1.7391742080675237, "learning_rate": 5.546955935928347e-07, "loss": 0.1354, "step": 11777 }, { "epoch": 2.679863481228669, "grad_norm": 1.477486030872569, "learning_rate": 5.546068100805165e-07, "loss": 0.0847, "step": 11778 }, { "epoch": 2.6800910125142208, "grad_norm": 2.217704326039074, "learning_rate": 5.545180280069059e-07, "loss": 0.0646, "step": 11779 }, { "epoch": 2.6803185437997725, "grad_norm": 1.3384608370214255, "learning_rate": 5.544292473738175e-07, "loss": 0.0987, "step": 11780 }, { "epoch": 2.6805460750853243, "grad_norm": 1.2502139013152773, "learning_rate": 5.543404681830665e-07, "loss": 0.0326, "step": 11781 }, { "epoch": 2.680773606370876, "grad_norm": 0.9970213689609355, "learning_rate": 5.542516904364665e-07, "loss": 0.0812, "step": 11782 }, { "epoch": 2.681001137656428, "grad_norm": 1.4856166321263977, "learning_rate": 5.541629141358326e-07, "loss": 0.0288, "step": 11783 }, { "epoch": 2.6812286689419795, "grad_norm": 1.3202804502505363, "learning_rate": 5.540741392829788e-07, "loss": 0.0234, "step": 11784 }, { "epoch": 2.6814562002275313, "grad_norm": 5.056806431438455, "learning_rate": 5.539853658797199e-07, "loss": 0.1007, "step": 11785 }, { "epoch": 2.681683731513083, "grad_norm": 1.772278465231209, "learning_rate": 5.538965939278701e-07, "loss": 0.0915, "step": 11786 }, { "epoch": 2.681911262798635, "grad_norm": 1.5420184165350868, "learning_rate": 5.538078234292435e-07, "loss": 0.0424, "step": 11787 }, { "epoch": 2.6821387940841865, "grad_norm": 1.4265085453733193, "learning_rate": 5.537190543856548e-07, "loss": 0.0388, "step": 11788 }, { "epoch": 2.6823663253697383, "grad_norm": 1.8059436730860365, "learning_rate": 5.536302867989179e-07, "loss": 0.1079, "step": 11789 }, { "epoch": 2.68259385665529, "grad_norm": 1.176567784044655, "learning_rate": 5.535415206708474e-07, "loss": 0.0521, "step": 11790 }, { "epoch": 2.682821387940842, "grad_norm": 1.8743584829881288, "learning_rate": 5.534527560032572e-07, "loss": 0.049, "step": 11791 }, { "epoch": 2.6830489192263935, "grad_norm": 2.087861869331885, "learning_rate": 5.533639927979619e-07, "loss": 0.0325, "step": 11792 }, { "epoch": 2.6832764505119453, "grad_norm": 1.364652237252089, "learning_rate": 5.532752310567751e-07, "loss": 0.1297, "step": 11793 }, { "epoch": 2.683503981797497, "grad_norm": 1.174355790898864, "learning_rate": 5.531864707815112e-07, "loss": 0.0452, "step": 11794 }, { "epoch": 2.683731513083049, "grad_norm": 2.002880403376161, "learning_rate": 5.530977119739847e-07, "loss": 0.0332, "step": 11795 }, { "epoch": 2.6839590443686006, "grad_norm": 2.5885591335178706, "learning_rate": 5.530089546360089e-07, "loss": 0.06, "step": 11796 }, { "epoch": 2.6841865756541523, "grad_norm": 1.3806794458399267, "learning_rate": 5.529201987693984e-07, "loss": 0.0396, "step": 11797 }, { "epoch": 2.684414106939704, "grad_norm": 1.1814668485382223, "learning_rate": 5.528314443759672e-07, "loss": 0.0122, "step": 11798 }, { "epoch": 2.684641638225256, "grad_norm": 1.6465664633234642, "learning_rate": 5.527426914575286e-07, "loss": 0.0798, "step": 11799 }, { "epoch": 2.684869169510808, "grad_norm": 1.2031886393304445, "learning_rate": 5.526539400158974e-07, "loss": 0.0491, "step": 11800 }, { "epoch": 2.6850967007963593, "grad_norm": 0.9665560164019251, "learning_rate": 5.52565190052887e-07, "loss": 0.0819, "step": 11801 }, { "epoch": 2.6853242320819115, "grad_norm": 1.6337234501027558, "learning_rate": 5.524764415703117e-07, "loss": 0.0531, "step": 11802 }, { "epoch": 2.685551763367463, "grad_norm": 1.64683983859262, "learning_rate": 5.523876945699849e-07, "loss": 0.0806, "step": 11803 }, { "epoch": 2.685779294653015, "grad_norm": 2.287904432819332, "learning_rate": 5.522989490537207e-07, "loss": 0.089, "step": 11804 }, { "epoch": 2.6860068259385663, "grad_norm": 2.106646428239462, "learning_rate": 5.522102050233331e-07, "loss": 0.0683, "step": 11805 }, { "epoch": 2.6862343572241185, "grad_norm": 1.4294940256330542, "learning_rate": 5.521214624806352e-07, "loss": 0.0539, "step": 11806 }, { "epoch": 2.68646188850967, "grad_norm": 1.0650732195476404, "learning_rate": 5.520327214274413e-07, "loss": 0.0326, "step": 11807 }, { "epoch": 2.686689419795222, "grad_norm": 1.849197206468674, "learning_rate": 5.519439818655648e-07, "loss": 0.0635, "step": 11808 }, { "epoch": 2.6869169510807733, "grad_norm": 2.223635388006909, "learning_rate": 5.518552437968198e-07, "loss": 0.036, "step": 11809 }, { "epoch": 2.6871444823663255, "grad_norm": 1.2157225962936766, "learning_rate": 5.517665072230195e-07, "loss": 0.0302, "step": 11810 }, { "epoch": 2.6873720136518773, "grad_norm": 2.8383056167235536, "learning_rate": 5.516777721459777e-07, "loss": 0.1448, "step": 11811 }, { "epoch": 2.687599544937429, "grad_norm": 2.889419698448168, "learning_rate": 5.51589038567508e-07, "loss": 0.0367, "step": 11812 }, { "epoch": 2.687827076222981, "grad_norm": 3.39746799721321, "learning_rate": 5.515003064894236e-07, "loss": 0.1269, "step": 11813 }, { "epoch": 2.6880546075085325, "grad_norm": 1.860260107679134, "learning_rate": 5.514115759135387e-07, "loss": 0.0839, "step": 11814 }, { "epoch": 2.6882821387940843, "grad_norm": 1.3475110010320432, "learning_rate": 5.513228468416662e-07, "loss": 0.0869, "step": 11815 }, { "epoch": 2.688509670079636, "grad_norm": 1.5311941646283858, "learning_rate": 5.512341192756199e-07, "loss": 0.0688, "step": 11816 }, { "epoch": 2.688737201365188, "grad_norm": 2.5872977768844527, "learning_rate": 5.511453932172132e-07, "loss": 0.0613, "step": 11817 }, { "epoch": 2.6889647326507395, "grad_norm": 1.952849149655829, "learning_rate": 5.510566686682592e-07, "loss": 0.0524, "step": 11818 }, { "epoch": 2.6891922639362913, "grad_norm": 1.0845865514477446, "learning_rate": 5.509679456305715e-07, "loss": 0.0865, "step": 11819 }, { "epoch": 2.689419795221843, "grad_norm": 1.5044128730448498, "learning_rate": 5.508792241059634e-07, "loss": 0.0651, "step": 11820 }, { "epoch": 2.689647326507395, "grad_norm": 1.788243808518128, "learning_rate": 5.507905040962484e-07, "loss": 0.067, "step": 11821 }, { "epoch": 2.6898748577929465, "grad_norm": 1.7261226239984977, "learning_rate": 5.507017856032393e-07, "loss": 0.0547, "step": 11822 }, { "epoch": 2.6901023890784983, "grad_norm": 1.315491041414622, "learning_rate": 5.506130686287498e-07, "loss": 0.0429, "step": 11823 }, { "epoch": 2.69032992036405, "grad_norm": 2.1614138294313565, "learning_rate": 5.505243531745931e-07, "loss": 0.0588, "step": 11824 }, { "epoch": 2.690557451649602, "grad_norm": 1.96590548126249, "learning_rate": 5.504356392425819e-07, "loss": 0.1138, "step": 11825 }, { "epoch": 2.6907849829351536, "grad_norm": 1.714233479929623, "learning_rate": 5.503469268345299e-07, "loss": 0.1261, "step": 11826 }, { "epoch": 2.6910125142207053, "grad_norm": 1.7281250567457982, "learning_rate": 5.502582159522498e-07, "loss": 0.0634, "step": 11827 }, { "epoch": 2.691240045506257, "grad_norm": 1.933922997598378, "learning_rate": 5.501695065975551e-07, "loss": 0.0421, "step": 11828 }, { "epoch": 2.691467576791809, "grad_norm": 1.9073604224711052, "learning_rate": 5.500807987722586e-07, "loss": 0.0993, "step": 11829 }, { "epoch": 2.6916951080773606, "grad_norm": 1.4511796935190264, "learning_rate": 5.499920924781732e-07, "loss": 0.0924, "step": 11830 }, { "epoch": 2.6919226393629123, "grad_norm": 0.8706757087986516, "learning_rate": 5.499033877171123e-07, "loss": 0.0208, "step": 11831 }, { "epoch": 2.692150170648464, "grad_norm": 1.6317381255078467, "learning_rate": 5.498146844908884e-07, "loss": 0.0506, "step": 11832 }, { "epoch": 2.692377701934016, "grad_norm": 1.6509841361142679, "learning_rate": 5.497259828013148e-07, "loss": 0.0544, "step": 11833 }, { "epoch": 2.6926052332195676, "grad_norm": 1.0283704927577855, "learning_rate": 5.49637282650204e-07, "loss": 0.0632, "step": 11834 }, { "epoch": 2.6928327645051193, "grad_norm": 1.5786461913687733, "learning_rate": 5.495485840393695e-07, "loss": 0.0646, "step": 11835 }, { "epoch": 2.693060295790671, "grad_norm": 1.374399949714469, "learning_rate": 5.494598869706237e-07, "loss": 0.042, "step": 11836 }, { "epoch": 2.693287827076223, "grad_norm": 1.816819301168456, "learning_rate": 5.493711914457791e-07, "loss": 0.0241, "step": 11837 }, { "epoch": 2.6935153583617746, "grad_norm": 1.3196168315099288, "learning_rate": 5.492824974666493e-07, "loss": 0.0363, "step": 11838 }, { "epoch": 2.6937428896473268, "grad_norm": 1.476798271409761, "learning_rate": 5.491938050350462e-07, "loss": 0.0161, "step": 11839 }, { "epoch": 2.693970420932878, "grad_norm": 1.8389668728760857, "learning_rate": 5.491051141527831e-07, "loss": 0.0199, "step": 11840 }, { "epoch": 2.6941979522184303, "grad_norm": 1.277987918321707, "learning_rate": 5.490164248216724e-07, "loss": 0.0409, "step": 11841 }, { "epoch": 2.6944254835039816, "grad_norm": 2.066630631059339, "learning_rate": 5.48927737043527e-07, "loss": 0.094, "step": 11842 }, { "epoch": 2.694653014789534, "grad_norm": 2.083658266917196, "learning_rate": 5.488390508201592e-07, "loss": 0.0561, "step": 11843 }, { "epoch": 2.694880546075085, "grad_norm": 3.3326749287617337, "learning_rate": 5.487503661533816e-07, "loss": 0.0794, "step": 11844 }, { "epoch": 2.6951080773606373, "grad_norm": 1.1691905434492917, "learning_rate": 5.486616830450072e-07, "loss": 0.0275, "step": 11845 }, { "epoch": 2.6953356086461886, "grad_norm": 1.0638356779155267, "learning_rate": 5.485730014968477e-07, "loss": 0.0733, "step": 11846 }, { "epoch": 2.695563139931741, "grad_norm": 1.855751157070228, "learning_rate": 5.484843215107164e-07, "loss": 0.0735, "step": 11847 }, { "epoch": 2.695790671217292, "grad_norm": 1.7591128024540792, "learning_rate": 5.483956430884256e-07, "loss": 0.0507, "step": 11848 }, { "epoch": 2.6960182025028443, "grad_norm": 1.8712565890831652, "learning_rate": 5.483069662317871e-07, "loss": 0.04, "step": 11849 }, { "epoch": 2.696245733788396, "grad_norm": 3.1684472451305368, "learning_rate": 5.482182909426141e-07, "loss": 0.0961, "step": 11850 }, { "epoch": 2.696473265073948, "grad_norm": 2.0981350089307966, "learning_rate": 5.481296172227184e-07, "loss": 0.0128, "step": 11851 }, { "epoch": 2.6967007963594996, "grad_norm": 1.7988212069087945, "learning_rate": 5.480409450739128e-07, "loss": 0.1256, "step": 11852 }, { "epoch": 2.6969283276450513, "grad_norm": 1.2702432605150293, "learning_rate": 5.479522744980091e-07, "loss": 0.0539, "step": 11853 }, { "epoch": 2.697155858930603, "grad_norm": 2.0296829813094153, "learning_rate": 5.4786360549682e-07, "loss": 0.0283, "step": 11854 }, { "epoch": 2.697383390216155, "grad_norm": 1.5113733105062064, "learning_rate": 5.477749380721576e-07, "loss": 0.1406, "step": 11855 }, { "epoch": 2.6976109215017066, "grad_norm": 1.7542245715424944, "learning_rate": 5.476862722258336e-07, "loss": 0.0443, "step": 11856 }, { "epoch": 2.6978384527872583, "grad_norm": 1.6286243231540676, "learning_rate": 5.47597607959661e-07, "loss": 0.0286, "step": 11857 }, { "epoch": 2.69806598407281, "grad_norm": 1.2794349511587502, "learning_rate": 5.475089452754513e-07, "loss": 0.0653, "step": 11858 }, { "epoch": 2.698293515358362, "grad_norm": 2.8694326465268767, "learning_rate": 5.474202841750171e-07, "loss": 0.0677, "step": 11859 }, { "epoch": 2.6985210466439136, "grad_norm": 1.1940049065053966, "learning_rate": 5.473316246601698e-07, "loss": 0.0951, "step": 11860 }, { "epoch": 2.6987485779294653, "grad_norm": 1.7143920120868188, "learning_rate": 5.472429667327222e-07, "loss": 0.0751, "step": 11861 }, { "epoch": 2.698976109215017, "grad_norm": 1.2680659503139282, "learning_rate": 5.47154310394486e-07, "loss": 0.0283, "step": 11862 }, { "epoch": 2.699203640500569, "grad_norm": 1.8535320314172157, "learning_rate": 5.470656556472729e-07, "loss": 0.1156, "step": 11863 }, { "epoch": 2.6994311717861206, "grad_norm": 1.1562794197080446, "learning_rate": 5.469770024928952e-07, "loss": 0.0811, "step": 11864 }, { "epoch": 2.6996587030716723, "grad_norm": 2.1233771954803053, "learning_rate": 5.468883509331644e-07, "loss": 0.0689, "step": 11865 }, { "epoch": 2.699886234357224, "grad_norm": 1.1714824994529924, "learning_rate": 5.467997009698931e-07, "loss": 0.0453, "step": 11866 }, { "epoch": 2.700113765642776, "grad_norm": 1.6498604688515068, "learning_rate": 5.467110526048925e-07, "loss": 0.0317, "step": 11867 }, { "epoch": 2.7003412969283276, "grad_norm": 1.7065813863296837, "learning_rate": 5.466224058399746e-07, "loss": 0.1055, "step": 11868 }, { "epoch": 2.7005688282138793, "grad_norm": 2.10210402224429, "learning_rate": 5.465337606769512e-07, "loss": 0.0279, "step": 11869 }, { "epoch": 2.700796359499431, "grad_norm": 1.07115118179692, "learning_rate": 5.46445117117634e-07, "loss": 0.0445, "step": 11870 }, { "epoch": 2.701023890784983, "grad_norm": 1.5467993248523797, "learning_rate": 5.463564751638348e-07, "loss": 0.0936, "step": 11871 }, { "epoch": 2.7012514220705346, "grad_norm": 1.8108953171116609, "learning_rate": 5.462678348173649e-07, "loss": 0.1958, "step": 11872 }, { "epoch": 2.7014789533560863, "grad_norm": 1.5402536067284982, "learning_rate": 5.461791960800367e-07, "loss": 0.0307, "step": 11873 }, { "epoch": 2.701706484641638, "grad_norm": 2.4361735731874212, "learning_rate": 5.460905589536613e-07, "loss": 0.0447, "step": 11874 }, { "epoch": 2.70193401592719, "grad_norm": 2.3196509816105384, "learning_rate": 5.460019234400501e-07, "loss": 0.0792, "step": 11875 }, { "epoch": 2.7021615472127416, "grad_norm": 1.3532065413462293, "learning_rate": 5.459132895410152e-07, "loss": 0.0259, "step": 11876 }, { "epoch": 2.7023890784982934, "grad_norm": 1.7379484900179318, "learning_rate": 5.458246572583674e-07, "loss": 0.0275, "step": 11877 }, { "epoch": 2.7026166097838455, "grad_norm": 2.0267588314163754, "learning_rate": 5.45736026593919e-07, "loss": 0.0373, "step": 11878 }, { "epoch": 2.702844141069397, "grad_norm": 1.4124248043928485, "learning_rate": 5.456473975494809e-07, "loss": 0.0471, "step": 11879 }, { "epoch": 2.703071672354949, "grad_norm": 2.080831524616176, "learning_rate": 5.455587701268647e-07, "loss": 0.0477, "step": 11880 }, { "epoch": 2.7032992036405004, "grad_norm": 1.995612856811124, "learning_rate": 5.454701443278819e-07, "loss": 0.0421, "step": 11881 }, { "epoch": 2.7035267349260526, "grad_norm": 1.1155482917369066, "learning_rate": 5.453815201543435e-07, "loss": 0.0704, "step": 11882 }, { "epoch": 2.703754266211604, "grad_norm": 1.5296127600017855, "learning_rate": 5.452928976080611e-07, "loss": 0.0761, "step": 11883 }, { "epoch": 2.703981797497156, "grad_norm": 1.782566197232046, "learning_rate": 5.452042766908457e-07, "loss": 0.0386, "step": 11884 }, { "epoch": 2.7042093287827074, "grad_norm": 1.5207989279217229, "learning_rate": 5.451156574045091e-07, "loss": 0.0371, "step": 11885 }, { "epoch": 2.7044368600682596, "grad_norm": 1.5848306816488844, "learning_rate": 5.45027039750862e-07, "loss": 0.0328, "step": 11886 }, { "epoch": 2.7046643913538113, "grad_norm": 1.251817332451052, "learning_rate": 5.449384237317156e-07, "loss": 0.0637, "step": 11887 }, { "epoch": 2.704891922639363, "grad_norm": 1.9442438667362727, "learning_rate": 5.448498093488814e-07, "loss": 0.0245, "step": 11888 }, { "epoch": 2.705119453924915, "grad_norm": 1.1388586685640363, "learning_rate": 5.447611966041701e-07, "loss": 0.0469, "step": 11889 }, { "epoch": 2.7053469852104666, "grad_norm": 2.2572469289515045, "learning_rate": 5.446725854993932e-07, "loss": 0.0997, "step": 11890 }, { "epoch": 2.7055745164960183, "grad_norm": 0.9671952527790956, "learning_rate": 5.445839760363613e-07, "loss": 0.0187, "step": 11891 }, { "epoch": 2.70580204778157, "grad_norm": 1.815343214336181, "learning_rate": 5.444953682168859e-07, "loss": 0.0813, "step": 11892 }, { "epoch": 2.706029579067122, "grad_norm": 2.1570782012286625, "learning_rate": 5.444067620427777e-07, "loss": 0.1268, "step": 11893 }, { "epoch": 2.7062571103526736, "grad_norm": 4.061901856591347, "learning_rate": 5.443181575158475e-07, "loss": 0.0887, "step": 11894 }, { "epoch": 2.7064846416382253, "grad_norm": 2.3209066405393846, "learning_rate": 5.442295546379067e-07, "loss": 0.1287, "step": 11895 }, { "epoch": 2.706712172923777, "grad_norm": 1.4101243887910333, "learning_rate": 5.441409534107657e-07, "loss": 0.0509, "step": 11896 }, { "epoch": 2.706939704209329, "grad_norm": 1.3207395413068512, "learning_rate": 5.440523538362355e-07, "loss": 0.0393, "step": 11897 }, { "epoch": 2.7071672354948806, "grad_norm": 2.7259186408216953, "learning_rate": 5.439637559161267e-07, "loss": 0.0516, "step": 11898 }, { "epoch": 2.7073947667804323, "grad_norm": 2.559588114207646, "learning_rate": 5.438751596522508e-07, "loss": 0.0679, "step": 11899 }, { "epoch": 2.707622298065984, "grad_norm": 3.3410659435587813, "learning_rate": 5.437865650464179e-07, "loss": 0.043, "step": 11900 }, { "epoch": 2.707849829351536, "grad_norm": 1.7945677890449874, "learning_rate": 5.436979721004388e-07, "loss": 0.0439, "step": 11901 }, { "epoch": 2.7080773606370876, "grad_norm": 1.7620048157089552, "learning_rate": 5.436093808161243e-07, "loss": 0.1006, "step": 11902 }, { "epoch": 2.7083048919226393, "grad_norm": 2.469626544188443, "learning_rate": 5.435207911952849e-07, "loss": 0.0456, "step": 11903 }, { "epoch": 2.708532423208191, "grad_norm": 1.6339302305451646, "learning_rate": 5.434322032397314e-07, "loss": 0.1002, "step": 11904 }, { "epoch": 2.708759954493743, "grad_norm": 1.7262040844830686, "learning_rate": 5.433436169512744e-07, "loss": 0.0445, "step": 11905 }, { "epoch": 2.7089874857792946, "grad_norm": 1.004382052514516, "learning_rate": 5.432550323317241e-07, "loss": 0.0173, "step": 11906 }, { "epoch": 2.7092150170648464, "grad_norm": 0.9321011985052823, "learning_rate": 5.431664493828914e-07, "loss": 0.0335, "step": 11907 }, { "epoch": 2.709442548350398, "grad_norm": 1.3250133144495255, "learning_rate": 5.430778681065863e-07, "loss": 0.0773, "step": 11908 }, { "epoch": 2.70967007963595, "grad_norm": 1.718539886239479, "learning_rate": 5.429892885046199e-07, "loss": 0.1263, "step": 11909 }, { "epoch": 2.7098976109215016, "grad_norm": 1.246387829916377, "learning_rate": 5.42900710578802e-07, "loss": 0.0692, "step": 11910 }, { "epoch": 2.7101251422070534, "grad_norm": 1.7035549789550128, "learning_rate": 5.428121343309434e-07, "loss": 0.0639, "step": 11911 }, { "epoch": 2.710352673492605, "grad_norm": 1.58349100101737, "learning_rate": 5.427235597628543e-07, "loss": 0.0719, "step": 11912 }, { "epoch": 2.710580204778157, "grad_norm": 1.8820203131580948, "learning_rate": 5.426349868763447e-07, "loss": 0.0779, "step": 11913 }, { "epoch": 2.7108077360637086, "grad_norm": 1.6620828914597363, "learning_rate": 5.425464156732253e-07, "loss": 0.0974, "step": 11914 }, { "epoch": 2.7110352673492604, "grad_norm": 2.9883622394917344, "learning_rate": 5.42457846155306e-07, "loss": 0.0891, "step": 11915 }, { "epoch": 2.711262798634812, "grad_norm": 1.2445721358418917, "learning_rate": 5.423692783243975e-07, "loss": 0.0449, "step": 11916 }, { "epoch": 2.7114903299203643, "grad_norm": 1.1271557074703387, "learning_rate": 5.422807121823093e-07, "loss": 0.0157, "step": 11917 }, { "epoch": 2.7117178612059156, "grad_norm": 2.2452663511443993, "learning_rate": 5.421921477308519e-07, "loss": 0.0281, "step": 11918 }, { "epoch": 2.711945392491468, "grad_norm": 1.9233989634986368, "learning_rate": 5.421035849718355e-07, "loss": 0.0713, "step": 11919 }, { "epoch": 2.712172923777019, "grad_norm": 2.4676561609299807, "learning_rate": 5.420150239070698e-07, "loss": 0.0728, "step": 11920 }, { "epoch": 2.7124004550625713, "grad_norm": 2.35774555804965, "learning_rate": 5.419264645383652e-07, "loss": 0.0578, "step": 11921 }, { "epoch": 2.7126279863481226, "grad_norm": 1.7079615584169825, "learning_rate": 5.418379068675313e-07, "loss": 0.1004, "step": 11922 }, { "epoch": 2.712855517633675, "grad_norm": 1.824334888738836, "learning_rate": 5.417493508963786e-07, "loss": 0.069, "step": 11923 }, { "epoch": 2.713083048919226, "grad_norm": 1.666869511066109, "learning_rate": 5.416607966267165e-07, "loss": 0.0341, "step": 11924 }, { "epoch": 2.7133105802047783, "grad_norm": 2.610959178214001, "learning_rate": 5.415722440603551e-07, "loss": 0.0982, "step": 11925 }, { "epoch": 2.71353811149033, "grad_norm": 1.6272202190724667, "learning_rate": 5.414836931991043e-07, "loss": 0.1161, "step": 11926 }, { "epoch": 2.713765642775882, "grad_norm": 1.9188500133058655, "learning_rate": 5.413951440447737e-07, "loss": 0.0608, "step": 11927 }, { "epoch": 2.7139931740614336, "grad_norm": 1.3974801768993048, "learning_rate": 5.413065965991734e-07, "loss": 0.1082, "step": 11928 }, { "epoch": 2.7142207053469853, "grad_norm": 1.5559716735257492, "learning_rate": 5.412180508641128e-07, "loss": 0.1114, "step": 11929 }, { "epoch": 2.714448236632537, "grad_norm": 7.506534140412298, "learning_rate": 5.411295068414022e-07, "loss": 0.0698, "step": 11930 }, { "epoch": 2.714675767918089, "grad_norm": 1.1510426151112145, "learning_rate": 5.410409645328506e-07, "loss": 0.0669, "step": 11931 }, { "epoch": 2.7149032992036406, "grad_norm": 1.3289668470140186, "learning_rate": 5.409524239402678e-07, "loss": 0.0428, "step": 11932 }, { "epoch": 2.7151308304891923, "grad_norm": 2.2392720785459495, "learning_rate": 5.408638850654638e-07, "loss": 0.031, "step": 11933 }, { "epoch": 2.715358361774744, "grad_norm": 1.537751477929988, "learning_rate": 5.407753479102477e-07, "loss": 0.0625, "step": 11934 }, { "epoch": 2.715585893060296, "grad_norm": 1.0707416371630227, "learning_rate": 5.406868124764293e-07, "loss": 0.0541, "step": 11935 }, { "epoch": 2.7158134243458476, "grad_norm": 1.1013596056987573, "learning_rate": 5.405982787658182e-07, "loss": 0.0365, "step": 11936 }, { "epoch": 2.7160409556313994, "grad_norm": 1.2114636525542721, "learning_rate": 5.405097467802233e-07, "loss": 0.063, "step": 11937 }, { "epoch": 2.716268486916951, "grad_norm": 1.4275731130476288, "learning_rate": 5.404212165214549e-07, "loss": 0.0296, "step": 11938 }, { "epoch": 2.716496018202503, "grad_norm": 1.5276696247872468, "learning_rate": 5.403326879913216e-07, "loss": 0.1831, "step": 11939 }, { "epoch": 2.7167235494880546, "grad_norm": 1.839522360642865, "learning_rate": 5.402441611916333e-07, "loss": 0.0801, "step": 11940 }, { "epoch": 2.7169510807736064, "grad_norm": 1.3077599568616678, "learning_rate": 5.401556361241989e-07, "loss": 0.1072, "step": 11941 }, { "epoch": 2.717178612059158, "grad_norm": 1.707277118126188, "learning_rate": 5.400671127908282e-07, "loss": 0.0434, "step": 11942 }, { "epoch": 2.71740614334471, "grad_norm": 2.0126881802580097, "learning_rate": 5.3997859119333e-07, "loss": 0.0388, "step": 11943 }, { "epoch": 2.7176336746302616, "grad_norm": 1.9864463031805064, "learning_rate": 5.398900713335137e-07, "loss": 0.0581, "step": 11944 }, { "epoch": 2.7178612059158134, "grad_norm": 1.395595330972357, "learning_rate": 5.398015532131887e-07, "loss": 0.0444, "step": 11945 }, { "epoch": 2.718088737201365, "grad_norm": 1.5080880016542877, "learning_rate": 5.397130368341635e-07, "loss": 0.0108, "step": 11946 }, { "epoch": 2.718316268486917, "grad_norm": 1.4049208968395963, "learning_rate": 5.396245221982479e-07, "loss": 0.0356, "step": 11947 }, { "epoch": 2.7185437997724686, "grad_norm": 1.8467650236284787, "learning_rate": 5.395360093072506e-07, "loss": 0.0607, "step": 11948 }, { "epoch": 2.7187713310580204, "grad_norm": 1.5831790258622491, "learning_rate": 5.394474981629809e-07, "loss": 0.0795, "step": 11949 }, { "epoch": 2.718998862343572, "grad_norm": 0.9567986773452148, "learning_rate": 5.393589887672476e-07, "loss": 0.0455, "step": 11950 }, { "epoch": 2.719226393629124, "grad_norm": 2.2658610580874403, "learning_rate": 5.392704811218595e-07, "loss": 0.0478, "step": 11951 }, { "epoch": 2.7194539249146756, "grad_norm": 2.6620891054853093, "learning_rate": 5.391819752286262e-07, "loss": 0.1191, "step": 11952 }, { "epoch": 2.7196814562002274, "grad_norm": 0.8759829940574254, "learning_rate": 5.390934710893557e-07, "loss": 0.0375, "step": 11953 }, { "epoch": 2.719908987485779, "grad_norm": 1.7058656882821797, "learning_rate": 5.390049687058575e-07, "loss": 0.0278, "step": 11954 }, { "epoch": 2.720136518771331, "grad_norm": 1.5966380689517803, "learning_rate": 5.389164680799405e-07, "loss": 0.0632, "step": 11955 }, { "epoch": 2.720364050056883, "grad_norm": 1.6219261768251225, "learning_rate": 5.388279692134129e-07, "loss": 0.0232, "step": 11956 }, { "epoch": 2.7205915813424344, "grad_norm": 1.6449759655794796, "learning_rate": 5.387394721080839e-07, "loss": 0.0998, "step": 11957 }, { "epoch": 2.7208191126279866, "grad_norm": 1.071749117799125, "learning_rate": 5.38650976765762e-07, "loss": 0.0414, "step": 11958 }, { "epoch": 2.721046643913538, "grad_norm": 1.732255569732357, "learning_rate": 5.385624831882562e-07, "loss": 0.0666, "step": 11959 }, { "epoch": 2.72127417519909, "grad_norm": 0.9837741979234907, "learning_rate": 5.384739913773748e-07, "loss": 0.0774, "step": 11960 }, { "epoch": 2.7215017064846414, "grad_norm": 1.5326174775241819, "learning_rate": 5.383855013349266e-07, "loss": 0.0796, "step": 11961 }, { "epoch": 2.7217292377701936, "grad_norm": 1.3626377203821411, "learning_rate": 5.382970130627203e-07, "loss": 0.0568, "step": 11962 }, { "epoch": 2.721956769055745, "grad_norm": 0.9277313032021542, "learning_rate": 5.382085265625639e-07, "loss": 0.0195, "step": 11963 }, { "epoch": 2.722184300341297, "grad_norm": 1.4178726085171167, "learning_rate": 5.381200418362665e-07, "loss": 0.0455, "step": 11964 }, { "epoch": 2.722411831626849, "grad_norm": 2.233223494003987, "learning_rate": 5.380315588856362e-07, "loss": 0.0475, "step": 11965 }, { "epoch": 2.7226393629124006, "grad_norm": 1.2394048352871927, "learning_rate": 5.379430777124817e-07, "loss": 0.0228, "step": 11966 }, { "epoch": 2.7228668941979524, "grad_norm": 1.3216368635187385, "learning_rate": 5.378545983186111e-07, "loss": 0.0473, "step": 11967 }, { "epoch": 2.723094425483504, "grad_norm": 1.0914467300121513, "learning_rate": 5.377661207058331e-07, "loss": 0.0414, "step": 11968 }, { "epoch": 2.723321956769056, "grad_norm": 1.3575038949125307, "learning_rate": 5.376776448759559e-07, "loss": 0.0445, "step": 11969 }, { "epoch": 2.7235494880546076, "grad_norm": 1.9089098546254997, "learning_rate": 5.375891708307874e-07, "loss": 0.0344, "step": 11970 }, { "epoch": 2.7237770193401594, "grad_norm": 1.6072476995637888, "learning_rate": 5.375006985721364e-07, "loss": 0.0617, "step": 11971 }, { "epoch": 2.724004550625711, "grad_norm": 1.6362168667558934, "learning_rate": 5.374122281018106e-07, "loss": 0.1131, "step": 11972 }, { "epoch": 2.724232081911263, "grad_norm": 0.8878730714733957, "learning_rate": 5.373237594216188e-07, "loss": 0.0147, "step": 11973 }, { "epoch": 2.7244596131968146, "grad_norm": 1.5332138657365089, "learning_rate": 5.372352925333687e-07, "loss": 0.0481, "step": 11974 }, { "epoch": 2.7246871444823664, "grad_norm": 1.0236549544151765, "learning_rate": 5.371468274388683e-07, "loss": 0.0675, "step": 11975 }, { "epoch": 2.724914675767918, "grad_norm": 1.822552470402765, "learning_rate": 5.370583641399261e-07, "loss": 0.0204, "step": 11976 }, { "epoch": 2.72514220705347, "grad_norm": 1.2381903059958899, "learning_rate": 5.369699026383495e-07, "loss": 0.0228, "step": 11977 }, { "epoch": 2.7253697383390216, "grad_norm": 1.4535383600514227, "learning_rate": 5.368814429359472e-07, "loss": 0.0423, "step": 11978 }, { "epoch": 2.7255972696245734, "grad_norm": 2.4754904371379785, "learning_rate": 5.367929850345267e-07, "loss": 0.0392, "step": 11979 }, { "epoch": 2.725824800910125, "grad_norm": 1.7805584630384452, "learning_rate": 5.367045289358962e-07, "loss": 0.0566, "step": 11980 }, { "epoch": 2.726052332195677, "grad_norm": 1.521682600435059, "learning_rate": 5.366160746418633e-07, "loss": 0.0205, "step": 11981 }, { "epoch": 2.7262798634812286, "grad_norm": 1.8855335269204343, "learning_rate": 5.365276221542359e-07, "loss": 0.103, "step": 11982 }, { "epoch": 2.7265073947667804, "grad_norm": 1.3894257517128838, "learning_rate": 5.364391714748221e-07, "loss": 0.0858, "step": 11983 }, { "epoch": 2.726734926052332, "grad_norm": 1.5437332327980855, "learning_rate": 5.363507226054292e-07, "loss": 0.0513, "step": 11984 }, { "epoch": 2.726962457337884, "grad_norm": 1.5629133356979934, "learning_rate": 5.362622755478653e-07, "loss": 0.1593, "step": 11985 }, { "epoch": 2.7271899886234356, "grad_norm": 1.9379088989672153, "learning_rate": 5.361738303039377e-07, "loss": 0.0566, "step": 11986 }, { "epoch": 2.7274175199089874, "grad_norm": 1.8850490394134047, "learning_rate": 5.360853868754548e-07, "loss": 0.1321, "step": 11987 }, { "epoch": 2.727645051194539, "grad_norm": 1.5506870104739108, "learning_rate": 5.359969452642235e-07, "loss": 0.0697, "step": 11988 }, { "epoch": 2.727872582480091, "grad_norm": 2.1569771813641574, "learning_rate": 5.359085054720515e-07, "loss": 0.0905, "step": 11989 }, { "epoch": 2.7281001137656427, "grad_norm": 1.3125343758894652, "learning_rate": 5.358200675007469e-07, "loss": 0.0846, "step": 11990 }, { "epoch": 2.7283276450511944, "grad_norm": 1.3996158235927127, "learning_rate": 5.357316313521164e-07, "loss": 0.0838, "step": 11991 }, { "epoch": 2.728555176336746, "grad_norm": 1.998394286460402, "learning_rate": 5.356431970279681e-07, "loss": 0.063, "step": 11992 }, { "epoch": 2.728782707622298, "grad_norm": 1.0329090268783114, "learning_rate": 5.355547645301092e-07, "loss": 0.0268, "step": 11993 }, { "epoch": 2.7290102389078497, "grad_norm": 2.185067809948094, "learning_rate": 5.354663338603469e-07, "loss": 0.1071, "step": 11994 }, { "epoch": 2.729237770193402, "grad_norm": 1.48538744935273, "learning_rate": 5.35377905020489e-07, "loss": 0.0792, "step": 11995 }, { "epoch": 2.729465301478953, "grad_norm": 1.3588374039045485, "learning_rate": 5.352894780123423e-07, "loss": 0.0267, "step": 11996 }, { "epoch": 2.7296928327645054, "grad_norm": 2.414581390598143, "learning_rate": 5.352010528377147e-07, "loss": 0.083, "step": 11997 }, { "epoch": 2.7299203640500567, "grad_norm": 1.227252321919242, "learning_rate": 5.351126294984126e-07, "loss": 0.0423, "step": 11998 }, { "epoch": 2.730147895335609, "grad_norm": 1.4861252924144936, "learning_rate": 5.350242079962443e-07, "loss": 0.0838, "step": 11999 }, { "epoch": 2.73037542662116, "grad_norm": 1.4644978483587472, "learning_rate": 5.349357883330164e-07, "loss": 0.0316, "step": 12000 }, { "epoch": 2.7306029579067124, "grad_norm": 2.745425668258535, "learning_rate": 5.348473705105355e-07, "loss": 0.0797, "step": 12001 }, { "epoch": 2.7308304891922637, "grad_norm": 1.311819359886042, "learning_rate": 5.347589545306097e-07, "loss": 0.0252, "step": 12002 }, { "epoch": 2.731058020477816, "grad_norm": 1.9222774549410553, "learning_rate": 5.346705403950454e-07, "loss": 0.104, "step": 12003 }, { "epoch": 2.7312855517633676, "grad_norm": 1.4420094223403623, "learning_rate": 5.345821281056499e-07, "loss": 0.0213, "step": 12004 }, { "epoch": 2.7315130830489194, "grad_norm": 1.9043719559624546, "learning_rate": 5.3449371766423e-07, "loss": 0.0218, "step": 12005 }, { "epoch": 2.731740614334471, "grad_norm": 1.6608665437390424, "learning_rate": 5.344053090725931e-07, "loss": 0.0363, "step": 12006 }, { "epoch": 2.731968145620023, "grad_norm": 1.5718965507876674, "learning_rate": 5.343169023325455e-07, "loss": 0.0597, "step": 12007 }, { "epoch": 2.7321956769055746, "grad_norm": 1.2788169741980175, "learning_rate": 5.342284974458943e-07, "loss": 0.0758, "step": 12008 }, { "epoch": 2.7324232081911264, "grad_norm": 1.8696821728159088, "learning_rate": 5.341400944144465e-07, "loss": 0.1146, "step": 12009 }, { "epoch": 2.732650739476678, "grad_norm": 1.4647292758479213, "learning_rate": 5.340516932400086e-07, "loss": 0.0631, "step": 12010 }, { "epoch": 2.73287827076223, "grad_norm": 1.475344816283562, "learning_rate": 5.339632939243877e-07, "loss": 0.0328, "step": 12011 }, { "epoch": 2.7331058020477816, "grad_norm": 1.3782301235539511, "learning_rate": 5.338748964693905e-07, "loss": 0.0387, "step": 12012 }, { "epoch": 2.7333333333333334, "grad_norm": 1.653617931554678, "learning_rate": 5.337865008768231e-07, "loss": 0.0816, "step": 12013 }, { "epoch": 2.733560864618885, "grad_norm": 1.7774823387954766, "learning_rate": 5.336981071484928e-07, "loss": 0.0392, "step": 12014 }, { "epoch": 2.733788395904437, "grad_norm": 2.2772696589527115, "learning_rate": 5.336097152862059e-07, "loss": 0.1749, "step": 12015 }, { "epoch": 2.7340159271899886, "grad_norm": 1.974702062977347, "learning_rate": 5.335213252917693e-07, "loss": 0.0781, "step": 12016 }, { "epoch": 2.7342434584755404, "grad_norm": 2.24698042322318, "learning_rate": 5.334329371669889e-07, "loss": 0.0291, "step": 12017 }, { "epoch": 2.734470989761092, "grad_norm": 1.35666071832003, "learning_rate": 5.333445509136718e-07, "loss": 0.0317, "step": 12018 }, { "epoch": 2.734698521046644, "grad_norm": 0.6914820779899206, "learning_rate": 5.332561665336243e-07, "loss": 0.0537, "step": 12019 }, { "epoch": 2.7349260523321957, "grad_norm": 1.090556972575461, "learning_rate": 5.331677840286524e-07, "loss": 0.0159, "step": 12020 }, { "epoch": 2.7351535836177474, "grad_norm": 2.4959047639801226, "learning_rate": 5.330794034005631e-07, "loss": 0.0485, "step": 12021 }, { "epoch": 2.735381114903299, "grad_norm": 2.333885243789422, "learning_rate": 5.329910246511623e-07, "loss": 0.0689, "step": 12022 }, { "epoch": 2.735608646188851, "grad_norm": 1.5655934394258946, "learning_rate": 5.329026477822566e-07, "loss": 0.037, "step": 12023 }, { "epoch": 2.7358361774744027, "grad_norm": 1.4822423138752874, "learning_rate": 5.328142727956521e-07, "loss": 0.0665, "step": 12024 }, { "epoch": 2.7360637087599544, "grad_norm": 0.8334656072883646, "learning_rate": 5.327258996931548e-07, "loss": 0.0103, "step": 12025 }, { "epoch": 2.736291240045506, "grad_norm": 1.519090420519734, "learning_rate": 5.326375284765715e-07, "loss": 0.073, "step": 12026 }, { "epoch": 2.736518771331058, "grad_norm": 2.172602028063574, "learning_rate": 5.325491591477076e-07, "loss": 0.0558, "step": 12027 }, { "epoch": 2.7367463026166097, "grad_norm": 1.1466681279704964, "learning_rate": 5.324607917083698e-07, "loss": 0.03, "step": 12028 }, { "epoch": 2.7369738339021614, "grad_norm": 1.8348500094169855, "learning_rate": 5.323724261603637e-07, "loss": 0.0923, "step": 12029 }, { "epoch": 2.737201365187713, "grad_norm": 1.4887043561389637, "learning_rate": 5.322840625054959e-07, "loss": 0.0587, "step": 12030 }, { "epoch": 2.737428896473265, "grad_norm": 2.7655850345628017, "learning_rate": 5.321957007455719e-07, "loss": 0.0762, "step": 12031 }, { "epoch": 2.7376564277588167, "grad_norm": 1.3270253110955967, "learning_rate": 5.321073408823976e-07, "loss": 0.03, "step": 12032 }, { "epoch": 2.7378839590443684, "grad_norm": 2.2152539049368256, "learning_rate": 5.320189829177796e-07, "loss": 0.0378, "step": 12033 }, { "epoch": 2.7381114903299206, "grad_norm": 1.2065296082167056, "learning_rate": 5.319306268535229e-07, "loss": 0.0509, "step": 12034 }, { "epoch": 2.738339021615472, "grad_norm": 1.268648387578703, "learning_rate": 5.318422726914339e-07, "loss": 0.0685, "step": 12035 }, { "epoch": 2.738566552901024, "grad_norm": 0.972255043342071, "learning_rate": 5.31753920433318e-07, "loss": 0.0201, "step": 12036 }, { "epoch": 2.7387940841865754, "grad_norm": 2.1321544910767605, "learning_rate": 5.316655700809816e-07, "loss": 0.0634, "step": 12037 }, { "epoch": 2.7390216154721276, "grad_norm": 1.6190289520282, "learning_rate": 5.315772216362298e-07, "loss": 0.065, "step": 12038 }, { "epoch": 2.739249146757679, "grad_norm": 0.8068243697343968, "learning_rate": 5.314888751008684e-07, "loss": 0.0261, "step": 12039 }, { "epoch": 2.739476678043231, "grad_norm": 1.5018557877505967, "learning_rate": 5.314005304767034e-07, "loss": 0.1098, "step": 12040 }, { "epoch": 2.7397042093287824, "grad_norm": 1.79551765721882, "learning_rate": 5.313121877655399e-07, "loss": 0.0684, "step": 12041 }, { "epoch": 2.7399317406143346, "grad_norm": 3.5258924390112947, "learning_rate": 5.312238469691838e-07, "loss": 0.0691, "step": 12042 }, { "epoch": 2.7401592718998864, "grad_norm": 1.291053026666248, "learning_rate": 5.311355080894407e-07, "loss": 0.023, "step": 12043 }, { "epoch": 2.740386803185438, "grad_norm": 2.356708040316074, "learning_rate": 5.310471711281155e-07, "loss": 0.0542, "step": 12044 }, { "epoch": 2.74061433447099, "grad_norm": 2.461869341796495, "learning_rate": 5.309588360870144e-07, "loss": 0.1396, "step": 12045 }, { "epoch": 2.7408418657565417, "grad_norm": 1.6564104037384781, "learning_rate": 5.308705029679421e-07, "loss": 0.1065, "step": 12046 }, { "epoch": 2.7410693970420934, "grad_norm": 1.366146256947657, "learning_rate": 5.307821717727047e-07, "loss": 0.0205, "step": 12047 }, { "epoch": 2.741296928327645, "grad_norm": 1.6942794586570478, "learning_rate": 5.306938425031069e-07, "loss": 0.0499, "step": 12048 }, { "epoch": 2.741524459613197, "grad_norm": 1.8497438650695501, "learning_rate": 5.306055151609544e-07, "loss": 0.0977, "step": 12049 }, { "epoch": 2.7417519908987487, "grad_norm": 1.8080388958781566, "learning_rate": 5.305171897480524e-07, "loss": 0.059, "step": 12050 }, { "epoch": 2.7419795221843004, "grad_norm": 1.8697345322768726, "learning_rate": 5.304288662662059e-07, "loss": 0.0476, "step": 12051 }, { "epoch": 2.742207053469852, "grad_norm": 1.625372617134515, "learning_rate": 5.303405447172201e-07, "loss": 0.0402, "step": 12052 }, { "epoch": 2.742434584755404, "grad_norm": 1.6669938486811062, "learning_rate": 5.302522251029002e-07, "loss": 0.1531, "step": 12053 }, { "epoch": 2.7426621160409557, "grad_norm": 1.915097654453064, "learning_rate": 5.301639074250514e-07, "loss": 0.0306, "step": 12054 }, { "epoch": 2.7428896473265074, "grad_norm": 1.8293734459266944, "learning_rate": 5.300755916854784e-07, "loss": 0.031, "step": 12055 }, { "epoch": 2.743117178612059, "grad_norm": 1.6543544214959198, "learning_rate": 5.299872778859867e-07, "loss": 0.1078, "step": 12056 }, { "epoch": 2.743344709897611, "grad_norm": 1.7407960084314187, "learning_rate": 5.298989660283812e-07, "loss": 0.0376, "step": 12057 }, { "epoch": 2.7435722411831627, "grad_norm": 3.2299777560484473, "learning_rate": 5.298106561144662e-07, "loss": 0.0582, "step": 12058 }, { "epoch": 2.7437997724687144, "grad_norm": 1.7608455221916188, "learning_rate": 5.297223481460474e-07, "loss": 0.0665, "step": 12059 }, { "epoch": 2.744027303754266, "grad_norm": 1.5379592651760254, "learning_rate": 5.296340421249291e-07, "loss": 0.0952, "step": 12060 }, { "epoch": 2.744254835039818, "grad_norm": 1.8940030152385763, "learning_rate": 5.295457380529164e-07, "loss": 0.0772, "step": 12061 }, { "epoch": 2.7444823663253697, "grad_norm": 2.025316107890232, "learning_rate": 5.294574359318143e-07, "loss": 0.1011, "step": 12062 }, { "epoch": 2.7447098976109214, "grad_norm": 2.2609865413067305, "learning_rate": 5.293691357634269e-07, "loss": 0.0651, "step": 12063 }, { "epoch": 2.744937428896473, "grad_norm": 2.647025383596616, "learning_rate": 5.292808375495593e-07, "loss": 0.0581, "step": 12064 }, { "epoch": 2.745164960182025, "grad_norm": 1.7829344102816183, "learning_rate": 5.291925412920159e-07, "loss": 0.0395, "step": 12065 }, { "epoch": 2.7453924914675767, "grad_norm": 1.5344497432318467, "learning_rate": 5.291042469926019e-07, "loss": 0.0483, "step": 12066 }, { "epoch": 2.7456200227531284, "grad_norm": 1.6910034150680022, "learning_rate": 5.290159546531211e-07, "loss": 0.0208, "step": 12067 }, { "epoch": 2.74584755403868, "grad_norm": 1.1579826866066742, "learning_rate": 5.289276642753785e-07, "loss": 0.0312, "step": 12068 }, { "epoch": 2.746075085324232, "grad_norm": 1.7161366161251554, "learning_rate": 5.288393758611787e-07, "loss": 0.1082, "step": 12069 }, { "epoch": 2.7463026166097837, "grad_norm": 2.3288241783913985, "learning_rate": 5.287510894123256e-07, "loss": 0.0503, "step": 12070 }, { "epoch": 2.7465301478953354, "grad_norm": 2.6842530235682376, "learning_rate": 5.286628049306243e-07, "loss": 0.0212, "step": 12071 }, { "epoch": 2.746757679180887, "grad_norm": 1.4823742650480323, "learning_rate": 5.285745224178785e-07, "loss": 0.0269, "step": 12072 }, { "epoch": 2.7469852104664394, "grad_norm": 0.6658058613817137, "learning_rate": 5.284862418758932e-07, "loss": 0.0282, "step": 12073 }, { "epoch": 2.7472127417519907, "grad_norm": 1.3770994565926036, "learning_rate": 5.28397963306472e-07, "loss": 0.0355, "step": 12074 }, { "epoch": 2.747440273037543, "grad_norm": 2.720295250201427, "learning_rate": 5.283096867114198e-07, "loss": 0.0677, "step": 12075 }, { "epoch": 2.747667804323094, "grad_norm": 1.741389045606541, "learning_rate": 5.282214120925406e-07, "loss": 0.0318, "step": 12076 }, { "epoch": 2.7478953356086464, "grad_norm": 2.148631506383997, "learning_rate": 5.281331394516382e-07, "loss": 0.0487, "step": 12077 }, { "epoch": 2.7481228668941977, "grad_norm": 1.4928955570576747, "learning_rate": 5.280448687905172e-07, "loss": 0.089, "step": 12078 }, { "epoch": 2.74835039817975, "grad_norm": 1.9610275706576938, "learning_rate": 5.279566001109813e-07, "loss": 0.0976, "step": 12079 }, { "epoch": 2.748577929465301, "grad_norm": 2.1847019960327065, "learning_rate": 5.278683334148351e-07, "loss": 0.0917, "step": 12080 }, { "epoch": 2.7488054607508534, "grad_norm": 1.6822855982801326, "learning_rate": 5.277800687038821e-07, "loss": 0.0922, "step": 12081 }, { "epoch": 2.749032992036405, "grad_norm": 1.3818114654517608, "learning_rate": 5.276918059799263e-07, "loss": 0.0539, "step": 12082 }, { "epoch": 2.749260523321957, "grad_norm": 1.3588365365688355, "learning_rate": 5.276035452447722e-07, "loss": 0.0317, "step": 12083 }, { "epoch": 2.7494880546075087, "grad_norm": 1.550896347301944, "learning_rate": 5.275152865002228e-07, "loss": 0.0364, "step": 12084 }, { "epoch": 2.7497155858930604, "grad_norm": 1.0535961970969192, "learning_rate": 5.274270297480827e-07, "loss": 0.0214, "step": 12085 }, { "epoch": 2.749943117178612, "grad_norm": 2.4968258270873056, "learning_rate": 5.273387749901552e-07, "loss": 0.1109, "step": 12086 }, { "epoch": 2.750170648464164, "grad_norm": 2.226580436184395, "learning_rate": 5.272505222282446e-07, "loss": 0.068, "step": 12087 }, { "epoch": 2.7503981797497157, "grad_norm": 1.685666357090485, "learning_rate": 5.271622714641541e-07, "loss": 0.0947, "step": 12088 }, { "epoch": 2.7506257110352674, "grad_norm": 1.5041321132938328, "learning_rate": 5.270740226996874e-07, "loss": 0.1065, "step": 12089 }, { "epoch": 2.750853242320819, "grad_norm": 1.4312680797575197, "learning_rate": 5.269857759366488e-07, "loss": 0.1154, "step": 12090 }, { "epoch": 2.751080773606371, "grad_norm": 1.416334933932847, "learning_rate": 5.26897531176841e-07, "loss": 0.0552, "step": 12091 }, { "epoch": 2.7513083048919227, "grad_norm": 1.400960466183693, "learning_rate": 5.268092884220682e-07, "loss": 0.0957, "step": 12092 }, { "epoch": 2.7515358361774744, "grad_norm": 1.1789639048643648, "learning_rate": 5.267210476741336e-07, "loss": 0.0155, "step": 12093 }, { "epoch": 2.751763367463026, "grad_norm": 2.6251396306245707, "learning_rate": 5.26632808934841e-07, "loss": 0.0732, "step": 12094 }, { "epoch": 2.751990898748578, "grad_norm": 2.209277515804109, "learning_rate": 5.265445722059935e-07, "loss": 0.0631, "step": 12095 }, { "epoch": 2.7522184300341297, "grad_norm": 1.489526006641341, "learning_rate": 5.264563374893945e-07, "loss": 0.0287, "step": 12096 }, { "epoch": 2.7524459613196814, "grad_norm": 1.9001502229248588, "learning_rate": 5.263681047868478e-07, "loss": 0.1407, "step": 12097 }, { "epoch": 2.752673492605233, "grad_norm": 2.404591338004736, "learning_rate": 5.262798741001561e-07, "loss": 0.0351, "step": 12098 }, { "epoch": 2.752901023890785, "grad_norm": 1.6992903691119874, "learning_rate": 5.261916454311232e-07, "loss": 0.0489, "step": 12099 }, { "epoch": 2.7531285551763367, "grad_norm": 1.2720886804726144, "learning_rate": 5.261034187815522e-07, "loss": 0.0771, "step": 12100 }, { "epoch": 2.7533560864618885, "grad_norm": 1.3674314055740948, "learning_rate": 5.260151941532458e-07, "loss": 0.0829, "step": 12101 }, { "epoch": 2.75358361774744, "grad_norm": 1.2068682376391768, "learning_rate": 5.259269715480078e-07, "loss": 0.0557, "step": 12102 }, { "epoch": 2.753811149032992, "grad_norm": 2.936689423972923, "learning_rate": 5.25838750967641e-07, "loss": 0.038, "step": 12103 }, { "epoch": 2.7540386803185437, "grad_norm": 1.9105014296836387, "learning_rate": 5.257505324139486e-07, "loss": 0.0408, "step": 12104 }, { "epoch": 2.7542662116040955, "grad_norm": 1.6624953282397827, "learning_rate": 5.256623158887334e-07, "loss": 0.0186, "step": 12105 }, { "epoch": 2.754493742889647, "grad_norm": 0.9077475462924169, "learning_rate": 5.255741013937987e-07, "loss": 0.0157, "step": 12106 }, { "epoch": 2.754721274175199, "grad_norm": 1.3096276603728436, "learning_rate": 5.254858889309474e-07, "loss": 0.0223, "step": 12107 }, { "epoch": 2.7549488054607507, "grad_norm": 1.4060098039664108, "learning_rate": 5.253976785019819e-07, "loss": 0.084, "step": 12108 }, { "epoch": 2.7551763367463025, "grad_norm": 2.110372668760082, "learning_rate": 5.253094701087057e-07, "loss": 0.0749, "step": 12109 }, { "epoch": 2.755403868031854, "grad_norm": 1.1410185084373619, "learning_rate": 5.252212637529211e-07, "loss": 0.0944, "step": 12110 }, { "epoch": 2.755631399317406, "grad_norm": 1.4070454260225131, "learning_rate": 5.251330594364315e-07, "loss": 0.0908, "step": 12111 }, { "epoch": 2.755858930602958, "grad_norm": 2.094845528241903, "learning_rate": 5.25044857161039e-07, "loss": 0.0858, "step": 12112 }, { "epoch": 2.7560864618885095, "grad_norm": 1.7148031244715007, "learning_rate": 5.249566569285467e-07, "loss": 0.0226, "step": 12113 }, { "epoch": 2.7563139931740617, "grad_norm": 1.5019111426088385, "learning_rate": 5.248684587407572e-07, "loss": 0.078, "step": 12114 }, { "epoch": 2.756541524459613, "grad_norm": 2.829903177953358, "learning_rate": 5.247802625994728e-07, "loss": 0.054, "step": 12115 }, { "epoch": 2.756769055745165, "grad_norm": 1.5646008682581514, "learning_rate": 5.246920685064963e-07, "loss": 0.0869, "step": 12116 }, { "epoch": 2.7569965870307165, "grad_norm": 2.0097133134052876, "learning_rate": 5.246038764636302e-07, "loss": 0.0797, "step": 12117 }, { "epoch": 2.7572241183162687, "grad_norm": 1.7291655514161406, "learning_rate": 5.245156864726772e-07, "loss": 0.0587, "step": 12118 }, { "epoch": 2.75745164960182, "grad_norm": 2.4362982192817926, "learning_rate": 5.244274985354394e-07, "loss": 0.1013, "step": 12119 }, { "epoch": 2.757679180887372, "grad_norm": 1.7353110668053062, "learning_rate": 5.243393126537194e-07, "loss": 0.1022, "step": 12120 }, { "epoch": 2.757906712172924, "grad_norm": 1.8190009698340397, "learning_rate": 5.242511288293195e-07, "loss": 0.0563, "step": 12121 }, { "epoch": 2.7581342434584757, "grad_norm": 1.6045598612022116, "learning_rate": 5.241629470640418e-07, "loss": 0.0988, "step": 12122 }, { "epoch": 2.7583617747440274, "grad_norm": 1.269048590347046, "learning_rate": 5.240747673596891e-07, "loss": 0.0431, "step": 12123 }, { "epoch": 2.758589306029579, "grad_norm": 1.5659722205962732, "learning_rate": 5.23986589718063e-07, "loss": 0.0775, "step": 12124 }, { "epoch": 2.758816837315131, "grad_norm": 2.5101791033110277, "learning_rate": 5.238984141409662e-07, "loss": 0.0652, "step": 12125 }, { "epoch": 2.7590443686006827, "grad_norm": 2.2206592217384746, "learning_rate": 5.238102406302008e-07, "loss": 0.1464, "step": 12126 }, { "epoch": 2.7592718998862344, "grad_norm": 1.1999519037779047, "learning_rate": 5.237220691875685e-07, "loss": 0.0418, "step": 12127 }, { "epoch": 2.759499431171786, "grad_norm": 1.5572707703451205, "learning_rate": 5.236338998148717e-07, "loss": 0.027, "step": 12128 }, { "epoch": 2.759726962457338, "grad_norm": 2.161324468758101, "learning_rate": 5.235457325139123e-07, "loss": 0.0519, "step": 12129 }, { "epoch": 2.7599544937428897, "grad_norm": 2.31080024167947, "learning_rate": 5.234575672864926e-07, "loss": 0.0737, "step": 12130 }, { "epoch": 2.7601820250284415, "grad_norm": 1.5978935473644693, "learning_rate": 5.233694041344141e-07, "loss": 0.119, "step": 12131 }, { "epoch": 2.760409556313993, "grad_norm": 2.0925011904882376, "learning_rate": 5.232812430594786e-07, "loss": 0.0876, "step": 12132 }, { "epoch": 2.760637087599545, "grad_norm": 0.8010434645413718, "learning_rate": 5.231930840634886e-07, "loss": 0.0127, "step": 12133 }, { "epoch": 2.7608646188850967, "grad_norm": 1.826437570266488, "learning_rate": 5.231049271482453e-07, "loss": 0.0242, "step": 12134 }, { "epoch": 2.7610921501706485, "grad_norm": 1.2778350333573862, "learning_rate": 5.230167723155507e-07, "loss": 0.0979, "step": 12135 }, { "epoch": 2.7613196814562, "grad_norm": 1.410990414720308, "learning_rate": 5.229286195672064e-07, "loss": 0.0583, "step": 12136 }, { "epoch": 2.761547212741752, "grad_norm": 2.4595869924570373, "learning_rate": 5.228404689050143e-07, "loss": 0.0587, "step": 12137 }, { "epoch": 2.7617747440273037, "grad_norm": 2.024205527952812, "learning_rate": 5.227523203307759e-07, "loss": 0.0517, "step": 12138 }, { "epoch": 2.7620022753128555, "grad_norm": 2.7879332932790755, "learning_rate": 5.226641738462928e-07, "loss": 0.0774, "step": 12139 }, { "epoch": 2.7622298065984072, "grad_norm": 2.2992094476984666, "learning_rate": 5.225760294533667e-07, "loss": 0.0756, "step": 12140 }, { "epoch": 2.762457337883959, "grad_norm": 1.412991446443035, "learning_rate": 5.224878871537987e-07, "loss": 0.0313, "step": 12141 }, { "epoch": 2.7626848691695107, "grad_norm": 1.159642190020476, "learning_rate": 5.223997469493907e-07, "loss": 0.0722, "step": 12142 }, { "epoch": 2.7629124004550625, "grad_norm": 1.5661839061880365, "learning_rate": 5.223116088419439e-07, "loss": 0.0678, "step": 12143 }, { "epoch": 2.7631399317406142, "grad_norm": 1.3623613179044811, "learning_rate": 5.222234728332601e-07, "loss": 0.0261, "step": 12144 }, { "epoch": 2.763367463026166, "grad_norm": 1.9192638282265198, "learning_rate": 5.221353389251399e-07, "loss": 0.0594, "step": 12145 }, { "epoch": 2.7635949943117177, "grad_norm": 1.9691628606028848, "learning_rate": 5.22047207119385e-07, "loss": 0.0338, "step": 12146 }, { "epoch": 2.7638225255972695, "grad_norm": 1.3920378601317596, "learning_rate": 5.219590774177969e-07, "loss": 0.0721, "step": 12147 }, { "epoch": 2.7640500568828212, "grad_norm": 3.172767046611389, "learning_rate": 5.218709498221762e-07, "loss": 0.0575, "step": 12148 }, { "epoch": 2.764277588168373, "grad_norm": 1.3911258924800964, "learning_rate": 5.217828243343246e-07, "loss": 0.0371, "step": 12149 }, { "epoch": 2.7645051194539247, "grad_norm": 1.9464647521375762, "learning_rate": 5.216947009560433e-07, "loss": 0.0295, "step": 12150 }, { "epoch": 2.764732650739477, "grad_norm": 1.422155614440322, "learning_rate": 5.216065796891327e-07, "loss": 0.0312, "step": 12151 }, { "epoch": 2.7649601820250282, "grad_norm": 1.3415724347333529, "learning_rate": 5.215184605353944e-07, "loss": 0.1001, "step": 12152 }, { "epoch": 2.7651877133105804, "grad_norm": 1.453026328782809, "learning_rate": 5.214303434966292e-07, "loss": 0.0235, "step": 12153 }, { "epoch": 2.7654152445961317, "grad_norm": 1.7174975025426644, "learning_rate": 5.213422285746382e-07, "loss": 0.1372, "step": 12154 }, { "epoch": 2.765642775881684, "grad_norm": 1.402588947128672, "learning_rate": 5.212541157712221e-07, "loss": 0.0229, "step": 12155 }, { "epoch": 2.7658703071672353, "grad_norm": 1.5959647329509061, "learning_rate": 5.21166005088182e-07, "loss": 0.1051, "step": 12156 }, { "epoch": 2.7660978384527874, "grad_norm": 1.4667626541589496, "learning_rate": 5.210778965273187e-07, "loss": 0.083, "step": 12157 }, { "epoch": 2.7663253697383388, "grad_norm": 1.2453385412502096, "learning_rate": 5.209897900904325e-07, "loss": 0.0607, "step": 12158 }, { "epoch": 2.766552901023891, "grad_norm": 1.4094894081388891, "learning_rate": 5.209016857793248e-07, "loss": 0.0743, "step": 12159 }, { "epoch": 2.7667804323094427, "grad_norm": 1.3773278897579952, "learning_rate": 5.208135835957958e-07, "loss": 0.0435, "step": 12160 }, { "epoch": 2.7670079635949945, "grad_norm": 1.2459073085886434, "learning_rate": 5.207254835416466e-07, "loss": 0.083, "step": 12161 }, { "epoch": 2.767235494880546, "grad_norm": 2.195303065503422, "learning_rate": 5.206373856186772e-07, "loss": 0.0331, "step": 12162 }, { "epoch": 2.767463026166098, "grad_norm": 1.6497747016331055, "learning_rate": 5.205492898286888e-07, "loss": 0.037, "step": 12163 }, { "epoch": 2.7676905574516497, "grad_norm": 0.9465214462930082, "learning_rate": 5.204611961734815e-07, "loss": 0.0726, "step": 12164 }, { "epoch": 2.7679180887372015, "grad_norm": 1.3696083029332753, "learning_rate": 5.203731046548559e-07, "loss": 0.0463, "step": 12165 }, { "epoch": 2.768145620022753, "grad_norm": 1.6307849803018264, "learning_rate": 5.202850152746124e-07, "loss": 0.0342, "step": 12166 }, { "epoch": 2.768373151308305, "grad_norm": 1.2837995931755728, "learning_rate": 5.201969280345514e-07, "loss": 0.0313, "step": 12167 }, { "epoch": 2.7686006825938567, "grad_norm": 1.6074497916504513, "learning_rate": 5.201088429364734e-07, "loss": 0.1015, "step": 12168 }, { "epoch": 2.7688282138794085, "grad_norm": 1.811173253174337, "learning_rate": 5.200207599821786e-07, "loss": 0.0388, "step": 12169 }, { "epoch": 2.7690557451649602, "grad_norm": 2.661325013645662, "learning_rate": 5.199326791734669e-07, "loss": 0.0439, "step": 12170 }, { "epoch": 2.769283276450512, "grad_norm": 1.3987337693570199, "learning_rate": 5.198446005121391e-07, "loss": 0.083, "step": 12171 }, { "epoch": 2.7695108077360637, "grad_norm": 1.7880136763493284, "learning_rate": 5.197565239999948e-07, "loss": 0.0392, "step": 12172 }, { "epoch": 2.7697383390216155, "grad_norm": 1.6604869525657775, "learning_rate": 5.196684496388347e-07, "loss": 0.0235, "step": 12173 }, { "epoch": 2.7699658703071672, "grad_norm": 1.5676680683618782, "learning_rate": 5.195803774304583e-07, "loss": 0.0323, "step": 12174 }, { "epoch": 2.770193401592719, "grad_norm": 0.902630672432121, "learning_rate": 5.194923073766661e-07, "loss": 0.045, "step": 12175 }, { "epoch": 2.7704209328782707, "grad_norm": 1.643476401514589, "learning_rate": 5.19404239479258e-07, "loss": 0.053, "step": 12176 }, { "epoch": 2.7706484641638225, "grad_norm": 1.3782031422655932, "learning_rate": 5.193161737400336e-07, "loss": 0.0981, "step": 12177 }, { "epoch": 2.7708759954493742, "grad_norm": 1.4175520771738424, "learning_rate": 5.192281101607934e-07, "loss": 0.1065, "step": 12178 }, { "epoch": 2.771103526734926, "grad_norm": 2.222691424474468, "learning_rate": 5.191400487433365e-07, "loss": 0.0601, "step": 12179 }, { "epoch": 2.7713310580204777, "grad_norm": 1.7563829118863181, "learning_rate": 5.190519894894633e-07, "loss": 0.0655, "step": 12180 }, { "epoch": 2.7715585893060295, "grad_norm": 2.501059340185149, "learning_rate": 5.189639324009734e-07, "loss": 0.0212, "step": 12181 }, { "epoch": 2.7717861205915812, "grad_norm": 1.7318724834749595, "learning_rate": 5.188758774796666e-07, "loss": 0.1064, "step": 12182 }, { "epoch": 2.772013651877133, "grad_norm": 2.249818106174672, "learning_rate": 5.187878247273425e-07, "loss": 0.1558, "step": 12183 }, { "epoch": 2.7722411831626848, "grad_norm": 1.5118256436121829, "learning_rate": 5.186997741458007e-07, "loss": 0.0421, "step": 12184 }, { "epoch": 2.7724687144482365, "grad_norm": 1.4534382932361514, "learning_rate": 5.186117257368409e-07, "loss": 0.0152, "step": 12185 }, { "epoch": 2.7726962457337883, "grad_norm": 1.7942634189922417, "learning_rate": 5.185236795022624e-07, "loss": 0.0309, "step": 12186 }, { "epoch": 2.77292377701934, "grad_norm": 2.007085409045853, "learning_rate": 5.184356354438651e-07, "loss": 0.0445, "step": 12187 }, { "epoch": 2.7731513083048918, "grad_norm": 1.3398724063346026, "learning_rate": 5.183475935634483e-07, "loss": 0.0253, "step": 12188 }, { "epoch": 2.7733788395904435, "grad_norm": 1.2310317761070702, "learning_rate": 5.182595538628111e-07, "loss": 0.0621, "step": 12189 }, { "epoch": 2.7736063708759957, "grad_norm": 1.6304094234701139, "learning_rate": 5.181715163437534e-07, "loss": 0.0231, "step": 12190 }, { "epoch": 2.773833902161547, "grad_norm": 1.531162888612821, "learning_rate": 5.18083481008074e-07, "loss": 0.0581, "step": 12191 }, { "epoch": 2.774061433447099, "grad_norm": 1.4031524119607783, "learning_rate": 5.179954478575725e-07, "loss": 0.0209, "step": 12192 }, { "epoch": 2.7742889647326505, "grad_norm": 1.5420581091070023, "learning_rate": 5.179074168940481e-07, "loss": 0.0233, "step": 12193 }, { "epoch": 2.7745164960182027, "grad_norm": 1.565794948766029, "learning_rate": 5.178193881193002e-07, "loss": 0.0266, "step": 12194 }, { "epoch": 2.774744027303754, "grad_norm": 1.9736828179935382, "learning_rate": 5.177313615351275e-07, "loss": 0.0412, "step": 12195 }, { "epoch": 2.774971558589306, "grad_norm": 1.5529832227257394, "learning_rate": 5.176433371433293e-07, "loss": 0.0422, "step": 12196 }, { "epoch": 2.7751990898748575, "grad_norm": 1.350521932815335, "learning_rate": 5.175553149457048e-07, "loss": 0.074, "step": 12197 }, { "epoch": 2.7754266211604097, "grad_norm": 1.3648981932144715, "learning_rate": 5.174672949440527e-07, "loss": 0.115, "step": 12198 }, { "epoch": 2.7756541524459615, "grad_norm": 1.669018837757576, "learning_rate": 5.173792771401723e-07, "loss": 0.0549, "step": 12199 }, { "epoch": 2.7758816837315132, "grad_norm": 1.4047994723088844, "learning_rate": 5.172912615358622e-07, "loss": 0.0555, "step": 12200 }, { "epoch": 2.776109215017065, "grad_norm": 1.7328512200268817, "learning_rate": 5.172032481329217e-07, "loss": 0.1626, "step": 12201 }, { "epoch": 2.7763367463026167, "grad_norm": 1.5251369740609149, "learning_rate": 5.171152369331493e-07, "loss": 0.1153, "step": 12202 }, { "epoch": 2.7765642775881685, "grad_norm": 1.8340899578826177, "learning_rate": 5.170272279383438e-07, "loss": 0.044, "step": 12203 }, { "epoch": 2.7767918088737202, "grad_norm": 5.630225218263727, "learning_rate": 5.169392211503043e-07, "loss": 0.0553, "step": 12204 }, { "epoch": 2.777019340159272, "grad_norm": 0.8289924964611158, "learning_rate": 5.168512165708288e-07, "loss": 0.0254, "step": 12205 }, { "epoch": 2.7772468714448237, "grad_norm": 1.7494650795976268, "learning_rate": 5.167632142017166e-07, "loss": 0.1079, "step": 12206 }, { "epoch": 2.7774744027303755, "grad_norm": 1.888284786770326, "learning_rate": 5.166752140447662e-07, "loss": 0.0532, "step": 12207 }, { "epoch": 2.7777019340159272, "grad_norm": 2.181221837373535, "learning_rate": 5.165872161017757e-07, "loss": 0.105, "step": 12208 }, { "epoch": 2.777929465301479, "grad_norm": 1.518042486660619, "learning_rate": 5.164992203745441e-07, "loss": 0.0413, "step": 12209 }, { "epoch": 2.7781569965870307, "grad_norm": 1.7427457424320367, "learning_rate": 5.164112268648697e-07, "loss": 0.1157, "step": 12210 }, { "epoch": 2.7783845278725825, "grad_norm": 0.9454533154025608, "learning_rate": 5.163232355745512e-07, "loss": 0.0494, "step": 12211 }, { "epoch": 2.7786120591581343, "grad_norm": 2.6409240983443354, "learning_rate": 5.162352465053863e-07, "loss": 0.0831, "step": 12212 }, { "epoch": 2.778839590443686, "grad_norm": 1.5741247383308825, "learning_rate": 5.161472596591741e-07, "loss": 0.0531, "step": 12213 }, { "epoch": 2.7790671217292378, "grad_norm": 0.7465636426829159, "learning_rate": 5.160592750377127e-07, "loss": 0.0085, "step": 12214 }, { "epoch": 2.7792946530147895, "grad_norm": 1.3491098610631436, "learning_rate": 5.159712926427997e-07, "loss": 0.056, "step": 12215 }, { "epoch": 2.7795221843003413, "grad_norm": 1.8463257498515748, "learning_rate": 5.158833124762341e-07, "loss": 0.0538, "step": 12216 }, { "epoch": 2.779749715585893, "grad_norm": 2.3046154431717807, "learning_rate": 5.157953345398136e-07, "loss": 0.0496, "step": 12217 }, { "epoch": 2.7799772468714448, "grad_norm": 1.5500816613229578, "learning_rate": 5.157073588353367e-07, "loss": 0.025, "step": 12218 }, { "epoch": 2.7802047781569965, "grad_norm": 2.7523155132413923, "learning_rate": 5.15619385364601e-07, "loss": 0.094, "step": 12219 }, { "epoch": 2.7804323094425483, "grad_norm": 1.741042215108274, "learning_rate": 5.155314141294049e-07, "loss": 0.0537, "step": 12220 }, { "epoch": 2.7806598407281, "grad_norm": 1.3687480431208716, "learning_rate": 5.154434451315464e-07, "loss": 0.0321, "step": 12221 }, { "epoch": 2.7808873720136518, "grad_norm": 1.4893823113143054, "learning_rate": 5.153554783728229e-07, "loss": 0.1082, "step": 12222 }, { "epoch": 2.7811149032992035, "grad_norm": 1.4957410259752777, "learning_rate": 5.152675138550327e-07, "loss": 0.0442, "step": 12223 }, { "epoch": 2.7813424345847553, "grad_norm": 1.9627156435559217, "learning_rate": 5.151795515799734e-07, "loss": 0.1194, "step": 12224 }, { "epoch": 2.781569965870307, "grad_norm": 1.222455640326145, "learning_rate": 5.150915915494432e-07, "loss": 0.0301, "step": 12225 }, { "epoch": 2.781797497155859, "grad_norm": 1.6327498170848338, "learning_rate": 5.150036337652396e-07, "loss": 0.075, "step": 12226 }, { "epoch": 2.7820250284414105, "grad_norm": 2.057655937308811, "learning_rate": 5.1491567822916e-07, "loss": 0.0332, "step": 12227 }, { "epoch": 2.7822525597269623, "grad_norm": 1.360403921647894, "learning_rate": 5.148277249430026e-07, "loss": 0.0649, "step": 12228 }, { "epoch": 2.7824800910125145, "grad_norm": 1.6975688169831502, "learning_rate": 5.147397739085646e-07, "loss": 0.0594, "step": 12229 }, { "epoch": 2.782707622298066, "grad_norm": 1.48364301153908, "learning_rate": 5.146518251276437e-07, "loss": 0.1389, "step": 12230 }, { "epoch": 2.782935153583618, "grad_norm": 1.22522332645999, "learning_rate": 5.145638786020373e-07, "loss": 0.0807, "step": 12231 }, { "epoch": 2.7831626848691693, "grad_norm": 1.674198819716318, "learning_rate": 5.144759343335433e-07, "loss": 0.0311, "step": 12232 }, { "epoch": 2.7833902161547215, "grad_norm": 2.0939032982029238, "learning_rate": 5.143879923239586e-07, "loss": 0.0566, "step": 12233 }, { "epoch": 2.783617747440273, "grad_norm": 2.439692930044025, "learning_rate": 5.143000525750805e-07, "loss": 0.0359, "step": 12234 }, { "epoch": 2.783845278725825, "grad_norm": 1.2813505781849315, "learning_rate": 5.142121150887071e-07, "loss": 0.096, "step": 12235 }, { "epoch": 2.7840728100113763, "grad_norm": 0.9818323011138255, "learning_rate": 5.141241798666347e-07, "loss": 0.0524, "step": 12236 }, { "epoch": 2.7843003412969285, "grad_norm": 1.1476763506129066, "learning_rate": 5.140362469106612e-07, "loss": 0.0411, "step": 12237 }, { "epoch": 2.7845278725824802, "grad_norm": 1.7478285453387674, "learning_rate": 5.139483162225835e-07, "loss": 0.1031, "step": 12238 }, { "epoch": 2.784755403868032, "grad_norm": 1.0411658571547224, "learning_rate": 5.138603878041991e-07, "loss": 0.0917, "step": 12239 }, { "epoch": 2.7849829351535837, "grad_norm": 1.4709236238988233, "learning_rate": 5.137724616573047e-07, "loss": 0.0749, "step": 12240 }, { "epoch": 2.7852104664391355, "grad_norm": 1.805661626702292, "learning_rate": 5.136845377836973e-07, "loss": 0.052, "step": 12241 }, { "epoch": 2.7854379977246873, "grad_norm": 1.090631697775416, "learning_rate": 5.135966161851743e-07, "loss": 0.0641, "step": 12242 }, { "epoch": 2.785665529010239, "grad_norm": 2.8578666100975285, "learning_rate": 5.135086968635321e-07, "loss": 0.0413, "step": 12243 }, { "epoch": 2.7858930602957908, "grad_norm": 1.190915535505636, "learning_rate": 5.134207798205684e-07, "loss": 0.0383, "step": 12244 }, { "epoch": 2.7861205915813425, "grad_norm": 1.2228693602059364, "learning_rate": 5.133328650580796e-07, "loss": 0.0331, "step": 12245 }, { "epoch": 2.7863481228668943, "grad_norm": 1.4527863515875543, "learning_rate": 5.13244952577862e-07, "loss": 0.1236, "step": 12246 }, { "epoch": 2.786575654152446, "grad_norm": 1.830690124565898, "learning_rate": 5.131570423817134e-07, "loss": 0.0357, "step": 12247 }, { "epoch": 2.7868031854379978, "grad_norm": 1.3110813322012624, "learning_rate": 5.130691344714298e-07, "loss": 0.0804, "step": 12248 }, { "epoch": 2.7870307167235495, "grad_norm": 1.567034408439154, "learning_rate": 5.129812288488081e-07, "loss": 0.0427, "step": 12249 }, { "epoch": 2.7872582480091013, "grad_norm": 2.0913174217797237, "learning_rate": 5.12893325515645e-07, "loss": 0.0445, "step": 12250 }, { "epoch": 2.787485779294653, "grad_norm": 1.556094869248945, "learning_rate": 5.128054244737371e-07, "loss": 0.0309, "step": 12251 }, { "epoch": 2.7877133105802048, "grad_norm": 1.608796810619742, "learning_rate": 5.127175257248808e-07, "loss": 0.0409, "step": 12252 }, { "epoch": 2.7879408418657565, "grad_norm": 1.7382306697212497, "learning_rate": 5.126296292708724e-07, "loss": 0.1665, "step": 12253 }, { "epoch": 2.7881683731513083, "grad_norm": 2.6884884287099156, "learning_rate": 5.12541735113509e-07, "loss": 0.0423, "step": 12254 }, { "epoch": 2.78839590443686, "grad_norm": 1.5737141436643975, "learning_rate": 5.124538432545863e-07, "loss": 0.062, "step": 12255 }, { "epoch": 2.788623435722412, "grad_norm": 1.9289931058952763, "learning_rate": 5.12365953695901e-07, "loss": 0.0549, "step": 12256 }, { "epoch": 2.7888509670079635, "grad_norm": 1.9945520978029396, "learning_rate": 5.122780664392494e-07, "loss": 0.0821, "step": 12257 }, { "epoch": 2.7890784982935153, "grad_norm": 0.6067548461888984, "learning_rate": 5.121901814864274e-07, "loss": 0.0312, "step": 12258 }, { "epoch": 2.789306029579067, "grad_norm": 2.319292829326023, "learning_rate": 5.121022988392318e-07, "loss": 0.0486, "step": 12259 }, { "epoch": 2.789533560864619, "grad_norm": 2.236301027470577, "learning_rate": 5.120144184994582e-07, "loss": 0.0848, "step": 12260 }, { "epoch": 2.7897610921501705, "grad_norm": 1.2192934605821484, "learning_rate": 5.119265404689032e-07, "loss": 0.0299, "step": 12261 }, { "epoch": 2.7899886234357223, "grad_norm": 1.1353894461463763, "learning_rate": 5.118386647493624e-07, "loss": 0.0806, "step": 12262 }, { "epoch": 2.790216154721274, "grad_norm": 1.6119174618138534, "learning_rate": 5.117507913426323e-07, "loss": 0.0735, "step": 12263 }, { "epoch": 2.790443686006826, "grad_norm": 2.1746787658775846, "learning_rate": 5.116629202505086e-07, "loss": 0.0979, "step": 12264 }, { "epoch": 2.7906712172923775, "grad_norm": 1.3004128727011313, "learning_rate": 5.115750514747869e-07, "loss": 0.0239, "step": 12265 }, { "epoch": 2.7908987485779293, "grad_norm": 0.9005980023916212, "learning_rate": 5.114871850172637e-07, "loss": 0.0113, "step": 12266 }, { "epoch": 2.791126279863481, "grad_norm": 18.99094773180802, "learning_rate": 5.113993208797344e-07, "loss": 0.087, "step": 12267 }, { "epoch": 2.7913538111490332, "grad_norm": 1.8926843955212302, "learning_rate": 5.113114590639952e-07, "loss": 0.0329, "step": 12268 }, { "epoch": 2.7915813424345846, "grad_norm": 2.194519609303125, "learning_rate": 5.112235995718413e-07, "loss": 0.1198, "step": 12269 }, { "epoch": 2.7918088737201368, "grad_norm": 0.9777184224853189, "learning_rate": 5.111357424050688e-07, "loss": 0.0129, "step": 12270 }, { "epoch": 2.792036405005688, "grad_norm": 1.3708293366675572, "learning_rate": 5.110478875654733e-07, "loss": 0.0289, "step": 12271 }, { "epoch": 2.7922639362912403, "grad_norm": 1.4849906419129426, "learning_rate": 5.1096003505485e-07, "loss": 0.0362, "step": 12272 }, { "epoch": 2.7924914675767916, "grad_norm": 1.242713912752343, "learning_rate": 5.108721848749948e-07, "loss": 0.0329, "step": 12273 }, { "epoch": 2.7927189988623438, "grad_norm": 1.5522042050573246, "learning_rate": 5.107843370277031e-07, "loss": 0.0481, "step": 12274 }, { "epoch": 2.792946530147895, "grad_norm": 1.032190530458261, "learning_rate": 5.106964915147706e-07, "loss": 0.0195, "step": 12275 }, { "epoch": 2.7931740614334473, "grad_norm": 2.416258344074278, "learning_rate": 5.106086483379924e-07, "loss": 0.0503, "step": 12276 }, { "epoch": 2.793401592718999, "grad_norm": 1.7949214895279773, "learning_rate": 5.105208074991637e-07, "loss": 0.0858, "step": 12277 }, { "epoch": 2.7936291240045508, "grad_norm": 1.961449951231605, "learning_rate": 5.104329690000803e-07, "loss": 0.0755, "step": 12278 }, { "epoch": 2.7938566552901025, "grad_norm": 1.0633626248506123, "learning_rate": 5.103451328425369e-07, "loss": 0.0608, "step": 12279 }, { "epoch": 2.7940841865756543, "grad_norm": 1.545190454911096, "learning_rate": 5.102572990283292e-07, "loss": 0.0257, "step": 12280 }, { "epoch": 2.794311717861206, "grad_norm": 2.335654261650082, "learning_rate": 5.101694675592521e-07, "loss": 0.0811, "step": 12281 }, { "epoch": 2.7945392491467578, "grad_norm": 1.4614868365467397, "learning_rate": 5.100816384371011e-07, "loss": 0.065, "step": 12282 }, { "epoch": 2.7947667804323095, "grad_norm": 1.584286920665836, "learning_rate": 5.099938116636706e-07, "loss": 0.04, "step": 12283 }, { "epoch": 2.7949943117178613, "grad_norm": 1.2770199174465084, "learning_rate": 5.09905987240756e-07, "loss": 0.058, "step": 12284 }, { "epoch": 2.795221843003413, "grad_norm": 1.7691017143070138, "learning_rate": 5.098181651701525e-07, "loss": 0.1022, "step": 12285 }, { "epoch": 2.795449374288965, "grad_norm": 3.0215419660678133, "learning_rate": 5.097303454536544e-07, "loss": 0.0576, "step": 12286 }, { "epoch": 2.7956769055745165, "grad_norm": 1.9792832847166897, "learning_rate": 5.096425280930571e-07, "loss": 0.0505, "step": 12287 }, { "epoch": 2.7959044368600683, "grad_norm": 1.3808414150890638, "learning_rate": 5.095547130901551e-07, "loss": 0.0261, "step": 12288 }, { "epoch": 2.79613196814562, "grad_norm": 1.4593764489085268, "learning_rate": 5.094669004467437e-07, "loss": 0.0624, "step": 12289 }, { "epoch": 2.796359499431172, "grad_norm": 2.1029388086629637, "learning_rate": 5.09379090164617e-07, "loss": 0.0504, "step": 12290 }, { "epoch": 2.7965870307167235, "grad_norm": 1.4481784527310362, "learning_rate": 5.0929128224557e-07, "loss": 0.0487, "step": 12291 }, { "epoch": 2.7968145620022753, "grad_norm": 1.5298884466690477, "learning_rate": 5.092034766913974e-07, "loss": 0.1035, "step": 12292 }, { "epoch": 2.797042093287827, "grad_norm": 1.525209612971406, "learning_rate": 5.091156735038934e-07, "loss": 0.1107, "step": 12293 }, { "epoch": 2.797269624573379, "grad_norm": 2.715117428578005, "learning_rate": 5.090278726848528e-07, "loss": 0.0583, "step": 12294 }, { "epoch": 2.7974971558589306, "grad_norm": 1.4403052323024323, "learning_rate": 5.089400742360705e-07, "loss": 0.0575, "step": 12295 }, { "epoch": 2.7977246871444823, "grad_norm": 1.5261036385848765, "learning_rate": 5.088522781593401e-07, "loss": 0.072, "step": 12296 }, { "epoch": 2.797952218430034, "grad_norm": 1.95312188190803, "learning_rate": 5.087644844564567e-07, "loss": 0.073, "step": 12297 }, { "epoch": 2.798179749715586, "grad_norm": 1.7189803240960893, "learning_rate": 5.086766931292141e-07, "loss": 0.0502, "step": 12298 }, { "epoch": 2.7984072810011376, "grad_norm": 1.55591359959273, "learning_rate": 5.085889041794071e-07, "loss": 0.0393, "step": 12299 }, { "epoch": 2.7986348122866893, "grad_norm": 1.6906082363119674, "learning_rate": 5.085011176088295e-07, "loss": 0.0519, "step": 12300 }, { "epoch": 2.798862343572241, "grad_norm": 1.0406236961342687, "learning_rate": 5.084133334192758e-07, "loss": 0.062, "step": 12301 }, { "epoch": 2.799089874857793, "grad_norm": 1.2480498527819928, "learning_rate": 5.083255516125401e-07, "loss": 0.1554, "step": 12302 }, { "epoch": 2.7993174061433446, "grad_norm": 3.1122617552322165, "learning_rate": 5.082377721904164e-07, "loss": 0.0717, "step": 12303 }, { "epoch": 2.7995449374288963, "grad_norm": 1.875193112214049, "learning_rate": 5.081499951546988e-07, "loss": 0.0533, "step": 12304 }, { "epoch": 2.799772468714448, "grad_norm": 1.4200797850927744, "learning_rate": 5.080622205071811e-07, "loss": 0.0436, "step": 12305 }, { "epoch": 2.8, "grad_norm": 1.5303712877601887, "learning_rate": 5.079744482496577e-07, "loss": 0.021, "step": 12306 }, { "epoch": 2.800227531285552, "grad_norm": 1.090907423072394, "learning_rate": 5.07886678383922e-07, "loss": 0.0315, "step": 12307 }, { "epoch": 2.8004550625711033, "grad_norm": 2.603375864881838, "learning_rate": 5.077989109117685e-07, "loss": 0.051, "step": 12308 }, { "epoch": 2.8006825938566555, "grad_norm": 1.3820861273633438, "learning_rate": 5.077111458349905e-07, "loss": 0.0342, "step": 12309 }, { "epoch": 2.800910125142207, "grad_norm": 2.0111015163906893, "learning_rate": 5.076233831553816e-07, "loss": 0.1224, "step": 12310 }, { "epoch": 2.801137656427759, "grad_norm": 1.8310550849658265, "learning_rate": 5.075356228747362e-07, "loss": 0.1004, "step": 12311 }, { "epoch": 2.8013651877133103, "grad_norm": 2.1878506871022023, "learning_rate": 5.074478649948472e-07, "loss": 0.0382, "step": 12312 }, { "epoch": 2.8015927189988625, "grad_norm": 1.7343527862128205, "learning_rate": 5.073601095175086e-07, "loss": 0.0394, "step": 12313 }, { "epoch": 2.801820250284414, "grad_norm": 2.108713484511094, "learning_rate": 5.072723564445142e-07, "loss": 0.1806, "step": 12314 }, { "epoch": 2.802047781569966, "grad_norm": 0.799988946070974, "learning_rate": 5.071846057776569e-07, "loss": 0.0536, "step": 12315 }, { "epoch": 2.802275312855518, "grad_norm": 1.849283255062893, "learning_rate": 5.070968575187306e-07, "loss": 0.0368, "step": 12316 }, { "epoch": 2.8025028441410695, "grad_norm": 1.328453329791788, "learning_rate": 5.070091116695285e-07, "loss": 0.0985, "step": 12317 }, { "epoch": 2.8027303754266213, "grad_norm": 1.8229984490635203, "learning_rate": 5.069213682318442e-07, "loss": 0.0615, "step": 12318 }, { "epoch": 2.802957906712173, "grad_norm": 1.9429033161011857, "learning_rate": 5.068336272074708e-07, "loss": 0.0303, "step": 12319 }, { "epoch": 2.803185437997725, "grad_norm": 1.3601610043888734, "learning_rate": 5.067458885982017e-07, "loss": 0.0546, "step": 12320 }, { "epoch": 2.8034129692832765, "grad_norm": 2.2319115839978547, "learning_rate": 5.066581524058303e-07, "loss": 0.0497, "step": 12321 }, { "epoch": 2.8036405005688283, "grad_norm": 2.265321347064858, "learning_rate": 5.065704186321492e-07, "loss": 0.1447, "step": 12322 }, { "epoch": 2.80386803185438, "grad_norm": 6.67774357390038, "learning_rate": 5.064826872789519e-07, "loss": 0.0775, "step": 12323 }, { "epoch": 2.804095563139932, "grad_norm": 1.7247136886583931, "learning_rate": 5.063949583480314e-07, "loss": 0.0367, "step": 12324 }, { "epoch": 2.8043230944254836, "grad_norm": 2.4404389427006032, "learning_rate": 5.063072318411809e-07, "loss": 0.042, "step": 12325 }, { "epoch": 2.8045506257110353, "grad_norm": 1.368772928396641, "learning_rate": 5.06219507760193e-07, "loss": 0.1057, "step": 12326 }, { "epoch": 2.804778156996587, "grad_norm": 2.07756756561191, "learning_rate": 5.06131786106861e-07, "loss": 0.0707, "step": 12327 }, { "epoch": 2.805005688282139, "grad_norm": 2.536406473232437, "learning_rate": 5.060440668829776e-07, "loss": 0.1778, "step": 12328 }, { "epoch": 2.8052332195676906, "grad_norm": 1.8570614955347176, "learning_rate": 5.059563500903355e-07, "loss": 0.032, "step": 12329 }, { "epoch": 2.8054607508532423, "grad_norm": 1.3382996299070236, "learning_rate": 5.058686357307276e-07, "loss": 0.0332, "step": 12330 }, { "epoch": 2.805688282138794, "grad_norm": 0.9029123312352025, "learning_rate": 5.057809238059466e-07, "loss": 0.0258, "step": 12331 }, { "epoch": 2.805915813424346, "grad_norm": 1.4780629308306903, "learning_rate": 5.056932143177853e-07, "loss": 0.0445, "step": 12332 }, { "epoch": 2.8061433447098976, "grad_norm": 3.0605760868742786, "learning_rate": 5.056055072680362e-07, "loss": 0.0439, "step": 12333 }, { "epoch": 2.8063708759954493, "grad_norm": 2.165851844052367, "learning_rate": 5.055178026584915e-07, "loss": 0.0695, "step": 12334 }, { "epoch": 2.806598407281001, "grad_norm": 1.5834304702808875, "learning_rate": 5.054301004909447e-07, "loss": 0.0776, "step": 12335 }, { "epoch": 2.806825938566553, "grad_norm": 1.378617594086561, "learning_rate": 5.053424007671871e-07, "loss": 0.0547, "step": 12336 }, { "epoch": 2.8070534698521046, "grad_norm": 1.311156171740467, "learning_rate": 5.05254703489012e-07, "loss": 0.0461, "step": 12337 }, { "epoch": 2.8072810011376563, "grad_norm": 1.4335481881514982, "learning_rate": 5.051670086582112e-07, "loss": 0.091, "step": 12338 }, { "epoch": 2.807508532423208, "grad_norm": 1.0909550145050309, "learning_rate": 5.050793162765777e-07, "loss": 0.018, "step": 12339 }, { "epoch": 2.80773606370876, "grad_norm": 0.902858956854319, "learning_rate": 5.049916263459031e-07, "loss": 0.0132, "step": 12340 }, { "epoch": 2.8079635949943116, "grad_norm": 3.730893013634994, "learning_rate": 5.049039388679798e-07, "loss": 0.0478, "step": 12341 }, { "epoch": 2.8081911262798633, "grad_norm": 1.8555483208308425, "learning_rate": 5.048162538446003e-07, "loss": 0.0244, "step": 12342 }, { "epoch": 2.808418657565415, "grad_norm": 0.9668178866569637, "learning_rate": 5.047285712775562e-07, "loss": 0.0595, "step": 12343 }, { "epoch": 2.8086461888509673, "grad_norm": 1.416517512122083, "learning_rate": 5.046408911686399e-07, "loss": 0.0177, "step": 12344 }, { "epoch": 2.8088737201365186, "grad_norm": 1.4573013712544156, "learning_rate": 5.045532135196433e-07, "loss": 0.027, "step": 12345 }, { "epoch": 2.809101251422071, "grad_norm": 1.656284987404014, "learning_rate": 5.044655383323586e-07, "loss": 0.1011, "step": 12346 }, { "epoch": 2.809328782707622, "grad_norm": 1.7799090778571245, "learning_rate": 5.043778656085776e-07, "loss": 0.0968, "step": 12347 }, { "epoch": 2.8095563139931743, "grad_norm": 2.4250683428661026, "learning_rate": 5.042901953500918e-07, "loss": 0.0746, "step": 12348 }, { "epoch": 2.8097838452787256, "grad_norm": 2.101879157416822, "learning_rate": 5.042025275586937e-07, "loss": 0.0368, "step": 12349 }, { "epoch": 2.810011376564278, "grad_norm": 1.7216620058852863, "learning_rate": 5.041148622361744e-07, "loss": 0.0295, "step": 12350 }, { "epoch": 2.810238907849829, "grad_norm": 1.7536268990970127, "learning_rate": 5.040271993843261e-07, "loss": 0.0505, "step": 12351 }, { "epoch": 2.8104664391353813, "grad_norm": 1.5099504555454073, "learning_rate": 5.039395390049403e-07, "loss": 0.0966, "step": 12352 }, { "epoch": 2.8106939704209326, "grad_norm": 0.8946909277068094, "learning_rate": 5.038518810998085e-07, "loss": 0.0327, "step": 12353 }, { "epoch": 2.810921501706485, "grad_norm": 1.301586573295254, "learning_rate": 5.037642256707225e-07, "loss": 0.083, "step": 12354 }, { "epoch": 2.8111490329920366, "grad_norm": 2.2651479595471113, "learning_rate": 5.036765727194735e-07, "loss": 0.0477, "step": 12355 }, { "epoch": 2.8113765642775883, "grad_norm": 2.5046704801364013, "learning_rate": 5.035889222478535e-07, "loss": 0.0372, "step": 12356 }, { "epoch": 2.81160409556314, "grad_norm": 2.946512602659083, "learning_rate": 5.035012742576532e-07, "loss": 0.0677, "step": 12357 }, { "epoch": 2.811831626848692, "grad_norm": 1.2816788794585832, "learning_rate": 5.034136287506645e-07, "loss": 0.0108, "step": 12358 }, { "epoch": 2.8120591581342436, "grad_norm": 1.3098601617100452, "learning_rate": 5.033259857286788e-07, "loss": 0.0475, "step": 12359 }, { "epoch": 2.8122866894197953, "grad_norm": 0.933190066896572, "learning_rate": 5.032383451934867e-07, "loss": 0.0233, "step": 12360 }, { "epoch": 2.812514220705347, "grad_norm": 1.434791660575219, "learning_rate": 5.0315070714688e-07, "loss": 0.0217, "step": 12361 }, { "epoch": 2.812741751990899, "grad_norm": 1.0944855461459193, "learning_rate": 5.030630715906495e-07, "loss": 0.0462, "step": 12362 }, { "epoch": 2.8129692832764506, "grad_norm": 1.1506433660613526, "learning_rate": 5.029754385265869e-07, "loss": 0.0635, "step": 12363 }, { "epoch": 2.8131968145620023, "grad_norm": 1.2440057909699478, "learning_rate": 5.028878079564827e-07, "loss": 0.0235, "step": 12364 }, { "epoch": 2.813424345847554, "grad_norm": 1.0554225504087387, "learning_rate": 5.02800179882128e-07, "loss": 0.0348, "step": 12365 }, { "epoch": 2.813651877133106, "grad_norm": 1.52473881043437, "learning_rate": 5.02712554305314e-07, "loss": 0.0521, "step": 12366 }, { "epoch": 2.8138794084186576, "grad_norm": 1.350709057613405, "learning_rate": 5.026249312278309e-07, "loss": 0.0733, "step": 12367 }, { "epoch": 2.8141069397042093, "grad_norm": 0.6906786338014513, "learning_rate": 5.025373106514707e-07, "loss": 0.0069, "step": 12368 }, { "epoch": 2.814334470989761, "grad_norm": 1.3120873599817202, "learning_rate": 5.024496925780232e-07, "loss": 0.0141, "step": 12369 }, { "epoch": 2.814562002275313, "grad_norm": 2.590022700351837, "learning_rate": 5.023620770092797e-07, "loss": 0.0661, "step": 12370 }, { "epoch": 2.8147895335608646, "grad_norm": 10.52815560853418, "learning_rate": 5.022744639470309e-07, "loss": 0.0905, "step": 12371 }, { "epoch": 2.8150170648464163, "grad_norm": 1.9209195462772353, "learning_rate": 5.02186853393067e-07, "loss": 0.1279, "step": 12372 }, { "epoch": 2.815244596131968, "grad_norm": 1.9938591227210936, "learning_rate": 5.020992453491791e-07, "loss": 0.1966, "step": 12373 }, { "epoch": 2.81547212741752, "grad_norm": 2.1642354679618654, "learning_rate": 5.020116398171574e-07, "loss": 0.0454, "step": 12374 }, { "epoch": 2.8156996587030716, "grad_norm": 1.1145364785562817, "learning_rate": 5.019240367987927e-07, "loss": 0.0206, "step": 12375 }, { "epoch": 2.8159271899886233, "grad_norm": 1.796379629083275, "learning_rate": 5.01836436295875e-07, "loss": 0.053, "step": 12376 }, { "epoch": 2.816154721274175, "grad_norm": 1.7575800155381038, "learning_rate": 5.017488383101952e-07, "loss": 0.0403, "step": 12377 }, { "epoch": 2.816382252559727, "grad_norm": 1.0044034705776776, "learning_rate": 5.016612428435436e-07, "loss": 0.0364, "step": 12378 }, { "epoch": 2.8166097838452786, "grad_norm": 1.4626956773592394, "learning_rate": 5.0157364989771e-07, "loss": 0.0445, "step": 12379 }, { "epoch": 2.8168373151308304, "grad_norm": 2.027906395203137, "learning_rate": 5.014860594744851e-07, "loss": 0.0636, "step": 12380 }, { "epoch": 2.817064846416382, "grad_norm": 2.4455879116155828, "learning_rate": 5.013984715756588e-07, "loss": 0.0654, "step": 12381 }, { "epoch": 2.817292377701934, "grad_norm": 2.6542700799081054, "learning_rate": 5.013108862030216e-07, "loss": 0.0631, "step": 12382 }, { "epoch": 2.817519908987486, "grad_norm": 1.7161997124367638, "learning_rate": 5.012233033583632e-07, "loss": 0.0736, "step": 12383 }, { "epoch": 2.8177474402730374, "grad_norm": 2.211421659913044, "learning_rate": 5.011357230434738e-07, "loss": 0.0727, "step": 12384 }, { "epoch": 2.8179749715585896, "grad_norm": 1.4247401735108203, "learning_rate": 5.010481452601435e-07, "loss": 0.0373, "step": 12385 }, { "epoch": 2.818202502844141, "grad_norm": 1.1452134944997772, "learning_rate": 5.009605700101619e-07, "loss": 0.0707, "step": 12386 }, { "epoch": 2.818430034129693, "grad_norm": 1.380693592758634, "learning_rate": 5.008729972953192e-07, "loss": 0.1073, "step": 12387 }, { "epoch": 2.8186575654152444, "grad_norm": 2.1387931691851407, "learning_rate": 5.00785427117405e-07, "loss": 0.1084, "step": 12388 }, { "epoch": 2.8188850967007966, "grad_norm": 1.2921025625816434, "learning_rate": 5.006978594782094e-07, "loss": 0.053, "step": 12389 }, { "epoch": 2.819112627986348, "grad_norm": 1.4151858621198081, "learning_rate": 5.006102943795219e-07, "loss": 0.0189, "step": 12390 }, { "epoch": 2.8193401592719, "grad_norm": 1.628649833678102, "learning_rate": 5.005227318231319e-07, "loss": 0.1038, "step": 12391 }, { "epoch": 2.8195676905574514, "grad_norm": 1.386198387188597, "learning_rate": 5.004351718108296e-07, "loss": 0.0417, "step": 12392 }, { "epoch": 2.8197952218430036, "grad_norm": 1.6737364223001112, "learning_rate": 5.00347614344404e-07, "loss": 0.074, "step": 12393 }, { "epoch": 2.8200227531285553, "grad_norm": 2.1568060555847044, "learning_rate": 5.00260059425645e-07, "loss": 0.0486, "step": 12394 }, { "epoch": 2.820250284414107, "grad_norm": 1.7200032626066988, "learning_rate": 5.001725070563418e-07, "loss": 0.0862, "step": 12395 }, { "epoch": 2.820477815699659, "grad_norm": 1.225124004048231, "learning_rate": 5.000849572382842e-07, "loss": 0.079, "step": 12396 }, { "epoch": 2.8207053469852106, "grad_norm": 1.3521269123422093, "learning_rate": 4.999974099732612e-07, "loss": 0.1092, "step": 12397 }, { "epoch": 2.8209328782707623, "grad_norm": 1.042889632658906, "learning_rate": 4.999098652630619e-07, "loss": 0.0246, "step": 12398 }, { "epoch": 2.821160409556314, "grad_norm": 2.036624641232839, "learning_rate": 4.998223231094764e-07, "loss": 0.0495, "step": 12399 }, { "epoch": 2.821387940841866, "grad_norm": 0.9420741878629776, "learning_rate": 4.99734783514293e-07, "loss": 0.0458, "step": 12400 }, { "epoch": 2.8216154721274176, "grad_norm": 2.115538251624953, "learning_rate": 4.996472464793013e-07, "loss": 0.0457, "step": 12401 }, { "epoch": 2.8218430034129693, "grad_norm": 2.807142092411353, "learning_rate": 4.995597120062905e-07, "loss": 0.0648, "step": 12402 }, { "epoch": 2.822070534698521, "grad_norm": 1.7437297991641658, "learning_rate": 4.994721800970491e-07, "loss": 0.079, "step": 12403 }, { "epoch": 2.822298065984073, "grad_norm": 2.3437914296327467, "learning_rate": 4.993846507533666e-07, "loss": 0.0517, "step": 12404 }, { "epoch": 2.8225255972696246, "grad_norm": 2.931123900419578, "learning_rate": 4.992971239770318e-07, "loss": 0.093, "step": 12405 }, { "epoch": 2.8227531285551763, "grad_norm": 1.4773190593128183, "learning_rate": 4.992095997698337e-07, "loss": 0.0407, "step": 12406 }, { "epoch": 2.822980659840728, "grad_norm": 2.233166280634999, "learning_rate": 4.991220781335607e-07, "loss": 0.059, "step": 12407 }, { "epoch": 2.82320819112628, "grad_norm": 1.601186240847427, "learning_rate": 4.990345590700021e-07, "loss": 0.075, "step": 12408 }, { "epoch": 2.8234357224118316, "grad_norm": 1.3803137973565383, "learning_rate": 4.989470425809466e-07, "loss": 0.0315, "step": 12409 }, { "epoch": 2.8236632536973834, "grad_norm": 0.8379629741409756, "learning_rate": 4.988595286681824e-07, "loss": 0.0231, "step": 12410 }, { "epoch": 2.823890784982935, "grad_norm": 1.0538785838570583, "learning_rate": 4.987720173334985e-07, "loss": 0.0827, "step": 12411 }, { "epoch": 2.824118316268487, "grad_norm": 2.255714681390018, "learning_rate": 4.986845085786833e-07, "loss": 0.0449, "step": 12412 }, { "epoch": 2.8243458475540386, "grad_norm": 1.5073567419853724, "learning_rate": 4.985970024055256e-07, "loss": 0.0306, "step": 12413 }, { "epoch": 2.8245733788395904, "grad_norm": 1.1259786377023226, "learning_rate": 4.985094988158134e-07, "loss": 0.0248, "step": 12414 }, { "epoch": 2.824800910125142, "grad_norm": 1.8341996588862899, "learning_rate": 4.984219978113357e-07, "loss": 0.061, "step": 12415 }, { "epoch": 2.825028441410694, "grad_norm": 1.0190895430591282, "learning_rate": 4.983344993938805e-07, "loss": 0.0578, "step": 12416 }, { "epoch": 2.8252559726962456, "grad_norm": 0.8797473161172562, "learning_rate": 4.982470035652359e-07, "loss": 0.025, "step": 12417 }, { "epoch": 2.8254835039817974, "grad_norm": 1.3428069637069182, "learning_rate": 4.981595103271906e-07, "loss": 0.0316, "step": 12418 }, { "epoch": 2.825711035267349, "grad_norm": 1.607853870578795, "learning_rate": 4.980720196815325e-07, "loss": 0.0756, "step": 12419 }, { "epoch": 2.825938566552901, "grad_norm": 1.6626289748630316, "learning_rate": 4.9798453163005e-07, "loss": 0.0847, "step": 12420 }, { "epoch": 2.8261660978384526, "grad_norm": 1.0929744289021452, "learning_rate": 4.978970461745311e-07, "loss": 0.0282, "step": 12421 }, { "epoch": 2.826393629124005, "grad_norm": 2.05792960565893, "learning_rate": 4.978095633167636e-07, "loss": 0.0325, "step": 12422 }, { "epoch": 2.826621160409556, "grad_norm": 1.9386374653830654, "learning_rate": 4.97722083058536e-07, "loss": 0.0467, "step": 12423 }, { "epoch": 2.8268486916951083, "grad_norm": 1.7560068136438203, "learning_rate": 4.976346054016356e-07, "loss": 0.0509, "step": 12424 }, { "epoch": 2.8270762229806596, "grad_norm": 1.4320104044389566, "learning_rate": 4.975471303478508e-07, "loss": 0.098, "step": 12425 }, { "epoch": 2.827303754266212, "grad_norm": 1.9420060847623346, "learning_rate": 4.974596578989689e-07, "loss": 0.0659, "step": 12426 }, { "epoch": 2.827531285551763, "grad_norm": 1.8496108469552397, "learning_rate": 4.973721880567783e-07, "loss": 0.0795, "step": 12427 }, { "epoch": 2.8277588168373153, "grad_norm": 1.3683229843279332, "learning_rate": 4.972847208230666e-07, "loss": 0.0554, "step": 12428 }, { "epoch": 2.8279863481228666, "grad_norm": 1.6526307031324836, "learning_rate": 4.97197256199621e-07, "loss": 0.0272, "step": 12429 }, { "epoch": 2.828213879408419, "grad_norm": 2.3115329508105127, "learning_rate": 4.971097941882296e-07, "loss": 0.0359, "step": 12430 }, { "epoch": 2.82844141069397, "grad_norm": 1.170928089173839, "learning_rate": 4.970223347906795e-07, "loss": 0.0307, "step": 12431 }, { "epoch": 2.8286689419795223, "grad_norm": 1.4136738324940674, "learning_rate": 4.969348780087589e-07, "loss": 0.0361, "step": 12432 }, { "epoch": 2.828896473265074, "grad_norm": 1.118121855056525, "learning_rate": 4.968474238442546e-07, "loss": 0.0389, "step": 12433 }, { "epoch": 2.829124004550626, "grad_norm": 1.544821777095836, "learning_rate": 4.967599722989544e-07, "loss": 0.0722, "step": 12434 }, { "epoch": 2.8293515358361776, "grad_norm": 2.5071756044145843, "learning_rate": 4.966725233746455e-07, "loss": 0.1573, "step": 12435 }, { "epoch": 2.8295790671217294, "grad_norm": 2.578108338658041, "learning_rate": 4.96585077073115e-07, "loss": 0.0751, "step": 12436 }, { "epoch": 2.829806598407281, "grad_norm": 1.4601562107101644, "learning_rate": 4.964976333961506e-07, "loss": 0.073, "step": 12437 }, { "epoch": 2.830034129692833, "grad_norm": 1.4847759936130727, "learning_rate": 4.96410192345539e-07, "loss": 0.013, "step": 12438 }, { "epoch": 2.8302616609783846, "grad_norm": 2.2342727518478, "learning_rate": 4.963227539230678e-07, "loss": 0.0488, "step": 12439 }, { "epoch": 2.8304891922639364, "grad_norm": 1.174231581989984, "learning_rate": 4.962353181305237e-07, "loss": 0.0231, "step": 12440 }, { "epoch": 2.830716723549488, "grad_norm": 1.1751789603833867, "learning_rate": 4.961478849696938e-07, "loss": 0.0872, "step": 12441 }, { "epoch": 2.83094425483504, "grad_norm": 1.3364615916937814, "learning_rate": 4.960604544423654e-07, "loss": 0.0306, "step": 12442 }, { "epoch": 2.8311717861205916, "grad_norm": 1.1249344857362928, "learning_rate": 4.959730265503249e-07, "loss": 0.0777, "step": 12443 }, { "epoch": 2.8313993174061434, "grad_norm": 1.6986931895438662, "learning_rate": 4.958856012953596e-07, "loss": 0.0578, "step": 12444 }, { "epoch": 2.831626848691695, "grad_norm": 0.9970419688606582, "learning_rate": 4.95798178679256e-07, "loss": 0.0422, "step": 12445 }, { "epoch": 2.831854379977247, "grad_norm": 1.5247661954017562, "learning_rate": 4.957107587038013e-07, "loss": 0.0608, "step": 12446 }, { "epoch": 2.8320819112627986, "grad_norm": 2.4425720740000982, "learning_rate": 4.956233413707817e-07, "loss": 0.0485, "step": 12447 }, { "epoch": 2.8323094425483504, "grad_norm": 1.5972903820714925, "learning_rate": 4.95535926681984e-07, "loss": 0.0605, "step": 12448 }, { "epoch": 2.832536973833902, "grad_norm": 2.085517259322412, "learning_rate": 4.954485146391951e-07, "loss": 0.1081, "step": 12449 }, { "epoch": 2.832764505119454, "grad_norm": 1.2375228799904452, "learning_rate": 4.953611052442011e-07, "loss": 0.0625, "step": 12450 }, { "epoch": 2.8329920364050056, "grad_norm": 1.6445396866206146, "learning_rate": 4.952736984987887e-07, "loss": 0.0338, "step": 12451 }, { "epoch": 2.8332195676905574, "grad_norm": 1.554154923194692, "learning_rate": 4.951862944047442e-07, "loss": 0.0502, "step": 12452 }, { "epoch": 2.833447098976109, "grad_norm": 1.6024401496865197, "learning_rate": 4.950988929638544e-07, "loss": 0.0382, "step": 12453 }, { "epoch": 2.833674630261661, "grad_norm": 1.1842567625833045, "learning_rate": 4.950114941779052e-07, "loss": 0.0766, "step": 12454 }, { "epoch": 2.8339021615472126, "grad_norm": 1.0563102861529128, "learning_rate": 4.949240980486828e-07, "loss": 0.0383, "step": 12455 }, { "epoch": 2.8341296928327644, "grad_norm": 1.1683120880190938, "learning_rate": 4.94836704577974e-07, "loss": 0.0311, "step": 12456 }, { "epoch": 2.834357224118316, "grad_norm": 2.830395967005674, "learning_rate": 4.947493137675642e-07, "loss": 0.1042, "step": 12457 }, { "epoch": 2.834584755403868, "grad_norm": 2.458211855747049, "learning_rate": 4.9466192561924e-07, "loss": 0.0556, "step": 12458 }, { "epoch": 2.8348122866894196, "grad_norm": 1.930389230672516, "learning_rate": 4.945745401347876e-07, "loss": 0.0255, "step": 12459 }, { "epoch": 2.8350398179749714, "grad_norm": 2.283301943533622, "learning_rate": 4.944871573159923e-07, "loss": 0.0615, "step": 12460 }, { "epoch": 2.8352673492605236, "grad_norm": 2.0380321244088764, "learning_rate": 4.943997771646408e-07, "loss": 0.1752, "step": 12461 }, { "epoch": 2.835494880546075, "grad_norm": 3.0866118266745413, "learning_rate": 4.943123996825185e-07, "loss": 0.0587, "step": 12462 }, { "epoch": 2.835722411831627, "grad_norm": 2.624868180539448, "learning_rate": 4.942250248714116e-07, "loss": 0.0787, "step": 12463 }, { "epoch": 2.8359499431171784, "grad_norm": 1.6560239980630487, "learning_rate": 4.941376527331054e-07, "loss": 0.0504, "step": 12464 }, { "epoch": 2.8361774744027306, "grad_norm": 1.4196794661854686, "learning_rate": 4.94050283269386e-07, "loss": 0.0702, "step": 12465 }, { "epoch": 2.836405005688282, "grad_norm": 1.9385184986334292, "learning_rate": 4.939629164820394e-07, "loss": 0.0659, "step": 12466 }, { "epoch": 2.836632536973834, "grad_norm": 1.3865877556678674, "learning_rate": 4.938755523728503e-07, "loss": 0.0682, "step": 12467 }, { "epoch": 2.8368600682593854, "grad_norm": 1.6229639256048995, "learning_rate": 4.93788190943605e-07, "loss": 0.0251, "step": 12468 }, { "epoch": 2.8370875995449376, "grad_norm": 1.3993812953601412, "learning_rate": 4.937008321960885e-07, "loss": 0.0462, "step": 12469 }, { "epoch": 2.837315130830489, "grad_norm": 1.4859110068688026, "learning_rate": 4.936134761320868e-07, "loss": 0.1248, "step": 12470 }, { "epoch": 2.837542662116041, "grad_norm": 1.0740348348851891, "learning_rate": 4.935261227533851e-07, "loss": 0.067, "step": 12471 }, { "epoch": 2.837770193401593, "grad_norm": 0.6179816459287018, "learning_rate": 4.934387720617683e-07, "loss": 0.0064, "step": 12472 }, { "epoch": 2.8379977246871446, "grad_norm": 1.446650861577762, "learning_rate": 4.933514240590225e-07, "loss": 0.0868, "step": 12473 }, { "epoch": 2.8382252559726964, "grad_norm": 1.1525854562135789, "learning_rate": 4.932640787469322e-07, "loss": 0.058, "step": 12474 }, { "epoch": 2.838452787258248, "grad_norm": 1.6034250298939894, "learning_rate": 4.93176736127283e-07, "loss": 0.0371, "step": 12475 }, { "epoch": 2.8386803185438, "grad_norm": 0.9115341880475033, "learning_rate": 4.930893962018597e-07, "loss": 0.0424, "step": 12476 }, { "epoch": 2.8389078498293516, "grad_norm": 2.2704012236006363, "learning_rate": 4.930020589724479e-07, "loss": 0.0538, "step": 12477 }, { "epoch": 2.8391353811149034, "grad_norm": 1.0332988077803422, "learning_rate": 4.92914724440832e-07, "loss": 0.1159, "step": 12478 }, { "epoch": 2.839362912400455, "grad_norm": 4.128713174031813, "learning_rate": 4.928273926087972e-07, "loss": 0.052, "step": 12479 }, { "epoch": 2.839590443686007, "grad_norm": 2.0982653574983217, "learning_rate": 4.927400634781286e-07, "loss": 0.1183, "step": 12480 }, { "epoch": 2.8398179749715586, "grad_norm": 1.7147176516929452, "learning_rate": 4.926527370506108e-07, "loss": 0.1227, "step": 12481 }, { "epoch": 2.8400455062571104, "grad_norm": 2.25299819115799, "learning_rate": 4.925654133280286e-07, "loss": 0.0604, "step": 12482 }, { "epoch": 2.840273037542662, "grad_norm": 2.116792509858124, "learning_rate": 4.924780923121668e-07, "loss": 0.0276, "step": 12483 }, { "epoch": 2.840500568828214, "grad_norm": 1.9584183054801894, "learning_rate": 4.923907740048103e-07, "loss": 0.0455, "step": 12484 }, { "epoch": 2.8407281001137656, "grad_norm": 1.5176945675534574, "learning_rate": 4.923034584077434e-07, "loss": 0.0289, "step": 12485 }, { "epoch": 2.8409556313993174, "grad_norm": 1.8341395574567008, "learning_rate": 4.922161455227508e-07, "loss": 0.045, "step": 12486 }, { "epoch": 2.841183162684869, "grad_norm": 1.7755663837064675, "learning_rate": 4.92128835351617e-07, "loss": 0.0254, "step": 12487 }, { "epoch": 2.841410693970421, "grad_norm": 2.6952995202562122, "learning_rate": 4.920415278961262e-07, "loss": 0.0309, "step": 12488 }, { "epoch": 2.8416382252559726, "grad_norm": 1.6705219810954337, "learning_rate": 4.919542231580634e-07, "loss": 0.0546, "step": 12489 }, { "epoch": 2.8418657565415244, "grad_norm": 1.4394545368430924, "learning_rate": 4.918669211392128e-07, "loss": 0.0713, "step": 12490 }, { "epoch": 2.842093287827076, "grad_norm": 1.0551270126541177, "learning_rate": 4.91779621841358e-07, "loss": 0.0601, "step": 12491 }, { "epoch": 2.842320819112628, "grad_norm": 1.2022961929326805, "learning_rate": 4.916923252662841e-07, "loss": 0.0365, "step": 12492 }, { "epoch": 2.8425483503981797, "grad_norm": 1.523051054036723, "learning_rate": 4.916050314157747e-07, "loss": 0.0514, "step": 12493 }, { "epoch": 2.8427758816837314, "grad_norm": 1.6356909146524539, "learning_rate": 4.915177402916143e-07, "loss": 0.039, "step": 12494 }, { "epoch": 2.843003412969283, "grad_norm": 2.5636367618487603, "learning_rate": 4.914304518955868e-07, "loss": 0.0372, "step": 12495 }, { "epoch": 2.843230944254835, "grad_norm": 1.6442222298759772, "learning_rate": 4.913431662294763e-07, "loss": 0.0332, "step": 12496 }, { "epoch": 2.8434584755403867, "grad_norm": 1.0014747689936507, "learning_rate": 4.912558832950667e-07, "loss": 0.0222, "step": 12497 }, { "epoch": 2.8436860068259384, "grad_norm": 1.3187374153451428, "learning_rate": 4.911686030941417e-07, "loss": 0.102, "step": 12498 }, { "epoch": 2.84391353811149, "grad_norm": 1.1173613504412683, "learning_rate": 4.910813256284857e-07, "loss": 0.0129, "step": 12499 }, { "epoch": 2.8441410693970424, "grad_norm": 1.4313469988717813, "learning_rate": 4.909940508998818e-07, "loss": 0.0258, "step": 12500 }, { "epoch": 2.8443686006825937, "grad_norm": 2.038137141081842, "learning_rate": 4.909067789101143e-07, "loss": 0.0648, "step": 12501 }, { "epoch": 2.844596131968146, "grad_norm": 2.6182235496547444, "learning_rate": 4.908195096609665e-07, "loss": 0.0391, "step": 12502 }, { "epoch": 2.844823663253697, "grad_norm": 1.5162196901586191, "learning_rate": 4.907322431542223e-07, "loss": 0.0755, "step": 12503 }, { "epoch": 2.8450511945392494, "grad_norm": 1.7354018741575523, "learning_rate": 4.906449793916652e-07, "loss": 0.1635, "step": 12504 }, { "epoch": 2.8452787258248007, "grad_norm": 1.5589326520056508, "learning_rate": 4.905577183750784e-07, "loss": 0.0821, "step": 12505 }, { "epoch": 2.845506257110353, "grad_norm": 1.2681736769162273, "learning_rate": 4.90470460106246e-07, "loss": 0.0388, "step": 12506 }, { "epoch": 2.845733788395904, "grad_norm": 1.2047239815568809, "learning_rate": 4.903832045869507e-07, "loss": 0.0913, "step": 12507 }, { "epoch": 2.8459613196814564, "grad_norm": 1.9531347096392215, "learning_rate": 4.902959518189763e-07, "loss": 0.038, "step": 12508 }, { "epoch": 2.8461888509670077, "grad_norm": 1.9148464534969405, "learning_rate": 4.90208701804106e-07, "loss": 0.0793, "step": 12509 }, { "epoch": 2.84641638225256, "grad_norm": 2.1121389175873357, "learning_rate": 4.901214545441228e-07, "loss": 0.0318, "step": 12510 }, { "epoch": 2.8466439135381116, "grad_norm": 1.9176665387243284, "learning_rate": 4.900342100408102e-07, "loss": 0.1241, "step": 12511 }, { "epoch": 2.8468714448236634, "grad_norm": 1.2699001664268361, "learning_rate": 4.899469682959511e-07, "loss": 0.1088, "step": 12512 }, { "epoch": 2.847098976109215, "grad_norm": 1.6285298500790093, "learning_rate": 4.898597293113287e-07, "loss": 0.0792, "step": 12513 }, { "epoch": 2.847326507394767, "grad_norm": 1.0695642368261296, "learning_rate": 4.897724930887258e-07, "loss": 0.0125, "step": 12514 }, { "epoch": 2.8475540386803186, "grad_norm": 1.1167959265025191, "learning_rate": 4.896852596299255e-07, "loss": 0.02, "step": 12515 }, { "epoch": 2.8477815699658704, "grad_norm": 1.6200045982292401, "learning_rate": 4.89598028936711e-07, "loss": 0.0269, "step": 12516 }, { "epoch": 2.848009101251422, "grad_norm": 1.405325258042828, "learning_rate": 4.895108010108644e-07, "loss": 0.0264, "step": 12517 }, { "epoch": 2.848236632536974, "grad_norm": 1.6915781007251292, "learning_rate": 4.894235758541691e-07, "loss": 0.1138, "step": 12518 }, { "epoch": 2.8484641638225257, "grad_norm": 1.3771031127697424, "learning_rate": 4.893363534684074e-07, "loss": 0.0886, "step": 12519 }, { "epoch": 2.8486916951080774, "grad_norm": 2.34194436849635, "learning_rate": 4.892491338553625e-07, "loss": 0.0409, "step": 12520 }, { "epoch": 2.848919226393629, "grad_norm": 0.9829797210759005, "learning_rate": 4.891619170168164e-07, "loss": 0.052, "step": 12521 }, { "epoch": 2.849146757679181, "grad_norm": 8.180933606149656, "learning_rate": 4.890747029545521e-07, "loss": 0.0863, "step": 12522 }, { "epoch": 2.8493742889647327, "grad_norm": 1.3843357543913206, "learning_rate": 4.88987491670352e-07, "loss": 0.0401, "step": 12523 }, { "epoch": 2.8496018202502844, "grad_norm": 1.3694321846971835, "learning_rate": 4.889002831659983e-07, "loss": 0.0271, "step": 12524 }, { "epoch": 2.849829351535836, "grad_norm": 2.170977405724725, "learning_rate": 4.888130774432737e-07, "loss": 0.0592, "step": 12525 }, { "epoch": 2.850056882821388, "grad_norm": 1.4263138495299816, "learning_rate": 4.887258745039601e-07, "loss": 0.1062, "step": 12526 }, { "epoch": 2.8502844141069397, "grad_norm": 1.4072739869493203, "learning_rate": 4.886386743498405e-07, "loss": 0.069, "step": 12527 }, { "epoch": 2.8505119453924914, "grad_norm": 2.268657280091602, "learning_rate": 4.885514769826964e-07, "loss": 0.0482, "step": 12528 }, { "epoch": 2.850739476678043, "grad_norm": 1.4394420404429884, "learning_rate": 4.884642824043101e-07, "loss": 0.0829, "step": 12529 }, { "epoch": 2.850967007963595, "grad_norm": 1.516225391819904, "learning_rate": 4.883770906164642e-07, "loss": 0.084, "step": 12530 }, { "epoch": 2.8511945392491467, "grad_norm": 1.3007718604792957, "learning_rate": 4.882899016209399e-07, "loss": 0.0456, "step": 12531 }, { "epoch": 2.8514220705346984, "grad_norm": 1.5421239261422242, "learning_rate": 4.882027154195199e-07, "loss": 0.0256, "step": 12532 }, { "epoch": 2.85164960182025, "grad_norm": 1.9355491194521617, "learning_rate": 4.881155320139857e-07, "loss": 0.1231, "step": 12533 }, { "epoch": 2.851877133105802, "grad_norm": 2.2741863109941503, "learning_rate": 4.880283514061196e-07, "loss": 0.0463, "step": 12534 }, { "epoch": 2.8521046643913537, "grad_norm": 0.9869142298504915, "learning_rate": 4.87941173597703e-07, "loss": 0.0773, "step": 12535 }, { "epoch": 2.8523321956769054, "grad_norm": 2.100740902672935, "learning_rate": 4.878539985905177e-07, "loss": 0.0412, "step": 12536 }, { "epoch": 2.852559726962457, "grad_norm": 2.118391255102143, "learning_rate": 4.877668263863458e-07, "loss": 0.0762, "step": 12537 }, { "epoch": 2.852787258248009, "grad_norm": 1.346442629184984, "learning_rate": 4.876796569869682e-07, "loss": 0.0169, "step": 12538 }, { "epoch": 2.853014789533561, "grad_norm": 1.8603294892286648, "learning_rate": 4.875924903941672e-07, "loss": 0.0619, "step": 12539 }, { "epoch": 2.8532423208191124, "grad_norm": 1.837471305334368, "learning_rate": 4.875053266097239e-07, "loss": 0.0538, "step": 12540 }, { "epoch": 2.8534698521046646, "grad_norm": 1.78923224274223, "learning_rate": 4.874181656354202e-07, "loss": 0.1908, "step": 12541 }, { "epoch": 2.853697383390216, "grad_norm": 2.033176238581486, "learning_rate": 4.87331007473037e-07, "loss": 0.1685, "step": 12542 }, { "epoch": 2.853924914675768, "grad_norm": 1.6609665564683296, "learning_rate": 4.872438521243558e-07, "loss": 0.0365, "step": 12543 }, { "epoch": 2.8541524459613195, "grad_norm": 1.6243114307382713, "learning_rate": 4.871566995911583e-07, "loss": 0.0538, "step": 12544 }, { "epoch": 2.8543799772468716, "grad_norm": 2.481014612464673, "learning_rate": 4.870695498752251e-07, "loss": 0.0506, "step": 12545 }, { "epoch": 2.854607508532423, "grad_norm": 1.2695097588009217, "learning_rate": 4.869824029783378e-07, "loss": 0.1172, "step": 12546 }, { "epoch": 2.854835039817975, "grad_norm": 1.5093454832200837, "learning_rate": 4.868952589022775e-07, "loss": 0.1249, "step": 12547 }, { "epoch": 2.8550625711035265, "grad_norm": 1.234324599012991, "learning_rate": 4.86808117648825e-07, "loss": 0.0529, "step": 12548 }, { "epoch": 2.8552901023890787, "grad_norm": 0.8899343893246875, "learning_rate": 4.867209792197617e-07, "loss": 0.0616, "step": 12549 }, { "epoch": 2.8555176336746304, "grad_norm": 0.9879236303698783, "learning_rate": 4.86633843616868e-07, "loss": 0.0597, "step": 12550 }, { "epoch": 2.855745164960182, "grad_norm": 2.3451247958185912, "learning_rate": 4.865467108419254e-07, "loss": 0.0431, "step": 12551 }, { "epoch": 2.855972696245734, "grad_norm": 1.859627070587405, "learning_rate": 4.864595808967143e-07, "loss": 0.0843, "step": 12552 }, { "epoch": 2.8562002275312857, "grad_norm": 1.198432837288742, "learning_rate": 4.863724537830159e-07, "loss": 0.0451, "step": 12553 }, { "epoch": 2.8564277588168374, "grad_norm": 1.5616247441242452, "learning_rate": 4.862853295026105e-07, "loss": 0.0413, "step": 12554 }, { "epoch": 2.856655290102389, "grad_norm": 1.5791378095049908, "learning_rate": 4.861982080572789e-07, "loss": 0.0631, "step": 12555 }, { "epoch": 2.856882821387941, "grad_norm": 1.4788911395852724, "learning_rate": 4.861110894488019e-07, "loss": 0.0359, "step": 12556 }, { "epoch": 2.8571103526734927, "grad_norm": 1.463157689810381, "learning_rate": 4.860239736789596e-07, "loss": 0.0323, "step": 12557 }, { "epoch": 2.8573378839590444, "grad_norm": 1.8911309040488467, "learning_rate": 4.859368607495329e-07, "loss": 0.1005, "step": 12558 }, { "epoch": 2.857565415244596, "grad_norm": 1.9179825115510722, "learning_rate": 4.85849750662302e-07, "loss": 0.0789, "step": 12559 }, { "epoch": 2.857792946530148, "grad_norm": 1.0991658514300273, "learning_rate": 4.857626434190475e-07, "loss": 0.0841, "step": 12560 }, { "epoch": 2.8580204778156997, "grad_norm": 1.380362922066705, "learning_rate": 4.856755390215495e-07, "loss": 0.0606, "step": 12561 }, { "epoch": 2.8582480091012514, "grad_norm": 1.0940161094951113, "learning_rate": 4.855884374715882e-07, "loss": 0.0587, "step": 12562 }, { "epoch": 2.858475540386803, "grad_norm": 1.3666608153941782, "learning_rate": 4.855013387709442e-07, "loss": 0.0352, "step": 12563 }, { "epoch": 2.858703071672355, "grad_norm": 2.56885883528296, "learning_rate": 4.854142429213971e-07, "loss": 0.0597, "step": 12564 }, { "epoch": 2.8589306029579067, "grad_norm": 1.3149473133084926, "learning_rate": 4.853271499247274e-07, "loss": 0.0232, "step": 12565 }, { "epoch": 2.8591581342434584, "grad_norm": 2.3127367563621735, "learning_rate": 4.85240059782715e-07, "loss": 0.0464, "step": 12566 }, { "epoch": 2.85938566552901, "grad_norm": 1.576361025651664, "learning_rate": 4.851529724971395e-07, "loss": 0.0172, "step": 12567 }, { "epoch": 2.859613196814562, "grad_norm": 2.022712336240276, "learning_rate": 4.850658880697814e-07, "loss": 0.0494, "step": 12568 }, { "epoch": 2.8598407281001137, "grad_norm": 2.3743581326878433, "learning_rate": 4.849788065024201e-07, "loss": 0.0489, "step": 12569 }, { "epoch": 2.8600682593856654, "grad_norm": 1.9198793733935373, "learning_rate": 4.848917277968358e-07, "loss": 0.0527, "step": 12570 }, { "epoch": 2.860295790671217, "grad_norm": 0.8867596588245615, "learning_rate": 4.848046519548078e-07, "loss": 0.0577, "step": 12571 }, { "epoch": 2.860523321956769, "grad_norm": 2.4413600777999234, "learning_rate": 4.84717578978116e-07, "loss": 0.0373, "step": 12572 }, { "epoch": 2.8607508532423207, "grad_norm": 1.0667938351185733, "learning_rate": 4.846305088685401e-07, "loss": 0.0674, "step": 12573 }, { "epoch": 2.8609783845278725, "grad_norm": 1.4652841713265647, "learning_rate": 4.845434416278593e-07, "loss": 0.0441, "step": 12574 }, { "epoch": 2.861205915813424, "grad_norm": 1.6611654165272123, "learning_rate": 4.844563772578534e-07, "loss": 0.0596, "step": 12575 }, { "epoch": 2.861433447098976, "grad_norm": 2.146127058215351, "learning_rate": 4.843693157603016e-07, "loss": 0.0419, "step": 12576 }, { "epoch": 2.8616609783845277, "grad_norm": 2.1368431049752608, "learning_rate": 4.842822571369837e-07, "loss": 0.0575, "step": 12577 }, { "epoch": 2.86188850967008, "grad_norm": 1.2279267897076829, "learning_rate": 4.841952013896788e-07, "loss": 0.0286, "step": 12578 }, { "epoch": 2.862116040955631, "grad_norm": 1.2010914062278637, "learning_rate": 4.841081485201659e-07, "loss": 0.1257, "step": 12579 }, { "epoch": 2.8623435722411834, "grad_norm": 0.997076251753237, "learning_rate": 4.840210985302245e-07, "loss": 0.032, "step": 12580 }, { "epoch": 2.8625711035267347, "grad_norm": 1.512080253534714, "learning_rate": 4.839340514216335e-07, "loss": 0.0788, "step": 12581 }, { "epoch": 2.862798634812287, "grad_norm": 1.6405898326829564, "learning_rate": 4.838470071961724e-07, "loss": 0.0441, "step": 12582 }, { "epoch": 2.863026166097838, "grad_norm": 1.3863686820792251, "learning_rate": 4.837599658556197e-07, "loss": 0.1159, "step": 12583 }, { "epoch": 2.8632536973833904, "grad_norm": 1.5439727230632452, "learning_rate": 4.836729274017549e-07, "loss": 0.0394, "step": 12584 }, { "epoch": 2.8634812286689417, "grad_norm": 1.4164398262655695, "learning_rate": 4.835858918363565e-07, "loss": 0.0361, "step": 12585 }, { "epoch": 2.863708759954494, "grad_norm": 2.242609547675428, "learning_rate": 4.834988591612034e-07, "loss": 0.1037, "step": 12586 }, { "epoch": 2.8639362912400452, "grad_norm": 0.9931996921083485, "learning_rate": 4.834118293780747e-07, "loss": 0.0588, "step": 12587 }, { "epoch": 2.8641638225255974, "grad_norm": 1.884978572801475, "learning_rate": 4.833248024887486e-07, "loss": 0.03, "step": 12588 }, { "epoch": 2.864391353811149, "grad_norm": 1.3607665011146834, "learning_rate": 4.832377784950043e-07, "loss": 0.0503, "step": 12589 }, { "epoch": 2.864618885096701, "grad_norm": 1.7079019643526105, "learning_rate": 4.831507573986199e-07, "loss": 0.0394, "step": 12590 }, { "epoch": 2.8648464163822527, "grad_norm": 2.1428398842910905, "learning_rate": 4.830637392013746e-07, "loss": 0.0643, "step": 12591 }, { "epoch": 2.8650739476678044, "grad_norm": 1.5659493233492159, "learning_rate": 4.829767239050465e-07, "loss": 0.0788, "step": 12592 }, { "epoch": 2.865301478953356, "grad_norm": 2.564073096204237, "learning_rate": 4.828897115114137e-07, "loss": 0.1263, "step": 12593 }, { "epoch": 2.865529010238908, "grad_norm": 1.5203745477871993, "learning_rate": 4.828027020222554e-07, "loss": 0.0998, "step": 12594 }, { "epoch": 2.8657565415244597, "grad_norm": 1.329607473285561, "learning_rate": 4.827156954393491e-07, "loss": 0.0163, "step": 12595 }, { "epoch": 2.8659840728100114, "grad_norm": 1.4373647950556965, "learning_rate": 4.826286917644734e-07, "loss": 0.0245, "step": 12596 }, { "epoch": 2.866211604095563, "grad_norm": 2.326427209417108, "learning_rate": 4.825416909994068e-07, "loss": 0.058, "step": 12597 }, { "epoch": 2.866439135381115, "grad_norm": 1.2666065320185766, "learning_rate": 4.824546931459268e-07, "loss": 0.0217, "step": 12598 }, { "epoch": 2.8666666666666667, "grad_norm": 1.5609225913177167, "learning_rate": 4.823676982058121e-07, "loss": 0.0728, "step": 12599 }, { "epoch": 2.8668941979522184, "grad_norm": 2.471124598114997, "learning_rate": 4.822807061808402e-07, "loss": 0.0564, "step": 12600 }, { "epoch": 2.86712172923777, "grad_norm": 0.9531753219643102, "learning_rate": 4.821937170727896e-07, "loss": 0.0751, "step": 12601 }, { "epoch": 2.867349260523322, "grad_norm": 1.2229857917089708, "learning_rate": 4.821067308834374e-07, "loss": 0.1104, "step": 12602 }, { "epoch": 2.8675767918088737, "grad_norm": 1.7547903280145958, "learning_rate": 4.820197476145623e-07, "loss": 0.1389, "step": 12603 }, { "epoch": 2.8678043230944255, "grad_norm": 4.352816513368197, "learning_rate": 4.819327672679418e-07, "loss": 0.1913, "step": 12604 }, { "epoch": 2.868031854379977, "grad_norm": 1.542021402901834, "learning_rate": 4.818457898453531e-07, "loss": 0.0817, "step": 12605 }, { "epoch": 2.868259385665529, "grad_norm": 1.4841457951611283, "learning_rate": 4.817588153485746e-07, "loss": 0.0308, "step": 12606 }, { "epoch": 2.8684869169510807, "grad_norm": 1.9657735183141922, "learning_rate": 4.816718437793833e-07, "loss": 0.2115, "step": 12607 }, { "epoch": 2.8687144482366325, "grad_norm": 0.9925899960879077, "learning_rate": 4.815848751395573e-07, "loss": 0.0469, "step": 12608 }, { "epoch": 2.868941979522184, "grad_norm": 1.6512964937695886, "learning_rate": 4.814979094308735e-07, "loss": 0.0328, "step": 12609 }, { "epoch": 2.869169510807736, "grad_norm": 1.7540422248196759, "learning_rate": 4.8141094665511e-07, "loss": 0.0807, "step": 12610 }, { "epoch": 2.8693970420932877, "grad_norm": 1.7011156090577977, "learning_rate": 4.813239868140437e-07, "loss": 0.0474, "step": 12611 }, { "epoch": 2.8696245733788395, "grad_norm": 1.6772828052412847, "learning_rate": 4.812370299094517e-07, "loss": 0.1117, "step": 12612 }, { "epoch": 2.8698521046643912, "grad_norm": 1.121757482600758, "learning_rate": 4.811500759431118e-07, "loss": 0.0362, "step": 12613 }, { "epoch": 2.870079635949943, "grad_norm": 1.3166850379619883, "learning_rate": 4.810631249168007e-07, "loss": 0.0736, "step": 12614 }, { "epoch": 2.8703071672354947, "grad_norm": 0.9431960853324578, "learning_rate": 4.809761768322959e-07, "loss": 0.011, "step": 12615 }, { "epoch": 2.8705346985210465, "grad_norm": 1.786601315502751, "learning_rate": 4.808892316913743e-07, "loss": 0.0702, "step": 12616 }, { "epoch": 2.8707622298065987, "grad_norm": 1.8295142306329464, "learning_rate": 4.808022894958126e-07, "loss": 0.0711, "step": 12617 }, { "epoch": 2.87098976109215, "grad_norm": 1.5984357401222524, "learning_rate": 4.807153502473883e-07, "loss": 0.0343, "step": 12618 }, { "epoch": 2.871217292377702, "grad_norm": 1.310871862467835, "learning_rate": 4.806284139478777e-07, "loss": 0.0851, "step": 12619 }, { "epoch": 2.8714448236632535, "grad_norm": 1.637312784222254, "learning_rate": 4.805414805990582e-07, "loss": 0.0584, "step": 12620 }, { "epoch": 2.8716723549488057, "grad_norm": 1.5014833886650167, "learning_rate": 4.804545502027061e-07, "loss": 0.0729, "step": 12621 }, { "epoch": 2.871899886234357, "grad_norm": 1.1326473423568553, "learning_rate": 4.803676227605984e-07, "loss": 0.0956, "step": 12622 }, { "epoch": 2.872127417519909, "grad_norm": 1.351481301281316, "learning_rate": 4.802806982745117e-07, "loss": 0.1354, "step": 12623 }, { "epoch": 2.8723549488054605, "grad_norm": 1.7228851479821903, "learning_rate": 4.801937767462222e-07, "loss": 0.0853, "step": 12624 }, { "epoch": 2.8725824800910127, "grad_norm": 1.21785740773013, "learning_rate": 4.801068581775068e-07, "loss": 0.0325, "step": 12625 }, { "epoch": 2.8728100113765644, "grad_norm": 2.2258791097080386, "learning_rate": 4.800199425701419e-07, "loss": 0.171, "step": 12626 }, { "epoch": 2.873037542662116, "grad_norm": 2.524220395214853, "learning_rate": 4.79933029925904e-07, "loss": 0.0419, "step": 12627 }, { "epoch": 2.873265073947668, "grad_norm": 0.7608139381180326, "learning_rate": 4.79846120246569e-07, "loss": 0.0156, "step": 12628 }, { "epoch": 2.8734926052332197, "grad_norm": 1.1377897550017428, "learning_rate": 4.797592135339136e-07, "loss": 0.1254, "step": 12629 }, { "epoch": 2.8737201365187715, "grad_norm": 1.4332971872490274, "learning_rate": 4.796723097897141e-07, "loss": 0.1074, "step": 12630 }, { "epoch": 2.873947667804323, "grad_norm": 3.3820811389698533, "learning_rate": 4.795854090157461e-07, "loss": 0.0981, "step": 12631 }, { "epoch": 2.874175199089875, "grad_norm": 2.4402619171544235, "learning_rate": 4.794985112137862e-07, "loss": 0.0888, "step": 12632 }, { "epoch": 2.8744027303754267, "grad_norm": 2.8335069331710776, "learning_rate": 4.794116163856101e-07, "loss": 0.0734, "step": 12633 }, { "epoch": 2.8746302616609785, "grad_norm": 1.6292403264676645, "learning_rate": 4.793247245329941e-07, "loss": 0.0874, "step": 12634 }, { "epoch": 2.87485779294653, "grad_norm": 2.4789131066468553, "learning_rate": 4.792378356577139e-07, "loss": 0.0533, "step": 12635 }, { "epoch": 2.875085324232082, "grad_norm": 1.7601814307097419, "learning_rate": 4.791509497615452e-07, "loss": 0.2023, "step": 12636 }, { "epoch": 2.8753128555176337, "grad_norm": 2.0916308238529697, "learning_rate": 4.790640668462644e-07, "loss": 0.0465, "step": 12637 }, { "epoch": 2.8755403868031855, "grad_norm": 2.173515659688752, "learning_rate": 4.789771869136464e-07, "loss": 0.058, "step": 12638 }, { "epoch": 2.875767918088737, "grad_norm": 1.4514675159393737, "learning_rate": 4.788903099654674e-07, "loss": 0.0292, "step": 12639 }, { "epoch": 2.875995449374289, "grad_norm": 1.2142755111521664, "learning_rate": 4.788034360035027e-07, "loss": 0.0412, "step": 12640 }, { "epoch": 2.8762229806598407, "grad_norm": 1.0524667251805684, "learning_rate": 4.787165650295284e-07, "loss": 0.0584, "step": 12641 }, { "epoch": 2.8764505119453925, "grad_norm": 1.5099300621326792, "learning_rate": 4.786296970453195e-07, "loss": 0.0225, "step": 12642 }, { "epoch": 2.8766780432309442, "grad_norm": 2.310984480702775, "learning_rate": 4.785428320526514e-07, "loss": 0.1343, "step": 12643 }, { "epoch": 2.876905574516496, "grad_norm": 1.8070839998169317, "learning_rate": 4.784559700532998e-07, "loss": 0.0313, "step": 12644 }, { "epoch": 2.8771331058020477, "grad_norm": 1.847202936698943, "learning_rate": 4.783691110490394e-07, "loss": 0.0308, "step": 12645 }, { "epoch": 2.8773606370875995, "grad_norm": 1.7991250369253637, "learning_rate": 4.782822550416461e-07, "loss": 0.0495, "step": 12646 }, { "epoch": 2.8775881683731512, "grad_norm": 1.0141459169277287, "learning_rate": 4.781954020328947e-07, "loss": 0.068, "step": 12647 }, { "epoch": 2.877815699658703, "grad_norm": 2.110063510489659, "learning_rate": 4.781085520245606e-07, "loss": 0.0375, "step": 12648 }, { "epoch": 2.8780432309442547, "grad_norm": 1.3739910255560714, "learning_rate": 4.780217050184185e-07, "loss": 0.1095, "step": 12649 }, { "epoch": 2.8782707622298065, "grad_norm": 1.4123602544759832, "learning_rate": 4.779348610162436e-07, "loss": 0.0371, "step": 12650 }, { "epoch": 2.8784982935153582, "grad_norm": 1.5787837302202425, "learning_rate": 4.77848020019811e-07, "loss": 0.072, "step": 12651 }, { "epoch": 2.87872582480091, "grad_norm": 1.6611165073812784, "learning_rate": 4.777611820308951e-07, "loss": 0.0852, "step": 12652 }, { "epoch": 2.8789533560864617, "grad_norm": 1.2677954519405399, "learning_rate": 4.776743470512711e-07, "loss": 0.0509, "step": 12653 }, { "epoch": 2.8791808873720135, "grad_norm": 1.662448624335218, "learning_rate": 4.775875150827137e-07, "loss": 0.0414, "step": 12654 }, { "epoch": 2.8794084186575652, "grad_norm": 1.4416654128242175, "learning_rate": 4.775006861269974e-07, "loss": 0.1093, "step": 12655 }, { "epoch": 2.8796359499431174, "grad_norm": 1.001465802320601, "learning_rate": 4.77413860185897e-07, "loss": 0.0227, "step": 12656 }, { "epoch": 2.8798634812286688, "grad_norm": 1.2998800925175038, "learning_rate": 4.77327037261187e-07, "loss": 0.0724, "step": 12657 }, { "epoch": 2.880091012514221, "grad_norm": 1.5538708070174234, "learning_rate": 4.772402173546419e-07, "loss": 0.1071, "step": 12658 }, { "epoch": 2.8803185437997723, "grad_norm": 1.6872468369081506, "learning_rate": 4.771534004680361e-07, "loss": 0.0384, "step": 12659 }, { "epoch": 2.8805460750853245, "grad_norm": 1.054390568040993, "learning_rate": 4.770665866031441e-07, "loss": 0.0788, "step": 12660 }, { "epoch": 2.8807736063708758, "grad_norm": 1.7375134508993193, "learning_rate": 4.769797757617403e-07, "loss": 0.0998, "step": 12661 }, { "epoch": 2.881001137656428, "grad_norm": 1.8201425765709196, "learning_rate": 4.768929679455984e-07, "loss": 0.0685, "step": 12662 }, { "epoch": 2.8812286689419793, "grad_norm": 1.2539693322152716, "learning_rate": 4.768061631564933e-07, "loss": 0.0204, "step": 12663 }, { "epoch": 2.8814562002275315, "grad_norm": 1.1739000520347551, "learning_rate": 4.767193613961986e-07, "loss": 0.0218, "step": 12664 }, { "epoch": 2.881683731513083, "grad_norm": 1.9750641914940097, "learning_rate": 4.766325626664889e-07, "loss": 0.0573, "step": 12665 }, { "epoch": 2.881911262798635, "grad_norm": 1.5737626894400853, "learning_rate": 4.7654576696913757e-07, "loss": 0.0532, "step": 12666 }, { "epoch": 2.8821387940841867, "grad_norm": 1.346668554792961, "learning_rate": 4.764589743059191e-07, "loss": 0.1448, "step": 12667 }, { "epoch": 2.8823663253697385, "grad_norm": 1.9647162897566983, "learning_rate": 4.7637218467860723e-07, "loss": 0.0732, "step": 12668 }, { "epoch": 2.88259385665529, "grad_norm": 1.2399223042234155, "learning_rate": 4.7628539808897543e-07, "loss": 0.1094, "step": 12669 }, { "epoch": 2.882821387940842, "grad_norm": 1.2279925116023995, "learning_rate": 4.7619861453879786e-07, "loss": 0.0133, "step": 12670 }, { "epoch": 2.8830489192263937, "grad_norm": 2.2499375483307635, "learning_rate": 4.7611183402984804e-07, "loss": 0.0766, "step": 12671 }, { "epoch": 2.8832764505119455, "grad_norm": 1.4752854972702385, "learning_rate": 4.760250565638998e-07, "loss": 0.0385, "step": 12672 }, { "epoch": 2.8835039817974972, "grad_norm": 1.9475460262474602, "learning_rate": 4.759382821427265e-07, "loss": 0.0807, "step": 12673 }, { "epoch": 2.883731513083049, "grad_norm": 3.0256458180929746, "learning_rate": 4.758515107681016e-07, "loss": 0.0784, "step": 12674 }, { "epoch": 2.8839590443686007, "grad_norm": 1.5366981847553096, "learning_rate": 4.757647424417988e-07, "loss": 0.0757, "step": 12675 }, { "epoch": 2.8841865756541525, "grad_norm": 2.621100057342978, "learning_rate": 4.756779771655912e-07, "loss": 0.0565, "step": 12676 }, { "epoch": 2.8844141069397042, "grad_norm": 1.9085724874020673, "learning_rate": 4.7559121494125255e-07, "loss": 0.0409, "step": 12677 }, { "epoch": 2.884641638225256, "grad_norm": 1.2980612219526557, "learning_rate": 4.7550445577055556e-07, "loss": 0.0595, "step": 12678 }, { "epoch": 2.8848691695108077, "grad_norm": 2.4713557779672946, "learning_rate": 4.7541769965527387e-07, "loss": 0.0417, "step": 12679 }, { "epoch": 2.8850967007963595, "grad_norm": 1.485957018260315, "learning_rate": 4.753309465971806e-07, "loss": 0.0414, "step": 12680 }, { "epoch": 2.8853242320819112, "grad_norm": 2.1950948718590073, "learning_rate": 4.752441965980483e-07, "loss": 0.0551, "step": 12681 }, { "epoch": 2.885551763367463, "grad_norm": 3.063514274683956, "learning_rate": 4.751574496596506e-07, "loss": 0.029, "step": 12682 }, { "epoch": 2.8857792946530147, "grad_norm": 1.1732145283000142, "learning_rate": 4.7507070578376e-07, "loss": 0.0262, "step": 12683 }, { "epoch": 2.8860068259385665, "grad_norm": 2.1100044625015326, "learning_rate": 4.7498396497214995e-07, "loss": 0.1165, "step": 12684 }, { "epoch": 2.8862343572241183, "grad_norm": 1.3530343463913168, "learning_rate": 4.748972272265927e-07, "loss": 0.0651, "step": 12685 }, { "epoch": 2.88646188850967, "grad_norm": 1.4383754418296333, "learning_rate": 4.7481049254886114e-07, "loss": 0.0687, "step": 12686 }, { "epoch": 2.8866894197952218, "grad_norm": 2.050069301803148, "learning_rate": 4.747237609407283e-07, "loss": 0.0796, "step": 12687 }, { "epoch": 2.8869169510807735, "grad_norm": 2.005883302724801, "learning_rate": 4.7463703240396627e-07, "loss": 0.097, "step": 12688 }, { "epoch": 2.8871444823663253, "grad_norm": 1.741654868338223, "learning_rate": 4.745503069403481e-07, "loss": 0.0283, "step": 12689 }, { "epoch": 2.887372013651877, "grad_norm": 1.7118983040423796, "learning_rate": 4.74463584551646e-07, "loss": 0.0662, "step": 12690 }, { "epoch": 2.8875995449374288, "grad_norm": 1.6226855554344366, "learning_rate": 4.743768652396327e-07, "loss": 0.0615, "step": 12691 }, { "epoch": 2.8878270762229805, "grad_norm": 1.596561306016025, "learning_rate": 4.7429014900608043e-07, "loss": 0.039, "step": 12692 }, { "epoch": 2.8880546075085323, "grad_norm": 1.3840429328709019, "learning_rate": 4.742034358527613e-07, "loss": 0.099, "step": 12693 }, { "epoch": 2.888282138794084, "grad_norm": 1.2160584145964988, "learning_rate": 4.74116725781448e-07, "loss": 0.0708, "step": 12694 }, { "epoch": 2.888509670079636, "grad_norm": 1.4347457588869192, "learning_rate": 4.740300187939123e-07, "loss": 0.0972, "step": 12695 }, { "epoch": 2.8887372013651875, "grad_norm": 0.9091464220534279, "learning_rate": 4.739433148919266e-07, "loss": 0.0083, "step": 12696 }, { "epoch": 2.8889647326507397, "grad_norm": 1.6838939434212756, "learning_rate": 4.7385661407726283e-07, "loss": 0.0225, "step": 12697 }, { "epoch": 2.889192263936291, "grad_norm": 4.480416185177638, "learning_rate": 4.737699163516932e-07, "loss": 0.1178, "step": 12698 }, { "epoch": 2.8894197952218432, "grad_norm": 1.0519356233051547, "learning_rate": 4.7368322171698954e-07, "loss": 0.0133, "step": 12699 }, { "epoch": 2.8896473265073945, "grad_norm": 1.4977935350797296, "learning_rate": 4.7359653017492344e-07, "loss": 0.1252, "step": 12700 }, { "epoch": 2.8898748577929467, "grad_norm": 2.607103083968053, "learning_rate": 4.735098417272674e-07, "loss": 0.0653, "step": 12701 }, { "epoch": 2.890102389078498, "grad_norm": 1.9397158384400859, "learning_rate": 4.734231563757924e-07, "loss": 0.0641, "step": 12702 }, { "epoch": 2.8903299203640502, "grad_norm": 1.6801156541101705, "learning_rate": 4.733364741222705e-07, "loss": 0.0847, "step": 12703 }, { "epoch": 2.890557451649602, "grad_norm": 1.4267174836852405, "learning_rate": 4.732497949684736e-07, "loss": 0.0368, "step": 12704 }, { "epoch": 2.8907849829351537, "grad_norm": 1.5112317998099394, "learning_rate": 4.731631189161727e-07, "loss": 0.0359, "step": 12705 }, { "epoch": 2.8910125142207055, "grad_norm": 2.161770319945882, "learning_rate": 4.730764459671397e-07, "loss": 0.0782, "step": 12706 }, { "epoch": 2.8912400455062572, "grad_norm": 1.6761927222187345, "learning_rate": 4.729897761231457e-07, "loss": 0.0653, "step": 12707 }, { "epoch": 2.891467576791809, "grad_norm": 1.8032820743773281, "learning_rate": 4.7290310938596264e-07, "loss": 0.0319, "step": 12708 }, { "epoch": 2.8916951080773607, "grad_norm": 1.4590808681382081, "learning_rate": 4.728164457573611e-07, "loss": 0.064, "step": 12709 }, { "epoch": 2.8919226393629125, "grad_norm": 1.897363873082315, "learning_rate": 4.7272978523911294e-07, "loss": 0.0959, "step": 12710 }, { "epoch": 2.8921501706484642, "grad_norm": 1.8342022127335944, "learning_rate": 4.726431278329892e-07, "loss": 0.0679, "step": 12711 }, { "epoch": 2.892377701934016, "grad_norm": 1.3158123655039042, "learning_rate": 4.725564735407606e-07, "loss": 0.0777, "step": 12712 }, { "epoch": 2.8926052332195678, "grad_norm": 2.3802220667914122, "learning_rate": 4.724698223641987e-07, "loss": 0.0444, "step": 12713 }, { "epoch": 2.8928327645051195, "grad_norm": 1.9342375647580567, "learning_rate": 4.723831743050741e-07, "loss": 0.0278, "step": 12714 }, { "epoch": 2.8930602957906713, "grad_norm": 1.044058711845359, "learning_rate": 4.722965293651581e-07, "loss": 0.0183, "step": 12715 }, { "epoch": 2.893287827076223, "grad_norm": 1.1927260350800157, "learning_rate": 4.7220988754622124e-07, "loss": 0.0306, "step": 12716 }, { "epoch": 2.8935153583617748, "grad_norm": 1.6408530472822564, "learning_rate": 4.721232488500345e-07, "loss": 0.0872, "step": 12717 }, { "epoch": 2.8937428896473265, "grad_norm": 1.8615083759362143, "learning_rate": 4.7203661327836873e-07, "loss": 0.0231, "step": 12718 }, { "epoch": 2.8939704209328783, "grad_norm": 1.5429479057947402, "learning_rate": 4.719499808329942e-07, "loss": 0.0642, "step": 12719 }, { "epoch": 2.89419795221843, "grad_norm": 2.1761302704355563, "learning_rate": 4.718633515156819e-07, "loss": 0.0403, "step": 12720 }, { "epoch": 2.8944254835039818, "grad_norm": 1.357756931505235, "learning_rate": 4.717767253282021e-07, "loss": 0.0187, "step": 12721 }, { "epoch": 2.8946530147895335, "grad_norm": 1.2671333346752245, "learning_rate": 4.7169010227232566e-07, "loss": 0.0291, "step": 12722 }, { "epoch": 2.8948805460750853, "grad_norm": 1.244409150540576, "learning_rate": 4.7160348234982264e-07, "loss": 0.1037, "step": 12723 }, { "epoch": 2.895108077360637, "grad_norm": 1.3009643685029213, "learning_rate": 4.7151686556246343e-07, "loss": 0.0566, "step": 12724 }, { "epoch": 2.8953356086461888, "grad_norm": 1.3847183794340217, "learning_rate": 4.714302519120185e-07, "loss": 0.0199, "step": 12725 }, { "epoch": 2.8955631399317405, "grad_norm": 1.2199273797824735, "learning_rate": 4.7134364140025786e-07, "loss": 0.0598, "step": 12726 }, { "epoch": 2.8957906712172923, "grad_norm": 3.4493151699233833, "learning_rate": 4.712570340289519e-07, "loss": 0.1, "step": 12727 }, { "epoch": 2.896018202502844, "grad_norm": 1.3690778729951745, "learning_rate": 4.7117042979987044e-07, "loss": 0.0641, "step": 12728 }, { "epoch": 2.896245733788396, "grad_norm": 1.8785863311484237, "learning_rate": 4.710838287147839e-07, "loss": 0.042, "step": 12729 }, { "epoch": 2.8964732650739475, "grad_norm": 1.4555343813191344, "learning_rate": 4.709972307754619e-07, "loss": 0.0476, "step": 12730 }, { "epoch": 2.8967007963594993, "grad_norm": 1.554822940806239, "learning_rate": 4.709106359836744e-07, "loss": 0.1003, "step": 12731 }, { "epoch": 2.896928327645051, "grad_norm": 2.621636766020505, "learning_rate": 4.7082404434119147e-07, "loss": 0.0901, "step": 12732 }, { "epoch": 2.897155858930603, "grad_norm": 1.5411444541113128, "learning_rate": 4.707374558497824e-07, "loss": 0.0176, "step": 12733 }, { "epoch": 2.897383390216155, "grad_norm": 1.390499915258167, "learning_rate": 4.7065087051121755e-07, "loss": 0.0233, "step": 12734 }, { "epoch": 2.8976109215017063, "grad_norm": 1.0607494087970601, "learning_rate": 4.70564288327266e-07, "loss": 0.0746, "step": 12735 }, { "epoch": 2.8978384527872585, "grad_norm": 1.7238952011304474, "learning_rate": 4.704777092996976e-07, "loss": 0.0338, "step": 12736 }, { "epoch": 2.89806598407281, "grad_norm": 2.163212330418796, "learning_rate": 4.703911334302821e-07, "loss": 0.0611, "step": 12737 }, { "epoch": 2.898293515358362, "grad_norm": 1.4517742771474698, "learning_rate": 4.703045607207883e-07, "loss": 0.0195, "step": 12738 }, { "epoch": 2.8985210466439133, "grad_norm": 1.4135370432183096, "learning_rate": 4.7021799117298615e-07, "loss": 0.0715, "step": 12739 }, { "epoch": 2.8987485779294655, "grad_norm": 3.4154653904517276, "learning_rate": 4.7013142478864466e-07, "loss": 0.0726, "step": 12740 }, { "epoch": 2.898976109215017, "grad_norm": 1.4003971826609685, "learning_rate": 4.7004486156953346e-07, "loss": 0.0948, "step": 12741 }, { "epoch": 2.899203640500569, "grad_norm": 1.347808369233036, "learning_rate": 4.699583015174214e-07, "loss": 0.0185, "step": 12742 }, { "epoch": 2.8994311717861208, "grad_norm": 1.3364249494100695, "learning_rate": 4.698717446340775e-07, "loss": 0.0278, "step": 12743 }, { "epoch": 2.8996587030716725, "grad_norm": 1.3514141501859118, "learning_rate": 4.6978519092127146e-07, "loss": 0.0373, "step": 12744 }, { "epoch": 2.8998862343572243, "grad_norm": 1.1563120243328868, "learning_rate": 4.696986403807715e-07, "loss": 0.0577, "step": 12745 }, { "epoch": 2.900113765642776, "grad_norm": 2.2956373121340743, "learning_rate": 4.6961209301434705e-07, "loss": 0.0282, "step": 12746 }, { "epoch": 2.9003412969283278, "grad_norm": 2.5460168092858555, "learning_rate": 4.695255488237667e-07, "loss": 0.0434, "step": 12747 }, { "epoch": 2.9005688282138795, "grad_norm": 1.9945026221261124, "learning_rate": 4.6943900781079963e-07, "loss": 0.1452, "step": 12748 }, { "epoch": 2.9007963594994313, "grad_norm": 1.6659584753865473, "learning_rate": 4.6935246997721425e-07, "loss": 0.0685, "step": 12749 }, { "epoch": 2.901023890784983, "grad_norm": 2.2300838003534103, "learning_rate": 4.6926593532477916e-07, "loss": 0.0422, "step": 12750 }, { "epoch": 2.9012514220705348, "grad_norm": 1.609660116849706, "learning_rate": 4.6917940385526344e-07, "loss": 0.0138, "step": 12751 }, { "epoch": 2.9014789533560865, "grad_norm": 1.950490638053339, "learning_rate": 4.6909287557043505e-07, "loss": 0.0533, "step": 12752 }, { "epoch": 2.9017064846416383, "grad_norm": 1.2128728332167655, "learning_rate": 4.690063504720629e-07, "loss": 0.0491, "step": 12753 }, { "epoch": 2.90193401592719, "grad_norm": 1.6782765470831773, "learning_rate": 4.689198285619151e-07, "loss": 0.0845, "step": 12754 }, { "epoch": 2.9021615472127418, "grad_norm": 1.8283505369036845, "learning_rate": 4.688333098417604e-07, "loss": 0.1136, "step": 12755 }, { "epoch": 2.9023890784982935, "grad_norm": 3.1524633240902302, "learning_rate": 4.687467943133668e-07, "loss": 0.0563, "step": 12756 }, { "epoch": 2.9026166097838453, "grad_norm": 1.082703010824774, "learning_rate": 4.686602819785024e-07, "loss": 0.0727, "step": 12757 }, { "epoch": 2.902844141069397, "grad_norm": 2.689289951178903, "learning_rate": 4.6857377283893573e-07, "loss": 0.0722, "step": 12758 }, { "epoch": 2.903071672354949, "grad_norm": 3.015537315925803, "learning_rate": 4.6848726689643436e-07, "loss": 0.0741, "step": 12759 }, { "epoch": 2.9032992036405005, "grad_norm": 1.864297464360259, "learning_rate": 4.6840076415276684e-07, "loss": 0.0348, "step": 12760 }, { "epoch": 2.9035267349260523, "grad_norm": 1.9910765959652617, "learning_rate": 4.6831426460970104e-07, "loss": 0.0613, "step": 12761 }, { "epoch": 2.903754266211604, "grad_norm": 2.1325871660522604, "learning_rate": 4.682277682690044e-07, "loss": 0.084, "step": 12762 }, { "epoch": 2.903981797497156, "grad_norm": 1.599218928105052, "learning_rate": 4.6814127513244517e-07, "loss": 0.0438, "step": 12763 }, { "epoch": 2.9042093287827075, "grad_norm": 1.261672335101324, "learning_rate": 4.6805478520179094e-07, "loss": 0.0727, "step": 12764 }, { "epoch": 2.9044368600682593, "grad_norm": 1.8986532369004119, "learning_rate": 4.6796829847880967e-07, "loss": 0.0342, "step": 12765 }, { "epoch": 2.904664391353811, "grad_norm": 1.4686328582881418, "learning_rate": 4.678818149652686e-07, "loss": 0.0994, "step": 12766 }, { "epoch": 2.904891922639363, "grad_norm": 1.593574668951673, "learning_rate": 4.6779533466293553e-07, "loss": 0.086, "step": 12767 }, { "epoch": 2.9051194539249146, "grad_norm": 3.410461792743678, "learning_rate": 4.677088575735781e-07, "loss": 0.1481, "step": 12768 }, { "epoch": 2.9053469852104663, "grad_norm": 2.555665130751702, "learning_rate": 4.6762238369896324e-07, "loss": 0.0489, "step": 12769 }, { "epoch": 2.905574516496018, "grad_norm": 1.3189271151913657, "learning_rate": 4.675359130408588e-07, "loss": 0.033, "step": 12770 }, { "epoch": 2.90580204778157, "grad_norm": 1.0953586428149595, "learning_rate": 4.674494456010319e-07, "loss": 0.0472, "step": 12771 }, { "epoch": 2.9060295790671216, "grad_norm": 1.304243199270888, "learning_rate": 4.6736298138124983e-07, "loss": 0.0254, "step": 12772 }, { "epoch": 2.9062571103526738, "grad_norm": 2.1699589035285123, "learning_rate": 4.672765203832796e-07, "loss": 0.1198, "step": 12773 }, { "epoch": 2.906484641638225, "grad_norm": 1.4075777953250896, "learning_rate": 4.671900626088886e-07, "loss": 0.0736, "step": 12774 }, { "epoch": 2.9067121729237773, "grad_norm": 1.9063881430776435, "learning_rate": 4.6710360805984373e-07, "loss": 0.0775, "step": 12775 }, { "epoch": 2.9069397042093286, "grad_norm": 2.177653384255012, "learning_rate": 4.670171567379117e-07, "loss": 0.0518, "step": 12776 }, { "epoch": 2.9071672354948808, "grad_norm": 1.857894151932197, "learning_rate": 4.6693070864485983e-07, "loss": 0.0535, "step": 12777 }, { "epoch": 2.907394766780432, "grad_norm": 1.3426136417861327, "learning_rate": 4.668442637824547e-07, "loss": 0.0714, "step": 12778 }, { "epoch": 2.9076222980659843, "grad_norm": 2.150949646150847, "learning_rate": 4.667578221524633e-07, "loss": 0.0453, "step": 12779 }, { "epoch": 2.9078498293515356, "grad_norm": 1.6365414174765631, "learning_rate": 4.6667138375665217e-07, "loss": 0.0263, "step": 12780 }, { "epoch": 2.9080773606370878, "grad_norm": 1.9342441548931641, "learning_rate": 4.6658494859678785e-07, "loss": 0.0414, "step": 12781 }, { "epoch": 2.9083048919226395, "grad_norm": 0.6756405211541169, "learning_rate": 4.6649851667463725e-07, "loss": 0.0497, "step": 12782 }, { "epoch": 2.9085324232081913, "grad_norm": 1.5633360715046478, "learning_rate": 4.664120879919665e-07, "loss": 0.059, "step": 12783 }, { "epoch": 2.908759954493743, "grad_norm": 1.8994784190124296, "learning_rate": 4.663256625505423e-07, "loss": 0.106, "step": 12784 }, { "epoch": 2.908987485779295, "grad_norm": 1.2940538976217306, "learning_rate": 4.6623924035213083e-07, "loss": 0.0453, "step": 12785 }, { "epoch": 2.9092150170648465, "grad_norm": 1.899826967879299, "learning_rate": 4.6615282139849887e-07, "loss": 0.0838, "step": 12786 }, { "epoch": 2.9094425483503983, "grad_norm": 2.619971550786549, "learning_rate": 4.6606640569141216e-07, "loss": 0.0891, "step": 12787 }, { "epoch": 2.90967007963595, "grad_norm": 1.4505788254071135, "learning_rate": 4.6597999323263693e-07, "loss": 0.0601, "step": 12788 }, { "epoch": 2.909897610921502, "grad_norm": 1.629653864470163, "learning_rate": 4.6589358402393967e-07, "loss": 0.0484, "step": 12789 }, { "epoch": 2.9101251422070535, "grad_norm": 1.8447630991483261, "learning_rate": 4.6580717806708585e-07, "loss": 0.102, "step": 12790 }, { "epoch": 2.9103526734926053, "grad_norm": 1.329840394262191, "learning_rate": 4.6572077536384197e-07, "loss": 0.0212, "step": 12791 }, { "epoch": 2.910580204778157, "grad_norm": 1.1245191849147695, "learning_rate": 4.656343759159737e-07, "loss": 0.0534, "step": 12792 }, { "epoch": 2.910807736063709, "grad_norm": 4.140171582508129, "learning_rate": 4.655479797252468e-07, "loss": 0.0324, "step": 12793 }, { "epoch": 2.9110352673492605, "grad_norm": 1.799756246975818, "learning_rate": 4.6546158679342727e-07, "loss": 0.0365, "step": 12794 }, { "epoch": 2.9112627986348123, "grad_norm": 1.8376032497848223, "learning_rate": 4.653751971222806e-07, "loss": 0.0442, "step": 12795 }, { "epoch": 2.911490329920364, "grad_norm": 1.9583172758232328, "learning_rate": 4.652888107135727e-07, "loss": 0.0508, "step": 12796 }, { "epoch": 2.911717861205916, "grad_norm": 2.036072657999068, "learning_rate": 4.652024275690689e-07, "loss": 0.036, "step": 12797 }, { "epoch": 2.9119453924914676, "grad_norm": 0.670168229588655, "learning_rate": 4.65116047690535e-07, "loss": 0.006, "step": 12798 }, { "epoch": 2.9121729237770193, "grad_norm": 1.9031972529358832, "learning_rate": 4.6502967107973624e-07, "loss": 0.0382, "step": 12799 }, { "epoch": 2.912400455062571, "grad_norm": 1.3312751117637498, "learning_rate": 4.6494329773843785e-07, "loss": 0.0304, "step": 12800 }, { "epoch": 2.912627986348123, "grad_norm": 1.7963455053635997, "learning_rate": 4.6485692766840563e-07, "loss": 0.0957, "step": 12801 }, { "epoch": 2.9128555176336746, "grad_norm": 0.9803640281675318, "learning_rate": 4.647705608714043e-07, "loss": 0.0345, "step": 12802 }, { "epoch": 2.9130830489192263, "grad_norm": 1.8199375992120261, "learning_rate": 4.6468419734919927e-07, "loss": 0.0275, "step": 12803 }, { "epoch": 2.913310580204778, "grad_norm": 1.2825183273422727, "learning_rate": 4.6459783710355555e-07, "loss": 0.0269, "step": 12804 }, { "epoch": 2.91353811149033, "grad_norm": 1.302344163841928, "learning_rate": 4.645114801362385e-07, "loss": 0.0283, "step": 12805 }, { "epoch": 2.9137656427758816, "grad_norm": 1.7635075012539043, "learning_rate": 4.644251264490128e-07, "loss": 0.0551, "step": 12806 }, { "epoch": 2.9139931740614333, "grad_norm": 2.624260801120413, "learning_rate": 4.643387760436433e-07, "loss": 0.0647, "step": 12807 }, { "epoch": 2.914220705346985, "grad_norm": 0.8822830489248923, "learning_rate": 4.6425242892189527e-07, "loss": 0.0417, "step": 12808 }, { "epoch": 2.914448236632537, "grad_norm": 2.0454222652472343, "learning_rate": 4.641660850855329e-07, "loss": 0.1308, "step": 12809 }, { "epoch": 2.9146757679180886, "grad_norm": 2.062280321057891, "learning_rate": 4.6407974453632134e-07, "loss": 0.0828, "step": 12810 }, { "epoch": 2.9149032992036403, "grad_norm": 2.27112860532164, "learning_rate": 4.639934072760252e-07, "loss": 0.033, "step": 12811 }, { "epoch": 2.9151308304891925, "grad_norm": 1.322087111692614, "learning_rate": 4.639070733064087e-07, "loss": 0.1109, "step": 12812 }, { "epoch": 2.915358361774744, "grad_norm": 1.8871760408137657, "learning_rate": 4.6382074262923677e-07, "loss": 0.1068, "step": 12813 }, { "epoch": 2.915585893060296, "grad_norm": 1.2704184566449461, "learning_rate": 4.6373441524627346e-07, "loss": 0.0283, "step": 12814 }, { "epoch": 2.9158134243458473, "grad_norm": 1.220072967946355, "learning_rate": 4.6364809115928366e-07, "loss": 0.0308, "step": 12815 }, { "epoch": 2.9160409556313995, "grad_norm": 2.178314609946825, "learning_rate": 4.635617703700311e-07, "loss": 0.0404, "step": 12816 }, { "epoch": 2.916268486916951, "grad_norm": 1.7607129538990194, "learning_rate": 4.6347545288028046e-07, "loss": 0.121, "step": 12817 }, { "epoch": 2.916496018202503, "grad_norm": 1.6472809907419084, "learning_rate": 4.6338913869179586e-07, "loss": 0.0306, "step": 12818 }, { "epoch": 2.9167235494880543, "grad_norm": 3.1715569464627977, "learning_rate": 4.63302827806341e-07, "loss": 0.0281, "step": 12819 }, { "epoch": 2.9169510807736065, "grad_norm": 1.139021549491681, "learning_rate": 4.632165202256804e-07, "loss": 0.0291, "step": 12820 }, { "epoch": 2.9171786120591583, "grad_norm": 1.317397477736393, "learning_rate": 4.6313021595157765e-07, "loss": 0.0281, "step": 12821 }, { "epoch": 2.91740614334471, "grad_norm": 2.087384159989203, "learning_rate": 4.630439149857971e-07, "loss": 0.067, "step": 12822 }, { "epoch": 2.917633674630262, "grad_norm": 1.217602722465485, "learning_rate": 4.62957617330102e-07, "loss": 0.0915, "step": 12823 }, { "epoch": 2.9178612059158135, "grad_norm": 1.4786989583089094, "learning_rate": 4.628713229862566e-07, "loss": 0.0532, "step": 12824 }, { "epoch": 2.9180887372013653, "grad_norm": 1.1793866064779464, "learning_rate": 4.6278503195602465e-07, "loss": 0.0583, "step": 12825 }, { "epoch": 2.918316268486917, "grad_norm": 1.1087768640939903, "learning_rate": 4.6269874424116926e-07, "loss": 0.0711, "step": 12826 }, { "epoch": 2.918543799772469, "grad_norm": 2.296161306426702, "learning_rate": 4.626124598434544e-07, "loss": 0.0472, "step": 12827 }, { "epoch": 2.9187713310580206, "grad_norm": 0.977898287603634, "learning_rate": 4.6252617876464333e-07, "loss": 0.0193, "step": 12828 }, { "epoch": 2.9189988623435723, "grad_norm": 1.4067017789568899, "learning_rate": 4.6243990100649993e-07, "loss": 0.026, "step": 12829 }, { "epoch": 2.919226393629124, "grad_norm": 1.1381498153218357, "learning_rate": 4.6235362657078705e-07, "loss": 0.0155, "step": 12830 }, { "epoch": 2.919453924914676, "grad_norm": 1.0910054559724054, "learning_rate": 4.6226735545926805e-07, "loss": 0.0524, "step": 12831 }, { "epoch": 2.9196814562002276, "grad_norm": 1.961685893301103, "learning_rate": 4.621810876737065e-07, "loss": 0.0743, "step": 12832 }, { "epoch": 2.9199089874857793, "grad_norm": 1.8424266879525575, "learning_rate": 4.6209482321586513e-07, "loss": 0.0362, "step": 12833 }, { "epoch": 2.920136518771331, "grad_norm": 1.6314664919916735, "learning_rate": 4.6200856208750736e-07, "loss": 0.0887, "step": 12834 }, { "epoch": 2.920364050056883, "grad_norm": 2.3113902597089324, "learning_rate": 4.6192230429039587e-07, "loss": 0.0526, "step": 12835 }, { "epoch": 2.9205915813424346, "grad_norm": 1.480320357530497, "learning_rate": 4.6183604982629417e-07, "loss": 0.0726, "step": 12836 }, { "epoch": 2.9208191126279863, "grad_norm": 2.0496834847004943, "learning_rate": 4.617497986969646e-07, "loss": 0.0342, "step": 12837 }, { "epoch": 2.921046643913538, "grad_norm": 0.863517751097571, "learning_rate": 4.6166355090417e-07, "loss": 0.0136, "step": 12838 }, { "epoch": 2.92127417519909, "grad_norm": 1.6908321507674433, "learning_rate": 4.615773064496737e-07, "loss": 0.028, "step": 12839 }, { "epoch": 2.9215017064846416, "grad_norm": 2.082896118371895, "learning_rate": 4.614910653352375e-07, "loss": 0.0432, "step": 12840 }, { "epoch": 2.9217292377701933, "grad_norm": 1.0883201332371846, "learning_rate": 4.614048275626248e-07, "loss": 0.0233, "step": 12841 }, { "epoch": 2.921956769055745, "grad_norm": 1.8408073192265604, "learning_rate": 4.6131859313359757e-07, "loss": 0.0796, "step": 12842 }, { "epoch": 2.922184300341297, "grad_norm": 1.0697305299111017, "learning_rate": 4.612323620499187e-07, "loss": 0.0229, "step": 12843 }, { "epoch": 2.9224118316268486, "grad_norm": 1.06778858957125, "learning_rate": 4.6114613431335044e-07, "loss": 0.0464, "step": 12844 }, { "epoch": 2.9226393629124003, "grad_norm": 1.5876524786452315, "learning_rate": 4.6105990992565493e-07, "loss": 0.0743, "step": 12845 }, { "epoch": 2.922866894197952, "grad_norm": 1.8379083967285146, "learning_rate": 4.609736888885949e-07, "loss": 0.0359, "step": 12846 }, { "epoch": 2.923094425483504, "grad_norm": 3.7135647918483032, "learning_rate": 4.60887471203932e-07, "loss": 0.1775, "step": 12847 }, { "epoch": 2.9233219567690556, "grad_norm": 1.5360899362565568, "learning_rate": 4.608012568734288e-07, "loss": 0.0454, "step": 12848 }, { "epoch": 2.9235494880546073, "grad_norm": 1.4224147255834727, "learning_rate": 4.6071504589884726e-07, "loss": 0.0612, "step": 12849 }, { "epoch": 2.923777019340159, "grad_norm": 1.3468419016926734, "learning_rate": 4.6062883828194903e-07, "loss": 0.1226, "step": 12850 }, { "epoch": 2.9240045506257113, "grad_norm": 1.1967217518053108, "learning_rate": 4.605426340244965e-07, "loss": 0.0272, "step": 12851 }, { "epoch": 2.9242320819112626, "grad_norm": 1.3589026150045593, "learning_rate": 4.6045643312825123e-07, "loss": 0.0743, "step": 12852 }, { "epoch": 2.924459613196815, "grad_norm": 1.575654533554006, "learning_rate": 4.603702355949753e-07, "loss": 0.0598, "step": 12853 }, { "epoch": 2.924687144482366, "grad_norm": 2.161298118905974, "learning_rate": 4.6028404142642993e-07, "loss": 0.0357, "step": 12854 }, { "epoch": 2.9249146757679183, "grad_norm": 1.8854897383551936, "learning_rate": 4.6019785062437746e-07, "loss": 0.0734, "step": 12855 }, { "epoch": 2.9251422070534696, "grad_norm": 1.0297694996152564, "learning_rate": 4.601116631905791e-07, "loss": 0.0938, "step": 12856 }, { "epoch": 2.925369738339022, "grad_norm": 3.9700083914531117, "learning_rate": 4.6002547912679594e-07, "loss": 0.0338, "step": 12857 }, { "epoch": 2.925597269624573, "grad_norm": 2.926209134878487, "learning_rate": 4.599392984347903e-07, "loss": 0.033, "step": 12858 }, { "epoch": 2.9258248009101253, "grad_norm": 1.057790246111934, "learning_rate": 4.598531211163228e-07, "loss": 0.076, "step": 12859 }, { "epoch": 2.926052332195677, "grad_norm": 1.718568871027464, "learning_rate": 4.5976694717315517e-07, "loss": 0.0228, "step": 12860 }, { "epoch": 2.926279863481229, "grad_norm": 1.634642903131896, "learning_rate": 4.596807766070484e-07, "loss": 0.0626, "step": 12861 }, { "epoch": 2.9265073947667806, "grad_norm": 3.690049276447525, "learning_rate": 4.595946094197641e-07, "loss": 0.1235, "step": 12862 }, { "epoch": 2.9267349260523323, "grad_norm": 1.284359511862899, "learning_rate": 4.595084456130629e-07, "loss": 0.0835, "step": 12863 }, { "epoch": 2.926962457337884, "grad_norm": 1.116402503912454, "learning_rate": 4.594222851887059e-07, "loss": 0.0591, "step": 12864 }, { "epoch": 2.927189988623436, "grad_norm": 2.450078357966045, "learning_rate": 4.593361281484543e-07, "loss": 0.0715, "step": 12865 }, { "epoch": 2.9274175199089876, "grad_norm": 1.3676339002960107, "learning_rate": 4.592499744940687e-07, "loss": 0.0239, "step": 12866 }, { "epoch": 2.9276450511945393, "grad_norm": 2.4057895053430447, "learning_rate": 4.591638242273101e-07, "loss": 0.0697, "step": 12867 }, { "epoch": 2.927872582480091, "grad_norm": 1.809703740671683, "learning_rate": 4.590776773499395e-07, "loss": 0.022, "step": 12868 }, { "epoch": 2.928100113765643, "grad_norm": 2.265294828868591, "learning_rate": 4.5899153386371686e-07, "loss": 0.0573, "step": 12869 }, { "epoch": 2.9283276450511946, "grad_norm": 2.3235130707765554, "learning_rate": 4.589053937704034e-07, "loss": 0.0332, "step": 12870 }, { "epoch": 2.9285551763367463, "grad_norm": 10.196266641020966, "learning_rate": 4.588192570717595e-07, "loss": 0.128, "step": 12871 }, { "epoch": 2.928782707622298, "grad_norm": 1.216887669558456, "learning_rate": 4.587331237695458e-07, "loss": 0.0391, "step": 12872 }, { "epoch": 2.92901023890785, "grad_norm": 3.6096309831138775, "learning_rate": 4.5864699386552234e-07, "loss": 0.0399, "step": 12873 }, { "epoch": 2.9292377701934016, "grad_norm": 1.2083109464940136, "learning_rate": 4.5856086736144973e-07, "loss": 0.0916, "step": 12874 }, { "epoch": 2.9294653014789533, "grad_norm": 1.3920445416534242, "learning_rate": 4.584747442590883e-07, "loss": 0.0666, "step": 12875 }, { "epoch": 2.929692832764505, "grad_norm": 0.6099483048403295, "learning_rate": 4.583886245601979e-07, "loss": 0.0263, "step": 12876 }, { "epoch": 2.929920364050057, "grad_norm": 2.559354026961083, "learning_rate": 4.5830250826653905e-07, "loss": 0.0692, "step": 12877 }, { "epoch": 2.9301478953356086, "grad_norm": 1.5403164858233045, "learning_rate": 4.5821639537987144e-07, "loss": 0.0739, "step": 12878 }, { "epoch": 2.9303754266211604, "grad_norm": 1.550175079590196, "learning_rate": 4.5813028590195553e-07, "loss": 0.0726, "step": 12879 }, { "epoch": 2.930602957906712, "grad_norm": 1.7741744131369481, "learning_rate": 4.580441798345507e-07, "loss": 0.0204, "step": 12880 }, { "epoch": 2.930830489192264, "grad_norm": 2.3895790819901186, "learning_rate": 4.5795807717941727e-07, "loss": 0.0509, "step": 12881 }, { "epoch": 2.9310580204778156, "grad_norm": 2.253850049690598, "learning_rate": 4.5787197793831486e-07, "loss": 0.1121, "step": 12882 }, { "epoch": 2.9312855517633674, "grad_norm": 1.3987928737268365, "learning_rate": 4.5778588211300295e-07, "loss": 0.0264, "step": 12883 }, { "epoch": 2.931513083048919, "grad_norm": 2.5138622046963786, "learning_rate": 4.576997897052414e-07, "loss": 0.0796, "step": 12884 }, { "epoch": 2.931740614334471, "grad_norm": 1.278850073383481, "learning_rate": 4.576137007167897e-07, "loss": 0.0149, "step": 12885 }, { "epoch": 2.9319681456200226, "grad_norm": 1.9437327270823188, "learning_rate": 4.5752761514940764e-07, "loss": 0.0368, "step": 12886 }, { "epoch": 2.9321956769055744, "grad_norm": 1.5953830529569124, "learning_rate": 4.5744153300485435e-07, "loss": 0.0399, "step": 12887 }, { "epoch": 2.932423208191126, "grad_norm": 1.6607685321532866, "learning_rate": 4.5735545428488904e-07, "loss": 0.0597, "step": 12888 }, { "epoch": 2.932650739476678, "grad_norm": 1.427471024676933, "learning_rate": 4.572693789912715e-07, "loss": 0.039, "step": 12889 }, { "epoch": 2.93287827076223, "grad_norm": 0.9530431730936352, "learning_rate": 4.5718330712576046e-07, "loss": 0.0241, "step": 12890 }, { "epoch": 2.9331058020477814, "grad_norm": 1.4920716038243778, "learning_rate": 4.570972386901154e-07, "loss": 0.0448, "step": 12891 }, { "epoch": 2.9333333333333336, "grad_norm": 1.4101932674128812, "learning_rate": 4.5701117368609505e-07, "loss": 0.1017, "step": 12892 }, { "epoch": 2.933560864618885, "grad_norm": 0.8647671590959068, "learning_rate": 4.56925112115459e-07, "loss": 0.0628, "step": 12893 }, { "epoch": 2.933788395904437, "grad_norm": 1.9827476473829184, "learning_rate": 4.5683905397996573e-07, "loss": 0.0673, "step": 12894 }, { "epoch": 2.9340159271899884, "grad_norm": 1.2236282807780798, "learning_rate": 4.5675299928137406e-07, "loss": 0.0852, "step": 12895 }, { "epoch": 2.9342434584755406, "grad_norm": 1.857094077477277, "learning_rate": 4.566669480214432e-07, "loss": 0.0582, "step": 12896 }, { "epoch": 2.934470989761092, "grad_norm": 0.9324416731529783, "learning_rate": 4.565809002019314e-07, "loss": 0.0589, "step": 12897 }, { "epoch": 2.934698521046644, "grad_norm": 2.096760615772248, "learning_rate": 4.564948558245977e-07, "loss": 0.0651, "step": 12898 }, { "epoch": 2.934926052332196, "grad_norm": 1.5337754059549098, "learning_rate": 4.5640881489120067e-07, "loss": 0.0329, "step": 12899 }, { "epoch": 2.9351535836177476, "grad_norm": 2.3474374391981487, "learning_rate": 4.5632277740349845e-07, "loss": 0.0442, "step": 12900 }, { "epoch": 2.9353811149032993, "grad_norm": 1.5507429693963757, "learning_rate": 4.5623674336324987e-07, "loss": 0.0682, "step": 12901 }, { "epoch": 2.935608646188851, "grad_norm": 1.1892555799548736, "learning_rate": 4.56150712772213e-07, "loss": 0.0276, "step": 12902 }, { "epoch": 2.935836177474403, "grad_norm": 1.4420319750687873, "learning_rate": 4.5606468563214664e-07, "loss": 0.0972, "step": 12903 }, { "epoch": 2.9360637087599546, "grad_norm": 1.2041598821907613, "learning_rate": 4.559786619448084e-07, "loss": 0.0705, "step": 12904 }, { "epoch": 2.9362912400455063, "grad_norm": 2.4393418948306236, "learning_rate": 4.558926417119569e-07, "loss": 0.0558, "step": 12905 }, { "epoch": 2.936518771331058, "grad_norm": 1.5748698398647856, "learning_rate": 4.558066249353503e-07, "loss": 0.0539, "step": 12906 }, { "epoch": 2.93674630261661, "grad_norm": 2.0981669336383812, "learning_rate": 4.5572061161674613e-07, "loss": 0.0353, "step": 12907 }, { "epoch": 2.9369738339021616, "grad_norm": 1.994315238088218, "learning_rate": 4.5563460175790277e-07, "loss": 0.0404, "step": 12908 }, { "epoch": 2.9372013651877134, "grad_norm": 2.5086027501686545, "learning_rate": 4.555485953605778e-07, "loss": 0.1043, "step": 12909 }, { "epoch": 2.937428896473265, "grad_norm": 2.697083957091369, "learning_rate": 4.554625924265295e-07, "loss": 0.0672, "step": 12910 }, { "epoch": 2.937656427758817, "grad_norm": 1.0892977978099543, "learning_rate": 4.5537659295751507e-07, "loss": 0.019, "step": 12911 }, { "epoch": 2.9378839590443686, "grad_norm": 2.717171374676071, "learning_rate": 4.5529059695529253e-07, "loss": 0.0571, "step": 12912 }, { "epoch": 2.9381114903299204, "grad_norm": 2.031097727019057, "learning_rate": 4.5520460442161946e-07, "loss": 0.0965, "step": 12913 }, { "epoch": 2.938339021615472, "grad_norm": 1.9889410199723294, "learning_rate": 4.551186153582532e-07, "loss": 0.0856, "step": 12914 }, { "epoch": 2.938566552901024, "grad_norm": 2.5924952040143046, "learning_rate": 4.5503262976695136e-07, "loss": 0.0883, "step": 12915 }, { "epoch": 2.9387940841865756, "grad_norm": 0.8907969745429866, "learning_rate": 4.5494664764947114e-07, "loss": 0.0413, "step": 12916 }, { "epoch": 2.9390216154721274, "grad_norm": 1.5378970750995962, "learning_rate": 4.548606690075703e-07, "loss": 0.0211, "step": 12917 }, { "epoch": 2.939249146757679, "grad_norm": 1.1474631934906263, "learning_rate": 4.547746938430056e-07, "loss": 0.0245, "step": 12918 }, { "epoch": 2.939476678043231, "grad_norm": 1.2077042162892495, "learning_rate": 4.5468872215753434e-07, "loss": 0.1258, "step": 12919 }, { "epoch": 2.9397042093287826, "grad_norm": 2.056293003652253, "learning_rate": 4.546027539529138e-07, "loss": 0.1277, "step": 12920 }, { "epoch": 2.9399317406143344, "grad_norm": 1.6300199637430401, "learning_rate": 4.545167892309008e-07, "loss": 0.1278, "step": 12921 }, { "epoch": 2.940159271899886, "grad_norm": 2.3967167195874692, "learning_rate": 4.544308279932526e-07, "loss": 0.0522, "step": 12922 }, { "epoch": 2.940386803185438, "grad_norm": 2.1323197244394634, "learning_rate": 4.5434487024172564e-07, "loss": 0.0596, "step": 12923 }, { "epoch": 2.9406143344709896, "grad_norm": 2.6409427290106597, "learning_rate": 4.542589159780772e-07, "loss": 0.1003, "step": 12924 }, { "epoch": 2.9408418657565414, "grad_norm": 1.4174744035871953, "learning_rate": 4.5417296520406385e-07, "loss": 0.036, "step": 12925 }, { "epoch": 2.941069397042093, "grad_norm": 1.0543881398918011, "learning_rate": 4.5408701792144203e-07, "loss": 0.0455, "step": 12926 }, { "epoch": 2.941296928327645, "grad_norm": 0.8777770763565663, "learning_rate": 4.5400107413196863e-07, "loss": 0.0221, "step": 12927 }, { "epoch": 2.9415244596131966, "grad_norm": 2.098727839303417, "learning_rate": 4.539151338374e-07, "loss": 0.0723, "step": 12928 }, { "epoch": 2.941751990898749, "grad_norm": 1.8040713305895977, "learning_rate": 4.53829197039493e-07, "loss": 0.049, "step": 12929 }, { "epoch": 2.9419795221843, "grad_norm": 1.5554935797249336, "learning_rate": 4.5374326374000347e-07, "loss": 0.114, "step": 12930 }, { "epoch": 2.9422070534698523, "grad_norm": 0.7420063602751064, "learning_rate": 4.536573339406881e-07, "loss": 0.0079, "step": 12931 }, { "epoch": 2.9424345847554036, "grad_norm": 1.575324476859482, "learning_rate": 4.5357140764330313e-07, "loss": 0.0601, "step": 12932 }, { "epoch": 2.942662116040956, "grad_norm": 2.2841059269801156, "learning_rate": 4.534854848496044e-07, "loss": 0.0755, "step": 12933 }, { "epoch": 2.942889647326507, "grad_norm": 2.6612322414826615, "learning_rate": 4.5339956556134836e-07, "loss": 0.051, "step": 12934 }, { "epoch": 2.9431171786120593, "grad_norm": 2.448993275405843, "learning_rate": 4.533136497802909e-07, "loss": 0.0368, "step": 12935 }, { "epoch": 2.9433447098976107, "grad_norm": 2.5055430788420776, "learning_rate": 4.532277375081881e-07, "loss": 0.0672, "step": 12936 }, { "epoch": 2.943572241183163, "grad_norm": 3.046261592828582, "learning_rate": 4.5314182874679576e-07, "loss": 0.0848, "step": 12937 }, { "epoch": 2.9437997724687146, "grad_norm": 1.208791245221186, "learning_rate": 4.5305592349786954e-07, "loss": 0.0446, "step": 12938 }, { "epoch": 2.9440273037542664, "grad_norm": 1.710247840697119, "learning_rate": 4.529700217631655e-07, "loss": 0.1086, "step": 12939 }, { "epoch": 2.944254835039818, "grad_norm": 2.5475689014457856, "learning_rate": 4.52884123544439e-07, "loss": 0.0443, "step": 12940 }, { "epoch": 2.94448236632537, "grad_norm": 1.8839174472970492, "learning_rate": 4.5279822884344593e-07, "loss": 0.072, "step": 12941 }, { "epoch": 2.9447098976109216, "grad_norm": 1.328049509707944, "learning_rate": 4.527123376619415e-07, "loss": 0.1017, "step": 12942 }, { "epoch": 2.9449374288964734, "grad_norm": 0.9837298402537764, "learning_rate": 4.5262645000168166e-07, "loss": 0.0214, "step": 12943 }, { "epoch": 2.945164960182025, "grad_norm": 1.274024178244271, "learning_rate": 4.5254056586442135e-07, "loss": 0.0753, "step": 12944 }, { "epoch": 2.945392491467577, "grad_norm": 1.5263769632992228, "learning_rate": 4.5245468525191597e-07, "loss": 0.1084, "step": 12945 }, { "epoch": 2.9456200227531286, "grad_norm": 1.914364148657164, "learning_rate": 4.52368808165921e-07, "loss": 0.039, "step": 12946 }, { "epoch": 2.9458475540386804, "grad_norm": 1.5960103025496384, "learning_rate": 4.522829346081912e-07, "loss": 0.0771, "step": 12947 }, { "epoch": 2.946075085324232, "grad_norm": 1.8046410749051436, "learning_rate": 4.52197064580482e-07, "loss": 0.0566, "step": 12948 }, { "epoch": 2.946302616609784, "grad_norm": 1.2020359601214634, "learning_rate": 4.5211119808454823e-07, "loss": 0.0851, "step": 12949 }, { "epoch": 2.9465301478953356, "grad_norm": 1.05860259139918, "learning_rate": 4.5202533512214514e-07, "loss": 0.0233, "step": 12950 }, { "epoch": 2.9467576791808874, "grad_norm": 2.1335357225797047, "learning_rate": 4.519394756950274e-07, "loss": 0.1709, "step": 12951 }, { "epoch": 2.946985210466439, "grad_norm": 2.171434562260309, "learning_rate": 4.518536198049496e-07, "loss": 0.0535, "step": 12952 }, { "epoch": 2.947212741751991, "grad_norm": 2.6313380267826294, "learning_rate": 4.5176776745366706e-07, "loss": 0.1277, "step": 12953 }, { "epoch": 2.9474402730375426, "grad_norm": 2.012706515557103, "learning_rate": 4.5168191864293374e-07, "loss": 0.0907, "step": 12954 }, { "epoch": 2.9476678043230944, "grad_norm": 1.192718164428443, "learning_rate": 4.515960733745047e-07, "loss": 0.1101, "step": 12955 }, { "epoch": 2.947895335608646, "grad_norm": 2.726645801577556, "learning_rate": 4.5151023165013457e-07, "loss": 0.0626, "step": 12956 }, { "epoch": 2.948122866894198, "grad_norm": 1.745952189004999, "learning_rate": 4.514243934715773e-07, "loss": 0.0551, "step": 12957 }, { "epoch": 2.9483503981797496, "grad_norm": 1.4486214989252422, "learning_rate": 4.5133855884058764e-07, "loss": 0.061, "step": 12958 }, { "epoch": 2.9485779294653014, "grad_norm": 1.9133547089299645, "learning_rate": 4.512527277589196e-07, "loss": 0.0547, "step": 12959 }, { "epoch": 2.948805460750853, "grad_norm": 1.7383685859019582, "learning_rate": 4.5116690022832796e-07, "loss": 0.0732, "step": 12960 }, { "epoch": 2.949032992036405, "grad_norm": 1.1373285097918044, "learning_rate": 4.5108107625056614e-07, "loss": 0.0399, "step": 12961 }, { "epoch": 2.9492605233219567, "grad_norm": 1.306216962235557, "learning_rate": 4.509952558273889e-07, "loss": 0.0681, "step": 12962 }, { "epoch": 2.9494880546075084, "grad_norm": 1.7146531150483755, "learning_rate": 4.509094389605499e-07, "loss": 0.1782, "step": 12963 }, { "epoch": 2.94971558589306, "grad_norm": 1.9627734175414704, "learning_rate": 4.5082362565180305e-07, "loss": 0.043, "step": 12964 }, { "epoch": 2.949943117178612, "grad_norm": 1.7698083576605679, "learning_rate": 4.5073781590290233e-07, "loss": 0.0889, "step": 12965 }, { "epoch": 2.9501706484641637, "grad_norm": 2.0232093114801812, "learning_rate": 4.506520097156014e-07, "loss": 0.0635, "step": 12966 }, { "epoch": 2.9503981797497154, "grad_norm": 1.550831714344691, "learning_rate": 4.5056620709165436e-07, "loss": 0.0209, "step": 12967 }, { "epoch": 2.9506257110352676, "grad_norm": 1.4216658946107414, "learning_rate": 4.504804080328143e-07, "loss": 0.0215, "step": 12968 }, { "epoch": 2.950853242320819, "grad_norm": 1.7574854832340892, "learning_rate": 4.5039461254083525e-07, "loss": 0.1657, "step": 12969 }, { "epoch": 2.951080773606371, "grad_norm": 1.4202340825083504, "learning_rate": 4.5030882061747063e-07, "loss": 0.0659, "step": 12970 }, { "epoch": 2.9513083048919224, "grad_norm": 1.8271182207596968, "learning_rate": 4.5022303226447365e-07, "loss": 0.0351, "step": 12971 }, { "epoch": 2.9515358361774746, "grad_norm": 1.5320586484777226, "learning_rate": 4.501372474835978e-07, "loss": 0.0591, "step": 12972 }, { "epoch": 2.951763367463026, "grad_norm": 1.1603458374503832, "learning_rate": 4.5005146627659623e-07, "loss": 0.0277, "step": 12973 }, { "epoch": 2.951990898748578, "grad_norm": 1.3309953487632438, "learning_rate": 4.4996568864522266e-07, "loss": 0.0596, "step": 12974 }, { "epoch": 2.9522184300341294, "grad_norm": 2.442123698464722, "learning_rate": 4.498799145912296e-07, "loss": 0.0388, "step": 12975 }, { "epoch": 2.9524459613196816, "grad_norm": 1.9519351263085443, "learning_rate": 4.497941441163703e-07, "loss": 0.056, "step": 12976 }, { "epoch": 2.9526734926052334, "grad_norm": 2.4163639442503992, "learning_rate": 4.4970837722239804e-07, "loss": 0.0372, "step": 12977 }, { "epoch": 2.952901023890785, "grad_norm": 2.2577833444777164, "learning_rate": 4.4962261391106517e-07, "loss": 0.0592, "step": 12978 }, { "epoch": 2.953128555176337, "grad_norm": 2.000305707239237, "learning_rate": 4.4953685418412523e-07, "loss": 0.0508, "step": 12979 }, { "epoch": 2.9533560864618886, "grad_norm": 1.7380506422147517, "learning_rate": 4.4945109804333045e-07, "loss": 0.0246, "step": 12980 }, { "epoch": 2.9535836177474404, "grad_norm": 2.498621749976715, "learning_rate": 4.4936534549043383e-07, "loss": 0.034, "step": 12981 }, { "epoch": 2.953811149032992, "grad_norm": 0.7776472736697876, "learning_rate": 4.4927959652718804e-07, "loss": 0.0528, "step": 12982 }, { "epoch": 2.954038680318544, "grad_norm": 1.7330879874383434, "learning_rate": 4.491938511553451e-07, "loss": 0.1086, "step": 12983 }, { "epoch": 2.9542662116040956, "grad_norm": 1.4701323268776592, "learning_rate": 4.4910810937665813e-07, "loss": 0.0203, "step": 12984 }, { "epoch": 2.9544937428896474, "grad_norm": 1.6059729946571162, "learning_rate": 4.490223711928791e-07, "loss": 0.0449, "step": 12985 }, { "epoch": 2.954721274175199, "grad_norm": 1.1413846237916592, "learning_rate": 4.4893663660576077e-07, "loss": 0.018, "step": 12986 }, { "epoch": 2.954948805460751, "grad_norm": 1.3751966261878346, "learning_rate": 4.4885090561705487e-07, "loss": 0.035, "step": 12987 }, { "epoch": 2.9551763367463026, "grad_norm": 1.1588588602922085, "learning_rate": 4.4876517822851397e-07, "loss": 0.0225, "step": 12988 }, { "epoch": 2.9554038680318544, "grad_norm": 1.4184871269834545, "learning_rate": 4.486794544418903e-07, "loss": 0.0361, "step": 12989 }, { "epoch": 2.955631399317406, "grad_norm": 1.3960692632980387, "learning_rate": 4.485937342589353e-07, "loss": 0.0904, "step": 12990 }, { "epoch": 2.955858930602958, "grad_norm": 1.8689899772665568, "learning_rate": 4.4850801768140147e-07, "loss": 0.0638, "step": 12991 }, { "epoch": 2.9560864618885097, "grad_norm": 1.1348833540988987, "learning_rate": 4.4842230471104036e-07, "loss": 0.0888, "step": 12992 }, { "epoch": 2.9563139931740614, "grad_norm": 1.3574891097508517, "learning_rate": 4.4833659534960426e-07, "loss": 0.0551, "step": 12993 }, { "epoch": 2.956541524459613, "grad_norm": 1.5384705969040584, "learning_rate": 4.482508895988445e-07, "loss": 0.0992, "step": 12994 }, { "epoch": 2.956769055745165, "grad_norm": 1.502278815725139, "learning_rate": 4.481651874605127e-07, "loss": 0.0937, "step": 12995 }, { "epoch": 2.9569965870307167, "grad_norm": 1.3287528798037573, "learning_rate": 4.4807948893636085e-07, "loss": 0.1284, "step": 12996 }, { "epoch": 2.9572241183162684, "grad_norm": 1.0587432514906663, "learning_rate": 4.4799379402813995e-07, "loss": 0.0229, "step": 12997 }, { "epoch": 2.95745164960182, "grad_norm": 1.484017291101202, "learning_rate": 4.4790810273760194e-07, "loss": 0.1013, "step": 12998 }, { "epoch": 2.957679180887372, "grad_norm": 2.0741332557641274, "learning_rate": 4.478224150664978e-07, "loss": 0.1863, "step": 12999 }, { "epoch": 2.9579067121729237, "grad_norm": 2.019751406739628, "learning_rate": 4.477367310165793e-07, "loss": 0.0688, "step": 13000 }, { "epoch": 2.9581342434584754, "grad_norm": 2.3690821792608534, "learning_rate": 4.4765105058959715e-07, "loss": 0.1192, "step": 13001 }, { "epoch": 2.958361774744027, "grad_norm": 0.854579825208334, "learning_rate": 4.475653737873027e-07, "loss": 0.0263, "step": 13002 }, { "epoch": 2.958589306029579, "grad_norm": 1.590484659047713, "learning_rate": 4.474797006114473e-07, "loss": 0.0363, "step": 13003 }, { "epoch": 2.9588168373151307, "grad_norm": 1.6473852491958563, "learning_rate": 4.473940310637814e-07, "loss": 0.0703, "step": 13004 }, { "epoch": 2.9590443686006824, "grad_norm": 1.5889939990195963, "learning_rate": 4.4730836514605643e-07, "loss": 0.1164, "step": 13005 }, { "epoch": 2.959271899886234, "grad_norm": 1.7757623527197235, "learning_rate": 4.4722270286002316e-07, "loss": 0.0931, "step": 13006 }, { "epoch": 2.9594994311717864, "grad_norm": 0.9975038647780088, "learning_rate": 4.4713704420743195e-07, "loss": 0.1061, "step": 13007 }, { "epoch": 2.9597269624573377, "grad_norm": 1.2136057197796417, "learning_rate": 4.4705138919003394e-07, "loss": 0.0384, "step": 13008 }, { "epoch": 2.95995449374289, "grad_norm": 2.31606085219207, "learning_rate": 4.4696573780957956e-07, "loss": 0.0611, "step": 13009 }, { "epoch": 2.960182025028441, "grad_norm": 2.165830630552365, "learning_rate": 4.468800900678197e-07, "loss": 0.1054, "step": 13010 }, { "epoch": 2.9604095563139934, "grad_norm": 1.1676011106074795, "learning_rate": 4.467944459665043e-07, "loss": 0.073, "step": 13011 }, { "epoch": 2.9606370875995447, "grad_norm": 1.2399931013700907, "learning_rate": 4.467088055073843e-07, "loss": 0.0367, "step": 13012 }, { "epoch": 2.960864618885097, "grad_norm": 2.555014816987186, "learning_rate": 4.466231686922098e-07, "loss": 0.0773, "step": 13013 }, { "epoch": 2.961092150170648, "grad_norm": 0.9632545369917679, "learning_rate": 4.4653753552273085e-07, "loss": 0.0373, "step": 13014 }, { "epoch": 2.9613196814562004, "grad_norm": 1.0197757457856507, "learning_rate": 4.464519060006979e-07, "loss": 0.0176, "step": 13015 }, { "epoch": 2.961547212741752, "grad_norm": 1.5370303364857272, "learning_rate": 4.4636628012786084e-07, "loss": 0.0648, "step": 13016 }, { "epoch": 2.961774744027304, "grad_norm": 0.8276802198836554, "learning_rate": 4.462806579059702e-07, "loss": 0.0677, "step": 13017 }, { "epoch": 2.9620022753128556, "grad_norm": 0.8453003262844131, "learning_rate": 4.4619503933677534e-07, "loss": 0.0142, "step": 13018 }, { "epoch": 2.9622298065984074, "grad_norm": 1.5409038360551535, "learning_rate": 4.4610942442202653e-07, "loss": 0.0453, "step": 13019 }, { "epoch": 2.962457337883959, "grad_norm": 1.2482747977417479, "learning_rate": 4.4602381316347353e-07, "loss": 0.0533, "step": 13020 }, { "epoch": 2.962684869169511, "grad_norm": 1.9145250430607152, "learning_rate": 4.459382055628658e-07, "loss": 0.0429, "step": 13021 }, { "epoch": 2.9629124004550627, "grad_norm": 2.3020590512566392, "learning_rate": 4.4585260162195337e-07, "loss": 0.1335, "step": 13022 }, { "epoch": 2.9631399317406144, "grad_norm": 1.9941026743407337, "learning_rate": 4.4576700134248557e-07, "loss": 0.0592, "step": 13023 }, { "epoch": 2.963367463026166, "grad_norm": 1.6208707450362119, "learning_rate": 4.456814047262122e-07, "loss": 0.0387, "step": 13024 }, { "epoch": 2.963594994311718, "grad_norm": 1.6394154791809012, "learning_rate": 4.455958117748824e-07, "loss": 0.0573, "step": 13025 }, { "epoch": 2.9638225255972697, "grad_norm": 2.1055712436792455, "learning_rate": 4.455102224902455e-07, "loss": 0.1403, "step": 13026 }, { "epoch": 2.9640500568828214, "grad_norm": 1.7130665645974061, "learning_rate": 4.454246368740512e-07, "loss": 0.0376, "step": 13027 }, { "epoch": 2.964277588168373, "grad_norm": 1.3602281669732832, "learning_rate": 4.453390549280482e-07, "loss": 0.0951, "step": 13028 }, { "epoch": 2.964505119453925, "grad_norm": 1.3308830099218025, "learning_rate": 4.4525347665398594e-07, "loss": 0.096, "step": 13029 }, { "epoch": 2.9647326507394767, "grad_norm": 1.3128118319785187, "learning_rate": 4.4516790205361345e-07, "loss": 0.0193, "step": 13030 }, { "epoch": 2.9649601820250284, "grad_norm": 1.6723883896636351, "learning_rate": 4.450823311286798e-07, "loss": 0.0414, "step": 13031 }, { "epoch": 2.96518771331058, "grad_norm": 1.746300844421504, "learning_rate": 4.4499676388093373e-07, "loss": 0.0278, "step": 13032 }, { "epoch": 2.965415244596132, "grad_norm": 1.940747654314322, "learning_rate": 4.4491120031212406e-07, "loss": 0.0384, "step": 13033 }, { "epoch": 2.9656427758816837, "grad_norm": 1.9551824412464693, "learning_rate": 4.4482564042399987e-07, "loss": 0.0992, "step": 13034 }, { "epoch": 2.9658703071672354, "grad_norm": 2.050051545613756, "learning_rate": 4.447400842183093e-07, "loss": 0.0469, "step": 13035 }, { "epoch": 2.966097838452787, "grad_norm": 1.528342415215035, "learning_rate": 4.446545316968015e-07, "loss": 0.1387, "step": 13036 }, { "epoch": 2.966325369738339, "grad_norm": 3.4742237839515955, "learning_rate": 4.445689828612246e-07, "loss": 0.0972, "step": 13037 }, { "epoch": 2.9665529010238907, "grad_norm": 1.1742678963751452, "learning_rate": 4.444834377133275e-07, "loss": 0.0117, "step": 13038 }, { "epoch": 2.9667804323094424, "grad_norm": 2.16127275654834, "learning_rate": 4.4439789625485826e-07, "loss": 0.0588, "step": 13039 }, { "epoch": 2.967007963594994, "grad_norm": 1.094715849904871, "learning_rate": 4.4431235848756505e-07, "loss": 0.0387, "step": 13040 }, { "epoch": 2.967235494880546, "grad_norm": 2.0375271063276945, "learning_rate": 4.442268244131965e-07, "loss": 0.1173, "step": 13041 }, { "epoch": 2.9674630261660977, "grad_norm": 1.591504183630176, "learning_rate": 4.4414129403350046e-07, "loss": 0.0503, "step": 13042 }, { "epoch": 2.9676905574516494, "grad_norm": 2.0028668595133494, "learning_rate": 4.440557673502253e-07, "loss": 0.0508, "step": 13043 }, { "epoch": 2.967918088737201, "grad_norm": 2.349525308813936, "learning_rate": 4.439702443651189e-07, "loss": 0.0692, "step": 13044 }, { "epoch": 2.968145620022753, "grad_norm": 1.0638485997219274, "learning_rate": 4.43884725079929e-07, "loss": 0.0228, "step": 13045 }, { "epoch": 2.968373151308305, "grad_norm": 1.2004334314234137, "learning_rate": 4.4379920949640385e-07, "loss": 0.0383, "step": 13046 }, { "epoch": 2.9686006825938565, "grad_norm": 1.8968195469269895, "learning_rate": 4.4371369761629075e-07, "loss": 0.0473, "step": 13047 }, { "epoch": 2.9688282138794087, "grad_norm": 1.3231070428851897, "learning_rate": 4.4362818944133773e-07, "loss": 0.074, "step": 13048 }, { "epoch": 2.96905574516496, "grad_norm": 1.5089023219690527, "learning_rate": 4.435426849732923e-07, "loss": 0.124, "step": 13049 }, { "epoch": 2.969283276450512, "grad_norm": 0.9972034559547887, "learning_rate": 4.434571842139023e-07, "loss": 0.0224, "step": 13050 }, { "epoch": 2.9695108077360635, "grad_norm": 1.4482834961694113, "learning_rate": 4.433716871649149e-07, "loss": 0.0886, "step": 13051 }, { "epoch": 2.9697383390216157, "grad_norm": 1.8033820020574873, "learning_rate": 4.4328619382807736e-07, "loss": 0.1, "step": 13052 }, { "epoch": 2.969965870307167, "grad_norm": 1.1174202278475414, "learning_rate": 4.4320070420513757e-07, "loss": 0.0816, "step": 13053 }, { "epoch": 2.970193401592719, "grad_norm": 1.6168788179611644, "learning_rate": 4.431152182978421e-07, "loss": 0.044, "step": 13054 }, { "epoch": 2.970420932878271, "grad_norm": 1.703154128905703, "learning_rate": 4.430297361079386e-07, "loss": 0.1169, "step": 13055 }, { "epoch": 2.9706484641638227, "grad_norm": 1.4182095318027819, "learning_rate": 4.42944257637174e-07, "loss": 0.0422, "step": 13056 }, { "epoch": 2.9708759954493744, "grad_norm": 2.171894125920818, "learning_rate": 4.4285878288729545e-07, "loss": 0.0406, "step": 13057 }, { "epoch": 2.971103526734926, "grad_norm": 1.0574974312146397, "learning_rate": 4.4277331186004976e-07, "loss": 0.0425, "step": 13058 }, { "epoch": 2.971331058020478, "grad_norm": 1.1890340190620456, "learning_rate": 4.4268784455718374e-07, "loss": 0.0521, "step": 13059 }, { "epoch": 2.9715585893060297, "grad_norm": 1.6470383155884767, "learning_rate": 4.4260238098044446e-07, "loss": 0.0208, "step": 13060 }, { "epoch": 2.9717861205915814, "grad_norm": 1.549761849813651, "learning_rate": 4.425169211315783e-07, "loss": 0.1237, "step": 13061 }, { "epoch": 2.972013651877133, "grad_norm": 1.5210465151016412, "learning_rate": 4.4243146501233216e-07, "loss": 0.092, "step": 13062 }, { "epoch": 2.972241183162685, "grad_norm": 1.2134670049358136, "learning_rate": 4.423460126244526e-07, "loss": 0.0376, "step": 13063 }, { "epoch": 2.9724687144482367, "grad_norm": 1.9903771151317675, "learning_rate": 4.422605639696859e-07, "loss": 0.0611, "step": 13064 }, { "epoch": 2.9726962457337884, "grad_norm": 1.5439630118313374, "learning_rate": 4.421751190497786e-07, "loss": 0.129, "step": 13065 }, { "epoch": 2.97292377701934, "grad_norm": 2.127198121488106, "learning_rate": 4.4208967786647696e-07, "loss": 0.0539, "step": 13066 }, { "epoch": 2.973151308304892, "grad_norm": 2.1268633104500494, "learning_rate": 4.420042404215276e-07, "loss": 0.1636, "step": 13067 }, { "epoch": 2.9733788395904437, "grad_norm": 1.83972656941225, "learning_rate": 4.4191880671667615e-07, "loss": 0.0464, "step": 13068 }, { "epoch": 2.9736063708759954, "grad_norm": 1.068679862483366, "learning_rate": 4.4183337675366906e-07, "loss": 0.0792, "step": 13069 }, { "epoch": 2.973833902161547, "grad_norm": 1.72333870861239, "learning_rate": 4.4174795053425256e-07, "loss": 0.1616, "step": 13070 }, { "epoch": 2.974061433447099, "grad_norm": 2.384117752997077, "learning_rate": 4.4166252806017196e-07, "loss": 0.0672, "step": 13071 }, { "epoch": 2.9742889647326507, "grad_norm": 1.8050386307778457, "learning_rate": 4.4157710933317375e-07, "loss": 0.0273, "step": 13072 }, { "epoch": 2.9745164960182024, "grad_norm": 1.2571487006375468, "learning_rate": 4.4149169435500323e-07, "loss": 0.0785, "step": 13073 }, { "epoch": 2.974744027303754, "grad_norm": 1.948006002917549, "learning_rate": 4.414062831274068e-07, "loss": 0.0333, "step": 13074 }, { "epoch": 2.974971558589306, "grad_norm": 1.422083405148932, "learning_rate": 4.413208756521294e-07, "loss": 0.0316, "step": 13075 }, { "epoch": 2.9751990898748577, "grad_norm": 1.1032271593549776, "learning_rate": 4.41235471930917e-07, "loss": 0.0466, "step": 13076 }, { "epoch": 2.9754266211604095, "grad_norm": 1.9584843544509556, "learning_rate": 4.411500719655151e-07, "loss": 0.0331, "step": 13077 }, { "epoch": 2.975654152445961, "grad_norm": 1.7321791709533056, "learning_rate": 4.4106467575766893e-07, "loss": 0.0607, "step": 13078 }, { "epoch": 2.975881683731513, "grad_norm": 1.5006548317168842, "learning_rate": 4.4097928330912395e-07, "loss": 0.0204, "step": 13079 }, { "epoch": 2.9761092150170647, "grad_norm": 1.2510250999535137, "learning_rate": 4.4089389462162527e-07, "loss": 0.0143, "step": 13080 }, { "epoch": 2.9763367463026165, "grad_norm": 1.08750578632032, "learning_rate": 4.408085096969184e-07, "loss": 0.0297, "step": 13081 }, { "epoch": 2.976564277588168, "grad_norm": 1.3231002411432946, "learning_rate": 4.407231285367482e-07, "loss": 0.0701, "step": 13082 }, { "epoch": 2.9767918088737204, "grad_norm": 0.8312882692904098, "learning_rate": 4.4063775114285973e-07, "loss": 0.0372, "step": 13083 }, { "epoch": 2.9770193401592717, "grad_norm": 1.294259617224198, "learning_rate": 4.4055237751699816e-07, "loss": 0.0716, "step": 13084 }, { "epoch": 2.977246871444824, "grad_norm": 1.1445460862664487, "learning_rate": 4.4046700766090785e-07, "loss": 0.031, "step": 13085 }, { "epoch": 2.9774744027303752, "grad_norm": 1.9684770212304292, "learning_rate": 4.4038164157633413e-07, "loss": 0.0561, "step": 13086 }, { "epoch": 2.9777019340159274, "grad_norm": 1.0952660179552887, "learning_rate": 4.4029627926502146e-07, "loss": 0.0308, "step": 13087 }, { "epoch": 2.9779294653014787, "grad_norm": 1.89964842255561, "learning_rate": 4.402109207287149e-07, "loss": 0.0437, "step": 13088 }, { "epoch": 2.978156996587031, "grad_norm": 3.9141951054672077, "learning_rate": 4.401255659691584e-07, "loss": 0.0302, "step": 13089 }, { "epoch": 2.9783845278725822, "grad_norm": 1.3078013999261462, "learning_rate": 4.400402149880967e-07, "loss": 0.0603, "step": 13090 }, { "epoch": 2.9786120591581344, "grad_norm": 1.5177785902877297, "learning_rate": 4.3995486778727455e-07, "loss": 0.0224, "step": 13091 }, { "epoch": 2.9788395904436857, "grad_norm": 0.9554939106389385, "learning_rate": 4.398695243684357e-07, "loss": 0.0268, "step": 13092 }, { "epoch": 2.979067121729238, "grad_norm": 1.556700001058213, "learning_rate": 4.397841847333249e-07, "loss": 0.151, "step": 13093 }, { "epoch": 2.9792946530147897, "grad_norm": 1.4681538234562634, "learning_rate": 4.3969884888368597e-07, "loss": 0.0822, "step": 13094 }, { "epoch": 2.9795221843003414, "grad_norm": 2.4096462708510007, "learning_rate": 4.3961351682126356e-07, "loss": 0.0545, "step": 13095 }, { "epoch": 2.979749715585893, "grad_norm": 1.6890336429659296, "learning_rate": 4.395281885478011e-07, "loss": 0.0888, "step": 13096 }, { "epoch": 2.979977246871445, "grad_norm": 1.6240444601284993, "learning_rate": 4.394428640650427e-07, "loss": 0.0787, "step": 13097 }, { "epoch": 2.9802047781569967, "grad_norm": 1.8342225305252253, "learning_rate": 4.3935754337473264e-07, "loss": 0.0439, "step": 13098 }, { "epoch": 2.9804323094425484, "grad_norm": 1.8699744853863407, "learning_rate": 4.3927222647861397e-07, "loss": 0.0244, "step": 13099 }, { "epoch": 2.9806598407281, "grad_norm": 1.1792796835617207, "learning_rate": 4.391869133784312e-07, "loss": 0.0641, "step": 13100 }, { "epoch": 2.980887372013652, "grad_norm": 1.2162999662653942, "learning_rate": 4.391016040759277e-07, "loss": 0.0141, "step": 13101 }, { "epoch": 2.9811149032992037, "grad_norm": 1.17115181234477, "learning_rate": 4.3901629857284646e-07, "loss": 0.1333, "step": 13102 }, { "epoch": 2.9813424345847555, "grad_norm": 1.4512759613967483, "learning_rate": 4.3893099687093197e-07, "loss": 0.0882, "step": 13103 }, { "epoch": 2.981569965870307, "grad_norm": 1.5846952076419059, "learning_rate": 4.3884569897192684e-07, "loss": 0.0211, "step": 13104 }, { "epoch": 2.981797497155859, "grad_norm": 1.8470013625730508, "learning_rate": 4.387604048775748e-07, "loss": 0.033, "step": 13105 }, { "epoch": 2.9820250284414107, "grad_norm": 1.460325533254344, "learning_rate": 4.3867511458961886e-07, "loss": 0.0227, "step": 13106 }, { "epoch": 2.9822525597269625, "grad_norm": 1.358001916903992, "learning_rate": 4.3858982810980265e-07, "loss": 0.0399, "step": 13107 }, { "epoch": 2.982480091012514, "grad_norm": 1.5746044533633659, "learning_rate": 4.3850454543986885e-07, "loss": 0.0855, "step": 13108 }, { "epoch": 2.982707622298066, "grad_norm": 1.5487390396377023, "learning_rate": 4.3841926658156034e-07, "loss": 0.0638, "step": 13109 }, { "epoch": 2.9829351535836177, "grad_norm": 2.191163593082572, "learning_rate": 4.383339915366207e-07, "loss": 0.0808, "step": 13110 }, { "epoch": 2.9831626848691695, "grad_norm": 1.4608750514002833, "learning_rate": 4.382487203067921e-07, "loss": 0.0685, "step": 13111 }, { "epoch": 2.983390216154721, "grad_norm": 1.2878745868267485, "learning_rate": 4.381634528938178e-07, "loss": 0.0204, "step": 13112 }, { "epoch": 2.983617747440273, "grad_norm": 1.5658142385780882, "learning_rate": 4.380781892994404e-07, "loss": 0.0955, "step": 13113 }, { "epoch": 2.9838452787258247, "grad_norm": 1.441072140504372, "learning_rate": 4.3799292952540226e-07, "loss": 0.0507, "step": 13114 }, { "epoch": 2.9840728100113765, "grad_norm": 1.1067665472668105, "learning_rate": 4.379076735734463e-07, "loss": 0.0362, "step": 13115 }, { "epoch": 2.9843003412969282, "grad_norm": 1.1580834362079724, "learning_rate": 4.3782242144531474e-07, "loss": 0.0675, "step": 13116 }, { "epoch": 2.98452787258248, "grad_norm": 1.1152483511903757, "learning_rate": 4.377371731427503e-07, "loss": 0.0548, "step": 13117 }, { "epoch": 2.9847554038680317, "grad_norm": 2.2371836668650786, "learning_rate": 4.3765192866749485e-07, "loss": 0.1108, "step": 13118 }, { "epoch": 2.9849829351535835, "grad_norm": 1.3912133303799725, "learning_rate": 4.37566688021291e-07, "loss": 0.0605, "step": 13119 }, { "epoch": 2.9852104664391352, "grad_norm": 2.2893725690899682, "learning_rate": 4.374814512058809e-07, "loss": 0.1717, "step": 13120 }, { "epoch": 2.985437997724687, "grad_norm": 2.251907849792396, "learning_rate": 4.373962182230063e-07, "loss": 0.0256, "step": 13121 }, { "epoch": 2.985665529010239, "grad_norm": 2.41084502816802, "learning_rate": 4.3731098907440944e-07, "loss": 0.0372, "step": 13122 }, { "epoch": 2.9858930602957905, "grad_norm": 2.073962031445273, "learning_rate": 4.372257637618321e-07, "loss": 0.0343, "step": 13123 }, { "epoch": 2.9861205915813427, "grad_norm": 1.3578954599804085, "learning_rate": 4.371405422870166e-07, "loss": 0.0541, "step": 13124 }, { "epoch": 2.986348122866894, "grad_norm": 2.192498765811592, "learning_rate": 4.3705532465170413e-07, "loss": 0.099, "step": 13125 }, { "epoch": 2.986575654152446, "grad_norm": 1.2101100304828787, "learning_rate": 4.3697011085763664e-07, "loss": 0.069, "step": 13126 }, { "epoch": 2.9868031854379975, "grad_norm": 1.4583882733380809, "learning_rate": 4.3688490090655593e-07, "loss": 0.0307, "step": 13127 }, { "epoch": 2.9870307167235497, "grad_norm": 1.6377610457640064, "learning_rate": 4.3679969480020303e-07, "loss": 0.0405, "step": 13128 }, { "epoch": 2.987258248009101, "grad_norm": 1.6014997354796539, "learning_rate": 4.3671449254031987e-07, "loss": 0.0647, "step": 13129 }, { "epoch": 2.987485779294653, "grad_norm": 2.025768418061247, "learning_rate": 4.366292941286475e-07, "loss": 0.0531, "step": 13130 }, { "epoch": 2.9877133105802045, "grad_norm": 1.608389241813033, "learning_rate": 4.3654409956692763e-07, "loss": 0.0934, "step": 13131 }, { "epoch": 2.9879408418657567, "grad_norm": 2.0981944204627028, "learning_rate": 4.3645890885690113e-07, "loss": 0.0653, "step": 13132 }, { "epoch": 2.9881683731513085, "grad_norm": 1.1779574806524113, "learning_rate": 4.3637372200030905e-07, "loss": 0.0223, "step": 13133 }, { "epoch": 2.98839590443686, "grad_norm": 1.8861239736817723, "learning_rate": 4.362885389988929e-07, "loss": 0.0219, "step": 13134 }, { "epoch": 2.988623435722412, "grad_norm": 1.6427411211550882, "learning_rate": 4.362033598543932e-07, "loss": 0.0492, "step": 13135 }, { "epoch": 2.9888509670079637, "grad_norm": 1.44853574142033, "learning_rate": 4.3611818456855117e-07, "loss": 0.0115, "step": 13136 }, { "epoch": 2.9890784982935155, "grad_norm": 2.1638249338102273, "learning_rate": 4.3603301314310737e-07, "loss": 0.0644, "step": 13137 }, { "epoch": 2.989306029579067, "grad_norm": 1.3979682503597206, "learning_rate": 4.35947845579803e-07, "loss": 0.0119, "step": 13138 }, { "epoch": 2.989533560864619, "grad_norm": 1.9256730937436946, "learning_rate": 4.358626818803782e-07, "loss": 0.038, "step": 13139 }, { "epoch": 2.9897610921501707, "grad_norm": 1.344468821321357, "learning_rate": 4.357775220465738e-07, "loss": 0.0777, "step": 13140 }, { "epoch": 2.9899886234357225, "grad_norm": 2.0770525632342283, "learning_rate": 4.356923660801305e-07, "loss": 0.0471, "step": 13141 }, { "epoch": 2.9902161547212742, "grad_norm": 1.9078780339185013, "learning_rate": 4.3560721398278826e-07, "loss": 0.0943, "step": 13142 }, { "epoch": 2.990443686006826, "grad_norm": 1.3741579533048163, "learning_rate": 4.355220657562879e-07, "loss": 0.1262, "step": 13143 }, { "epoch": 2.9906712172923777, "grad_norm": 1.540745863516107, "learning_rate": 4.3543692140236933e-07, "loss": 0.0432, "step": 13144 }, { "epoch": 2.9908987485779295, "grad_norm": 1.3020562013264199, "learning_rate": 4.3535178092277325e-07, "loss": 0.0915, "step": 13145 }, { "epoch": 2.9911262798634812, "grad_norm": 1.2604347784205963, "learning_rate": 4.352666443192392e-07, "loss": 0.065, "step": 13146 }, { "epoch": 2.991353811149033, "grad_norm": 1.0927762520134534, "learning_rate": 4.3518151159350745e-07, "loss": 0.0262, "step": 13147 }, { "epoch": 2.9915813424345847, "grad_norm": 1.7483173309705438, "learning_rate": 4.3509638274731814e-07, "loss": 0.1818, "step": 13148 }, { "epoch": 2.9918088737201365, "grad_norm": 1.9605552754525961, "learning_rate": 4.350112577824107e-07, "loss": 0.0863, "step": 13149 }, { "epoch": 2.9920364050056882, "grad_norm": 1.4927439903941464, "learning_rate": 4.3492613670052537e-07, "loss": 0.0578, "step": 13150 }, { "epoch": 2.99226393629124, "grad_norm": 1.413604043126645, "learning_rate": 4.3484101950340183e-07, "loss": 0.0281, "step": 13151 }, { "epoch": 2.9924914675767917, "grad_norm": 2.329271068996593, "learning_rate": 4.3475590619277933e-07, "loss": 0.0359, "step": 13152 }, { "epoch": 2.9927189988623435, "grad_norm": 1.1394430507926712, "learning_rate": 4.346707967703978e-07, "loss": 0.0621, "step": 13153 }, { "epoch": 2.9929465301478952, "grad_norm": 2.0223079355671083, "learning_rate": 4.345856912379966e-07, "loss": 0.1317, "step": 13154 }, { "epoch": 2.993174061433447, "grad_norm": 1.2760916114113112, "learning_rate": 4.3450058959731525e-07, "loss": 0.0986, "step": 13155 }, { "epoch": 2.9934015927189987, "grad_norm": 2.0025222861052336, "learning_rate": 4.344154918500928e-07, "loss": 0.0509, "step": 13156 }, { "epoch": 2.9936291240045505, "grad_norm": 1.3952340137798345, "learning_rate": 4.3433039799806867e-07, "loss": 0.049, "step": 13157 }, { "epoch": 2.9938566552901023, "grad_norm": 2.102369043775291, "learning_rate": 4.342453080429823e-07, "loss": 0.1368, "step": 13158 }, { "epoch": 2.994084186575654, "grad_norm": 2.658980696589605, "learning_rate": 4.341602219865722e-07, "loss": 0.0558, "step": 13159 }, { "epoch": 2.9943117178612058, "grad_norm": 1.1006086556727832, "learning_rate": 4.3407513983057767e-07, "loss": 0.0294, "step": 13160 }, { "epoch": 2.994539249146758, "grad_norm": 2.269886074720511, "learning_rate": 4.339900615767376e-07, "loss": 0.0263, "step": 13161 }, { "epoch": 2.9947667804323093, "grad_norm": 1.4787335486630533, "learning_rate": 4.33904987226791e-07, "loss": 0.028, "step": 13162 }, { "epoch": 2.9949943117178615, "grad_norm": 2.650992402234237, "learning_rate": 4.3381991678247614e-07, "loss": 0.0539, "step": 13163 }, { "epoch": 2.9952218430034128, "grad_norm": 2.0047526608958073, "learning_rate": 4.337348502455325e-07, "loss": 0.0334, "step": 13164 }, { "epoch": 2.995449374288965, "grad_norm": 1.4551251338988713, "learning_rate": 4.33649787617698e-07, "loss": 0.0989, "step": 13165 }, { "epoch": 2.9956769055745163, "grad_norm": 1.9482920042613665, "learning_rate": 4.335647289007114e-07, "loss": 0.0278, "step": 13166 }, { "epoch": 2.9959044368600685, "grad_norm": 1.7102001121864232, "learning_rate": 4.334796740963114e-07, "loss": 0.0233, "step": 13167 }, { "epoch": 2.9961319681456198, "grad_norm": 2.0229936631975525, "learning_rate": 4.3339462320623567e-07, "loss": 0.0563, "step": 13168 }, { "epoch": 2.996359499431172, "grad_norm": 3.145249338112899, "learning_rate": 4.333095762322233e-07, "loss": 0.0979, "step": 13169 }, { "epoch": 2.9965870307167233, "grad_norm": 2.782206713750375, "learning_rate": 4.332245331760121e-07, "loss": 0.0714, "step": 13170 }, { "epoch": 2.9968145620022755, "grad_norm": 1.3790192642703236, "learning_rate": 4.3313949403933996e-07, "loss": 0.1014, "step": 13171 }, { "epoch": 2.9970420932878272, "grad_norm": 1.7878910217414477, "learning_rate": 4.330544588239454e-07, "loss": 0.0334, "step": 13172 }, { "epoch": 2.997269624573379, "grad_norm": 1.791608752284231, "learning_rate": 4.32969427531566e-07, "loss": 0.0752, "step": 13173 }, { "epoch": 2.9974971558589307, "grad_norm": 1.8003336467893771, "learning_rate": 4.3288440016394007e-07, "loss": 0.0881, "step": 13174 }, { "epoch": 2.9977246871444825, "grad_norm": 1.7256525129813558, "learning_rate": 4.327993767228049e-07, "loss": 0.1276, "step": 13175 }, { "epoch": 2.9979522184300342, "grad_norm": 1.820046588804461, "learning_rate": 4.3271435720989857e-07, "loss": 0.0385, "step": 13176 }, { "epoch": 2.998179749715586, "grad_norm": 1.8409870044764811, "learning_rate": 4.326293416269588e-07, "loss": 0.0254, "step": 13177 }, { "epoch": 2.9984072810011377, "grad_norm": 1.8872835245009851, "learning_rate": 4.3254432997572273e-07, "loss": 0.0442, "step": 13178 }, { "epoch": 2.9986348122866895, "grad_norm": 0.9614692200087656, "learning_rate": 4.324593222579282e-07, "loss": 0.0231, "step": 13179 }, { "epoch": 2.9988623435722412, "grad_norm": 1.112422088779659, "learning_rate": 4.3237431847531237e-07, "loss": 0.0117, "step": 13180 }, { "epoch": 2.999089874857793, "grad_norm": 2.4943756745319523, "learning_rate": 4.3228931862961285e-07, "loss": 0.0894, "step": 13181 }, { "epoch": 2.9993174061433447, "grad_norm": 1.4293179131008735, "learning_rate": 4.322043227225666e-07, "loss": 0.0828, "step": 13182 }, { "epoch": 2.9995449374288965, "grad_norm": 2.4767411132749615, "learning_rate": 4.32119330755911e-07, "loss": 0.0887, "step": 13183 }, { "epoch": 2.9997724687144482, "grad_norm": 1.554662574046274, "learning_rate": 4.320343427313832e-07, "loss": 0.0288, "step": 13184 }, { "epoch": 3.0, "grad_norm": 1.559025726192582, "learning_rate": 4.319493586507197e-07, "loss": 0.0664, "step": 13185 }, { "epoch": 3.0002275312855518, "grad_norm": 1.382271693324837, "learning_rate": 4.3186437851565794e-07, "loss": 0.0236, "step": 13186 }, { "epoch": 3.0004550625711035, "grad_norm": 0.834752493212597, "learning_rate": 4.317794023279344e-07, "loss": 0.0171, "step": 13187 }, { "epoch": 3.0006825938566553, "grad_norm": 1.1626874287403963, "learning_rate": 4.3169443008928627e-07, "loss": 0.0196, "step": 13188 }, { "epoch": 3.000910125142207, "grad_norm": 1.3277791106904522, "learning_rate": 4.316094618014499e-07, "loss": 0.0393, "step": 13189 }, { "epoch": 3.0011376564277588, "grad_norm": 0.8844782482018406, "learning_rate": 4.3152449746616177e-07, "loss": 0.0181, "step": 13190 }, { "epoch": 3.0013651877133105, "grad_norm": 0.9601461266016285, "learning_rate": 4.3143953708515886e-07, "loss": 0.0178, "step": 13191 }, { "epoch": 3.0015927189988623, "grad_norm": 0.9514808427694906, "learning_rate": 4.3135458066017705e-07, "loss": 0.0232, "step": 13192 }, { "epoch": 3.001820250284414, "grad_norm": 1.1025289722492302, "learning_rate": 4.3126962819295304e-07, "loss": 0.0634, "step": 13193 }, { "epoch": 3.0020477815699658, "grad_norm": 1.816810891174548, "learning_rate": 4.3118467968522303e-07, "loss": 0.086, "step": 13194 }, { "epoch": 3.0022753128555175, "grad_norm": 1.0416672967263496, "learning_rate": 4.3109973513872333e-07, "loss": 0.0169, "step": 13195 }, { "epoch": 3.0025028441410693, "grad_norm": 0.9182020911720622, "learning_rate": 4.3101479455518993e-07, "loss": 0.013, "step": 13196 }, { "epoch": 3.002730375426621, "grad_norm": 0.9928907068452957, "learning_rate": 4.309298579363587e-07, "loss": 0.0187, "step": 13197 }, { "epoch": 3.0029579067121728, "grad_norm": 1.6870621465222204, "learning_rate": 4.30844925283966e-07, "loss": 0.1285, "step": 13198 }, { "epoch": 3.0031854379977245, "grad_norm": 1.389511726435519, "learning_rate": 4.307599965997471e-07, "loss": 0.0246, "step": 13199 }, { "epoch": 3.0034129692832763, "grad_norm": 1.4593983867330063, "learning_rate": 4.306750718854383e-07, "loss": 0.0264, "step": 13200 }, { "epoch": 3.003640500568828, "grad_norm": 1.0481124629872234, "learning_rate": 4.3059015114277514e-07, "loss": 0.0857, "step": 13201 }, { "epoch": 3.00386803185438, "grad_norm": 1.1616814392067516, "learning_rate": 4.3050523437349333e-07, "loss": 0.0427, "step": 13202 }, { "epoch": 3.0040955631399315, "grad_norm": 1.379853895932092, "learning_rate": 4.304203215793283e-07, "loss": 0.0495, "step": 13203 }, { "epoch": 3.0043230944254833, "grad_norm": 1.0461745230221782, "learning_rate": 4.3033541276201546e-07, "loss": 0.0255, "step": 13204 }, { "epoch": 3.0045506257110355, "grad_norm": 1.2830005867561094, "learning_rate": 4.302505079232905e-07, "loss": 0.0375, "step": 13205 }, { "epoch": 3.0047781569965872, "grad_norm": 0.8187459081748271, "learning_rate": 4.301656070648881e-07, "loss": 0.0576, "step": 13206 }, { "epoch": 3.005005688282139, "grad_norm": 0.7700117342476106, "learning_rate": 4.3008071018854417e-07, "loss": 0.0076, "step": 13207 }, { "epoch": 3.0052332195676907, "grad_norm": 1.0520870510551201, "learning_rate": 4.299958172959935e-07, "loss": 0.0085, "step": 13208 }, { "epoch": 3.0054607508532425, "grad_norm": 1.5864578680877846, "learning_rate": 4.299109283889711e-07, "loss": 0.0464, "step": 13209 }, { "epoch": 3.0056882821387942, "grad_norm": 1.7049812797383903, "learning_rate": 4.298260434692121e-07, "loss": 0.0656, "step": 13210 }, { "epoch": 3.005915813424346, "grad_norm": 1.7648378330147547, "learning_rate": 4.2974116253845124e-07, "loss": 0.0444, "step": 13211 }, { "epoch": 3.0061433447098977, "grad_norm": 0.8464735961989552, "learning_rate": 4.296562855984236e-07, "loss": 0.015, "step": 13212 }, { "epoch": 3.0063708759954495, "grad_norm": 1.6393320440110775, "learning_rate": 4.295714126508635e-07, "loss": 0.0345, "step": 13213 }, { "epoch": 3.0065984072810013, "grad_norm": 0.8908343244940867, "learning_rate": 4.29486543697506e-07, "loss": 0.0069, "step": 13214 }, { "epoch": 3.006825938566553, "grad_norm": 1.2461776739693726, "learning_rate": 4.2940167874008553e-07, "loss": 0.027, "step": 13215 }, { "epoch": 3.0070534698521048, "grad_norm": 1.3238695615599454, "learning_rate": 4.293168177803363e-07, "loss": 0.0434, "step": 13216 }, { "epoch": 3.0072810011376565, "grad_norm": 1.397950889815398, "learning_rate": 4.29231960819993e-07, "loss": 0.0455, "step": 13217 }, { "epoch": 3.0075085324232083, "grad_norm": 1.5099079237230095, "learning_rate": 4.2914710786078986e-07, "loss": 0.0133, "step": 13218 }, { "epoch": 3.00773606370876, "grad_norm": 2.1871200668248694, "learning_rate": 4.2906225890446135e-07, "loss": 0.097, "step": 13219 }, { "epoch": 3.0079635949943118, "grad_norm": 0.9655577304636618, "learning_rate": 4.2897741395274134e-07, "loss": 0.0154, "step": 13220 }, { "epoch": 3.0081911262798635, "grad_norm": 1.5938777201628511, "learning_rate": 4.2889257300736383e-07, "loss": 0.09, "step": 13221 }, { "epoch": 3.0084186575654153, "grad_norm": 1.5592404871169523, "learning_rate": 4.288077360700632e-07, "loss": 0.1018, "step": 13222 }, { "epoch": 3.008646188850967, "grad_norm": 0.7447568212037099, "learning_rate": 4.2872290314257284e-07, "loss": 0.0078, "step": 13223 }, { "epoch": 3.0088737201365188, "grad_norm": 4.4568677862020145, "learning_rate": 4.2863807422662717e-07, "loss": 0.1144, "step": 13224 }, { "epoch": 3.0091012514220705, "grad_norm": 2.0898480659897776, "learning_rate": 4.2855324932395944e-07, "loss": 0.0426, "step": 13225 }, { "epoch": 3.0093287827076223, "grad_norm": 1.391809873752272, "learning_rate": 4.284684284363036e-07, "loss": 0.0224, "step": 13226 }, { "epoch": 3.009556313993174, "grad_norm": 1.2252285483871492, "learning_rate": 4.283836115653933e-07, "loss": 0.0504, "step": 13227 }, { "epoch": 3.0097838452787258, "grad_norm": 0.861253646944311, "learning_rate": 4.2829879871296163e-07, "loss": 0.0127, "step": 13228 }, { "epoch": 3.0100113765642775, "grad_norm": 1.1436644358717143, "learning_rate": 4.282139898807425e-07, "loss": 0.0163, "step": 13229 }, { "epoch": 3.0102389078498293, "grad_norm": 0.9152869919543974, "learning_rate": 4.281291850704687e-07, "loss": 0.009, "step": 13230 }, { "epoch": 3.010466439135381, "grad_norm": 0.9905869855884598, "learning_rate": 4.280443842838743e-07, "loss": 0.0078, "step": 13231 }, { "epoch": 3.010693970420933, "grad_norm": 1.124567726955188, "learning_rate": 4.279595875226915e-07, "loss": 0.0369, "step": 13232 }, { "epoch": 3.0109215017064845, "grad_norm": 0.9040886154061768, "learning_rate": 4.278747947886541e-07, "loss": 0.013, "step": 13233 }, { "epoch": 3.0111490329920363, "grad_norm": 0.9195504062316298, "learning_rate": 4.2779000608349497e-07, "loss": 0.0551, "step": 13234 }, { "epoch": 3.011376564277588, "grad_norm": 0.930984581019989, "learning_rate": 4.2770522140894675e-07, "loss": 0.0174, "step": 13235 }, { "epoch": 3.01160409556314, "grad_norm": 0.9141413419439177, "learning_rate": 4.276204407667425e-07, "loss": 0.0144, "step": 13236 }, { "epoch": 3.0118316268486915, "grad_norm": 1.0829111731283476, "learning_rate": 4.2753566415861494e-07, "loss": 0.0104, "step": 13237 }, { "epoch": 3.0120591581342433, "grad_norm": 1.3400083900852426, "learning_rate": 4.2745089158629697e-07, "loss": 0.0108, "step": 13238 }, { "epoch": 3.012286689419795, "grad_norm": 3.890470437292931, "learning_rate": 4.273661230515209e-07, "loss": 0.1038, "step": 13239 }, { "epoch": 3.012514220705347, "grad_norm": 0.9002069050000354, "learning_rate": 4.272813585560191e-07, "loss": 0.009, "step": 13240 }, { "epoch": 3.0127417519908986, "grad_norm": 1.5152537378047133, "learning_rate": 4.271965981015246e-07, "loss": 0.0384, "step": 13241 }, { "epoch": 3.0129692832764503, "grad_norm": 1.2393741033270267, "learning_rate": 4.27111841689769e-07, "loss": 0.0221, "step": 13242 }, { "epoch": 3.013196814562002, "grad_norm": 1.474692141221041, "learning_rate": 4.2702708932248514e-07, "loss": 0.1142, "step": 13243 }, { "epoch": 3.0134243458475543, "grad_norm": 2.4925041820073783, "learning_rate": 4.2694234100140486e-07, "loss": 0.069, "step": 13244 }, { "epoch": 3.013651877133106, "grad_norm": 1.7137464704741845, "learning_rate": 4.268575967282607e-07, "loss": 0.0526, "step": 13245 }, { "epoch": 3.0138794084186578, "grad_norm": 0.676444797575058, "learning_rate": 4.267728565047842e-07, "loss": 0.0053, "step": 13246 }, { "epoch": 3.0141069397042095, "grad_norm": 1.5263879738176018, "learning_rate": 4.2668812033270737e-07, "loss": 0.1112, "step": 13247 }, { "epoch": 3.0143344709897613, "grad_norm": 1.1841332357854044, "learning_rate": 4.266033882137624e-07, "loss": 0.0316, "step": 13248 }, { "epoch": 3.014562002275313, "grad_norm": 1.9703034435253326, "learning_rate": 4.265186601496806e-07, "loss": 0.049, "step": 13249 }, { "epoch": 3.0147895335608648, "grad_norm": 1.5499351609744219, "learning_rate": 4.2643393614219404e-07, "loss": 0.0253, "step": 13250 }, { "epoch": 3.0150170648464165, "grad_norm": 1.4468365854600398, "learning_rate": 4.2634921619303396e-07, "loss": 0.0174, "step": 13251 }, { "epoch": 3.0152445961319683, "grad_norm": 0.91805245026478, "learning_rate": 4.2626450030393244e-07, "loss": 0.0105, "step": 13252 }, { "epoch": 3.01547212741752, "grad_norm": 1.6209642714889567, "learning_rate": 4.261797884766204e-07, "loss": 0.074, "step": 13253 }, { "epoch": 3.0156996587030718, "grad_norm": 1.1112387531163945, "learning_rate": 4.260950807128292e-07, "loss": 0.0197, "step": 13254 }, { "epoch": 3.0159271899886235, "grad_norm": 1.3226134442516104, "learning_rate": 4.260103770142906e-07, "loss": 0.0154, "step": 13255 }, { "epoch": 3.0161547212741753, "grad_norm": 1.1828539561815132, "learning_rate": 4.259256773827351e-07, "loss": 0.0152, "step": 13256 }, { "epoch": 3.016382252559727, "grad_norm": 1.9318973266364718, "learning_rate": 4.258409818198944e-07, "loss": 0.0515, "step": 13257 }, { "epoch": 3.016609783845279, "grad_norm": 1.4287361502743618, "learning_rate": 4.2575629032749937e-07, "loss": 0.0475, "step": 13258 }, { "epoch": 3.0168373151308305, "grad_norm": 1.252243483473632, "learning_rate": 4.256716029072806e-07, "loss": 0.0235, "step": 13259 }, { "epoch": 3.0170648464163823, "grad_norm": 1.6961185361734714, "learning_rate": 4.2558691956096936e-07, "loss": 0.0371, "step": 13260 }, { "epoch": 3.017292377701934, "grad_norm": 1.2776339761735378, "learning_rate": 4.2550224029029616e-07, "loss": 0.1052, "step": 13261 }, { "epoch": 3.017519908987486, "grad_norm": 1.710988599864679, "learning_rate": 4.25417565096992e-07, "loss": 0.0562, "step": 13262 }, { "epoch": 3.0177474402730375, "grad_norm": 1.311806421167304, "learning_rate": 4.2533289398278715e-07, "loss": 0.0509, "step": 13263 }, { "epoch": 3.0179749715585893, "grad_norm": 1.3008408557301334, "learning_rate": 4.252482269494123e-07, "loss": 0.0659, "step": 13264 }, { "epoch": 3.018202502844141, "grad_norm": 1.1103077141452364, "learning_rate": 4.2516356399859804e-07, "loss": 0.0204, "step": 13265 }, { "epoch": 3.018430034129693, "grad_norm": 1.26787411352042, "learning_rate": 4.250789051320743e-07, "loss": 0.0321, "step": 13266 }, { "epoch": 3.0186575654152445, "grad_norm": 1.2641927421284511, "learning_rate": 4.2499425035157167e-07, "loss": 0.023, "step": 13267 }, { "epoch": 3.0188850967007963, "grad_norm": 1.163767129946361, "learning_rate": 4.249095996588202e-07, "loss": 0.0311, "step": 13268 }, { "epoch": 3.019112627986348, "grad_norm": 1.1879154355737191, "learning_rate": 4.248249530555503e-07, "loss": 0.0554, "step": 13269 }, { "epoch": 3.0193401592719, "grad_norm": 1.2513091162172165, "learning_rate": 4.247403105434915e-07, "loss": 0.0177, "step": 13270 }, { "epoch": 3.0195676905574516, "grad_norm": 1.44119144536477, "learning_rate": 4.2465567212437415e-07, "loss": 0.0469, "step": 13271 }, { "epoch": 3.0197952218430033, "grad_norm": 1.1476782058016675, "learning_rate": 4.2457103779992807e-07, "loss": 0.014, "step": 13272 }, { "epoch": 3.020022753128555, "grad_norm": 1.6065021801749597, "learning_rate": 4.2448640757188255e-07, "loss": 0.0251, "step": 13273 }, { "epoch": 3.020250284414107, "grad_norm": 1.142888010927926, "learning_rate": 4.2440178144196794e-07, "loss": 0.0116, "step": 13274 }, { "epoch": 3.0204778156996586, "grad_norm": 2.0953230989560523, "learning_rate": 4.2431715941191323e-07, "loss": 0.0125, "step": 13275 }, { "epoch": 3.0207053469852103, "grad_norm": 1.6570818092864317, "learning_rate": 4.242325414834486e-07, "loss": 0.0963, "step": 13276 }, { "epoch": 3.020932878270762, "grad_norm": 1.8834002768100588, "learning_rate": 4.24147927658303e-07, "loss": 0.0352, "step": 13277 }, { "epoch": 3.021160409556314, "grad_norm": 1.5613046461738147, "learning_rate": 4.240633179382058e-07, "loss": 0.06, "step": 13278 }, { "epoch": 3.0213879408418656, "grad_norm": 1.94400222644559, "learning_rate": 4.239787123248866e-07, "loss": 0.0338, "step": 13279 }, { "epoch": 3.0216154721274173, "grad_norm": 1.1004305081559707, "learning_rate": 4.238941108200742e-07, "loss": 0.0327, "step": 13280 }, { "epoch": 3.021843003412969, "grad_norm": 1.263128741337215, "learning_rate": 4.2380951342549786e-07, "loss": 0.0448, "step": 13281 }, { "epoch": 3.022070534698521, "grad_norm": 0.7980585858339172, "learning_rate": 4.2372492014288656e-07, "loss": 0.0089, "step": 13282 }, { "epoch": 3.022298065984073, "grad_norm": 1.0735412674935032, "learning_rate": 4.236403309739695e-07, "loss": 0.0163, "step": 13283 }, { "epoch": 3.0225255972696248, "grad_norm": 1.2248457006370426, "learning_rate": 4.235557459204752e-07, "loss": 0.1263, "step": 13284 }, { "epoch": 3.0227531285551765, "grad_norm": 1.4462979836681458, "learning_rate": 4.2347116498413233e-07, "loss": 0.0614, "step": 13285 }, { "epoch": 3.0229806598407283, "grad_norm": 1.898073570725312, "learning_rate": 4.233865881666702e-07, "loss": 0.0399, "step": 13286 }, { "epoch": 3.02320819112628, "grad_norm": 0.5819244473081424, "learning_rate": 4.233020154698164e-07, "loss": 0.0053, "step": 13287 }, { "epoch": 3.023435722411832, "grad_norm": 0.7332460177159091, "learning_rate": 4.2321744689530043e-07, "loss": 0.015, "step": 13288 }, { "epoch": 3.0236632536973835, "grad_norm": 0.6921180887290506, "learning_rate": 4.231328824448501e-07, "loss": 0.0079, "step": 13289 }, { "epoch": 3.0238907849829353, "grad_norm": 1.2601872719121663, "learning_rate": 4.2304832212019405e-07, "loss": 0.0674, "step": 13290 }, { "epoch": 3.024118316268487, "grad_norm": 1.2230345056157368, "learning_rate": 4.229637659230606e-07, "loss": 0.0343, "step": 13291 }, { "epoch": 3.024345847554039, "grad_norm": 1.5005402810903532, "learning_rate": 4.228792138551775e-07, "loss": 0.0867, "step": 13292 }, { "epoch": 3.0245733788395905, "grad_norm": 0.719921442750588, "learning_rate": 4.2279466591827315e-07, "loss": 0.0073, "step": 13293 }, { "epoch": 3.0248009101251423, "grad_norm": 1.881107102531448, "learning_rate": 4.2271012211407546e-07, "loss": 0.0174, "step": 13294 }, { "epoch": 3.025028441410694, "grad_norm": 1.127310652175649, "learning_rate": 4.226255824443126e-07, "loss": 0.0124, "step": 13295 }, { "epoch": 3.025255972696246, "grad_norm": 1.4164159334391395, "learning_rate": 4.225410469107121e-07, "loss": 0.095, "step": 13296 }, { "epoch": 3.0254835039817976, "grad_norm": 1.6065142456303274, "learning_rate": 4.224565155150017e-07, "loss": 0.0244, "step": 13297 }, { "epoch": 3.0257110352673493, "grad_norm": 1.7342480871419255, "learning_rate": 4.2237198825890944e-07, "loss": 0.0748, "step": 13298 }, { "epoch": 3.025938566552901, "grad_norm": 1.4752779592149676, "learning_rate": 4.2228746514416233e-07, "loss": 0.0172, "step": 13299 }, { "epoch": 3.026166097838453, "grad_norm": 3.3297758248447717, "learning_rate": 4.2220294617248836e-07, "loss": 0.0286, "step": 13300 }, { "epoch": 3.0263936291240046, "grad_norm": 1.5640224770012991, "learning_rate": 4.2211843134561464e-07, "loss": 0.0916, "step": 13301 }, { "epoch": 3.0266211604095563, "grad_norm": 1.1961727767095949, "learning_rate": 4.220339206652689e-07, "loss": 0.0655, "step": 13302 }, { "epoch": 3.026848691695108, "grad_norm": 0.6118574013456218, "learning_rate": 4.219494141331779e-07, "loss": 0.0074, "step": 13303 }, { "epoch": 3.02707622298066, "grad_norm": 1.1033534708515562, "learning_rate": 4.2186491175106895e-07, "loss": 0.0201, "step": 13304 }, { "epoch": 3.0273037542662116, "grad_norm": 1.0329086665693359, "learning_rate": 4.217804135206695e-07, "loss": 0.0243, "step": 13305 }, { "epoch": 3.0275312855517633, "grad_norm": 0.5609689584666137, "learning_rate": 4.2169591944370576e-07, "loss": 0.0307, "step": 13306 }, { "epoch": 3.027758816837315, "grad_norm": 0.9249692876538282, "learning_rate": 4.2161142952190536e-07, "loss": 0.0168, "step": 13307 }, { "epoch": 3.027986348122867, "grad_norm": 1.2663152254556087, "learning_rate": 4.2152694375699474e-07, "loss": 0.0167, "step": 13308 }, { "epoch": 3.0282138794084186, "grad_norm": 0.7192273247965933, "learning_rate": 4.2144246215070096e-07, "loss": 0.0109, "step": 13309 }, { "epoch": 3.0284414106939703, "grad_norm": 0.5224291152409135, "learning_rate": 4.213579847047503e-07, "loss": 0.0046, "step": 13310 }, { "epoch": 3.028668941979522, "grad_norm": 1.0322245682280937, "learning_rate": 4.212735114208694e-07, "loss": 0.013, "step": 13311 }, { "epoch": 3.028896473265074, "grad_norm": 2.6973094199773633, "learning_rate": 4.2118904230078505e-07, "loss": 0.0729, "step": 13312 }, { "epoch": 3.0291240045506256, "grad_norm": 1.0168727074963735, "learning_rate": 4.2110457734622314e-07, "loss": 0.0306, "step": 13313 }, { "epoch": 3.0293515358361773, "grad_norm": 1.503372605072573, "learning_rate": 4.210201165589105e-07, "loss": 0.057, "step": 13314 }, { "epoch": 3.029579067121729, "grad_norm": 1.1514452127693273, "learning_rate": 4.209356599405731e-07, "loss": 0.0461, "step": 13315 }, { "epoch": 3.029806598407281, "grad_norm": 1.0820487826132448, "learning_rate": 4.208512074929368e-07, "loss": 0.0368, "step": 13316 }, { "epoch": 3.0300341296928326, "grad_norm": 1.1633569186995, "learning_rate": 4.207667592177282e-07, "loss": 0.0571, "step": 13317 }, { "epoch": 3.0302616609783843, "grad_norm": 1.387975285703502, "learning_rate": 4.2068231511667277e-07, "loss": 0.0318, "step": 13318 }, { "epoch": 3.030489192263936, "grad_norm": 1.4745008021493997, "learning_rate": 4.205978751914969e-07, "loss": 0.1125, "step": 13319 }, { "epoch": 3.030716723549488, "grad_norm": 0.7642285339237449, "learning_rate": 4.205134394439259e-07, "loss": 0.0055, "step": 13320 }, { "epoch": 3.03094425483504, "grad_norm": 1.3300854177580277, "learning_rate": 4.2042900787568586e-07, "loss": 0.0717, "step": 13321 }, { "epoch": 3.031171786120592, "grad_norm": 1.0917864424972223, "learning_rate": 4.203445804885023e-07, "loss": 0.0135, "step": 13322 }, { "epoch": 3.0313993174061435, "grad_norm": 1.3921964328740548, "learning_rate": 4.2026015728410043e-07, "loss": 0.1132, "step": 13323 }, { "epoch": 3.0316268486916953, "grad_norm": 1.685828984343597, "learning_rate": 4.201757382642062e-07, "loss": 0.0373, "step": 13324 }, { "epoch": 3.031854379977247, "grad_norm": 0.8866408832431689, "learning_rate": 4.200913234305446e-07, "loss": 0.0263, "step": 13325 }, { "epoch": 3.032081911262799, "grad_norm": 0.8149788573715943, "learning_rate": 4.2000691278484134e-07, "loss": 0.021, "step": 13326 }, { "epoch": 3.0323094425483506, "grad_norm": 2.4134843776393082, "learning_rate": 4.1992250632882105e-07, "loss": 0.0148, "step": 13327 }, { "epoch": 3.0325369738339023, "grad_norm": 1.4089736195557434, "learning_rate": 4.198381040642093e-07, "loss": 0.0742, "step": 13328 }, { "epoch": 3.032764505119454, "grad_norm": 0.5333140931520381, "learning_rate": 4.197537059927311e-07, "loss": 0.0046, "step": 13329 }, { "epoch": 3.032992036405006, "grad_norm": 2.039980878400671, "learning_rate": 4.1966931211611106e-07, "loss": 0.034, "step": 13330 }, { "epoch": 3.0332195676905576, "grad_norm": 0.7920029305665055, "learning_rate": 4.195849224360743e-07, "loss": 0.0085, "step": 13331 }, { "epoch": 3.0334470989761093, "grad_norm": 1.6002054133132673, "learning_rate": 4.1950053695434535e-07, "loss": 0.0789, "step": 13332 }, { "epoch": 3.033674630261661, "grad_norm": 2.091328134968427, "learning_rate": 4.194161556726494e-07, "loss": 0.1056, "step": 13333 }, { "epoch": 3.033902161547213, "grad_norm": 1.3131635015290881, "learning_rate": 4.1933177859271064e-07, "loss": 0.0267, "step": 13334 }, { "epoch": 3.0341296928327646, "grad_norm": 1.6665476452246417, "learning_rate": 4.1924740571625345e-07, "loss": 0.0744, "step": 13335 }, { "epoch": 3.0343572241183163, "grad_norm": 1.094737796049103, "learning_rate": 4.191630370450027e-07, "loss": 0.0187, "step": 13336 }, { "epoch": 3.034584755403868, "grad_norm": 1.7711738165264865, "learning_rate": 4.190786725806823e-07, "loss": 0.0243, "step": 13337 }, { "epoch": 3.03481228668942, "grad_norm": 1.3955646734325438, "learning_rate": 4.189943123250168e-07, "loss": 0.039, "step": 13338 }, { "epoch": 3.0350398179749716, "grad_norm": 0.986233542613958, "learning_rate": 4.189099562797302e-07, "loss": 0.0347, "step": 13339 }, { "epoch": 3.0352673492605233, "grad_norm": 1.591928138120007, "learning_rate": 4.188256044465469e-07, "loss": 0.06, "step": 13340 }, { "epoch": 3.035494880546075, "grad_norm": 1.3223647144666897, "learning_rate": 4.187412568271905e-07, "loss": 0.0666, "step": 13341 }, { "epoch": 3.035722411831627, "grad_norm": 1.0934437462077036, "learning_rate": 4.186569134233849e-07, "loss": 0.0146, "step": 13342 }, { "epoch": 3.0359499431171786, "grad_norm": 0.7411176744442081, "learning_rate": 4.185725742368545e-07, "loss": 0.0078, "step": 13343 }, { "epoch": 3.0361774744027303, "grad_norm": 1.5290035079625857, "learning_rate": 4.1848823926932207e-07, "loss": 0.0967, "step": 13344 }, { "epoch": 3.036405005688282, "grad_norm": 1.3904766134726232, "learning_rate": 4.184039085225122e-07, "loss": 0.0237, "step": 13345 }, { "epoch": 3.036632536973834, "grad_norm": 2.400799064860784, "learning_rate": 4.1831958199814825e-07, "loss": 0.1077, "step": 13346 }, { "epoch": 3.0368600682593856, "grad_norm": 1.3565470245274505, "learning_rate": 4.1823525969795306e-07, "loss": 0.054, "step": 13347 }, { "epoch": 3.0370875995449373, "grad_norm": 2.1875234757273834, "learning_rate": 4.181509416236508e-07, "loss": 0.0222, "step": 13348 }, { "epoch": 3.037315130830489, "grad_norm": 1.1768474079893905, "learning_rate": 4.1806662777696424e-07, "loss": 0.0278, "step": 13349 }, { "epoch": 3.037542662116041, "grad_norm": 0.9270114335555383, "learning_rate": 4.1798231815961693e-07, "loss": 0.0091, "step": 13350 }, { "epoch": 3.0377701934015926, "grad_norm": 1.5616032267934, "learning_rate": 4.178980127733319e-07, "loss": 0.0802, "step": 13351 }, { "epoch": 3.0379977246871444, "grad_norm": 1.5824938575616478, "learning_rate": 4.178137116198323e-07, "loss": 0.0991, "step": 13352 }, { "epoch": 3.038225255972696, "grad_norm": 2.2836042560695367, "learning_rate": 4.17729414700841e-07, "loss": 0.0204, "step": 13353 }, { "epoch": 3.038452787258248, "grad_norm": 1.6407492669061328, "learning_rate": 4.1764512201808063e-07, "loss": 0.0933, "step": 13354 }, { "epoch": 3.0386803185437996, "grad_norm": 0.8839855451636057, "learning_rate": 4.1756083357327464e-07, "loss": 0.0114, "step": 13355 }, { "epoch": 3.0389078498293514, "grad_norm": 2.2836459005278207, "learning_rate": 4.1747654936814485e-07, "loss": 0.0767, "step": 13356 }, { "epoch": 3.039135381114903, "grad_norm": 1.0305424153148834, "learning_rate": 4.1739226940441454e-07, "loss": 0.02, "step": 13357 }, { "epoch": 3.039362912400455, "grad_norm": 1.264317448715382, "learning_rate": 4.1730799368380593e-07, "loss": 0.0507, "step": 13358 }, { "epoch": 3.0395904436860066, "grad_norm": 1.3438876602058312, "learning_rate": 4.172237222080418e-07, "loss": 0.0737, "step": 13359 }, { "epoch": 3.039817974971559, "grad_norm": 1.5429009173106505, "learning_rate": 4.1713945497884405e-07, "loss": 0.0318, "step": 13360 }, { "epoch": 3.0400455062571106, "grad_norm": 1.8135438849828422, "learning_rate": 4.170551919979351e-07, "loss": 0.0837, "step": 13361 }, { "epoch": 3.0402730375426623, "grad_norm": 1.0040180896228588, "learning_rate": 4.1697093326703746e-07, "loss": 0.0085, "step": 13362 }, { "epoch": 3.040500568828214, "grad_norm": 0.973331217118637, "learning_rate": 4.1688667878787266e-07, "loss": 0.0631, "step": 13363 }, { "epoch": 3.040728100113766, "grad_norm": 3.1958968232351292, "learning_rate": 4.1680242856216316e-07, "loss": 0.0494, "step": 13364 }, { "epoch": 3.0409556313993176, "grad_norm": 1.2518337336390213, "learning_rate": 4.1671818259163087e-07, "loss": 0.0353, "step": 13365 }, { "epoch": 3.0411831626848693, "grad_norm": 1.5597504103199884, "learning_rate": 4.1663394087799723e-07, "loss": 0.0475, "step": 13366 }, { "epoch": 3.041410693970421, "grad_norm": 0.7123425833854019, "learning_rate": 4.1654970342298437e-07, "loss": 0.0485, "step": 13367 }, { "epoch": 3.041638225255973, "grad_norm": 1.0335255243078498, "learning_rate": 4.1646547022831355e-07, "loss": 0.0418, "step": 13368 }, { "epoch": 3.0418657565415246, "grad_norm": 0.9633568096960885, "learning_rate": 4.16381241295707e-07, "loss": 0.062, "step": 13369 }, { "epoch": 3.0420932878270763, "grad_norm": 1.6890557050834747, "learning_rate": 4.162970166268855e-07, "loss": 0.0515, "step": 13370 }, { "epoch": 3.042320819112628, "grad_norm": 1.214327464568877, "learning_rate": 4.16212796223571e-07, "loss": 0.014, "step": 13371 }, { "epoch": 3.04254835039818, "grad_norm": 1.2745589501171823, "learning_rate": 4.161285800874845e-07, "loss": 0.0378, "step": 13372 }, { "epoch": 3.0427758816837316, "grad_norm": 1.0914763511777328, "learning_rate": 4.160443682203472e-07, "loss": 0.0423, "step": 13373 }, { "epoch": 3.0430034129692833, "grad_norm": 1.1425023897554973, "learning_rate": 4.159601606238804e-07, "loss": 0.0135, "step": 13374 }, { "epoch": 3.043230944254835, "grad_norm": 1.2706253745960434, "learning_rate": 4.158759572998049e-07, "loss": 0.079, "step": 13375 }, { "epoch": 3.043458475540387, "grad_norm": 0.2623364609033214, "learning_rate": 4.157917582498422e-07, "loss": 0.0025, "step": 13376 }, { "epoch": 3.0436860068259386, "grad_norm": 1.1036903536789457, "learning_rate": 4.1570756347571256e-07, "loss": 0.0774, "step": 13377 }, { "epoch": 3.0439135381114903, "grad_norm": 1.7791001115225482, "learning_rate": 4.15623372979137e-07, "loss": 0.0522, "step": 13378 }, { "epoch": 3.044141069397042, "grad_norm": 1.3524364951322463, "learning_rate": 4.155391867618365e-07, "loss": 0.0345, "step": 13379 }, { "epoch": 3.044368600682594, "grad_norm": 0.5466471167810906, "learning_rate": 4.154550048255311e-07, "loss": 0.0129, "step": 13380 }, { "epoch": 3.0445961319681456, "grad_norm": 1.9070680349986167, "learning_rate": 4.1537082717194184e-07, "loss": 0.0248, "step": 13381 }, { "epoch": 3.0448236632536974, "grad_norm": 1.2087986947797418, "learning_rate": 4.1528665380278875e-07, "loss": 0.0355, "step": 13382 }, { "epoch": 3.045051194539249, "grad_norm": 1.688718658688664, "learning_rate": 4.152024847197926e-07, "loss": 0.1213, "step": 13383 }, { "epoch": 3.045278725824801, "grad_norm": 5.178666121075372, "learning_rate": 4.151183199246735e-07, "loss": 0.0942, "step": 13384 }, { "epoch": 3.0455062571103526, "grad_norm": 1.325551493075684, "learning_rate": 4.150341594191512e-07, "loss": 0.0451, "step": 13385 }, { "epoch": 3.0457337883959044, "grad_norm": 1.2265878504582506, "learning_rate": 4.149500032049465e-07, "loss": 0.0444, "step": 13386 }, { "epoch": 3.045961319681456, "grad_norm": 2.06147047953494, "learning_rate": 4.148658512837789e-07, "loss": 0.035, "step": 13387 }, { "epoch": 3.046188850967008, "grad_norm": 1.0080294999041635, "learning_rate": 4.147817036573684e-07, "loss": 0.0072, "step": 13388 }, { "epoch": 3.0464163822525596, "grad_norm": 2.374050447773029, "learning_rate": 4.146975603274349e-07, "loss": 0.0088, "step": 13389 }, { "epoch": 3.0466439135381114, "grad_norm": 1.450348945085828, "learning_rate": 4.146134212956983e-07, "loss": 0.0293, "step": 13390 }, { "epoch": 3.046871444823663, "grad_norm": 7.4199155889202135, "learning_rate": 4.1452928656387793e-07, "loss": 0.065, "step": 13391 }, { "epoch": 3.047098976109215, "grad_norm": 0.86740290342997, "learning_rate": 4.144451561336933e-07, "loss": 0.0663, "step": 13392 }, { "epoch": 3.0473265073947666, "grad_norm": 0.998469176160924, "learning_rate": 4.143610300068644e-07, "loss": 0.0598, "step": 13393 }, { "epoch": 3.0475540386803184, "grad_norm": 1.5358023514811763, "learning_rate": 4.1427690818511003e-07, "loss": 0.056, "step": 13394 }, { "epoch": 3.04778156996587, "grad_norm": 1.890686724425367, "learning_rate": 4.1419279067014985e-07, "loss": 0.0245, "step": 13395 }, { "epoch": 3.048009101251422, "grad_norm": 1.7452111872537175, "learning_rate": 4.141086774637028e-07, "loss": 0.0706, "step": 13396 }, { "epoch": 3.0482366325369736, "grad_norm": 0.8691037631203364, "learning_rate": 4.1402456856748846e-07, "loss": 0.0361, "step": 13397 }, { "epoch": 3.0484641638225254, "grad_norm": 1.1178551612893326, "learning_rate": 4.1394046398322536e-07, "loss": 0.0543, "step": 13398 }, { "epoch": 3.0486916951080776, "grad_norm": 1.513749198039502, "learning_rate": 4.138563637126326e-07, "loss": 0.0436, "step": 13399 }, { "epoch": 3.0489192263936293, "grad_norm": 1.2534560832472401, "learning_rate": 4.137722677574293e-07, "loss": 0.0495, "step": 13400 }, { "epoch": 3.049146757679181, "grad_norm": 1.3697654689812881, "learning_rate": 4.136881761193337e-07, "loss": 0.0692, "step": 13401 }, { "epoch": 3.049374288964733, "grad_norm": 0.9069287581478572, "learning_rate": 4.1360408880006487e-07, "loss": 0.0126, "step": 13402 }, { "epoch": 3.0496018202502846, "grad_norm": 0.4105994141268159, "learning_rate": 4.1352000580134153e-07, "loss": 0.0023, "step": 13403 }, { "epoch": 3.0498293515358363, "grad_norm": 1.8873866540861508, "learning_rate": 4.134359271248817e-07, "loss": 0.0318, "step": 13404 }, { "epoch": 3.050056882821388, "grad_norm": 1.5313930316803053, "learning_rate": 4.133518527724042e-07, "loss": 0.1006, "step": 13405 }, { "epoch": 3.05028441410694, "grad_norm": 1.4837530196951467, "learning_rate": 4.1326778274562706e-07, "loss": 0.0243, "step": 13406 }, { "epoch": 3.0505119453924916, "grad_norm": 1.98096515607044, "learning_rate": 4.1318371704626894e-07, "loss": 0.0657, "step": 13407 }, { "epoch": 3.0507394766780433, "grad_norm": 1.0377556871905562, "learning_rate": 4.1309965567604726e-07, "loss": 0.0516, "step": 13408 }, { "epoch": 3.050967007963595, "grad_norm": 1.2342219558471834, "learning_rate": 4.13015598636681e-07, "loss": 0.0246, "step": 13409 }, { "epoch": 3.051194539249147, "grad_norm": 1.0429375517371202, "learning_rate": 4.1293154592988756e-07, "loss": 0.0162, "step": 13410 }, { "epoch": 3.0514220705346986, "grad_norm": 1.6072818486701812, "learning_rate": 4.128474975573847e-07, "loss": 0.0173, "step": 13411 }, { "epoch": 3.0516496018202504, "grad_norm": 2.1183086186221836, "learning_rate": 4.1276345352089083e-07, "loss": 0.0351, "step": 13412 }, { "epoch": 3.051877133105802, "grad_norm": 1.2877181681045269, "learning_rate": 4.12679413822123e-07, "loss": 0.1041, "step": 13413 }, { "epoch": 3.052104664391354, "grad_norm": 0.8670980792052393, "learning_rate": 4.1259537846279926e-07, "loss": 0.0077, "step": 13414 }, { "epoch": 3.0523321956769056, "grad_norm": 1.3728031274652417, "learning_rate": 4.125113474446367e-07, "loss": 0.0883, "step": 13415 }, { "epoch": 3.0525597269624574, "grad_norm": 1.0110578189520192, "learning_rate": 4.124273207693534e-07, "loss": 0.0071, "step": 13416 }, { "epoch": 3.052787258248009, "grad_norm": 1.4886501592278203, "learning_rate": 4.1234329843866624e-07, "loss": 0.0486, "step": 13417 }, { "epoch": 3.053014789533561, "grad_norm": 1.2826239487332893, "learning_rate": 4.122592804542925e-07, "loss": 0.0317, "step": 13418 }, { "epoch": 3.0532423208191126, "grad_norm": 0.8648315164936364, "learning_rate": 4.121752668179496e-07, "loss": 0.0061, "step": 13419 }, { "epoch": 3.0534698521046644, "grad_norm": 0.745421575988049, "learning_rate": 4.1209125753135434e-07, "loss": 0.0075, "step": 13420 }, { "epoch": 3.053697383390216, "grad_norm": 1.2388777137093925, "learning_rate": 4.120072525962239e-07, "loss": 0.0735, "step": 13421 }, { "epoch": 3.053924914675768, "grad_norm": 1.170249372157511, "learning_rate": 4.119232520142753e-07, "loss": 0.0393, "step": 13422 }, { "epoch": 3.0541524459613196, "grad_norm": 1.1353765161004992, "learning_rate": 4.11839255787225e-07, "loss": 0.04, "step": 13423 }, { "epoch": 3.0543799772468714, "grad_norm": 0.6884534394721324, "learning_rate": 4.1175526391678997e-07, "loss": 0.0106, "step": 13424 }, { "epoch": 3.054607508532423, "grad_norm": 1.4906769861235591, "learning_rate": 4.1167127640468667e-07, "loss": 0.0055, "step": 13425 }, { "epoch": 3.054835039817975, "grad_norm": 2.7452174754531855, "learning_rate": 4.1158729325263205e-07, "loss": 0.0339, "step": 13426 }, { "epoch": 3.0550625711035266, "grad_norm": 1.2940660153006962, "learning_rate": 4.115033144623421e-07, "loss": 0.0233, "step": 13427 }, { "epoch": 3.0552901023890784, "grad_norm": 1.4837145290704379, "learning_rate": 4.1141934003553346e-07, "loss": 0.0742, "step": 13428 }, { "epoch": 3.05551763367463, "grad_norm": 1.924274693733529, "learning_rate": 4.113353699739225e-07, "loss": 0.0073, "step": 13429 }, { "epoch": 3.055745164960182, "grad_norm": 1.289177340615115, "learning_rate": 4.112514042792251e-07, "loss": 0.0758, "step": 13430 }, { "epoch": 3.0559726962457336, "grad_norm": 1.937002691650164, "learning_rate": 4.111674429531576e-07, "loss": 0.0938, "step": 13431 }, { "epoch": 3.0562002275312854, "grad_norm": 1.8613270707621927, "learning_rate": 4.110834859974358e-07, "loss": 0.0544, "step": 13432 }, { "epoch": 3.056427758816837, "grad_norm": 1.4705015926152087, "learning_rate": 4.1099953341377603e-07, "loss": 0.0082, "step": 13433 }, { "epoch": 3.056655290102389, "grad_norm": 0.9355663974099242, "learning_rate": 4.1091558520389375e-07, "loss": 0.0317, "step": 13434 }, { "epoch": 3.0568828213879407, "grad_norm": 1.5590405263832086, "learning_rate": 4.108316413695048e-07, "loss": 0.0506, "step": 13435 }, { "epoch": 3.0571103526734924, "grad_norm": 3.0202523515447797, "learning_rate": 4.10747701912325e-07, "loss": 0.0282, "step": 13436 }, { "epoch": 3.057337883959044, "grad_norm": 0.9991731061689265, "learning_rate": 4.106637668340696e-07, "loss": 0.0569, "step": 13437 }, { "epoch": 3.0575654152445964, "grad_norm": 1.0584333034997588, "learning_rate": 4.105798361364544e-07, "loss": 0.0517, "step": 13438 }, { "epoch": 3.057792946530148, "grad_norm": 0.9866150836186816, "learning_rate": 4.1049590982119454e-07, "loss": 0.0226, "step": 13439 }, { "epoch": 3.0580204778157, "grad_norm": 1.8454025753251064, "learning_rate": 4.104119878900056e-07, "loss": 0.0882, "step": 13440 }, { "epoch": 3.0582480091012516, "grad_norm": 1.013504660462846, "learning_rate": 4.1032807034460263e-07, "loss": 0.0166, "step": 13441 }, { "epoch": 3.0584755403868034, "grad_norm": 1.0796220694673753, "learning_rate": 4.102441571867005e-07, "loss": 0.0106, "step": 13442 }, { "epoch": 3.058703071672355, "grad_norm": 1.5882377552401359, "learning_rate": 4.1016024841801485e-07, "loss": 0.0435, "step": 13443 }, { "epoch": 3.058930602957907, "grad_norm": 2.195275381557851, "learning_rate": 4.1007634404025996e-07, "loss": 0.0091, "step": 13444 }, { "epoch": 3.0591581342434586, "grad_norm": 1.8649239332790597, "learning_rate": 4.0999244405515117e-07, "loss": 0.0897, "step": 13445 }, { "epoch": 3.0593856655290104, "grad_norm": 1.6579118777445994, "learning_rate": 4.099085484644028e-07, "loss": 0.0726, "step": 13446 }, { "epoch": 3.059613196814562, "grad_norm": 1.0374266058516155, "learning_rate": 4.098246572697301e-07, "loss": 0.0197, "step": 13447 }, { "epoch": 3.059840728100114, "grad_norm": 1.2561720733452113, "learning_rate": 4.097407704728472e-07, "loss": 0.0581, "step": 13448 }, { "epoch": 3.0600682593856656, "grad_norm": 1.6746906032217599, "learning_rate": 4.096568880754686e-07, "loss": 0.063, "step": 13449 }, { "epoch": 3.0602957906712174, "grad_norm": 1.2208176189884394, "learning_rate": 4.095730100793091e-07, "loss": 0.0217, "step": 13450 }, { "epoch": 3.060523321956769, "grad_norm": 2.1190557987688163, "learning_rate": 4.0948913648608244e-07, "loss": 0.0332, "step": 13451 }, { "epoch": 3.060750853242321, "grad_norm": 1.0491572766019448, "learning_rate": 4.094052672975033e-07, "loss": 0.0071, "step": 13452 }, { "epoch": 3.0609783845278726, "grad_norm": 0.9574842304894022, "learning_rate": 4.093214025152858e-07, "loss": 0.0259, "step": 13453 }, { "epoch": 3.0612059158134244, "grad_norm": 1.4909161647319866, "learning_rate": 4.092375421411435e-07, "loss": 0.0525, "step": 13454 }, { "epoch": 3.061433447098976, "grad_norm": 0.7430136938562012, "learning_rate": 4.091536861767909e-07, "loss": 0.0288, "step": 13455 }, { "epoch": 3.061660978384528, "grad_norm": 1.5399161384655613, "learning_rate": 4.090698346239415e-07, "loss": 0.0142, "step": 13456 }, { "epoch": 3.0618885096700796, "grad_norm": 1.1001731047600767, "learning_rate": 4.089859874843094e-07, "loss": 0.0134, "step": 13457 }, { "epoch": 3.0621160409556314, "grad_norm": 1.8292967132120512, "learning_rate": 4.0890214475960793e-07, "loss": 0.0126, "step": 13458 }, { "epoch": 3.062343572241183, "grad_norm": 1.4288370200683915, "learning_rate": 4.08818306451551e-07, "loss": 0.0187, "step": 13459 }, { "epoch": 3.062571103526735, "grad_norm": 1.5470610281366866, "learning_rate": 4.087344725618521e-07, "loss": 0.0266, "step": 13460 }, { "epoch": 3.0627986348122866, "grad_norm": 0.6507191402425261, "learning_rate": 4.086506430922242e-07, "loss": 0.0067, "step": 13461 }, { "epoch": 3.0630261660978384, "grad_norm": 1.5922788974562063, "learning_rate": 4.085668180443811e-07, "loss": 0.1368, "step": 13462 }, { "epoch": 3.06325369738339, "grad_norm": 1.389819013458761, "learning_rate": 4.084829974200356e-07, "loss": 0.0131, "step": 13463 }, { "epoch": 3.063481228668942, "grad_norm": 1.2370499030688962, "learning_rate": 4.0839918122090156e-07, "loss": 0.0138, "step": 13464 }, { "epoch": 3.0637087599544937, "grad_norm": 1.150810868467212, "learning_rate": 4.0831536944869106e-07, "loss": 0.0309, "step": 13465 }, { "epoch": 3.0639362912400454, "grad_norm": 1.2385956487181562, "learning_rate": 4.0823156210511806e-07, "loss": 0.0238, "step": 13466 }, { "epoch": 3.064163822525597, "grad_norm": 1.9246925216642141, "learning_rate": 4.081477591918948e-07, "loss": 0.1411, "step": 13467 }, { "epoch": 3.064391353811149, "grad_norm": 1.8950729251092184, "learning_rate": 4.0806396071073395e-07, "loss": 0.0097, "step": 13468 }, { "epoch": 3.0646188850967007, "grad_norm": 1.403209208456404, "learning_rate": 4.079801666633487e-07, "loss": 0.0439, "step": 13469 }, { "epoch": 3.0648464163822524, "grad_norm": 1.2964554318811992, "learning_rate": 4.0789637705145114e-07, "loss": 0.0134, "step": 13470 }, { "epoch": 3.065073947667804, "grad_norm": 1.5324451809429704, "learning_rate": 4.078125918767543e-07, "loss": 0.0136, "step": 13471 }, { "epoch": 3.065301478953356, "grad_norm": 1.0115565879325228, "learning_rate": 4.077288111409703e-07, "loss": 0.01, "step": 13472 }, { "epoch": 3.0655290102389077, "grad_norm": 1.2374919644492437, "learning_rate": 4.076450348458113e-07, "loss": 0.0621, "step": 13473 }, { "epoch": 3.0657565415244594, "grad_norm": 0.979410923765276, "learning_rate": 4.075612629929898e-07, "loss": 0.0095, "step": 13474 }, { "epoch": 3.065984072810011, "grad_norm": 1.7613443240880429, "learning_rate": 4.0747749558421785e-07, "loss": 0.0603, "step": 13475 }, { "epoch": 3.066211604095563, "grad_norm": 1.5162125333616578, "learning_rate": 4.073937326212077e-07, "loss": 0.081, "step": 13476 }, { "epoch": 3.066439135381115, "grad_norm": 1.130395082326072, "learning_rate": 4.073099741056708e-07, "loss": 0.0245, "step": 13477 }, { "epoch": 3.066666666666667, "grad_norm": 1.3574672108572303, "learning_rate": 4.0722622003931956e-07, "loss": 0.069, "step": 13478 }, { "epoch": 3.0668941979522186, "grad_norm": 2.7548262999130144, "learning_rate": 4.0714247042386565e-07, "loss": 0.0643, "step": 13479 }, { "epoch": 3.0671217292377704, "grad_norm": 1.4067458213595894, "learning_rate": 4.0705872526102043e-07, "loss": 0.0546, "step": 13480 }, { "epoch": 3.067349260523322, "grad_norm": 1.163065008591204, "learning_rate": 4.0697498455249585e-07, "loss": 0.0349, "step": 13481 }, { "epoch": 3.067576791808874, "grad_norm": 1.554792095772025, "learning_rate": 4.068912483000032e-07, "loss": 0.1044, "step": 13482 }, { "epoch": 3.0678043230944256, "grad_norm": 2.455194010243393, "learning_rate": 4.0680751650525416e-07, "loss": 0.0393, "step": 13483 }, { "epoch": 3.0680318543799774, "grad_norm": 1.2038858065137428, "learning_rate": 4.0672378916995966e-07, "loss": 0.0686, "step": 13484 }, { "epoch": 3.068259385665529, "grad_norm": 1.687966183318232, "learning_rate": 4.0664006629583125e-07, "loss": 0.0268, "step": 13485 }, { "epoch": 3.068486916951081, "grad_norm": 1.0928487166753238, "learning_rate": 4.0655634788458006e-07, "loss": 0.0133, "step": 13486 }, { "epoch": 3.0687144482366326, "grad_norm": 1.2203987199554573, "learning_rate": 4.064726339379169e-07, "loss": 0.0465, "step": 13487 }, { "epoch": 3.0689419795221844, "grad_norm": 1.0149243496050784, "learning_rate": 4.0638892445755305e-07, "loss": 0.0185, "step": 13488 }, { "epoch": 3.069169510807736, "grad_norm": 0.6896405585172166, "learning_rate": 4.06305219445199e-07, "loss": 0.0052, "step": 13489 }, { "epoch": 3.069397042093288, "grad_norm": 1.18629353952902, "learning_rate": 4.06221518902566e-07, "loss": 0.0767, "step": 13490 }, { "epoch": 3.0696245733788396, "grad_norm": 0.8898289124723648, "learning_rate": 4.061378228313644e-07, "loss": 0.0079, "step": 13491 }, { "epoch": 3.0698521046643914, "grad_norm": 1.3754777315813482, "learning_rate": 4.0605413123330476e-07, "loss": 0.0224, "step": 13492 }, { "epoch": 3.070079635949943, "grad_norm": 1.9201497564246706, "learning_rate": 4.059704441100979e-07, "loss": 0.0275, "step": 13493 }, { "epoch": 3.070307167235495, "grad_norm": 2.1555084889666505, "learning_rate": 4.058867614634538e-07, "loss": 0.0466, "step": 13494 }, { "epoch": 3.0705346985210467, "grad_norm": 1.2457301443950424, "learning_rate": 4.0580308329508315e-07, "loss": 0.0334, "step": 13495 }, { "epoch": 3.0707622298065984, "grad_norm": 1.1068885214777247, "learning_rate": 4.057194096066959e-07, "loss": 0.0097, "step": 13496 }, { "epoch": 3.07098976109215, "grad_norm": 1.310389923389732, "learning_rate": 4.056357404000026e-07, "loss": 0.1162, "step": 13497 }, { "epoch": 3.071217292377702, "grad_norm": 2.099909634838453, "learning_rate": 4.055520756767128e-07, "loss": 0.0361, "step": 13498 }, { "epoch": 3.0714448236632537, "grad_norm": 0.9891137718587748, "learning_rate": 4.054684154385366e-07, "loss": 0.0176, "step": 13499 }, { "epoch": 3.0716723549488054, "grad_norm": 2.0555136435708143, "learning_rate": 4.053847596871843e-07, "loss": 0.0962, "step": 13500 }, { "epoch": 3.071899886234357, "grad_norm": 3.7364014902550267, "learning_rate": 4.0530110842436494e-07, "loss": 0.0402, "step": 13501 }, { "epoch": 3.072127417519909, "grad_norm": 0.4557133784445649, "learning_rate": 4.0521746165178865e-07, "loss": 0.0029, "step": 13502 }, { "epoch": 3.0723549488054607, "grad_norm": 1.9812207397091883, "learning_rate": 4.0513381937116487e-07, "loss": 0.0888, "step": 13503 }, { "epoch": 3.0725824800910124, "grad_norm": 1.9405191227862628, "learning_rate": 4.050501815842034e-07, "loss": 0.0721, "step": 13504 }, { "epoch": 3.072810011376564, "grad_norm": 1.9314472471509796, "learning_rate": 4.0496654829261323e-07, "loss": 0.0153, "step": 13505 }, { "epoch": 3.073037542662116, "grad_norm": 1.808527730362304, "learning_rate": 4.048829194981038e-07, "loss": 0.0169, "step": 13506 }, { "epoch": 3.0732650739476677, "grad_norm": 1.5635600604522804, "learning_rate": 4.0479929520238456e-07, "loss": 0.0612, "step": 13507 }, { "epoch": 3.0734926052332194, "grad_norm": 2.1474426369563218, "learning_rate": 4.047156754071642e-07, "loss": 0.0225, "step": 13508 }, { "epoch": 3.073720136518771, "grad_norm": 1.8673939408032696, "learning_rate": 4.046320601141522e-07, "loss": 0.0251, "step": 13509 }, { "epoch": 3.073947667804323, "grad_norm": 1.6083302145752891, "learning_rate": 4.045484493250573e-07, "loss": 0.1088, "step": 13510 }, { "epoch": 3.0741751990898747, "grad_norm": 1.11479900856408, "learning_rate": 4.0446484304158815e-07, "loss": 0.0168, "step": 13511 }, { "epoch": 3.0744027303754264, "grad_norm": 1.3724966365543199, "learning_rate": 4.0438124126545396e-07, "loss": 0.0643, "step": 13512 }, { "epoch": 3.074630261660978, "grad_norm": 1.2023256503137727, "learning_rate": 4.042976439983629e-07, "loss": 0.0773, "step": 13513 }, { "epoch": 3.07485779294653, "grad_norm": 1.0758297574056426, "learning_rate": 4.0421405124202407e-07, "loss": 0.0218, "step": 13514 }, { "epoch": 3.0750853242320817, "grad_norm": 0.9968522427579356, "learning_rate": 4.0413046299814547e-07, "loss": 0.0177, "step": 13515 }, { "epoch": 3.075312855517634, "grad_norm": 0.7690823506210563, "learning_rate": 4.0404687926843583e-07, "loss": 0.0068, "step": 13516 }, { "epoch": 3.0755403868031856, "grad_norm": 1.9359591671385443, "learning_rate": 4.039633000546034e-07, "loss": 0.0185, "step": 13517 }, { "epoch": 3.0757679180887374, "grad_norm": 0.906157425833498, "learning_rate": 4.038797253583561e-07, "loss": 0.0129, "step": 13518 }, { "epoch": 3.075995449374289, "grad_norm": 0.7705445834367937, "learning_rate": 4.0379615518140235e-07, "loss": 0.0176, "step": 13519 }, { "epoch": 3.076222980659841, "grad_norm": 0.9305813210185895, "learning_rate": 4.0371258952545e-07, "loss": 0.0093, "step": 13520 }, { "epoch": 3.0764505119453927, "grad_norm": 1.4660603538082584, "learning_rate": 4.036290283922072e-07, "loss": 0.0487, "step": 13521 }, { "epoch": 3.0766780432309444, "grad_norm": 1.035703611412051, "learning_rate": 4.0354547178338146e-07, "loss": 0.0086, "step": 13522 }, { "epoch": 3.076905574516496, "grad_norm": 1.2022629355464796, "learning_rate": 4.0346191970068086e-07, "loss": 0.0501, "step": 13523 }, { "epoch": 3.077133105802048, "grad_norm": 0.8862816640384154, "learning_rate": 4.0337837214581294e-07, "loss": 0.0179, "step": 13524 }, { "epoch": 3.0773606370875997, "grad_norm": 1.1910454629000577, "learning_rate": 4.0329482912048504e-07, "loss": 0.0368, "step": 13525 }, { "epoch": 3.0775881683731514, "grad_norm": 1.0126958331368516, "learning_rate": 4.0321129062640484e-07, "loss": 0.0137, "step": 13526 }, { "epoch": 3.077815699658703, "grad_norm": 1.6635571241226488, "learning_rate": 4.031277566652796e-07, "loss": 0.0833, "step": 13527 }, { "epoch": 3.078043230944255, "grad_norm": 1.4145634698707976, "learning_rate": 4.030442272388169e-07, "loss": 0.1069, "step": 13528 }, { "epoch": 3.0782707622298067, "grad_norm": 1.2118014045720968, "learning_rate": 4.0296070234872363e-07, "loss": 0.0163, "step": 13529 }, { "epoch": 3.0784982935153584, "grad_norm": 1.6818453480918845, "learning_rate": 4.0287718199670675e-07, "loss": 0.0364, "step": 13530 }, { "epoch": 3.07872582480091, "grad_norm": 1.3975149217143883, "learning_rate": 4.0279366618447375e-07, "loss": 0.0309, "step": 13531 }, { "epoch": 3.078953356086462, "grad_norm": 1.3719357764136466, "learning_rate": 4.0271015491373093e-07, "loss": 0.044, "step": 13532 }, { "epoch": 3.0791808873720137, "grad_norm": 1.6631681895046133, "learning_rate": 4.0262664818618575e-07, "loss": 0.0999, "step": 13533 }, { "epoch": 3.0794084186575654, "grad_norm": 1.4486732064483316, "learning_rate": 4.0254314600354444e-07, "loss": 0.0339, "step": 13534 }, { "epoch": 3.079635949943117, "grad_norm": 1.995944337440893, "learning_rate": 4.024596483675139e-07, "loss": 0.032, "step": 13535 }, { "epoch": 3.079863481228669, "grad_norm": 1.2073157058850934, "learning_rate": 4.023761552798007e-07, "loss": 0.0456, "step": 13536 }, { "epoch": 3.0800910125142207, "grad_norm": 0.9590832452537267, "learning_rate": 4.0229266674211094e-07, "loss": 0.0177, "step": 13537 }, { "epoch": 3.0803185437997724, "grad_norm": 1.5612287708932804, "learning_rate": 4.0220918275615134e-07, "loss": 0.0506, "step": 13538 }, { "epoch": 3.080546075085324, "grad_norm": 1.5408183457651807, "learning_rate": 4.0212570332362783e-07, "loss": 0.1125, "step": 13539 }, { "epoch": 3.080773606370876, "grad_norm": 2.0341934839510305, "learning_rate": 4.0204222844624713e-07, "loss": 0.0428, "step": 13540 }, { "epoch": 3.0810011376564277, "grad_norm": 1.3743426350604653, "learning_rate": 4.0195875812571465e-07, "loss": 0.0151, "step": 13541 }, { "epoch": 3.0812286689419794, "grad_norm": 0.7938899439658227, "learning_rate": 4.018752923637367e-07, "loss": 0.0082, "step": 13542 }, { "epoch": 3.081456200227531, "grad_norm": 1.4093232529786845, "learning_rate": 4.0179183116201943e-07, "loss": 0.0418, "step": 13543 }, { "epoch": 3.081683731513083, "grad_norm": 1.614215210398318, "learning_rate": 4.01708374522268e-07, "loss": 0.0197, "step": 13544 }, { "epoch": 3.0819112627986347, "grad_norm": 1.9083495567702569, "learning_rate": 4.0162492244618867e-07, "loss": 0.0591, "step": 13545 }, { "epoch": 3.0821387940841865, "grad_norm": 1.8561542510845073, "learning_rate": 4.0154147493548657e-07, "loss": 0.0283, "step": 13546 }, { "epoch": 3.082366325369738, "grad_norm": 0.8625508657530548, "learning_rate": 4.014580319918678e-07, "loss": 0.0075, "step": 13547 }, { "epoch": 3.08259385665529, "grad_norm": 1.2051216114066197, "learning_rate": 4.0137459361703734e-07, "loss": 0.0279, "step": 13548 }, { "epoch": 3.0828213879408417, "grad_norm": 1.5980120260106725, "learning_rate": 4.0129115981270046e-07, "loss": 0.0413, "step": 13549 }, { "epoch": 3.0830489192263935, "grad_norm": 0.7754847132563644, "learning_rate": 4.012077305805629e-07, "loss": 0.0099, "step": 13550 }, { "epoch": 3.083276450511945, "grad_norm": 1.4837595515239845, "learning_rate": 4.011243059223292e-07, "loss": 0.0615, "step": 13551 }, { "epoch": 3.083503981797497, "grad_norm": 1.4069182622620378, "learning_rate": 4.0104088583970477e-07, "loss": 0.0805, "step": 13552 }, { "epoch": 3.0837315130830487, "grad_norm": 2.239945715980311, "learning_rate": 4.0095747033439436e-07, "loss": 0.0463, "step": 13553 }, { "epoch": 3.0839590443686005, "grad_norm": 1.4361460309354477, "learning_rate": 4.0087405940810314e-07, "loss": 0.0191, "step": 13554 }, { "epoch": 3.0841865756541527, "grad_norm": 1.6887858906545936, "learning_rate": 4.0079065306253567e-07, "loss": 0.0223, "step": 13555 }, { "epoch": 3.0844141069397044, "grad_norm": 1.3309005270757588, "learning_rate": 4.007072512993965e-07, "loss": 0.0466, "step": 13556 }, { "epoch": 3.084641638225256, "grad_norm": 0.901185618562385, "learning_rate": 4.006238541203905e-07, "loss": 0.0264, "step": 13557 }, { "epoch": 3.084869169510808, "grad_norm": 1.1283872665207775, "learning_rate": 4.005404615272217e-07, "loss": 0.0198, "step": 13558 }, { "epoch": 3.0850967007963597, "grad_norm": 1.6944661587147454, "learning_rate": 4.00457073521595e-07, "loss": 0.0146, "step": 13559 }, { "epoch": 3.0853242320819114, "grad_norm": 1.2088392354032453, "learning_rate": 4.0037369010521464e-07, "loss": 0.0113, "step": 13560 }, { "epoch": 3.085551763367463, "grad_norm": 1.7537302403034813, "learning_rate": 4.002903112797844e-07, "loss": 0.0576, "step": 13561 }, { "epoch": 3.085779294653015, "grad_norm": 1.9189260524778622, "learning_rate": 4.002069370470088e-07, "loss": 0.0345, "step": 13562 }, { "epoch": 3.0860068259385667, "grad_norm": 1.3230019191488758, "learning_rate": 4.001235674085915e-07, "loss": 0.0519, "step": 13563 }, { "epoch": 3.0862343572241184, "grad_norm": 2.5876457830681905, "learning_rate": 4.0004020236623695e-07, "loss": 0.0401, "step": 13564 }, { "epoch": 3.08646188850967, "grad_norm": 1.1830801872337653, "learning_rate": 3.9995684192164847e-07, "loss": 0.0188, "step": 13565 }, { "epoch": 3.086689419795222, "grad_norm": 1.1105391937933662, "learning_rate": 3.9987348607653005e-07, "loss": 0.0191, "step": 13566 }, { "epoch": 3.0869169510807737, "grad_norm": 1.6853885755963305, "learning_rate": 3.997901348325854e-07, "loss": 0.0088, "step": 13567 }, { "epoch": 3.0871444823663254, "grad_norm": 2.1078178126028653, "learning_rate": 3.997067881915177e-07, "loss": 0.0638, "step": 13568 }, { "epoch": 3.087372013651877, "grad_norm": 1.7444940340830641, "learning_rate": 3.996234461550308e-07, "loss": 0.0835, "step": 13569 }, { "epoch": 3.087599544937429, "grad_norm": 1.1967540988763263, "learning_rate": 3.995401087248278e-07, "loss": 0.0388, "step": 13570 }, { "epoch": 3.0878270762229807, "grad_norm": 1.2383315920243803, "learning_rate": 3.994567759026123e-07, "loss": 0.0411, "step": 13571 }, { "epoch": 3.0880546075085324, "grad_norm": 1.5793452629516955, "learning_rate": 3.993734476900869e-07, "loss": 0.0171, "step": 13572 }, { "epoch": 3.088282138794084, "grad_norm": 0.7155357908004026, "learning_rate": 3.992901240889552e-07, "loss": 0.0139, "step": 13573 }, { "epoch": 3.088509670079636, "grad_norm": 1.9476577944738236, "learning_rate": 3.9920680510092015e-07, "loss": 0.0094, "step": 13574 }, { "epoch": 3.0887372013651877, "grad_norm": 1.5767731743172757, "learning_rate": 3.991234907276842e-07, "loss": 0.0343, "step": 13575 }, { "epoch": 3.0889647326507395, "grad_norm": 0.806702065755519, "learning_rate": 3.9904018097095055e-07, "loss": 0.0102, "step": 13576 }, { "epoch": 3.089192263936291, "grad_norm": 1.620963288889041, "learning_rate": 3.9895687583242164e-07, "loss": 0.1153, "step": 13577 }, { "epoch": 3.089419795221843, "grad_norm": 1.1294542271760653, "learning_rate": 3.9887357531380053e-07, "loss": 0.0204, "step": 13578 }, { "epoch": 3.0896473265073947, "grad_norm": 1.1160415915299098, "learning_rate": 3.987902794167892e-07, "loss": 0.0103, "step": 13579 }, { "epoch": 3.0898748577929465, "grad_norm": 1.1652597855733946, "learning_rate": 3.987069881430902e-07, "loss": 0.0058, "step": 13580 }, { "epoch": 3.090102389078498, "grad_norm": 1.4609879707033124, "learning_rate": 3.986237014944062e-07, "loss": 0.1052, "step": 13581 }, { "epoch": 3.09032992036405, "grad_norm": 2.960132136121817, "learning_rate": 3.985404194724388e-07, "loss": 0.0369, "step": 13582 }, { "epoch": 3.0905574516496017, "grad_norm": 1.7689916717088179, "learning_rate": 3.9845714207889073e-07, "loss": 0.0171, "step": 13583 }, { "epoch": 3.0907849829351535, "grad_norm": 1.7127618790413806, "learning_rate": 3.983738693154636e-07, "loss": 0.1069, "step": 13584 }, { "epoch": 3.091012514220705, "grad_norm": 1.9746894042761092, "learning_rate": 3.982906011838598e-07, "loss": 0.0283, "step": 13585 }, { "epoch": 3.091240045506257, "grad_norm": 1.6912074577139427, "learning_rate": 3.982073376857808e-07, "loss": 0.1166, "step": 13586 }, { "epoch": 3.0914675767918087, "grad_norm": 1.2974283541468137, "learning_rate": 3.981240788229282e-07, "loss": 0.1101, "step": 13587 }, { "epoch": 3.0916951080773605, "grad_norm": 0.6586818410934965, "learning_rate": 3.980408245970044e-07, "loss": 0.0043, "step": 13588 }, { "epoch": 3.0919226393629122, "grad_norm": 1.843241594417675, "learning_rate": 3.979575750097099e-07, "loss": 0.0636, "step": 13589 }, { "epoch": 3.092150170648464, "grad_norm": 2.9225776197220914, "learning_rate": 3.978743300627473e-07, "loss": 0.0497, "step": 13590 }, { "epoch": 3.0923777019340157, "grad_norm": 0.9766718854823248, "learning_rate": 3.977910897578171e-07, "loss": 0.0267, "step": 13591 }, { "epoch": 3.0926052332195675, "grad_norm": 0.9843035078742755, "learning_rate": 3.97707854096621e-07, "loss": 0.0343, "step": 13592 }, { "epoch": 3.0928327645051192, "grad_norm": 0.7849049760636766, "learning_rate": 3.9762462308086033e-07, "loss": 0.0101, "step": 13593 }, { "epoch": 3.0930602957906714, "grad_norm": 1.4449212290046136, "learning_rate": 3.975413967122356e-07, "loss": 0.0537, "step": 13594 }, { "epoch": 3.093287827076223, "grad_norm": 0.7746423469912095, "learning_rate": 3.9745817499244834e-07, "loss": 0.0223, "step": 13595 }, { "epoch": 3.093515358361775, "grad_norm": 1.581893902486248, "learning_rate": 3.973749579231992e-07, "loss": 0.0209, "step": 13596 }, { "epoch": 3.0937428896473267, "grad_norm": 2.040787185687584, "learning_rate": 3.972917455061891e-07, "loss": 0.0185, "step": 13597 }, { "epoch": 3.0939704209328784, "grad_norm": 0.5868462089913166, "learning_rate": 3.9720853774311866e-07, "loss": 0.0173, "step": 13598 }, { "epoch": 3.09419795221843, "grad_norm": 2.758236698811097, "learning_rate": 3.9712533463568846e-07, "loss": 0.0652, "step": 13599 }, { "epoch": 3.094425483503982, "grad_norm": 1.0679837585237097, "learning_rate": 3.970421361855993e-07, "loss": 0.0228, "step": 13600 }, { "epoch": 3.0946530147895337, "grad_norm": 0.7191902231456109, "learning_rate": 3.9695894239455123e-07, "loss": 0.0148, "step": 13601 }, { "epoch": 3.0948805460750854, "grad_norm": 2.052964662530032, "learning_rate": 3.968757532642448e-07, "loss": 0.0659, "step": 13602 }, { "epoch": 3.095108077360637, "grad_norm": 2.796019311282256, "learning_rate": 3.9679256879637995e-07, "loss": 0.0599, "step": 13603 }, { "epoch": 3.095335608646189, "grad_norm": 1.4177295671233534, "learning_rate": 3.9670938899265735e-07, "loss": 0.0631, "step": 13604 }, { "epoch": 3.0955631399317407, "grad_norm": 0.96470390354775, "learning_rate": 3.966262138547767e-07, "loss": 0.0391, "step": 13605 }, { "epoch": 3.0957906712172925, "grad_norm": 1.8997638680758662, "learning_rate": 3.9654304338443776e-07, "loss": 0.046, "step": 13606 }, { "epoch": 3.096018202502844, "grad_norm": 2.3329637416332756, "learning_rate": 3.96459877583341e-07, "loss": 0.0108, "step": 13607 }, { "epoch": 3.096245733788396, "grad_norm": 1.8311459269138874, "learning_rate": 3.9637671645318545e-07, "loss": 0.0477, "step": 13608 }, { "epoch": 3.0964732650739477, "grad_norm": 1.5398449246725303, "learning_rate": 3.9629355999567124e-07, "loss": 0.0216, "step": 13609 }, { "epoch": 3.0967007963594995, "grad_norm": 2.985761137005442, "learning_rate": 3.9621040821249767e-07, "loss": 0.033, "step": 13610 }, { "epoch": 3.096928327645051, "grad_norm": 1.7459871382127772, "learning_rate": 3.9612726110536466e-07, "loss": 0.0125, "step": 13611 }, { "epoch": 3.097155858930603, "grad_norm": 1.4356542353456119, "learning_rate": 3.9604411867597115e-07, "loss": 0.0092, "step": 13612 }, { "epoch": 3.0973833902161547, "grad_norm": 1.0027829786598457, "learning_rate": 3.9596098092601635e-07, "loss": 0.0462, "step": 13613 }, { "epoch": 3.0976109215017065, "grad_norm": 0.9289704234462367, "learning_rate": 3.9587784785719993e-07, "loss": 0.0066, "step": 13614 }, { "epoch": 3.0978384527872582, "grad_norm": 1.8415802777750643, "learning_rate": 3.957947194712204e-07, "loss": 0.0264, "step": 13615 }, { "epoch": 3.09806598407281, "grad_norm": 2.012133337720865, "learning_rate": 3.9571159576977714e-07, "loss": 0.0707, "step": 13616 }, { "epoch": 3.0982935153583617, "grad_norm": 0.8305792303825693, "learning_rate": 3.956284767545691e-07, "loss": 0.0049, "step": 13617 }, { "epoch": 3.0985210466439135, "grad_norm": 1.6277206846245693, "learning_rate": 3.9554536242729464e-07, "loss": 0.0181, "step": 13618 }, { "epoch": 3.0987485779294652, "grad_norm": 2.2393511732835205, "learning_rate": 3.9546225278965296e-07, "loss": 0.0941, "step": 13619 }, { "epoch": 3.098976109215017, "grad_norm": 1.30612697707262, "learning_rate": 3.953791478433423e-07, "loss": 0.0502, "step": 13620 }, { "epoch": 3.0992036405005687, "grad_norm": 1.1988701689538506, "learning_rate": 3.952960475900615e-07, "loss": 0.0216, "step": 13621 }, { "epoch": 3.0994311717861205, "grad_norm": 1.7057160110973104, "learning_rate": 3.952129520315086e-07, "loss": 0.0497, "step": 13622 }, { "epoch": 3.0996587030716722, "grad_norm": 2.2501576736675144, "learning_rate": 3.951298611693823e-07, "loss": 0.0105, "step": 13623 }, { "epoch": 3.099886234357224, "grad_norm": 1.81134407811434, "learning_rate": 3.9504677500538065e-07, "loss": 0.1098, "step": 13624 }, { "epoch": 3.1001137656427757, "grad_norm": 0.9554496622975158, "learning_rate": 3.9496369354120163e-07, "loss": 0.0085, "step": 13625 }, { "epoch": 3.1003412969283275, "grad_norm": 1.751586546616023, "learning_rate": 3.948806167785435e-07, "loss": 0.0231, "step": 13626 }, { "epoch": 3.1005688282138792, "grad_norm": 1.4083255423837844, "learning_rate": 3.947975447191041e-07, "loss": 0.0514, "step": 13627 }, { "epoch": 3.100796359499431, "grad_norm": 2.6419060692642082, "learning_rate": 3.947144773645814e-07, "loss": 0.0417, "step": 13628 }, { "epoch": 3.1010238907849828, "grad_norm": 1.0200515477962708, "learning_rate": 3.9463141471667286e-07, "loss": 0.0147, "step": 13629 }, { "epoch": 3.1012514220705345, "grad_norm": 1.1512948924057376, "learning_rate": 3.9454835677707635e-07, "loss": 0.011, "step": 13630 }, { "epoch": 3.1014789533560863, "grad_norm": 2.3890227280139555, "learning_rate": 3.944653035474896e-07, "loss": 0.0659, "step": 13631 }, { "epoch": 3.101706484641638, "grad_norm": 2.328996965700167, "learning_rate": 3.943822550296096e-07, "loss": 0.0472, "step": 13632 }, { "epoch": 3.10193401592719, "grad_norm": 1.4595592760149405, "learning_rate": 3.9429921122513397e-07, "loss": 0.0189, "step": 13633 }, { "epoch": 3.102161547212742, "grad_norm": 1.5823663697569976, "learning_rate": 3.9421617213575993e-07, "loss": 0.0543, "step": 13634 }, { "epoch": 3.1023890784982937, "grad_norm": 0.6113103262859129, "learning_rate": 3.941331377631849e-07, "loss": 0.0101, "step": 13635 }, { "epoch": 3.1026166097838455, "grad_norm": 1.198317104994379, "learning_rate": 3.9405010810910555e-07, "loss": 0.0736, "step": 13636 }, { "epoch": 3.102844141069397, "grad_norm": 0.9068924114879524, "learning_rate": 3.939670831752189e-07, "loss": 0.0194, "step": 13637 }, { "epoch": 3.103071672354949, "grad_norm": 1.8700229818759475, "learning_rate": 3.938840629632222e-07, "loss": 0.0418, "step": 13638 }, { "epoch": 3.1032992036405007, "grad_norm": 0.8857194230085054, "learning_rate": 3.9380104747481176e-07, "loss": 0.0415, "step": 13639 }, { "epoch": 3.1035267349260525, "grad_norm": 2.6494756041361365, "learning_rate": 3.937180367116847e-07, "loss": 0.1394, "step": 13640 }, { "epoch": 3.103754266211604, "grad_norm": 2.1222672408863192, "learning_rate": 3.9363503067553714e-07, "loss": 0.0594, "step": 13641 }, { "epoch": 3.103981797497156, "grad_norm": 1.2948911808426278, "learning_rate": 3.935520293680661e-07, "loss": 0.0212, "step": 13642 }, { "epoch": 3.1042093287827077, "grad_norm": 0.9864418066480387, "learning_rate": 3.9346903279096765e-07, "loss": 0.0103, "step": 13643 }, { "epoch": 3.1044368600682595, "grad_norm": 0.9656232432263048, "learning_rate": 3.9338604094593795e-07, "loss": 0.0161, "step": 13644 }, { "epoch": 3.1046643913538112, "grad_norm": 1.9090975175227, "learning_rate": 3.933030538346737e-07, "loss": 0.0408, "step": 13645 }, { "epoch": 3.104891922639363, "grad_norm": 1.0361535183730601, "learning_rate": 3.9322007145887043e-07, "loss": 0.0368, "step": 13646 }, { "epoch": 3.1051194539249147, "grad_norm": 1.064350619988742, "learning_rate": 3.931370938202245e-07, "loss": 0.031, "step": 13647 }, { "epoch": 3.1053469852104665, "grad_norm": 1.5147584116740491, "learning_rate": 3.9305412092043164e-07, "loss": 0.0288, "step": 13648 }, { "epoch": 3.1055745164960182, "grad_norm": 1.2921007829929048, "learning_rate": 3.9297115276118796e-07, "loss": 0.0266, "step": 13649 }, { "epoch": 3.10580204778157, "grad_norm": 1.7582586715190742, "learning_rate": 3.928881893441889e-07, "loss": 0.0159, "step": 13650 }, { "epoch": 3.1060295790671217, "grad_norm": 1.9597148289645372, "learning_rate": 3.9280523067113003e-07, "loss": 0.0306, "step": 13651 }, { "epoch": 3.1062571103526735, "grad_norm": 1.515148510373826, "learning_rate": 3.9272227674370725e-07, "loss": 0.0427, "step": 13652 }, { "epoch": 3.1064846416382252, "grad_norm": 1.8699740341754119, "learning_rate": 3.9263932756361535e-07, "loss": 0.0685, "step": 13653 }, { "epoch": 3.106712172923777, "grad_norm": 1.5704775352303968, "learning_rate": 3.9255638313255046e-07, "loss": 0.0316, "step": 13654 }, { "epoch": 3.1069397042093287, "grad_norm": 3.7040589361078022, "learning_rate": 3.9247344345220735e-07, "loss": 0.0234, "step": 13655 }, { "epoch": 3.1071672354948805, "grad_norm": 2.0484494655074887, "learning_rate": 3.923905085242808e-07, "loss": 0.011, "step": 13656 }, { "epoch": 3.1073947667804322, "grad_norm": 9.054963879376832, "learning_rate": 3.9230757835046666e-07, "loss": 0.0186, "step": 13657 }, { "epoch": 3.107622298065984, "grad_norm": 2.807142764086387, "learning_rate": 3.922246529324593e-07, "loss": 0.0424, "step": 13658 }, { "epoch": 3.1078498293515358, "grad_norm": 1.8081178720985454, "learning_rate": 3.921417322719537e-07, "loss": 0.0341, "step": 13659 }, { "epoch": 3.1080773606370875, "grad_norm": 1.2137707170936725, "learning_rate": 3.920588163706446e-07, "loss": 0.0532, "step": 13660 }, { "epoch": 3.1083048919226393, "grad_norm": 1.8002316567985217, "learning_rate": 3.919759052302269e-07, "loss": 0.0673, "step": 13661 }, { "epoch": 3.108532423208191, "grad_norm": 1.586673130644875, "learning_rate": 3.918929988523948e-07, "loss": 0.0284, "step": 13662 }, { "epoch": 3.1087599544937428, "grad_norm": 1.4857284190430624, "learning_rate": 3.918100972388428e-07, "loss": 0.0897, "step": 13663 }, { "epoch": 3.1089874857792945, "grad_norm": 3.9463839452653167, "learning_rate": 3.917272003912656e-07, "loss": 0.0367, "step": 13664 }, { "epoch": 3.1092150170648463, "grad_norm": 1.9057021462513661, "learning_rate": 3.916443083113569e-07, "loss": 0.0882, "step": 13665 }, { "epoch": 3.109442548350398, "grad_norm": 3.9980963426640614, "learning_rate": 3.915614210008113e-07, "loss": 0.0548, "step": 13666 }, { "epoch": 3.1096700796359498, "grad_norm": 2.1212580668937076, "learning_rate": 3.9147853846132294e-07, "loss": 0.0341, "step": 13667 }, { "epoch": 3.1098976109215015, "grad_norm": 0.9910576452904495, "learning_rate": 3.913956606945853e-07, "loss": 0.0826, "step": 13668 }, { "epoch": 3.1101251422070533, "grad_norm": 1.6992016631236222, "learning_rate": 3.913127877022926e-07, "loss": 0.0424, "step": 13669 }, { "epoch": 3.110352673492605, "grad_norm": 1.6663451648992658, "learning_rate": 3.912299194861385e-07, "loss": 0.0529, "step": 13670 }, { "epoch": 3.1105802047781568, "grad_norm": 3.2616390416158794, "learning_rate": 3.911470560478169e-07, "loss": 0.0195, "step": 13671 }, { "epoch": 3.110807736063709, "grad_norm": 1.3238193840375847, "learning_rate": 3.9106419738902094e-07, "loss": 0.0107, "step": 13672 }, { "epoch": 3.1110352673492607, "grad_norm": 1.7233847329716374, "learning_rate": 3.9098134351144445e-07, "loss": 0.0971, "step": 13673 }, { "epoch": 3.1112627986348125, "grad_norm": 1.279322109562346, "learning_rate": 3.908984944167809e-07, "loss": 0.1073, "step": 13674 }, { "epoch": 3.1114903299203642, "grad_norm": 1.0843274788211255, "learning_rate": 3.9081565010672313e-07, "loss": 0.0712, "step": 13675 }, { "epoch": 3.111717861205916, "grad_norm": 1.7803529965783085, "learning_rate": 3.907328105829647e-07, "loss": 0.0375, "step": 13676 }, { "epoch": 3.1119453924914677, "grad_norm": 1.4769656432212854, "learning_rate": 3.906499758471984e-07, "loss": 0.0232, "step": 13677 }, { "epoch": 3.1121729237770195, "grad_norm": 2.992656370036915, "learning_rate": 3.9056714590111775e-07, "loss": 0.0499, "step": 13678 }, { "epoch": 3.1124004550625712, "grad_norm": 2.2659616211691556, "learning_rate": 3.90484320746415e-07, "loss": 0.0472, "step": 13679 }, { "epoch": 3.112627986348123, "grad_norm": 0.9065031192625356, "learning_rate": 3.904015003847833e-07, "loss": 0.0348, "step": 13680 }, { "epoch": 3.1128555176336747, "grad_norm": 0.9142569450976825, "learning_rate": 3.903186848179155e-07, "loss": 0.0236, "step": 13681 }, { "epoch": 3.1130830489192265, "grad_norm": 0.6320240567400595, "learning_rate": 3.902358740475037e-07, "loss": 0.0626, "step": 13682 }, { "epoch": 3.1133105802047782, "grad_norm": 1.5144696810706013, "learning_rate": 3.9015306807524077e-07, "loss": 0.0217, "step": 13683 }, { "epoch": 3.11353811149033, "grad_norm": 1.970796630174994, "learning_rate": 3.90070266902819e-07, "loss": 0.1633, "step": 13684 }, { "epoch": 3.1137656427758817, "grad_norm": 2.5014489093708114, "learning_rate": 3.899874705319309e-07, "loss": 0.0218, "step": 13685 }, { "epoch": 3.1139931740614335, "grad_norm": 1.448495354816247, "learning_rate": 3.899046789642684e-07, "loss": 0.0729, "step": 13686 }, { "epoch": 3.1142207053469853, "grad_norm": 1.422845895027724, "learning_rate": 3.898218922015236e-07, "loss": 0.0284, "step": 13687 }, { "epoch": 3.114448236632537, "grad_norm": 1.24680904771161, "learning_rate": 3.8973911024538883e-07, "loss": 0.0231, "step": 13688 }, { "epoch": 3.1146757679180888, "grad_norm": 1.8083655960309712, "learning_rate": 3.896563330975556e-07, "loss": 0.0301, "step": 13689 }, { "epoch": 3.1149032992036405, "grad_norm": 1.0732449577351268, "learning_rate": 3.8957356075971593e-07, "loss": 0.0077, "step": 13690 }, { "epoch": 3.1151308304891923, "grad_norm": 2.857856867193602, "learning_rate": 3.894907932335614e-07, "loss": 0.0889, "step": 13691 }, { "epoch": 3.115358361774744, "grad_norm": 1.390889378820175, "learning_rate": 3.89408030520784e-07, "loss": 0.0282, "step": 13692 }, { "epoch": 3.1155858930602958, "grad_norm": 1.4756927121440322, "learning_rate": 3.893252726230749e-07, "loss": 0.0139, "step": 13693 }, { "epoch": 3.1158134243458475, "grad_norm": 1.292534314074829, "learning_rate": 3.8924251954212553e-07, "loss": 0.06, "step": 13694 }, { "epoch": 3.1160409556313993, "grad_norm": 1.2385493691497786, "learning_rate": 3.8915977127962743e-07, "loss": 0.0244, "step": 13695 }, { "epoch": 3.116268486916951, "grad_norm": 1.251082270402003, "learning_rate": 3.890770278372716e-07, "loss": 0.0362, "step": 13696 }, { "epoch": 3.1164960182025028, "grad_norm": 1.026264019443252, "learning_rate": 3.889942892167492e-07, "loss": 0.0352, "step": 13697 }, { "epoch": 3.1167235494880545, "grad_norm": 1.3017515651570033, "learning_rate": 3.8891155541975124e-07, "loss": 0.0149, "step": 13698 }, { "epoch": 3.1169510807736063, "grad_norm": 0.6406566816077041, "learning_rate": 3.8882882644796894e-07, "loss": 0.0082, "step": 13699 }, { "epoch": 3.117178612059158, "grad_norm": 0.6400461242409065, "learning_rate": 3.887461023030929e-07, "loss": 0.0068, "step": 13700 }, { "epoch": 3.11740614334471, "grad_norm": 0.8363418615724226, "learning_rate": 3.8866338298681363e-07, "loss": 0.0349, "step": 13701 }, { "epoch": 3.1176336746302615, "grad_norm": 1.1635107676949128, "learning_rate": 3.8858066850082214e-07, "loss": 0.0166, "step": 13702 }, { "epoch": 3.1178612059158133, "grad_norm": 1.216717864730756, "learning_rate": 3.8849795884680866e-07, "loss": 0.0117, "step": 13703 }, { "epoch": 3.118088737201365, "grad_norm": 2.1827838495743825, "learning_rate": 3.884152540264639e-07, "loss": 0.0688, "step": 13704 }, { "epoch": 3.118316268486917, "grad_norm": 2.7600992365275214, "learning_rate": 3.8833255404147813e-07, "loss": 0.0521, "step": 13705 }, { "epoch": 3.1185437997724685, "grad_norm": 1.5553898122813838, "learning_rate": 3.8824985889354137e-07, "loss": 0.084, "step": 13706 }, { "epoch": 3.1187713310580203, "grad_norm": 0.9501418867733942, "learning_rate": 3.8816716858434395e-07, "loss": 0.0226, "step": 13707 }, { "epoch": 3.118998862343572, "grad_norm": 1.7594833253521522, "learning_rate": 3.880844831155757e-07, "loss": 0.013, "step": 13708 }, { "epoch": 3.119226393629124, "grad_norm": 1.065379980704756, "learning_rate": 3.8800180248892706e-07, "loss": 0.065, "step": 13709 }, { "epoch": 3.1194539249146755, "grad_norm": 1.80255790909447, "learning_rate": 3.879191267060871e-07, "loss": 0.0959, "step": 13710 }, { "epoch": 3.1196814562002277, "grad_norm": 1.580664596024811, "learning_rate": 3.878364557687464e-07, "loss": 0.0429, "step": 13711 }, { "epoch": 3.1199089874857795, "grad_norm": 1.2120843343067373, "learning_rate": 3.877537896785942e-07, "loss": 0.0977, "step": 13712 }, { "epoch": 3.1201365187713312, "grad_norm": 1.7192225590602361, "learning_rate": 3.8767112843731966e-07, "loss": 0.0882, "step": 13713 }, { "epoch": 3.120364050056883, "grad_norm": 0.7460997920116451, "learning_rate": 3.87588472046613e-07, "loss": 0.0039, "step": 13714 }, { "epoch": 3.1205915813424348, "grad_norm": 0.7578751565468316, "learning_rate": 3.8750582050816295e-07, "loss": 0.0089, "step": 13715 }, { "epoch": 3.1208191126279865, "grad_norm": 1.4190206220826054, "learning_rate": 3.8742317382365905e-07, "loss": 0.0251, "step": 13716 }, { "epoch": 3.1210466439135383, "grad_norm": 1.6231657644085622, "learning_rate": 3.873405319947903e-07, "loss": 0.0203, "step": 13717 }, { "epoch": 3.12127417519909, "grad_norm": 1.206230705645918, "learning_rate": 3.8725789502324605e-07, "loss": 0.0661, "step": 13718 }, { "epoch": 3.1215017064846418, "grad_norm": 1.7823339556521005, "learning_rate": 3.871752629107149e-07, "loss": 0.0782, "step": 13719 }, { "epoch": 3.1217292377701935, "grad_norm": 1.1669512137232827, "learning_rate": 3.870926356588857e-07, "loss": 0.0429, "step": 13720 }, { "epoch": 3.1219567690557453, "grad_norm": 1.2907653285682879, "learning_rate": 3.870100132694475e-07, "loss": 0.0554, "step": 13721 }, { "epoch": 3.122184300341297, "grad_norm": 0.6106132291584392, "learning_rate": 3.869273957440886e-07, "loss": 0.0354, "step": 13722 }, { "epoch": 3.1224118316268488, "grad_norm": 0.9498443896459235, "learning_rate": 3.8684478308449786e-07, "loss": 0.0199, "step": 13723 }, { "epoch": 3.1226393629124005, "grad_norm": 0.9430231377783396, "learning_rate": 3.8676217529236364e-07, "loss": 0.0281, "step": 13724 }, { "epoch": 3.1228668941979523, "grad_norm": 2.0294497525449406, "learning_rate": 3.866795723693741e-07, "loss": 0.1024, "step": 13725 }, { "epoch": 3.123094425483504, "grad_norm": 2.7031013458834288, "learning_rate": 3.8659697431721767e-07, "loss": 0.0306, "step": 13726 }, { "epoch": 3.1233219567690558, "grad_norm": 1.0170711126473129, "learning_rate": 3.865143811375824e-07, "loss": 0.0188, "step": 13727 }, { "epoch": 3.1235494880546075, "grad_norm": 1.2040063183993628, "learning_rate": 3.864317928321566e-07, "loss": 0.0436, "step": 13728 }, { "epoch": 3.1237770193401593, "grad_norm": 1.976069585568679, "learning_rate": 3.8634920940262785e-07, "loss": 0.0544, "step": 13729 }, { "epoch": 3.124004550625711, "grad_norm": 1.452565754627796, "learning_rate": 3.8626663085068433e-07, "loss": 0.0133, "step": 13730 }, { "epoch": 3.124232081911263, "grad_norm": 1.8650633525008145, "learning_rate": 3.861840571780137e-07, "loss": 0.0376, "step": 13731 }, { "epoch": 3.1244596131968145, "grad_norm": 0.7195128742968746, "learning_rate": 3.861014883863033e-07, "loss": 0.0068, "step": 13732 }, { "epoch": 3.1246871444823663, "grad_norm": 2.065281478040351, "learning_rate": 3.8601892447724126e-07, "loss": 0.1061, "step": 13733 }, { "epoch": 3.124914675767918, "grad_norm": 1.6227354963944618, "learning_rate": 3.8593636545251446e-07, "loss": 0.0699, "step": 13734 }, { "epoch": 3.12514220705347, "grad_norm": 1.4969227867233603, "learning_rate": 3.8585381131381076e-07, "loss": 0.0139, "step": 13735 }, { "epoch": 3.1253697383390215, "grad_norm": 1.6202663870832155, "learning_rate": 3.85771262062817e-07, "loss": 0.0292, "step": 13736 }, { "epoch": 3.1255972696245733, "grad_norm": 0.5372475140973506, "learning_rate": 3.856887177012206e-07, "loss": 0.0056, "step": 13737 }, { "epoch": 3.125824800910125, "grad_norm": 1.076529429521248, "learning_rate": 3.856061782307087e-07, "loss": 0.0162, "step": 13738 }, { "epoch": 3.126052332195677, "grad_norm": 2.0657366793148744, "learning_rate": 3.855236436529678e-07, "loss": 0.0889, "step": 13739 }, { "epoch": 3.1262798634812285, "grad_norm": 1.972938092921441, "learning_rate": 3.8544111396968523e-07, "loss": 0.0439, "step": 13740 }, { "epoch": 3.1265073947667803, "grad_norm": 1.629699284405217, "learning_rate": 3.853585891825473e-07, "loss": 0.075, "step": 13741 }, { "epoch": 3.126734926052332, "grad_norm": 1.2983153573773878, "learning_rate": 3.852760692932413e-07, "loss": 0.0158, "step": 13742 }, { "epoch": 3.126962457337884, "grad_norm": 1.3186305110321934, "learning_rate": 3.8519355430345326e-07, "loss": 0.1058, "step": 13743 }, { "epoch": 3.1271899886234356, "grad_norm": 1.8779761061896214, "learning_rate": 3.851110442148696e-07, "loss": 0.0963, "step": 13744 }, { "epoch": 3.1274175199089873, "grad_norm": 2.1772945018569465, "learning_rate": 3.850285390291772e-07, "loss": 0.0708, "step": 13745 }, { "epoch": 3.127645051194539, "grad_norm": 1.1955270084323741, "learning_rate": 3.849460387480617e-07, "loss": 0.0429, "step": 13746 }, { "epoch": 3.127872582480091, "grad_norm": 1.2063209216097588, "learning_rate": 3.8486354337320966e-07, "loss": 0.0916, "step": 13747 }, { "epoch": 3.128100113765643, "grad_norm": 1.2101592638185685, "learning_rate": 3.847810529063068e-07, "loss": 0.0393, "step": 13748 }, { "epoch": 3.1283276450511943, "grad_norm": 1.7149548734262663, "learning_rate": 3.8469856734903955e-07, "loss": 0.0835, "step": 13749 }, { "epoch": 3.1285551763367465, "grad_norm": 0.7134067318633924, "learning_rate": 3.8461608670309346e-07, "loss": 0.0065, "step": 13750 }, { "epoch": 3.1287827076222983, "grad_norm": 0.42775060274445503, "learning_rate": 3.845336109701542e-07, "loss": 0.003, "step": 13751 }, { "epoch": 3.12901023890785, "grad_norm": 0.8310430486225371, "learning_rate": 3.844511401519077e-07, "loss": 0.0122, "step": 13752 }, { "epoch": 3.1292377701934018, "grad_norm": 2.6955314575378067, "learning_rate": 3.843686742500391e-07, "loss": 0.0186, "step": 13753 }, { "epoch": 3.1294653014789535, "grad_norm": 1.4612361075994247, "learning_rate": 3.8428621326623424e-07, "loss": 0.0141, "step": 13754 }, { "epoch": 3.1296928327645053, "grad_norm": 1.449607858162352, "learning_rate": 3.842037572021783e-07, "loss": 0.0643, "step": 13755 }, { "epoch": 3.129920364050057, "grad_norm": 2.10718098276556, "learning_rate": 3.841213060595567e-07, "loss": 0.019, "step": 13756 }, { "epoch": 3.1301478953356088, "grad_norm": 0.8617572878363544, "learning_rate": 3.8403885984005433e-07, "loss": 0.0141, "step": 13757 }, { "epoch": 3.1303754266211605, "grad_norm": 0.7611032581689884, "learning_rate": 3.8395641854535627e-07, "loss": 0.0071, "step": 13758 }, { "epoch": 3.1306029579067123, "grad_norm": 0.8456014211529498, "learning_rate": 3.8387398217714784e-07, "loss": 0.0532, "step": 13759 }, { "epoch": 3.130830489192264, "grad_norm": 1.8473153724168192, "learning_rate": 3.837915507371133e-07, "loss": 0.0684, "step": 13760 }, { "epoch": 3.131058020477816, "grad_norm": 3.134351375345956, "learning_rate": 3.8370912422693795e-07, "loss": 0.0427, "step": 13761 }, { "epoch": 3.1312855517633675, "grad_norm": 1.0954175688770225, "learning_rate": 3.836267026483062e-07, "loss": 0.0147, "step": 13762 }, { "epoch": 3.1315130830489193, "grad_norm": 1.6727184400724522, "learning_rate": 3.835442860029024e-07, "loss": 0.0511, "step": 13763 }, { "epoch": 3.131740614334471, "grad_norm": 1.7186892215644787, "learning_rate": 3.834618742924113e-07, "loss": 0.1063, "step": 13764 }, { "epoch": 3.131968145620023, "grad_norm": 2.6779668542494326, "learning_rate": 3.83379467518517e-07, "loss": 0.0329, "step": 13765 }, { "epoch": 3.1321956769055745, "grad_norm": 1.2438620432668854, "learning_rate": 3.8329706568290416e-07, "loss": 0.016, "step": 13766 }, { "epoch": 3.1324232081911263, "grad_norm": 1.8580798151240492, "learning_rate": 3.8321466878725634e-07, "loss": 0.0216, "step": 13767 }, { "epoch": 3.132650739476678, "grad_norm": 0.9917570964458639, "learning_rate": 3.831322768332581e-07, "loss": 0.0061, "step": 13768 }, { "epoch": 3.13287827076223, "grad_norm": 1.22431639229182, "learning_rate": 3.830498898225932e-07, "loss": 0.015, "step": 13769 }, { "epoch": 3.1331058020477816, "grad_norm": 1.0666553235346454, "learning_rate": 3.829675077569452e-07, "loss": 0.0117, "step": 13770 }, { "epoch": 3.1333333333333333, "grad_norm": 1.1872091221783199, "learning_rate": 3.8288513063799826e-07, "loss": 0.0324, "step": 13771 }, { "epoch": 3.133560864618885, "grad_norm": 1.6202609936705508, "learning_rate": 3.828027584674357e-07, "loss": 0.024, "step": 13772 }, { "epoch": 3.133788395904437, "grad_norm": 2.1636277568318634, "learning_rate": 3.827203912469414e-07, "loss": 0.0608, "step": 13773 }, { "epoch": 3.1340159271899886, "grad_norm": 1.428976173518792, "learning_rate": 3.8263802897819847e-07, "loss": 0.0392, "step": 13774 }, { "epoch": 3.1342434584755403, "grad_norm": 3.37529423269316, "learning_rate": 3.8255567166289023e-07, "loss": 0.0256, "step": 13775 }, { "epoch": 3.134470989761092, "grad_norm": 1.360564685648355, "learning_rate": 3.8247331930270033e-07, "loss": 0.0636, "step": 13776 }, { "epoch": 3.134698521046644, "grad_norm": 0.9001157179249929, "learning_rate": 3.8239097189931124e-07, "loss": 0.0391, "step": 13777 }, { "epoch": 3.1349260523321956, "grad_norm": 1.6108051060300312, "learning_rate": 3.823086294544067e-07, "loss": 0.0622, "step": 13778 }, { "epoch": 3.1351535836177473, "grad_norm": 1.4012945493541338, "learning_rate": 3.822262919696691e-07, "loss": 0.1033, "step": 13779 }, { "epoch": 3.135381114903299, "grad_norm": 1.441413942146335, "learning_rate": 3.821439594467816e-07, "loss": 0.0275, "step": 13780 }, { "epoch": 3.135608646188851, "grad_norm": 1.5147017421898716, "learning_rate": 3.8206163188742695e-07, "loss": 0.052, "step": 13781 }, { "epoch": 3.1358361774744026, "grad_norm": 1.2408090278155843, "learning_rate": 3.8197930929328724e-07, "loss": 0.015, "step": 13782 }, { "epoch": 3.1360637087599543, "grad_norm": 2.643474390232461, "learning_rate": 3.818969916660456e-07, "loss": 0.0908, "step": 13783 }, { "epoch": 3.136291240045506, "grad_norm": 1.0609309830837907, "learning_rate": 3.8181467900738415e-07, "loss": 0.0076, "step": 13784 }, { "epoch": 3.136518771331058, "grad_norm": 1.5863301329631765, "learning_rate": 3.8173237131898545e-07, "loss": 0.0515, "step": 13785 }, { "epoch": 3.1367463026166096, "grad_norm": 1.1450382741296343, "learning_rate": 3.816500686025314e-07, "loss": 0.054, "step": 13786 }, { "epoch": 3.136973833902162, "grad_norm": 2.2455237449149017, "learning_rate": 3.815677708597043e-07, "loss": 0.0686, "step": 13787 }, { "epoch": 3.137201365187713, "grad_norm": 1.8970651256788846, "learning_rate": 3.814854780921863e-07, "loss": 0.0585, "step": 13788 }, { "epoch": 3.1374288964732653, "grad_norm": 1.5561533059231962, "learning_rate": 3.8140319030165886e-07, "loss": 0.0246, "step": 13789 }, { "epoch": 3.137656427758817, "grad_norm": 1.3876527256788471, "learning_rate": 3.813209074898043e-07, "loss": 0.0659, "step": 13790 }, { "epoch": 3.137883959044369, "grad_norm": 1.5296387414333932, "learning_rate": 3.8123862965830384e-07, "loss": 0.0369, "step": 13791 }, { "epoch": 3.1381114903299205, "grad_norm": 0.7664810135290934, "learning_rate": 3.811563568088398e-07, "loss": 0.0076, "step": 13792 }, { "epoch": 3.1383390216154723, "grad_norm": 1.298426141284068, "learning_rate": 3.8107408894309306e-07, "loss": 0.0481, "step": 13793 }, { "epoch": 3.138566552901024, "grad_norm": 1.5359466781662514, "learning_rate": 3.80991826062745e-07, "loss": 0.0141, "step": 13794 }, { "epoch": 3.138794084186576, "grad_norm": 1.0760114303467743, "learning_rate": 3.8090956816947743e-07, "loss": 0.0622, "step": 13795 }, { "epoch": 3.1390216154721275, "grad_norm": 1.7222742341711936, "learning_rate": 3.808273152649711e-07, "loss": 0.0972, "step": 13796 }, { "epoch": 3.1392491467576793, "grad_norm": 0.7060732352253923, "learning_rate": 3.807450673509073e-07, "loss": 0.004, "step": 13797 }, { "epoch": 3.139476678043231, "grad_norm": 1.7307470088457282, "learning_rate": 3.8066282442896686e-07, "loss": 0.072, "step": 13798 }, { "epoch": 3.139704209328783, "grad_norm": 0.8539657536949081, "learning_rate": 3.805805865008311e-07, "loss": 0.0078, "step": 13799 }, { "epoch": 3.1399317406143346, "grad_norm": 0.9772002643153225, "learning_rate": 3.8049835356818034e-07, "loss": 0.0092, "step": 13800 }, { "epoch": 3.1401592718998863, "grad_norm": 2.1153164233238884, "learning_rate": 3.804161256326953e-07, "loss": 0.0306, "step": 13801 }, { "epoch": 3.140386803185438, "grad_norm": 1.9128041660051684, "learning_rate": 3.803339026960571e-07, "loss": 0.072, "step": 13802 }, { "epoch": 3.14061433447099, "grad_norm": 1.3783891484075808, "learning_rate": 3.802516847599455e-07, "loss": 0.0332, "step": 13803 }, { "epoch": 3.1408418657565416, "grad_norm": 1.4634633737057754, "learning_rate": 3.801694718260414e-07, "loss": 0.0514, "step": 13804 }, { "epoch": 3.1410693970420933, "grad_norm": 1.0938086272883338, "learning_rate": 3.8008726389602473e-07, "loss": 0.0205, "step": 13805 }, { "epoch": 3.141296928327645, "grad_norm": 0.6830342791571454, "learning_rate": 3.8000506097157617e-07, "loss": 0.0299, "step": 13806 }, { "epoch": 3.141524459613197, "grad_norm": 1.3448865495999955, "learning_rate": 3.7992286305437527e-07, "loss": 0.0201, "step": 13807 }, { "epoch": 3.1417519908987486, "grad_norm": 1.6387076567693086, "learning_rate": 3.7984067014610216e-07, "loss": 0.11, "step": 13808 }, { "epoch": 3.1419795221843003, "grad_norm": 2.5576501001695555, "learning_rate": 3.7975848224843694e-07, "loss": 0.0623, "step": 13809 }, { "epoch": 3.142207053469852, "grad_norm": 2.2021177686268287, "learning_rate": 3.79676299363059e-07, "loss": 0.0943, "step": 13810 }, { "epoch": 3.142434584755404, "grad_norm": 1.3529649601654208, "learning_rate": 3.7959412149164836e-07, "loss": 0.0239, "step": 13811 }, { "epoch": 3.1426621160409556, "grad_norm": 1.960122131079808, "learning_rate": 3.795119486358845e-07, "loss": 0.061, "step": 13812 }, { "epoch": 3.1428896473265073, "grad_norm": 2.398858609617929, "learning_rate": 3.7942978079744663e-07, "loss": 0.0162, "step": 13813 }, { "epoch": 3.143117178612059, "grad_norm": 1.1353971809557037, "learning_rate": 3.7934761797801437e-07, "loss": 0.0262, "step": 13814 }, { "epoch": 3.143344709897611, "grad_norm": 1.6276385341410764, "learning_rate": 3.792654601792668e-07, "loss": 0.0448, "step": 13815 }, { "epoch": 3.1435722411831626, "grad_norm": 1.2259004138732084, "learning_rate": 3.7918330740288346e-07, "loss": 0.055, "step": 13816 }, { "epoch": 3.1437997724687143, "grad_norm": 1.7840113823261248, "learning_rate": 3.791011596505428e-07, "loss": 0.1692, "step": 13817 }, { "epoch": 3.144027303754266, "grad_norm": 1.1598976681512179, "learning_rate": 3.790190169239243e-07, "loss": 0.0513, "step": 13818 }, { "epoch": 3.144254835039818, "grad_norm": 1.4366871843583766, "learning_rate": 3.7893687922470676e-07, "loss": 0.0197, "step": 13819 }, { "epoch": 3.1444823663253696, "grad_norm": 1.4173308988851177, "learning_rate": 3.7885474655456845e-07, "loss": 0.0123, "step": 13820 }, { "epoch": 3.1447098976109213, "grad_norm": 1.0507006842648654, "learning_rate": 3.787726189151884e-07, "loss": 0.0287, "step": 13821 }, { "epoch": 3.144937428896473, "grad_norm": 1.6782156751525465, "learning_rate": 3.786904963082451e-07, "loss": 0.0598, "step": 13822 }, { "epoch": 3.145164960182025, "grad_norm": 1.5110399636641703, "learning_rate": 3.786083787354171e-07, "loss": 0.0946, "step": 13823 }, { "epoch": 3.1453924914675766, "grad_norm": 0.6603391391767558, "learning_rate": 3.785262661983823e-07, "loss": 0.0041, "step": 13824 }, { "epoch": 3.1456200227531284, "grad_norm": 1.6841545209999165, "learning_rate": 3.784441586988194e-07, "loss": 0.0134, "step": 13825 }, { "epoch": 3.1458475540386805, "grad_norm": 0.8562604979725685, "learning_rate": 3.7836205623840653e-07, "loss": 0.0298, "step": 13826 }, { "epoch": 3.146075085324232, "grad_norm": 2.339152771312919, "learning_rate": 3.782799588188212e-07, "loss": 0.0269, "step": 13827 }, { "epoch": 3.146302616609784, "grad_norm": 2.0551092460118148, "learning_rate": 3.7819786644174184e-07, "loss": 0.0201, "step": 13828 }, { "epoch": 3.146530147895336, "grad_norm": 1.6970659589814046, "learning_rate": 3.78115779108846e-07, "loss": 0.02, "step": 13829 }, { "epoch": 3.1467576791808876, "grad_norm": 1.2911558632510887, "learning_rate": 3.7803369682181166e-07, "loss": 0.0386, "step": 13830 }, { "epoch": 3.1469852104664393, "grad_norm": 1.1353990141933716, "learning_rate": 3.7795161958231616e-07, "loss": 0.0616, "step": 13831 }, { "epoch": 3.147212741751991, "grad_norm": 2.1607677013183126, "learning_rate": 3.77869547392037e-07, "loss": 0.0353, "step": 13832 }, { "epoch": 3.147440273037543, "grad_norm": 1.478576930402691, "learning_rate": 3.7778748025265194e-07, "loss": 0.0231, "step": 13833 }, { "epoch": 3.1476678043230946, "grad_norm": 1.917640083907175, "learning_rate": 3.777054181658377e-07, "loss": 0.0825, "step": 13834 }, { "epoch": 3.1478953356086463, "grad_norm": 1.821627592016001, "learning_rate": 3.776233611332722e-07, "loss": 0.0649, "step": 13835 }, { "epoch": 3.148122866894198, "grad_norm": 1.6914326745634896, "learning_rate": 3.7754130915663187e-07, "loss": 0.0333, "step": 13836 }, { "epoch": 3.14835039817975, "grad_norm": 1.1427313249490716, "learning_rate": 3.7745926223759403e-07, "loss": 0.0279, "step": 13837 }, { "epoch": 3.1485779294653016, "grad_norm": 3.3182212999388434, "learning_rate": 3.773772203778357e-07, "loss": 0.0117, "step": 13838 }, { "epoch": 3.1488054607508533, "grad_norm": 1.4198422317803325, "learning_rate": 3.772951835790332e-07, "loss": 0.049, "step": 13839 }, { "epoch": 3.149032992036405, "grad_norm": 0.8606112698798716, "learning_rate": 3.772131518428637e-07, "loss": 0.0307, "step": 13840 }, { "epoch": 3.149260523321957, "grad_norm": 1.3791258087939562, "learning_rate": 3.771311251710034e-07, "loss": 0.0522, "step": 13841 }, { "epoch": 3.1494880546075086, "grad_norm": 1.3799811862351457, "learning_rate": 3.770491035651292e-07, "loss": 0.0802, "step": 13842 }, { "epoch": 3.1497155858930603, "grad_norm": 0.7558921417653969, "learning_rate": 3.7696708702691695e-07, "loss": 0.0077, "step": 13843 }, { "epoch": 3.149943117178612, "grad_norm": 1.4567410579149824, "learning_rate": 3.768850755580433e-07, "loss": 0.0269, "step": 13844 }, { "epoch": 3.150170648464164, "grad_norm": 1.4293443948226692, "learning_rate": 3.7680306916018457e-07, "loss": 0.0209, "step": 13845 }, { "epoch": 3.1503981797497156, "grad_norm": 1.3774255263513295, "learning_rate": 3.7672106783501626e-07, "loss": 0.0164, "step": 13846 }, { "epoch": 3.1506257110352673, "grad_norm": 2.3884841009184457, "learning_rate": 3.7663907158421465e-07, "loss": 0.0843, "step": 13847 }, { "epoch": 3.150853242320819, "grad_norm": 1.6489598433072892, "learning_rate": 3.7655708040945554e-07, "loss": 0.017, "step": 13848 }, { "epoch": 3.151080773606371, "grad_norm": 0.8998232206166086, "learning_rate": 3.7647509431241485e-07, "loss": 0.009, "step": 13849 }, { "epoch": 3.1513083048919226, "grad_norm": 2.039532597350182, "learning_rate": 3.7639311329476806e-07, "loss": 0.0615, "step": 13850 }, { "epoch": 3.1515358361774743, "grad_norm": 2.132808487068888, "learning_rate": 3.7631113735819055e-07, "loss": 0.1592, "step": 13851 }, { "epoch": 3.151763367463026, "grad_norm": 1.4363837412211613, "learning_rate": 3.762291665043581e-07, "loss": 0.0646, "step": 13852 }, { "epoch": 3.151990898748578, "grad_norm": 1.8300347886705317, "learning_rate": 3.7614720073494576e-07, "loss": 0.0965, "step": 13853 }, { "epoch": 3.1522184300341296, "grad_norm": 1.6197111375106328, "learning_rate": 3.7606524005162904e-07, "loss": 0.0265, "step": 13854 }, { "epoch": 3.1524459613196814, "grad_norm": 1.5683842217475705, "learning_rate": 3.759832844560827e-07, "loss": 0.0273, "step": 13855 }, { "epoch": 3.152673492605233, "grad_norm": 1.3182427273474142, "learning_rate": 3.759013339499823e-07, "loss": 0.0546, "step": 13856 }, { "epoch": 3.152901023890785, "grad_norm": 2.100806467219255, "learning_rate": 3.758193885350022e-07, "loss": 0.0179, "step": 13857 }, { "epoch": 3.1531285551763366, "grad_norm": 0.6967769127366987, "learning_rate": 3.757374482128174e-07, "loss": 0.0092, "step": 13858 }, { "epoch": 3.1533560864618884, "grad_norm": 0.4414444264315907, "learning_rate": 3.756555129851029e-07, "loss": 0.0019, "step": 13859 }, { "epoch": 3.15358361774744, "grad_norm": 1.3299447344821882, "learning_rate": 3.755735828535328e-07, "loss": 0.106, "step": 13860 }, { "epoch": 3.153811149032992, "grad_norm": 1.3042560126306324, "learning_rate": 3.7549165781978195e-07, "loss": 0.0196, "step": 13861 }, { "epoch": 3.1540386803185436, "grad_norm": 1.4381103273339793, "learning_rate": 3.7540973788552456e-07, "loss": 0.0339, "step": 13862 }, { "epoch": 3.1542662116040954, "grad_norm": 1.4867550685398856, "learning_rate": 3.7532782305243526e-07, "loss": 0.0611, "step": 13863 }, { "epoch": 3.154493742889647, "grad_norm": 1.3001220458189475, "learning_rate": 3.752459133221879e-07, "loss": 0.0057, "step": 13864 }, { "epoch": 3.1547212741751993, "grad_norm": 1.008715539532686, "learning_rate": 3.7516400869645655e-07, "loss": 0.0578, "step": 13865 }, { "epoch": 3.1549488054607506, "grad_norm": 1.532640249835676, "learning_rate": 3.7508210917691556e-07, "loss": 0.0436, "step": 13866 }, { "epoch": 3.155176336746303, "grad_norm": 1.8371133117048846, "learning_rate": 3.750002147652383e-07, "loss": 0.116, "step": 13867 }, { "epoch": 3.1554038680318546, "grad_norm": 1.2771519862095264, "learning_rate": 3.7491832546309894e-07, "loss": 0.0309, "step": 13868 }, { "epoch": 3.1556313993174063, "grad_norm": 0.811302669676039, "learning_rate": 3.748364412721711e-07, "loss": 0.0046, "step": 13869 }, { "epoch": 3.155858930602958, "grad_norm": 2.213045857238132, "learning_rate": 3.74754562194128e-07, "loss": 0.0456, "step": 13870 }, { "epoch": 3.15608646188851, "grad_norm": 1.1073942845077938, "learning_rate": 3.7467268823064354e-07, "loss": 0.0679, "step": 13871 }, { "epoch": 3.1563139931740616, "grad_norm": 0.5340069345145287, "learning_rate": 3.745908193833907e-07, "loss": 0.0027, "step": 13872 }, { "epoch": 3.1565415244596133, "grad_norm": 1.520559905046947, "learning_rate": 3.7450895565404316e-07, "loss": 0.0773, "step": 13873 }, { "epoch": 3.156769055745165, "grad_norm": 0.543443493953861, "learning_rate": 3.7442709704427357e-07, "loss": 0.006, "step": 13874 }, { "epoch": 3.156996587030717, "grad_norm": 0.8294897308947107, "learning_rate": 3.7434524355575527e-07, "loss": 0.0386, "step": 13875 }, { "epoch": 3.1572241183162686, "grad_norm": 2.4524875211539423, "learning_rate": 3.7426339519016133e-07, "loss": 0.02, "step": 13876 }, { "epoch": 3.1574516496018203, "grad_norm": 0.7811629084594014, "learning_rate": 3.7418155194916415e-07, "loss": 0.0035, "step": 13877 }, { "epoch": 3.157679180887372, "grad_norm": 1.2745292185221264, "learning_rate": 3.7409971383443684e-07, "loss": 0.015, "step": 13878 }, { "epoch": 3.157906712172924, "grad_norm": 2.8878970415386185, "learning_rate": 3.740178808476517e-07, "loss": 0.1176, "step": 13879 }, { "epoch": 3.1581342434584756, "grad_norm": 2.308265058328603, "learning_rate": 3.739360529904817e-07, "loss": 0.0588, "step": 13880 }, { "epoch": 3.1583617747440274, "grad_norm": 1.1950275562025403, "learning_rate": 3.738542302645988e-07, "loss": 0.0088, "step": 13881 }, { "epoch": 3.158589306029579, "grad_norm": 1.5369317902931667, "learning_rate": 3.737724126716755e-07, "loss": 0.0731, "step": 13882 }, { "epoch": 3.158816837315131, "grad_norm": 0.9570033839786857, "learning_rate": 3.73690600213384e-07, "loss": 0.0227, "step": 13883 }, { "epoch": 3.1590443686006826, "grad_norm": 3.080053445268308, "learning_rate": 3.7360879289139633e-07, "loss": 0.0201, "step": 13884 }, { "epoch": 3.1592718998862344, "grad_norm": 1.6104354092359685, "learning_rate": 3.7352699070738455e-07, "loss": 0.0259, "step": 13885 }, { "epoch": 3.159499431171786, "grad_norm": 1.6126346730888486, "learning_rate": 3.734451936630204e-07, "loss": 0.0144, "step": 13886 }, { "epoch": 3.159726962457338, "grad_norm": 1.4024520415734605, "learning_rate": 3.733634017599762e-07, "loss": 0.0785, "step": 13887 }, { "epoch": 3.1599544937428896, "grad_norm": 1.286975282960194, "learning_rate": 3.732816149999229e-07, "loss": 0.0502, "step": 13888 }, { "epoch": 3.1601820250284414, "grad_norm": 1.5831496092670823, "learning_rate": 3.731998333845324e-07, "loss": 0.1025, "step": 13889 }, { "epoch": 3.160409556313993, "grad_norm": 1.3918742086207971, "learning_rate": 3.731180569154763e-07, "loss": 0.0683, "step": 13890 }, { "epoch": 3.160637087599545, "grad_norm": 1.7034898873479414, "learning_rate": 3.730362855944256e-07, "loss": 0.0284, "step": 13891 }, { "epoch": 3.1608646188850966, "grad_norm": 1.5683104689234246, "learning_rate": 3.7295451942305187e-07, "loss": 0.114, "step": 13892 }, { "epoch": 3.1610921501706484, "grad_norm": 3.1401864587853447, "learning_rate": 3.728727584030261e-07, "loss": 0.09, "step": 13893 }, { "epoch": 3.1613196814562, "grad_norm": 1.9830451833140417, "learning_rate": 3.7279100253601963e-07, "loss": 0.0367, "step": 13894 }, { "epoch": 3.161547212741752, "grad_norm": 2.3532037659805485, "learning_rate": 3.7270925182370306e-07, "loss": 0.0437, "step": 13895 }, { "epoch": 3.1617747440273036, "grad_norm": 1.2223270803303379, "learning_rate": 3.7262750626774714e-07, "loss": 0.0212, "step": 13896 }, { "epoch": 3.1620022753128554, "grad_norm": 1.005261123818386, "learning_rate": 3.725457658698231e-07, "loss": 0.0162, "step": 13897 }, { "epoch": 3.162229806598407, "grad_norm": 3.0133294797280445, "learning_rate": 3.724640306316009e-07, "loss": 0.0592, "step": 13898 }, { "epoch": 3.162457337883959, "grad_norm": 3.8995418020498542, "learning_rate": 3.723823005547518e-07, "loss": 0.0794, "step": 13899 }, { "epoch": 3.1626848691695106, "grad_norm": 0.8571441287559585, "learning_rate": 3.7230057564094574e-07, "loss": 0.0359, "step": 13900 }, { "epoch": 3.1629124004550624, "grad_norm": 1.6733717984824525, "learning_rate": 3.7221885589185286e-07, "loss": 0.0695, "step": 13901 }, { "epoch": 3.163139931740614, "grad_norm": 1.6746679223460776, "learning_rate": 3.721371413091439e-07, "loss": 0.0639, "step": 13902 }, { "epoch": 3.163367463026166, "grad_norm": 1.260133341542016, "learning_rate": 3.720554318944884e-07, "loss": 0.011, "step": 13903 }, { "epoch": 3.163594994311718, "grad_norm": 5.05888075340167, "learning_rate": 3.7197372764955683e-07, "loss": 0.0267, "step": 13904 }, { "epoch": 3.1638225255972694, "grad_norm": 1.5594475149097615, "learning_rate": 3.718920285760187e-07, "loss": 0.1163, "step": 13905 }, { "epoch": 3.1640500568828216, "grad_norm": 1.41168431181034, "learning_rate": 3.718103346755443e-07, "loss": 0.0775, "step": 13906 }, { "epoch": 3.1642775881683733, "grad_norm": 2.6510191175924263, "learning_rate": 3.717286459498028e-07, "loss": 0.0152, "step": 13907 }, { "epoch": 3.164505119453925, "grad_norm": 1.6611511299021215, "learning_rate": 3.716469624004637e-07, "loss": 0.0319, "step": 13908 }, { "epoch": 3.164732650739477, "grad_norm": 1.2729058462701697, "learning_rate": 3.715652840291971e-07, "loss": 0.0045, "step": 13909 }, { "epoch": 3.1649601820250286, "grad_norm": 1.5547879472596926, "learning_rate": 3.7148361083767155e-07, "loss": 0.0827, "step": 13910 }, { "epoch": 3.1651877133105804, "grad_norm": 0.9484541490890365, "learning_rate": 3.714019428275569e-07, "loss": 0.0148, "step": 13911 }, { "epoch": 3.165415244596132, "grad_norm": 0.7391151042270085, "learning_rate": 3.7132028000052196e-07, "loss": 0.0181, "step": 13912 }, { "epoch": 3.165642775881684, "grad_norm": 1.5604944619544545, "learning_rate": 3.712386223582362e-07, "loss": 0.0131, "step": 13913 }, { "epoch": 3.1658703071672356, "grad_norm": 1.7556016077642513, "learning_rate": 3.711569699023681e-07, "loss": 0.0288, "step": 13914 }, { "epoch": 3.1660978384527874, "grad_norm": 1.0370049480230432, "learning_rate": 3.7107532263458653e-07, "loss": 0.0416, "step": 13915 }, { "epoch": 3.166325369738339, "grad_norm": 1.0258506987075688, "learning_rate": 3.7099368055656054e-07, "loss": 0.0628, "step": 13916 }, { "epoch": 3.166552901023891, "grad_norm": 1.6930629612704402, "learning_rate": 3.709120436699583e-07, "loss": 0.0219, "step": 13917 }, { "epoch": 3.1667804323094426, "grad_norm": 1.1735522455962333, "learning_rate": 3.708304119764487e-07, "loss": 0.1327, "step": 13918 }, { "epoch": 3.1670079635949944, "grad_norm": 1.8287418444841044, "learning_rate": 3.7074878547770005e-07, "loss": 0.0236, "step": 13919 }, { "epoch": 3.167235494880546, "grad_norm": 1.6679902751464148, "learning_rate": 3.7066716417538035e-07, "loss": 0.1484, "step": 13920 }, { "epoch": 3.167463026166098, "grad_norm": 1.6273992303442903, "learning_rate": 3.705855480711582e-07, "loss": 0.1096, "step": 13921 }, { "epoch": 3.1676905574516496, "grad_norm": 0.7688529761257368, "learning_rate": 3.7050393716670135e-07, "loss": 0.0331, "step": 13922 }, { "epoch": 3.1679180887372014, "grad_norm": 1.2349635653609385, "learning_rate": 3.7042233146367816e-07, "loss": 0.0793, "step": 13923 }, { "epoch": 3.168145620022753, "grad_norm": 1.0545781811962225, "learning_rate": 3.7034073096375607e-07, "loss": 0.0257, "step": 13924 }, { "epoch": 3.168373151308305, "grad_norm": 2.9213269870696865, "learning_rate": 3.702591356686032e-07, "loss": 0.0499, "step": 13925 }, { "epoch": 3.1686006825938566, "grad_norm": 0.3675398132350555, "learning_rate": 3.7017754557988716e-07, "loss": 0.0016, "step": 13926 }, { "epoch": 3.1688282138794084, "grad_norm": 3.455939263892689, "learning_rate": 3.7009596069927516e-07, "loss": 0.068, "step": 13927 }, { "epoch": 3.16905574516496, "grad_norm": 1.5005562161696462, "learning_rate": 3.70014381028435e-07, "loss": 0.045, "step": 13928 }, { "epoch": 3.169283276450512, "grad_norm": 1.2887943411921012, "learning_rate": 3.6993280656903376e-07, "loss": 0.0378, "step": 13929 }, { "epoch": 3.1695108077360636, "grad_norm": 2.426986305815661, "learning_rate": 3.698512373227391e-07, "loss": 0.0328, "step": 13930 }, { "epoch": 3.1697383390216154, "grad_norm": 0.910172465949971, "learning_rate": 3.697696732912176e-07, "loss": 0.0623, "step": 13931 }, { "epoch": 3.169965870307167, "grad_norm": 1.3816472094899088, "learning_rate": 3.6968811447613677e-07, "loss": 0.1122, "step": 13932 }, { "epoch": 3.170193401592719, "grad_norm": 0.7333612879176401, "learning_rate": 3.6960656087916324e-07, "loss": 0.0063, "step": 13933 }, { "epoch": 3.1704209328782706, "grad_norm": 1.8419054223436113, "learning_rate": 3.695250125019638e-07, "loss": 0.05, "step": 13934 }, { "epoch": 3.1706484641638224, "grad_norm": 2.0717965883934735, "learning_rate": 3.694434693462053e-07, "loss": 0.0134, "step": 13935 }, { "epoch": 3.170875995449374, "grad_norm": 1.7659746652779136, "learning_rate": 3.69361931413554e-07, "loss": 0.1095, "step": 13936 }, { "epoch": 3.171103526734926, "grad_norm": 3.457104784942819, "learning_rate": 3.69280398705677e-07, "loss": 0.0273, "step": 13937 }, { "epoch": 3.1713310580204777, "grad_norm": 1.6048709364257023, "learning_rate": 3.6919887122424015e-07, "loss": 0.0843, "step": 13938 }, { "epoch": 3.1715585893060294, "grad_norm": 1.1176572333742696, "learning_rate": 3.691173489709099e-07, "loss": 0.0564, "step": 13939 }, { "epoch": 3.171786120591581, "grad_norm": 2.1804493970393786, "learning_rate": 3.6903583194735256e-07, "loss": 0.0217, "step": 13940 }, { "epoch": 3.172013651877133, "grad_norm": 0.9396354648071963, "learning_rate": 3.6895432015523375e-07, "loss": 0.0391, "step": 13941 }, { "epoch": 3.1722411831626847, "grad_norm": 0.9525540862555474, "learning_rate": 3.6887281359621996e-07, "loss": 0.0121, "step": 13942 }, { "epoch": 3.172468714448237, "grad_norm": 1.5634523366320552, "learning_rate": 3.6879131227197657e-07, "loss": 0.0575, "step": 13943 }, { "epoch": 3.172696245733788, "grad_norm": 2.6229741107107585, "learning_rate": 3.687098161841698e-07, "loss": 0.0177, "step": 13944 }, { "epoch": 3.1729237770193404, "grad_norm": 2.3252190329139726, "learning_rate": 3.6862832533446483e-07, "loss": 0.0341, "step": 13945 }, { "epoch": 3.173151308304892, "grad_norm": 1.5659442937735852, "learning_rate": 3.6854683972452734e-07, "loss": 0.038, "step": 13946 }, { "epoch": 3.173378839590444, "grad_norm": 1.30829591927033, "learning_rate": 3.6846535935602295e-07, "loss": 0.0533, "step": 13947 }, { "epoch": 3.1736063708759956, "grad_norm": 1.6230999255055047, "learning_rate": 3.6838388423061655e-07, "loss": 0.0283, "step": 13948 }, { "epoch": 3.1738339021615474, "grad_norm": 1.606678000385043, "learning_rate": 3.6830241434997374e-07, "loss": 0.0227, "step": 13949 }, { "epoch": 3.174061433447099, "grad_norm": 2.4503446328429024, "learning_rate": 3.6822094971575936e-07, "loss": 0.1212, "step": 13950 }, { "epoch": 3.174288964732651, "grad_norm": 1.1800329804888954, "learning_rate": 3.681394903296387e-07, "loss": 0.0589, "step": 13951 }, { "epoch": 3.1745164960182026, "grad_norm": 1.0004538723367997, "learning_rate": 3.680580361932764e-07, "loss": 0.0209, "step": 13952 }, { "epoch": 3.1747440273037544, "grad_norm": 1.4713710676608531, "learning_rate": 3.6797658730833704e-07, "loss": 0.0223, "step": 13953 }, { "epoch": 3.174971558589306, "grad_norm": 1.3646503588970937, "learning_rate": 3.6789514367648584e-07, "loss": 0.0189, "step": 13954 }, { "epoch": 3.175199089874858, "grad_norm": 1.6650930850486658, "learning_rate": 3.6781370529938666e-07, "loss": 0.0319, "step": 13955 }, { "epoch": 3.1754266211604096, "grad_norm": 1.6135211522035569, "learning_rate": 3.677322721787047e-07, "loss": 0.0704, "step": 13956 }, { "epoch": 3.1756541524459614, "grad_norm": 2.2184633843784325, "learning_rate": 3.6765084431610394e-07, "loss": 0.0155, "step": 13957 }, { "epoch": 3.175881683731513, "grad_norm": 0.9356293296357032, "learning_rate": 3.675694217132482e-07, "loss": 0.0403, "step": 13958 }, { "epoch": 3.176109215017065, "grad_norm": 2.0801302010129463, "learning_rate": 3.674880043718024e-07, "loss": 0.0457, "step": 13959 }, { "epoch": 3.1763367463026166, "grad_norm": 1.3903411580431047, "learning_rate": 3.6740659229342985e-07, "loss": 0.0177, "step": 13960 }, { "epoch": 3.1765642775881684, "grad_norm": 1.6577498920391787, "learning_rate": 3.6732518547979503e-07, "loss": 0.0308, "step": 13961 }, { "epoch": 3.17679180887372, "grad_norm": 1.273155335345335, "learning_rate": 3.672437839325614e-07, "loss": 0.0271, "step": 13962 }, { "epoch": 3.177019340159272, "grad_norm": 1.9252510263550708, "learning_rate": 3.6716238765339295e-07, "loss": 0.0179, "step": 13963 }, { "epoch": 3.1772468714448237, "grad_norm": 0.9584782264607207, "learning_rate": 3.67080996643953e-07, "loss": 0.0465, "step": 13964 }, { "epoch": 3.1774744027303754, "grad_norm": 0.9698949447074896, "learning_rate": 3.6699961090590497e-07, "loss": 0.0369, "step": 13965 }, { "epoch": 3.177701934015927, "grad_norm": 1.5147965170144926, "learning_rate": 3.6691823044091267e-07, "loss": 0.0094, "step": 13966 }, { "epoch": 3.177929465301479, "grad_norm": 0.9402628722659118, "learning_rate": 3.668368552506388e-07, "loss": 0.0075, "step": 13967 }, { "epoch": 3.1781569965870307, "grad_norm": 3.5247631093954697, "learning_rate": 3.667554853367469e-07, "loss": 0.0146, "step": 13968 }, { "epoch": 3.1783845278725824, "grad_norm": 1.0103663650973742, "learning_rate": 3.6667412070089986e-07, "loss": 0.0272, "step": 13969 }, { "epoch": 3.178612059158134, "grad_norm": 0.5022589867554276, "learning_rate": 3.6659276134476106e-07, "loss": 0.0033, "step": 13970 }, { "epoch": 3.178839590443686, "grad_norm": 1.4274113061616498, "learning_rate": 3.665114072699927e-07, "loss": 0.0291, "step": 13971 }, { "epoch": 3.1790671217292377, "grad_norm": 1.0620666972921853, "learning_rate": 3.6643005847825765e-07, "loss": 0.0615, "step": 13972 }, { "epoch": 3.1792946530147894, "grad_norm": 1.5703714244623361, "learning_rate": 3.6634871497121887e-07, "loss": 0.0647, "step": 13973 }, { "epoch": 3.179522184300341, "grad_norm": 2.196711564006803, "learning_rate": 3.662673767505385e-07, "loss": 0.008, "step": 13974 }, { "epoch": 3.179749715585893, "grad_norm": 1.270266727635661, "learning_rate": 3.661860438178792e-07, "loss": 0.0381, "step": 13975 }, { "epoch": 3.1799772468714447, "grad_norm": 1.6564263674745914, "learning_rate": 3.661047161749033e-07, "loss": 0.0621, "step": 13976 }, { "epoch": 3.1802047781569964, "grad_norm": 1.5693479996876114, "learning_rate": 3.660233938232725e-07, "loss": 0.033, "step": 13977 }, { "epoch": 3.180432309442548, "grad_norm": 1.0819687660425352, "learning_rate": 3.659420767646495e-07, "loss": 0.0774, "step": 13978 }, { "epoch": 3.1806598407281, "grad_norm": 2.0255580957384183, "learning_rate": 3.6586076500069583e-07, "loss": 0.0573, "step": 13979 }, { "epoch": 3.1808873720136517, "grad_norm": 1.3421891505603667, "learning_rate": 3.657794585330738e-07, "loss": 0.0751, "step": 13980 }, { "epoch": 3.1811149032992034, "grad_norm": 1.6494799416408936, "learning_rate": 3.6569815736344467e-07, "loss": 0.0958, "step": 13981 }, { "epoch": 3.1813424345847556, "grad_norm": 1.5783983432798596, "learning_rate": 3.6561686149347034e-07, "loss": 0.109, "step": 13982 }, { "epoch": 3.181569965870307, "grad_norm": 2.0450531970870296, "learning_rate": 3.6553557092481254e-07, "loss": 0.0657, "step": 13983 }, { "epoch": 3.181797497155859, "grad_norm": 1.651622614723497, "learning_rate": 3.6545428565913225e-07, "loss": 0.1671, "step": 13984 }, { "epoch": 3.182025028441411, "grad_norm": 0.9387537582342375, "learning_rate": 3.653730056980911e-07, "loss": 0.0107, "step": 13985 }, { "epoch": 3.1822525597269626, "grad_norm": 1.1577559462350464, "learning_rate": 3.6529173104335013e-07, "loss": 0.0081, "step": 13986 }, { "epoch": 3.1824800910125144, "grad_norm": 1.1272163619347588, "learning_rate": 3.6521046169657073e-07, "loss": 0.0562, "step": 13987 }, { "epoch": 3.182707622298066, "grad_norm": 1.258246017359122, "learning_rate": 3.651291976594136e-07, "loss": 0.0663, "step": 13988 }, { "epoch": 3.182935153583618, "grad_norm": 1.4978067871556147, "learning_rate": 3.650479389335397e-07, "loss": 0.0519, "step": 13989 }, { "epoch": 3.1831626848691696, "grad_norm": 0.8369964799683921, "learning_rate": 3.6496668552061007e-07, "loss": 0.07, "step": 13990 }, { "epoch": 3.1833902161547214, "grad_norm": 1.5344196192190485, "learning_rate": 3.6488543742228473e-07, "loss": 0.0145, "step": 13991 }, { "epoch": 3.183617747440273, "grad_norm": 1.0922037020701252, "learning_rate": 3.6480419464022504e-07, "loss": 0.0724, "step": 13992 }, { "epoch": 3.183845278725825, "grad_norm": 1.4190739316296355, "learning_rate": 3.647229571760909e-07, "loss": 0.0165, "step": 13993 }, { "epoch": 3.1840728100113767, "grad_norm": 1.3445557759492501, "learning_rate": 3.64641725031543e-07, "loss": 0.0687, "step": 13994 }, { "epoch": 3.1843003412969284, "grad_norm": 1.570058478401071, "learning_rate": 3.645604982082415e-07, "loss": 0.0949, "step": 13995 }, { "epoch": 3.18452787258248, "grad_norm": 1.0162554693608417, "learning_rate": 3.6447927670784623e-07, "loss": 0.0174, "step": 13996 }, { "epoch": 3.184755403868032, "grad_norm": 1.1665608789283854, "learning_rate": 3.643980605320174e-07, "loss": 0.0367, "step": 13997 }, { "epoch": 3.1849829351535837, "grad_norm": 1.8621243637488156, "learning_rate": 3.643168496824148e-07, "loss": 0.106, "step": 13998 }, { "epoch": 3.1852104664391354, "grad_norm": 1.1837527378817143, "learning_rate": 3.6423564416069884e-07, "loss": 0.0138, "step": 13999 }, { "epoch": 3.185437997724687, "grad_norm": 1.439194320170073, "learning_rate": 3.6415444396852835e-07, "loss": 0.085, "step": 14000 }, { "epoch": 3.185665529010239, "grad_norm": 1.5654526757975469, "learning_rate": 3.640732491075636e-07, "loss": 0.0189, "step": 14001 }, { "epoch": 3.1858930602957907, "grad_norm": 1.3185802945248495, "learning_rate": 3.6399205957946384e-07, "loss": 0.0296, "step": 14002 }, { "epoch": 3.1861205915813424, "grad_norm": 1.0708488171057455, "learning_rate": 3.6391087538588797e-07, "loss": 0.0275, "step": 14003 }, { "epoch": 3.186348122866894, "grad_norm": 1.1559731604217067, "learning_rate": 3.638296965284957e-07, "loss": 0.0416, "step": 14004 }, { "epoch": 3.186575654152446, "grad_norm": 2.3260721031985025, "learning_rate": 3.6374852300894615e-07, "loss": 0.0441, "step": 14005 }, { "epoch": 3.1868031854379977, "grad_norm": 1.1073003851914283, "learning_rate": 3.636673548288986e-07, "loss": 0.017, "step": 14006 }, { "epoch": 3.1870307167235494, "grad_norm": 1.9637808356439925, "learning_rate": 3.6358619199001173e-07, "loss": 0.1205, "step": 14007 }, { "epoch": 3.187258248009101, "grad_norm": 1.1452875875258182, "learning_rate": 3.635050344939441e-07, "loss": 0.0304, "step": 14008 }, { "epoch": 3.187485779294653, "grad_norm": 0.758887587668667, "learning_rate": 3.634238823423548e-07, "loss": 0.0205, "step": 14009 }, { "epoch": 3.1877133105802047, "grad_norm": 0.5877787575008205, "learning_rate": 3.633427355369021e-07, "loss": 0.004, "step": 14010 }, { "epoch": 3.1879408418657564, "grad_norm": 1.9996744223164014, "learning_rate": 3.6326159407924464e-07, "loss": 0.0874, "step": 14011 }, { "epoch": 3.188168373151308, "grad_norm": 0.8568629455211323, "learning_rate": 3.6318045797104086e-07, "loss": 0.0083, "step": 14012 }, { "epoch": 3.18839590443686, "grad_norm": 1.83359385363956, "learning_rate": 3.6309932721394936e-07, "loss": 0.0141, "step": 14013 }, { "epoch": 3.1886234357224117, "grad_norm": 0.6828603383208813, "learning_rate": 3.630182018096279e-07, "loss": 0.0054, "step": 14014 }, { "epoch": 3.1888509670079634, "grad_norm": 1.506562111900859, "learning_rate": 3.629370817597343e-07, "loss": 0.0831, "step": 14015 }, { "epoch": 3.189078498293515, "grad_norm": 1.2878882750390608, "learning_rate": 3.6285596706592705e-07, "loss": 0.0257, "step": 14016 }, { "epoch": 3.189306029579067, "grad_norm": 2.728622205951661, "learning_rate": 3.6277485772986336e-07, "loss": 0.0063, "step": 14017 }, { "epoch": 3.1895335608646187, "grad_norm": 1.550402846073723, "learning_rate": 3.6269375375320174e-07, "loss": 0.1394, "step": 14018 }, { "epoch": 3.1897610921501705, "grad_norm": 1.9901782374572121, "learning_rate": 3.626126551375992e-07, "loss": 0.0851, "step": 14019 }, { "epoch": 3.189988623435722, "grad_norm": 1.7635632576189388, "learning_rate": 3.6253156188471363e-07, "loss": 0.1065, "step": 14020 }, { "epoch": 3.1902161547212744, "grad_norm": 1.1647595646541204, "learning_rate": 3.624504739962023e-07, "loss": 0.0251, "step": 14021 }, { "epoch": 3.1904436860068257, "grad_norm": 1.5570315312134482, "learning_rate": 3.6236939147372223e-07, "loss": 0.0113, "step": 14022 }, { "epoch": 3.190671217292378, "grad_norm": 2.8969565987336274, "learning_rate": 3.6228831431893074e-07, "loss": 0.0223, "step": 14023 }, { "epoch": 3.1908987485779297, "grad_norm": 1.9056844034062967, "learning_rate": 3.6220724253348503e-07, "loss": 0.0688, "step": 14024 }, { "epoch": 3.1911262798634814, "grad_norm": 0.8616679904105784, "learning_rate": 3.621261761190422e-07, "loss": 0.0201, "step": 14025 }, { "epoch": 3.191353811149033, "grad_norm": 1.276915414604766, "learning_rate": 3.6204511507725894e-07, "loss": 0.0259, "step": 14026 }, { "epoch": 3.191581342434585, "grad_norm": 1.3625026078286497, "learning_rate": 3.619640594097917e-07, "loss": 0.0416, "step": 14027 }, { "epoch": 3.1918088737201367, "grad_norm": 0.9455831189547533, "learning_rate": 3.618830091182976e-07, "loss": 0.0057, "step": 14028 }, { "epoch": 3.1920364050056884, "grad_norm": 1.6282999361115436, "learning_rate": 3.618019642044326e-07, "loss": 0.0577, "step": 14029 }, { "epoch": 3.19226393629124, "grad_norm": 1.6969119335164908, "learning_rate": 3.6172092466985346e-07, "loss": 0.0971, "step": 14030 }, { "epoch": 3.192491467576792, "grad_norm": 1.1635697668983531, "learning_rate": 3.6163989051621647e-07, "loss": 0.0083, "step": 14031 }, { "epoch": 3.1927189988623437, "grad_norm": 1.2421161131525835, "learning_rate": 3.6155886174517804e-07, "loss": 0.0798, "step": 14032 }, { "epoch": 3.1929465301478954, "grad_norm": 0.7659895781943893, "learning_rate": 3.61477838358394e-07, "loss": 0.0152, "step": 14033 }, { "epoch": 3.193174061433447, "grad_norm": 1.6167402847932797, "learning_rate": 3.6139682035752017e-07, "loss": 0.0561, "step": 14034 }, { "epoch": 3.193401592718999, "grad_norm": 1.9258809536577204, "learning_rate": 3.613158077442127e-07, "loss": 0.0981, "step": 14035 }, { "epoch": 3.1936291240045507, "grad_norm": 2.897237114901181, "learning_rate": 3.6123480052012705e-07, "loss": 0.0351, "step": 14036 }, { "epoch": 3.1938566552901024, "grad_norm": 1.491119578750828, "learning_rate": 3.611537986869189e-07, "loss": 0.0315, "step": 14037 }, { "epoch": 3.194084186575654, "grad_norm": 1.0882184827562822, "learning_rate": 3.6107280224624396e-07, "loss": 0.0382, "step": 14038 }, { "epoch": 3.194311717861206, "grad_norm": 1.1316346835916191, "learning_rate": 3.6099181119975775e-07, "loss": 0.0207, "step": 14039 }, { "epoch": 3.1945392491467577, "grad_norm": 1.4844704020911295, "learning_rate": 3.609108255491155e-07, "loss": 0.0638, "step": 14040 }, { "epoch": 3.1947667804323094, "grad_norm": 1.274566031351992, "learning_rate": 3.6082984529597205e-07, "loss": 0.0351, "step": 14041 }, { "epoch": 3.194994311717861, "grad_norm": 1.373571643361966, "learning_rate": 3.60748870441983e-07, "loss": 0.0226, "step": 14042 }, { "epoch": 3.195221843003413, "grad_norm": 0.7014938586412542, "learning_rate": 3.6066790098880283e-07, "loss": 0.0129, "step": 14043 }, { "epoch": 3.1954493742889647, "grad_norm": 1.8208896202092755, "learning_rate": 3.605869369380867e-07, "loss": 0.1164, "step": 14044 }, { "epoch": 3.1956769055745164, "grad_norm": 1.7897780232587652, "learning_rate": 3.6050597829148957e-07, "loss": 0.0434, "step": 14045 }, { "epoch": 3.195904436860068, "grad_norm": 2.1539355263168645, "learning_rate": 3.604250250506656e-07, "loss": 0.0692, "step": 14046 }, { "epoch": 3.19613196814562, "grad_norm": 1.6066638218137692, "learning_rate": 3.6034407721726975e-07, "loss": 0.0579, "step": 14047 }, { "epoch": 3.1963594994311717, "grad_norm": 0.8412692523956905, "learning_rate": 3.602631347929561e-07, "loss": 0.0231, "step": 14048 }, { "epoch": 3.1965870307167235, "grad_norm": 1.5361299069391456, "learning_rate": 3.601821977793794e-07, "loss": 0.0377, "step": 14049 }, { "epoch": 3.196814562002275, "grad_norm": 0.8586667868382049, "learning_rate": 3.601012661781932e-07, "loss": 0.0056, "step": 14050 }, { "epoch": 3.197042093287827, "grad_norm": 0.5945342128312673, "learning_rate": 3.600203399910521e-07, "loss": 0.0039, "step": 14051 }, { "epoch": 3.1972696245733787, "grad_norm": 1.2995750066140501, "learning_rate": 3.5993941921961017e-07, "loss": 0.0115, "step": 14052 }, { "epoch": 3.1974971558589305, "grad_norm": 1.3802316620237751, "learning_rate": 3.5985850386552083e-07, "loss": 0.0109, "step": 14053 }, { "epoch": 3.197724687144482, "grad_norm": 1.844683951568489, "learning_rate": 3.5977759393043834e-07, "loss": 0.0309, "step": 14054 }, { "epoch": 3.197952218430034, "grad_norm": 1.394763502123708, "learning_rate": 3.5969668941601587e-07, "loss": 0.0207, "step": 14055 }, { "epoch": 3.1981797497155857, "grad_norm": 0.7519523253817547, "learning_rate": 3.5961579032390737e-07, "loss": 0.0052, "step": 14056 }, { "epoch": 3.1984072810011375, "grad_norm": 1.1982278235990893, "learning_rate": 3.5953489665576594e-07, "loss": 0.0422, "step": 14057 }, { "epoch": 3.198634812286689, "grad_norm": 0.6890338476380227, "learning_rate": 3.59454008413245e-07, "loss": 0.0074, "step": 14058 }, { "epoch": 3.198862343572241, "grad_norm": 1.1874943794196997, "learning_rate": 3.5937312559799804e-07, "loss": 0.0296, "step": 14059 }, { "epoch": 3.199089874857793, "grad_norm": 1.576775364683743, "learning_rate": 3.592922482116777e-07, "loss": 0.0281, "step": 14060 }, { "epoch": 3.1993174061433445, "grad_norm": 1.5451706279604864, "learning_rate": 3.5921137625593737e-07, "loss": 0.0156, "step": 14061 }, { "epoch": 3.1995449374288967, "grad_norm": 1.8443787454918126, "learning_rate": 3.591305097324295e-07, "loss": 0.0325, "step": 14062 }, { "epoch": 3.1997724687144484, "grad_norm": 1.4118500148858186, "learning_rate": 3.5904964864280745e-07, "loss": 0.0289, "step": 14063 }, { "epoch": 3.2, "grad_norm": 1.640995216231316, "learning_rate": 3.589687929887232e-07, "loss": 0.0157, "step": 14064 }, { "epoch": 3.200227531285552, "grad_norm": 2.647869379533056, "learning_rate": 3.588879427718296e-07, "loss": 0.0138, "step": 14065 }, { "epoch": 3.2004550625711037, "grad_norm": 2.0042695182815904, "learning_rate": 3.588070979937793e-07, "loss": 0.1295, "step": 14066 }, { "epoch": 3.2006825938566554, "grad_norm": 2.1569960359656477, "learning_rate": 3.5872625865622423e-07, "loss": 0.1062, "step": 14067 }, { "epoch": 3.200910125142207, "grad_norm": 1.6188418093588681, "learning_rate": 3.58645424760817e-07, "loss": 0.0619, "step": 14068 }, { "epoch": 3.201137656427759, "grad_norm": 1.111833903645787, "learning_rate": 3.585645963092092e-07, "loss": 0.0064, "step": 14069 }, { "epoch": 3.2013651877133107, "grad_norm": 1.2922618817003027, "learning_rate": 3.5848377330305335e-07, "loss": 0.0576, "step": 14070 }, { "epoch": 3.2015927189988624, "grad_norm": 1.9978020868988746, "learning_rate": 3.5840295574400087e-07, "loss": 0.014, "step": 14071 }, { "epoch": 3.201820250284414, "grad_norm": 0.9443131705253055, "learning_rate": 3.5832214363370366e-07, "loss": 0.0209, "step": 14072 }, { "epoch": 3.202047781569966, "grad_norm": 1.5971067897370914, "learning_rate": 3.582413369738137e-07, "loss": 0.0523, "step": 14073 }, { "epoch": 3.2022753128555177, "grad_norm": 0.7708122239827447, "learning_rate": 3.581605357659821e-07, "loss": 0.0464, "step": 14074 }, { "epoch": 3.2025028441410694, "grad_norm": 0.8282272549806302, "learning_rate": 3.580797400118607e-07, "loss": 0.0096, "step": 14075 }, { "epoch": 3.202730375426621, "grad_norm": 2.0653445779471165, "learning_rate": 3.579989497131002e-07, "loss": 0.0227, "step": 14076 }, { "epoch": 3.202957906712173, "grad_norm": 1.6681275685837629, "learning_rate": 3.5791816487135256e-07, "loss": 0.0266, "step": 14077 }, { "epoch": 3.2031854379977247, "grad_norm": 0.5816076124992333, "learning_rate": 3.5783738548826814e-07, "loss": 0.0042, "step": 14078 }, { "epoch": 3.2034129692832765, "grad_norm": 4.550104638565782, "learning_rate": 3.5775661156549834e-07, "loss": 0.0554, "step": 14079 }, { "epoch": 3.203640500568828, "grad_norm": 1.4324964684911277, "learning_rate": 3.5767584310469424e-07, "loss": 0.0598, "step": 14080 }, { "epoch": 3.20386803185438, "grad_norm": 1.1469350153039646, "learning_rate": 3.57595080107506e-07, "loss": 0.0575, "step": 14081 }, { "epoch": 3.2040955631399317, "grad_norm": 2.6494036444995177, "learning_rate": 3.575143225755849e-07, "loss": 0.0224, "step": 14082 }, { "epoch": 3.2043230944254835, "grad_norm": 1.2853119589664757, "learning_rate": 3.574335705105811e-07, "loss": 0.0243, "step": 14083 }, { "epoch": 3.204550625711035, "grad_norm": 1.354880106516372, "learning_rate": 3.5735282391414467e-07, "loss": 0.0123, "step": 14084 }, { "epoch": 3.204778156996587, "grad_norm": 1.3258945118459937, "learning_rate": 3.5727208278792683e-07, "loss": 0.0565, "step": 14085 }, { "epoch": 3.2050056882821387, "grad_norm": 2.3235858310406394, "learning_rate": 3.5719134713357704e-07, "loss": 0.0383, "step": 14086 }, { "epoch": 3.2052332195676905, "grad_norm": 1.916765260106145, "learning_rate": 3.5711061695274585e-07, "loss": 0.021, "step": 14087 }, { "epoch": 3.2054607508532422, "grad_norm": 1.231019966269424, "learning_rate": 3.570298922470829e-07, "loss": 0.0195, "step": 14088 }, { "epoch": 3.205688282138794, "grad_norm": 1.7680621621366714, "learning_rate": 3.5694917301823826e-07, "loss": 0.0703, "step": 14089 }, { "epoch": 3.2059158134243457, "grad_norm": 1.988499624305372, "learning_rate": 3.568684592678615e-07, "loss": 0.0159, "step": 14090 }, { "epoch": 3.2061433447098975, "grad_norm": 1.1408102284591872, "learning_rate": 3.567877509976023e-07, "loss": 0.0863, "step": 14091 }, { "epoch": 3.2063708759954492, "grad_norm": 1.2124674702969647, "learning_rate": 3.5670704820911063e-07, "loss": 0.0451, "step": 14092 }, { "epoch": 3.206598407281001, "grad_norm": 1.9559710528432783, "learning_rate": 3.5662635090403517e-07, "loss": 0.1008, "step": 14093 }, { "epoch": 3.2068259385665527, "grad_norm": 0.8100562214823082, "learning_rate": 3.565456590840259e-07, "loss": 0.0216, "step": 14094 }, { "epoch": 3.2070534698521045, "grad_norm": 1.4633205478463978, "learning_rate": 3.564649727507316e-07, "loss": 0.0487, "step": 14095 }, { "epoch": 3.2072810011376562, "grad_norm": 1.4195246483692285, "learning_rate": 3.563842919058014e-07, "loss": 0.0281, "step": 14096 }, { "epoch": 3.207508532423208, "grad_norm": 1.4830025972929204, "learning_rate": 3.5630361655088415e-07, "loss": 0.0835, "step": 14097 }, { "epoch": 3.2077360637087597, "grad_norm": 2.0145802781255253, "learning_rate": 3.5622294668762896e-07, "loss": 0.04, "step": 14098 }, { "epoch": 3.207963594994312, "grad_norm": 1.1761184407147305, "learning_rate": 3.561422823176848e-07, "loss": 0.022, "step": 14099 }, { "epoch": 3.2081911262798632, "grad_norm": 1.047950704273198, "learning_rate": 3.560616234426997e-07, "loss": 0.0166, "step": 14100 }, { "epoch": 3.2084186575654154, "grad_norm": 0.7668196795918685, "learning_rate": 3.559809700643227e-07, "loss": 0.0077, "step": 14101 }, { "epoch": 3.208646188850967, "grad_norm": 1.5277863633350106, "learning_rate": 3.5590032218420204e-07, "loss": 0.0319, "step": 14102 }, { "epoch": 3.208873720136519, "grad_norm": 1.6837959902693331, "learning_rate": 3.5581967980398564e-07, "loss": 0.0206, "step": 14103 }, { "epoch": 3.2091012514220707, "grad_norm": 1.3189768847088477, "learning_rate": 3.557390429253221e-07, "loss": 0.0037, "step": 14104 }, { "epoch": 3.2093287827076225, "grad_norm": 2.20128488168973, "learning_rate": 3.5565841154985933e-07, "loss": 0.054, "step": 14105 }, { "epoch": 3.209556313993174, "grad_norm": 1.52956164459477, "learning_rate": 3.5557778567924554e-07, "loss": 0.1083, "step": 14106 }, { "epoch": 3.209783845278726, "grad_norm": 2.05354990193377, "learning_rate": 3.5549716531512824e-07, "loss": 0.0435, "step": 14107 }, { "epoch": 3.2100113765642777, "grad_norm": 1.0103114954274413, "learning_rate": 3.5541655045915556e-07, "loss": 0.0456, "step": 14108 }, { "epoch": 3.2102389078498295, "grad_norm": 1.48101483174127, "learning_rate": 3.553359411129748e-07, "loss": 0.0506, "step": 14109 }, { "epoch": 3.210466439135381, "grad_norm": 1.44520790269126, "learning_rate": 3.552553372782334e-07, "loss": 0.0297, "step": 14110 }, { "epoch": 3.210693970420933, "grad_norm": 1.4370459685027197, "learning_rate": 3.5517473895657877e-07, "loss": 0.0154, "step": 14111 }, { "epoch": 3.2109215017064847, "grad_norm": 1.0607725574781075, "learning_rate": 3.550941461496583e-07, "loss": 0.0349, "step": 14112 }, { "epoch": 3.2111490329920365, "grad_norm": 2.243483431080323, "learning_rate": 3.5501355885911957e-07, "loss": 0.0763, "step": 14113 }, { "epoch": 3.211376564277588, "grad_norm": 1.1196991585829743, "learning_rate": 3.549329770866092e-07, "loss": 0.0178, "step": 14114 }, { "epoch": 3.21160409556314, "grad_norm": 1.1915800691030551, "learning_rate": 3.5485240083377376e-07, "loss": 0.0146, "step": 14115 }, { "epoch": 3.2118316268486917, "grad_norm": 2.3452566213705484, "learning_rate": 3.54771830102261e-07, "loss": 0.0377, "step": 14116 }, { "epoch": 3.2120591581342435, "grad_norm": 0.7227725112591336, "learning_rate": 3.546912648937167e-07, "loss": 0.0036, "step": 14117 }, { "epoch": 3.2122866894197952, "grad_norm": 1.9996219542463158, "learning_rate": 3.5461070520978797e-07, "loss": 0.0131, "step": 14118 }, { "epoch": 3.212514220705347, "grad_norm": 2.2568222707502317, "learning_rate": 3.5453015105212117e-07, "loss": 0.0093, "step": 14119 }, { "epoch": 3.2127417519908987, "grad_norm": 1.131445770258146, "learning_rate": 3.54449602422363e-07, "loss": 0.0342, "step": 14120 }, { "epoch": 3.2129692832764505, "grad_norm": 1.892883652709601, "learning_rate": 3.543690593221595e-07, "loss": 0.0244, "step": 14121 }, { "epoch": 3.2131968145620022, "grad_norm": 2.236510230660248, "learning_rate": 3.5428852175315656e-07, "loss": 0.0312, "step": 14122 }, { "epoch": 3.213424345847554, "grad_norm": 3.1105236897881485, "learning_rate": 3.5420798971700074e-07, "loss": 0.0099, "step": 14123 }, { "epoch": 3.2136518771331057, "grad_norm": 1.062952665881116, "learning_rate": 3.541274632153373e-07, "loss": 0.0344, "step": 14124 }, { "epoch": 3.2138794084186575, "grad_norm": 2.558317341206331, "learning_rate": 3.540469422498125e-07, "loss": 0.0202, "step": 14125 }, { "epoch": 3.2141069397042092, "grad_norm": 1.8448499891389487, "learning_rate": 3.5396642682207204e-07, "loss": 0.0734, "step": 14126 }, { "epoch": 3.214334470989761, "grad_norm": 1.0159140061595509, "learning_rate": 3.538859169337616e-07, "loss": 0.006, "step": 14127 }, { "epoch": 3.2145620022753127, "grad_norm": 1.1282772784231305, "learning_rate": 3.538054125865265e-07, "loss": 0.0091, "step": 14128 }, { "epoch": 3.2147895335608645, "grad_norm": 0.9423437903676266, "learning_rate": 3.537249137820119e-07, "loss": 0.0169, "step": 14129 }, { "epoch": 3.2150170648464163, "grad_norm": 1.4818987818268567, "learning_rate": 3.536444205218634e-07, "loss": 0.141, "step": 14130 }, { "epoch": 3.215244596131968, "grad_norm": 1.6335895721211446, "learning_rate": 3.535639328077258e-07, "loss": 0.1593, "step": 14131 }, { "epoch": 3.2154721274175198, "grad_norm": 1.216720397216977, "learning_rate": 3.534834506412443e-07, "loss": 0.0319, "step": 14132 }, { "epoch": 3.2156996587030715, "grad_norm": 0.8804275341152431, "learning_rate": 3.534029740240641e-07, "loss": 0.0108, "step": 14133 }, { "epoch": 3.2159271899886233, "grad_norm": 1.2433923932205284, "learning_rate": 3.5332250295782937e-07, "loss": 0.0265, "step": 14134 }, { "epoch": 3.216154721274175, "grad_norm": 1.53953426441039, "learning_rate": 3.5324203744418544e-07, "loss": 0.0269, "step": 14135 }, { "epoch": 3.2163822525597268, "grad_norm": 0.7767735129765837, "learning_rate": 3.5316157748477625e-07, "loss": 0.0093, "step": 14136 }, { "epoch": 3.2166097838452785, "grad_norm": 1.6381717371602027, "learning_rate": 3.5308112308124685e-07, "loss": 0.0449, "step": 14137 }, { "epoch": 3.2168373151308307, "grad_norm": 5.903234183798949, "learning_rate": 3.5300067423524086e-07, "loss": 0.0943, "step": 14138 }, { "epoch": 3.217064846416382, "grad_norm": 1.4434879028107634, "learning_rate": 3.5292023094840336e-07, "loss": 0.048, "step": 14139 }, { "epoch": 3.217292377701934, "grad_norm": 1.5525363594894295, "learning_rate": 3.5283979322237804e-07, "loss": 0.015, "step": 14140 }, { "epoch": 3.217519908987486, "grad_norm": 1.0638315916406267, "learning_rate": 3.527593610588087e-07, "loss": 0.0207, "step": 14141 }, { "epoch": 3.2177474402730377, "grad_norm": 1.030194918581785, "learning_rate": 3.5267893445933967e-07, "loss": 0.0177, "step": 14142 }, { "epoch": 3.2179749715585895, "grad_norm": 1.1044825341913411, "learning_rate": 3.525985134256143e-07, "loss": 0.0271, "step": 14143 }, { "epoch": 3.218202502844141, "grad_norm": 1.1389023869061903, "learning_rate": 3.5251809795927637e-07, "loss": 0.0363, "step": 14144 }, { "epoch": 3.218430034129693, "grad_norm": 1.2988081403043172, "learning_rate": 3.5243768806196954e-07, "loss": 0.0511, "step": 14145 }, { "epoch": 3.2186575654152447, "grad_norm": 2.1722465630261487, "learning_rate": 3.5235728373533736e-07, "loss": 0.0661, "step": 14146 }, { "epoch": 3.2188850967007965, "grad_norm": 0.8695332946074085, "learning_rate": 3.522768849810231e-07, "loss": 0.0098, "step": 14147 }, { "epoch": 3.2191126279863482, "grad_norm": 1.2944627017819108, "learning_rate": 3.521964918006694e-07, "loss": 0.0861, "step": 14148 }, { "epoch": 3.2193401592719, "grad_norm": 1.6120254271045824, "learning_rate": 3.521161041959202e-07, "loss": 0.0462, "step": 14149 }, { "epoch": 3.2195676905574517, "grad_norm": 3.4687189385907247, "learning_rate": 3.520357221684178e-07, "loss": 0.1071, "step": 14150 }, { "epoch": 3.2197952218430035, "grad_norm": 1.4140402450511422, "learning_rate": 3.519553457198053e-07, "loss": 0.0051, "step": 14151 }, { "epoch": 3.2200227531285552, "grad_norm": 1.5571100555179476, "learning_rate": 3.518749748517257e-07, "loss": 0.0662, "step": 14152 }, { "epoch": 3.220250284414107, "grad_norm": 2.431116348775614, "learning_rate": 3.517946095658212e-07, "loss": 0.0269, "step": 14153 }, { "epoch": 3.2204778156996587, "grad_norm": 0.9295092809465975, "learning_rate": 3.5171424986373477e-07, "loss": 0.0094, "step": 14154 }, { "epoch": 3.2207053469852105, "grad_norm": 1.6993313198223268, "learning_rate": 3.516338957471083e-07, "loss": 0.0278, "step": 14155 }, { "epoch": 3.2209328782707622, "grad_norm": 0.8094564322090295, "learning_rate": 3.515535472175846e-07, "loss": 0.0302, "step": 14156 }, { "epoch": 3.221160409556314, "grad_norm": 1.1273524089369207, "learning_rate": 3.5147320427680543e-07, "loss": 0.0221, "step": 14157 }, { "epoch": 3.2213879408418657, "grad_norm": 0.7613192616580398, "learning_rate": 3.51392866926413e-07, "loss": 0.0241, "step": 14158 }, { "epoch": 3.2216154721274175, "grad_norm": 1.7743994465188766, "learning_rate": 3.513125351680495e-07, "loss": 0.0272, "step": 14159 }, { "epoch": 3.2218430034129693, "grad_norm": 1.3567463118342542, "learning_rate": 3.5123220900335623e-07, "loss": 0.0678, "step": 14160 }, { "epoch": 3.222070534698521, "grad_norm": 1.4609907894626792, "learning_rate": 3.5115188843397557e-07, "loss": 0.0141, "step": 14161 }, { "epoch": 3.2222980659840728, "grad_norm": 1.2202850563655494, "learning_rate": 3.510715734615485e-07, "loss": 0.0289, "step": 14162 }, { "epoch": 3.2225255972696245, "grad_norm": 1.6149905679820162, "learning_rate": 3.5099126408771707e-07, "loss": 0.0195, "step": 14163 }, { "epoch": 3.2227531285551763, "grad_norm": 1.4647499675617914, "learning_rate": 3.509109603141221e-07, "loss": 0.0494, "step": 14164 }, { "epoch": 3.222980659840728, "grad_norm": 1.188006207250539, "learning_rate": 3.5083066214240513e-07, "loss": 0.038, "step": 14165 }, { "epoch": 3.2232081911262798, "grad_norm": 1.5895257008803365, "learning_rate": 3.507503695742076e-07, "loss": 0.0589, "step": 14166 }, { "epoch": 3.2234357224118315, "grad_norm": 1.9726659923293806, "learning_rate": 3.5067008261116994e-07, "loss": 0.0213, "step": 14167 }, { "epoch": 3.2236632536973833, "grad_norm": 1.264516503646631, "learning_rate": 3.5058980125493366e-07, "loss": 0.0132, "step": 14168 }, { "epoch": 3.223890784982935, "grad_norm": 1.1137357654647504, "learning_rate": 3.5050952550713906e-07, "loss": 0.0132, "step": 14169 }, { "epoch": 3.2241183162684868, "grad_norm": 3.050262038136745, "learning_rate": 3.5042925536942727e-07, "loss": 0.0888, "step": 14170 }, { "epoch": 3.2243458475540385, "grad_norm": 1.5296867084515862, "learning_rate": 3.5034899084343835e-07, "loss": 0.0643, "step": 14171 }, { "epoch": 3.2245733788395903, "grad_norm": 0.7977872259214123, "learning_rate": 3.5026873193081316e-07, "loss": 0.0055, "step": 14172 }, { "epoch": 3.224800910125142, "grad_norm": 1.200324351573714, "learning_rate": 3.5018847863319215e-07, "loss": 0.0363, "step": 14173 }, { "epoch": 3.225028441410694, "grad_norm": 1.4294154042817355, "learning_rate": 3.501082309522151e-07, "loss": 0.039, "step": 14174 }, { "epoch": 3.2252559726962455, "grad_norm": 1.1682733691035285, "learning_rate": 3.500279888895226e-07, "loss": 0.0126, "step": 14175 }, { "epoch": 3.2254835039817973, "grad_norm": 1.1633522690895461, "learning_rate": 3.499477524467541e-07, "loss": 0.0538, "step": 14176 }, { "epoch": 3.2257110352673495, "grad_norm": 1.1942659784788554, "learning_rate": 3.498675216255502e-07, "loss": 0.0915, "step": 14177 }, { "epoch": 3.225938566552901, "grad_norm": 0.9977154886788188, "learning_rate": 3.4978729642754993e-07, "loss": 0.0097, "step": 14178 }, { "epoch": 3.226166097838453, "grad_norm": 0.933989073969292, "learning_rate": 3.4970707685439335e-07, "loss": 0.0126, "step": 14179 }, { "epoch": 3.2263936291240047, "grad_norm": 1.741770736898083, "learning_rate": 3.4962686290772015e-07, "loss": 0.0157, "step": 14180 }, { "epoch": 3.2266211604095565, "grad_norm": 1.2119160605178922, "learning_rate": 3.495466545891693e-07, "loss": 0.0342, "step": 14181 }, { "epoch": 3.2268486916951082, "grad_norm": 1.1083456780759795, "learning_rate": 3.494664519003806e-07, "loss": 0.0228, "step": 14182 }, { "epoch": 3.22707622298066, "grad_norm": 1.4178156446613785, "learning_rate": 3.4938625484299286e-07, "loss": 0.0129, "step": 14183 }, { "epoch": 3.2273037542662117, "grad_norm": 0.8730938802669684, "learning_rate": 3.493060634186454e-07, "loss": 0.0077, "step": 14184 }, { "epoch": 3.2275312855517635, "grad_norm": 0.9905030197492665, "learning_rate": 3.4922587762897695e-07, "loss": 0.0257, "step": 14185 }, { "epoch": 3.2277588168373152, "grad_norm": 1.2804023517071121, "learning_rate": 3.4914569747562645e-07, "loss": 0.0172, "step": 14186 }, { "epoch": 3.227986348122867, "grad_norm": 0.9533188647900838, "learning_rate": 3.4906552296023297e-07, "loss": 0.045, "step": 14187 }, { "epoch": 3.2282138794084188, "grad_norm": 2.4293241298516137, "learning_rate": 3.4898535408443466e-07, "loss": 0.0872, "step": 14188 }, { "epoch": 3.2284414106939705, "grad_norm": 1.5329107050313489, "learning_rate": 3.4890519084987044e-07, "loss": 0.0134, "step": 14189 }, { "epoch": 3.2286689419795223, "grad_norm": 1.536786005272848, "learning_rate": 3.488250332581784e-07, "loss": 0.0376, "step": 14190 }, { "epoch": 3.228896473265074, "grad_norm": 2.134439687158072, "learning_rate": 3.4874488131099673e-07, "loss": 0.041, "step": 14191 }, { "epoch": 3.2291240045506258, "grad_norm": 0.7903350102057523, "learning_rate": 3.4866473500996367e-07, "loss": 0.0103, "step": 14192 }, { "epoch": 3.2293515358361775, "grad_norm": 1.3515491876891756, "learning_rate": 3.4858459435671734e-07, "loss": 0.0414, "step": 14193 }, { "epoch": 3.2295790671217293, "grad_norm": 0.6752184582196755, "learning_rate": 3.485044593528959e-07, "loss": 0.0044, "step": 14194 }, { "epoch": 3.229806598407281, "grad_norm": 1.0558749672784962, "learning_rate": 3.4842433000013677e-07, "loss": 0.0117, "step": 14195 }, { "epoch": 3.2300341296928328, "grad_norm": 1.9250338706358912, "learning_rate": 3.48344206300078e-07, "loss": 0.0587, "step": 14196 }, { "epoch": 3.2302616609783845, "grad_norm": 2.350815251376095, "learning_rate": 3.4826408825435704e-07, "loss": 0.0312, "step": 14197 }, { "epoch": 3.2304891922639363, "grad_norm": 1.4566011488801036, "learning_rate": 3.4818397586461106e-07, "loss": 0.0285, "step": 14198 }, { "epoch": 3.230716723549488, "grad_norm": 1.5536992037918995, "learning_rate": 3.4810386913247755e-07, "loss": 0.0335, "step": 14199 }, { "epoch": 3.2309442548350398, "grad_norm": 1.6283734845773477, "learning_rate": 3.4802376805959393e-07, "loss": 0.0708, "step": 14200 }, { "epoch": 3.2311717861205915, "grad_norm": 0.5817842089590879, "learning_rate": 3.4794367264759746e-07, "loss": 0.0039, "step": 14201 }, { "epoch": 3.2313993174061433, "grad_norm": 3.03758359059035, "learning_rate": 3.478635828981249e-07, "loss": 0.1039, "step": 14202 }, { "epoch": 3.231626848691695, "grad_norm": 1.3089638667225447, "learning_rate": 3.4778349881281286e-07, "loss": 0.0452, "step": 14203 }, { "epoch": 3.231854379977247, "grad_norm": 3.3897824893782302, "learning_rate": 3.477034203932987e-07, "loss": 0.0332, "step": 14204 }, { "epoch": 3.2320819112627985, "grad_norm": 1.7054519014805007, "learning_rate": 3.476233476412183e-07, "loss": 0.03, "step": 14205 }, { "epoch": 3.2323094425483503, "grad_norm": 1.0143372738768477, "learning_rate": 3.475432805582092e-07, "loss": 0.0246, "step": 14206 }, { "epoch": 3.232536973833902, "grad_norm": 1.4274289568145697, "learning_rate": 3.474632191459071e-07, "loss": 0.0742, "step": 14207 }, { "epoch": 3.232764505119454, "grad_norm": 2.5410438403229323, "learning_rate": 3.4738316340594866e-07, "loss": 0.155, "step": 14208 }, { "epoch": 3.2329920364050055, "grad_norm": 0.8907535713333513, "learning_rate": 3.4730311333996997e-07, "loss": 0.0247, "step": 14209 }, { "epoch": 3.2332195676905573, "grad_norm": 1.2491570091164752, "learning_rate": 3.472230689496068e-07, "loss": 0.07, "step": 14210 }, { "epoch": 3.233447098976109, "grad_norm": 0.9747408127024365, "learning_rate": 3.4714303023649536e-07, "loss": 0.0336, "step": 14211 }, { "epoch": 3.233674630261661, "grad_norm": 1.6730340854731904, "learning_rate": 3.470629972022715e-07, "loss": 0.0222, "step": 14212 }, { "epoch": 3.2339021615472126, "grad_norm": 1.1891626575542127, "learning_rate": 3.4698296984857134e-07, "loss": 0.0192, "step": 14213 }, { "epoch": 3.2341296928327643, "grad_norm": 2.8359664897005357, "learning_rate": 3.4690294817702974e-07, "loss": 0.019, "step": 14214 }, { "epoch": 3.234357224118316, "grad_norm": 1.4728581072889684, "learning_rate": 3.468229321892828e-07, "loss": 0.0205, "step": 14215 }, { "epoch": 3.2345847554038683, "grad_norm": 1.4669652837412488, "learning_rate": 3.467429218869657e-07, "loss": 0.0165, "step": 14216 }, { "epoch": 3.23481228668942, "grad_norm": 1.1949575993336956, "learning_rate": 3.466629172717135e-07, "loss": 0.0148, "step": 14217 }, { "epoch": 3.2350398179749718, "grad_norm": 1.113524540985447, "learning_rate": 3.4658291834516145e-07, "loss": 0.0751, "step": 14218 }, { "epoch": 3.2352673492605235, "grad_norm": 0.9511259709993009, "learning_rate": 3.465029251089447e-07, "loss": 0.009, "step": 14219 }, { "epoch": 3.2354948805460753, "grad_norm": 4.182617777379817, "learning_rate": 3.464229375646983e-07, "loss": 0.0437, "step": 14220 }, { "epoch": 3.235722411831627, "grad_norm": 1.2690027422204482, "learning_rate": 3.4634295571405686e-07, "loss": 0.0898, "step": 14221 }, { "epoch": 3.2359499431171788, "grad_norm": 1.7582056704046929, "learning_rate": 3.4626297955865485e-07, "loss": 0.052, "step": 14222 }, { "epoch": 3.2361774744027305, "grad_norm": 1.047736508508155, "learning_rate": 3.461830091001274e-07, "loss": 0.0375, "step": 14223 }, { "epoch": 3.2364050056882823, "grad_norm": 0.5634853052744635, "learning_rate": 3.461030443401082e-07, "loss": 0.0113, "step": 14224 }, { "epoch": 3.236632536973834, "grad_norm": 1.7286123562576503, "learning_rate": 3.460230852802321e-07, "loss": 0.0166, "step": 14225 }, { "epoch": 3.2368600682593858, "grad_norm": 0.9137983685709291, "learning_rate": 3.459431319221332e-07, "loss": 0.0333, "step": 14226 }, { "epoch": 3.2370875995449375, "grad_norm": 1.3665395550081187, "learning_rate": 3.4586318426744585e-07, "loss": 0.0145, "step": 14227 }, { "epoch": 3.2373151308304893, "grad_norm": 0.8420242299596451, "learning_rate": 3.457832423178038e-07, "loss": 0.0111, "step": 14228 }, { "epoch": 3.237542662116041, "grad_norm": 0.928578061074645, "learning_rate": 3.457033060748406e-07, "loss": 0.0126, "step": 14229 }, { "epoch": 3.2377701934015928, "grad_norm": 1.0533099496982945, "learning_rate": 3.4562337554019066e-07, "loss": 0.0266, "step": 14230 }, { "epoch": 3.2379977246871445, "grad_norm": 1.1864853245755906, "learning_rate": 3.45543450715487e-07, "loss": 0.0265, "step": 14231 }, { "epoch": 3.2382252559726963, "grad_norm": 1.177901003538926, "learning_rate": 3.454635316023634e-07, "loss": 0.046, "step": 14232 }, { "epoch": 3.238452787258248, "grad_norm": 1.5641648164435225, "learning_rate": 3.453836182024533e-07, "loss": 0.0618, "step": 14233 }, { "epoch": 3.2386803185438, "grad_norm": 1.4991076158315002, "learning_rate": 3.4530371051739024e-07, "loss": 0.0545, "step": 14234 }, { "epoch": 3.2389078498293515, "grad_norm": 1.6407771011205, "learning_rate": 3.4522380854880695e-07, "loss": 0.0974, "step": 14235 }, { "epoch": 3.2391353811149033, "grad_norm": 1.8974255545499432, "learning_rate": 3.451439122983365e-07, "loss": 0.0183, "step": 14236 }, { "epoch": 3.239362912400455, "grad_norm": 2.275496062392656, "learning_rate": 3.450640217676121e-07, "loss": 0.0204, "step": 14237 }, { "epoch": 3.239590443686007, "grad_norm": 1.0304643956785893, "learning_rate": 3.4498413695826624e-07, "loss": 0.0333, "step": 14238 }, { "epoch": 3.2398179749715585, "grad_norm": 0.43369369611663267, "learning_rate": 3.449042578719318e-07, "loss": 0.0028, "step": 14239 }, { "epoch": 3.2400455062571103, "grad_norm": 2.914500605804129, "learning_rate": 3.4482438451024154e-07, "loss": 0.0749, "step": 14240 }, { "epoch": 3.240273037542662, "grad_norm": 1.8475088856597326, "learning_rate": 3.4474451687482756e-07, "loss": 0.1072, "step": 14241 }, { "epoch": 3.240500568828214, "grad_norm": 1.2720700930387634, "learning_rate": 3.446646549673226e-07, "loss": 0.0627, "step": 14242 }, { "epoch": 3.2407281001137656, "grad_norm": 1.6985327241235997, "learning_rate": 3.4458479878935844e-07, "loss": 0.0639, "step": 14243 }, { "epoch": 3.2409556313993173, "grad_norm": 2.241340889569551, "learning_rate": 3.4450494834256776e-07, "loss": 0.03, "step": 14244 }, { "epoch": 3.241183162684869, "grad_norm": 1.3291789289281983, "learning_rate": 3.4442510362858187e-07, "loss": 0.0219, "step": 14245 }, { "epoch": 3.241410693970421, "grad_norm": 1.5377431782610747, "learning_rate": 3.443452646490331e-07, "loss": 0.025, "step": 14246 }, { "epoch": 3.2416382252559726, "grad_norm": 0.9910307837437928, "learning_rate": 3.442654314055534e-07, "loss": 0.0552, "step": 14247 }, { "epoch": 3.2418657565415243, "grad_norm": 1.1421753567344568, "learning_rate": 3.441856038997737e-07, "loss": 0.0428, "step": 14248 }, { "epoch": 3.242093287827076, "grad_norm": 1.2274749816573758, "learning_rate": 3.4410578213332645e-07, "loss": 0.0218, "step": 14249 }, { "epoch": 3.242320819112628, "grad_norm": 0.5582154570661995, "learning_rate": 3.440259661078422e-07, "loss": 0.0041, "step": 14250 }, { "epoch": 3.2425483503981796, "grad_norm": 1.8703295223607241, "learning_rate": 3.4394615582495295e-07, "loss": 0.0819, "step": 14251 }, { "epoch": 3.2427758816837313, "grad_norm": 1.376800280921143, "learning_rate": 3.4386635128628916e-07, "loss": 0.065, "step": 14252 }, { "epoch": 3.243003412969283, "grad_norm": 1.117492606359382, "learning_rate": 3.437865524934824e-07, "loss": 0.0223, "step": 14253 }, { "epoch": 3.243230944254835, "grad_norm": 2.7205673578735663, "learning_rate": 3.437067594481637e-07, "loss": 0.0238, "step": 14254 }, { "epoch": 3.243458475540387, "grad_norm": 1.335724808195048, "learning_rate": 3.4362697215196347e-07, "loss": 0.0341, "step": 14255 }, { "epoch": 3.2436860068259388, "grad_norm": 1.0442912124474084, "learning_rate": 3.4354719060651286e-07, "loss": 0.0112, "step": 14256 }, { "epoch": 3.2439135381114905, "grad_norm": 1.5974383309319915, "learning_rate": 3.434674148134419e-07, "loss": 0.0135, "step": 14257 }, { "epoch": 3.2441410693970423, "grad_norm": 2.3538874451110257, "learning_rate": 3.433876447743817e-07, "loss": 0.08, "step": 14258 }, { "epoch": 3.244368600682594, "grad_norm": 2.0785532215322027, "learning_rate": 3.4330788049096196e-07, "loss": 0.0721, "step": 14259 }, { "epoch": 3.244596131968146, "grad_norm": 1.3000201624950973, "learning_rate": 3.432281219648133e-07, "loss": 0.0876, "step": 14260 }, { "epoch": 3.2448236632536975, "grad_norm": 1.3705374148020093, "learning_rate": 3.431483691975661e-07, "loss": 0.059, "step": 14261 }, { "epoch": 3.2450511945392493, "grad_norm": 1.3790226154436707, "learning_rate": 3.430686221908497e-07, "loss": 0.0102, "step": 14262 }, { "epoch": 3.245278725824801, "grad_norm": 1.8342303364197996, "learning_rate": 3.429888809462946e-07, "loss": 0.0242, "step": 14263 }, { "epoch": 3.245506257110353, "grad_norm": 1.2135914775779888, "learning_rate": 3.4290914546553006e-07, "loss": 0.0212, "step": 14264 }, { "epoch": 3.2457337883959045, "grad_norm": 0.7807715886172751, "learning_rate": 3.428294157501859e-07, "loss": 0.0191, "step": 14265 }, { "epoch": 3.2459613196814563, "grad_norm": 1.4887871684248475, "learning_rate": 3.4274969180189203e-07, "loss": 0.0195, "step": 14266 }, { "epoch": 3.246188850967008, "grad_norm": 1.6467326709504786, "learning_rate": 3.426699736222773e-07, "loss": 0.1917, "step": 14267 }, { "epoch": 3.24641638225256, "grad_norm": 1.6405296421374231, "learning_rate": 3.4259026121297145e-07, "loss": 0.0406, "step": 14268 }, { "epoch": 3.2466439135381115, "grad_norm": 1.4624632944796276, "learning_rate": 3.4251055457560326e-07, "loss": 0.0302, "step": 14269 }, { "epoch": 3.2468714448236633, "grad_norm": 1.759450847685765, "learning_rate": 3.4243085371180223e-07, "loss": 0.0538, "step": 14270 }, { "epoch": 3.247098976109215, "grad_norm": 2.115324870765557, "learning_rate": 3.423511586231967e-07, "loss": 0.013, "step": 14271 }, { "epoch": 3.247326507394767, "grad_norm": 1.8390610653406043, "learning_rate": 3.42271469311416e-07, "loss": 0.0866, "step": 14272 }, { "epoch": 3.2475540386803186, "grad_norm": 1.240693771884126, "learning_rate": 3.4219178577808874e-07, "loss": 0.0212, "step": 14273 }, { "epoch": 3.2477815699658703, "grad_norm": 1.1633684282933432, "learning_rate": 3.421121080248433e-07, "loss": 0.0107, "step": 14274 }, { "epoch": 3.248009101251422, "grad_norm": 1.513108675950562, "learning_rate": 3.4203243605330854e-07, "loss": 0.0496, "step": 14275 }, { "epoch": 3.248236632536974, "grad_norm": 0.7294263288113205, "learning_rate": 3.419527698651123e-07, "loss": 0.0099, "step": 14276 }, { "epoch": 3.2484641638225256, "grad_norm": 2.001373482049415, "learning_rate": 3.4187310946188323e-07, "loss": 0.0504, "step": 14277 }, { "epoch": 3.2486916951080773, "grad_norm": 1.714326034376967, "learning_rate": 3.4179345484524907e-07, "loss": 0.0316, "step": 14278 }, { "epoch": 3.248919226393629, "grad_norm": 1.6477476098259418, "learning_rate": 3.417138060168381e-07, "loss": 0.0207, "step": 14279 }, { "epoch": 3.249146757679181, "grad_norm": 1.974308711923056, "learning_rate": 3.4163416297827835e-07, "loss": 0.0968, "step": 14280 }, { "epoch": 3.2493742889647326, "grad_norm": 2.0258805994584903, "learning_rate": 3.415545257311971e-07, "loss": 0.0286, "step": 14281 }, { "epoch": 3.2496018202502843, "grad_norm": 1.1420186837514275, "learning_rate": 3.4147489427722236e-07, "loss": 0.0093, "step": 14282 }, { "epoch": 3.249829351535836, "grad_norm": 0.340898434128629, "learning_rate": 3.4139526861798143e-07, "loss": 0.0018, "step": 14283 }, { "epoch": 3.250056882821388, "grad_norm": 2.0804994249994606, "learning_rate": 3.4131564875510206e-07, "loss": 0.0318, "step": 14284 }, { "epoch": 3.2502844141069396, "grad_norm": 1.182858382208802, "learning_rate": 3.4123603469021095e-07, "loss": 0.0253, "step": 14285 }, { "epoch": 3.2505119453924913, "grad_norm": 1.2008061973120427, "learning_rate": 3.4115642642493565e-07, "loss": 0.0125, "step": 14286 }, { "epoch": 3.250739476678043, "grad_norm": 2.1717390070226554, "learning_rate": 3.4107682396090345e-07, "loss": 0.0265, "step": 14287 }, { "epoch": 3.250967007963595, "grad_norm": 2.1831062341633887, "learning_rate": 3.409972272997407e-07, "loss": 0.1465, "step": 14288 }, { "epoch": 3.2511945392491466, "grad_norm": 1.5661394635549473, "learning_rate": 3.409176364430747e-07, "loss": 0.0247, "step": 14289 }, { "epoch": 3.2514220705346983, "grad_norm": 1.599999641908275, "learning_rate": 3.4083805139253174e-07, "loss": 0.0678, "step": 14290 }, { "epoch": 3.25164960182025, "grad_norm": 1.6242118199157112, "learning_rate": 3.407584721497388e-07, "loss": 0.0949, "step": 14291 }, { "epoch": 3.2518771331058023, "grad_norm": 1.5435689354668418, "learning_rate": 3.406788987163219e-07, "loss": 0.0846, "step": 14292 }, { "epoch": 3.2521046643913536, "grad_norm": 1.6416406624327815, "learning_rate": 3.4059933109390766e-07, "loss": 0.0994, "step": 14293 }, { "epoch": 3.252332195676906, "grad_norm": 2.0165968227915636, "learning_rate": 3.4051976928412237e-07, "loss": 0.0629, "step": 14294 }, { "epoch": 3.252559726962457, "grad_norm": 1.0864612414646433, "learning_rate": 3.404402132885919e-07, "loss": 0.0547, "step": 14295 }, { "epoch": 3.2527872582480093, "grad_norm": 0.959913787419441, "learning_rate": 3.403606631089424e-07, "loss": 0.0083, "step": 14296 }, { "epoch": 3.253014789533561, "grad_norm": 1.0912643884754598, "learning_rate": 3.402811187467997e-07, "loss": 0.0773, "step": 14297 }, { "epoch": 3.253242320819113, "grad_norm": 1.8796370769630626, "learning_rate": 3.402015802037893e-07, "loss": 0.0234, "step": 14298 }, { "epoch": 3.2534698521046646, "grad_norm": 1.1086790524004024, "learning_rate": 3.40122047481537e-07, "loss": 0.0089, "step": 14299 }, { "epoch": 3.2536973833902163, "grad_norm": 1.000275245247726, "learning_rate": 3.4004252058166833e-07, "loss": 0.0072, "step": 14300 }, { "epoch": 3.253924914675768, "grad_norm": 1.4090860332587833, "learning_rate": 3.399629995058089e-07, "loss": 0.0872, "step": 14301 }, { "epoch": 3.25415244596132, "grad_norm": 1.2559119505799416, "learning_rate": 3.398834842555835e-07, "loss": 0.0727, "step": 14302 }, { "epoch": 3.2543799772468716, "grad_norm": 1.0641461092341085, "learning_rate": 3.3980397483261775e-07, "loss": 0.0269, "step": 14303 }, { "epoch": 3.2546075085324233, "grad_norm": 3.8269077730041596, "learning_rate": 3.3972447123853644e-07, "loss": 0.0222, "step": 14304 }, { "epoch": 3.254835039817975, "grad_norm": 1.4564536565941053, "learning_rate": 3.396449734749643e-07, "loss": 0.1038, "step": 14305 }, { "epoch": 3.255062571103527, "grad_norm": 1.838583264916514, "learning_rate": 3.395654815435262e-07, "loss": 0.0453, "step": 14306 }, { "epoch": 3.2552901023890786, "grad_norm": 0.9904403013989204, "learning_rate": 3.3948599544584697e-07, "loss": 0.0068, "step": 14307 }, { "epoch": 3.2555176336746303, "grad_norm": 1.2912663263370714, "learning_rate": 3.394065151835513e-07, "loss": 0.017, "step": 14308 }, { "epoch": 3.255745164960182, "grad_norm": 1.3558654468993598, "learning_rate": 3.3932704075826344e-07, "loss": 0.0233, "step": 14309 }, { "epoch": 3.255972696245734, "grad_norm": 1.8875607259046794, "learning_rate": 3.3924757217160745e-07, "loss": 0.0365, "step": 14310 }, { "epoch": 3.2562002275312856, "grad_norm": 1.454221478591199, "learning_rate": 3.391681094252079e-07, "loss": 0.0357, "step": 14311 }, { "epoch": 3.2564277588168373, "grad_norm": 1.6784780216344173, "learning_rate": 3.3908865252068866e-07, "loss": 0.0793, "step": 14312 }, { "epoch": 3.256655290102389, "grad_norm": 1.043596096767074, "learning_rate": 3.390092014596736e-07, "loss": 0.0136, "step": 14313 }, { "epoch": 3.256882821387941, "grad_norm": 1.4216150550136462, "learning_rate": 3.3892975624378677e-07, "loss": 0.0242, "step": 14314 }, { "epoch": 3.2571103526734926, "grad_norm": 1.017133209477059, "learning_rate": 3.3885031687465197e-07, "loss": 0.0173, "step": 14315 }, { "epoch": 3.2573378839590443, "grad_norm": 2.4622713363500117, "learning_rate": 3.3877088335389273e-07, "loss": 0.0592, "step": 14316 }, { "epoch": 3.257565415244596, "grad_norm": 0.7758472927856728, "learning_rate": 3.386914556831321e-07, "loss": 0.0131, "step": 14317 }, { "epoch": 3.257792946530148, "grad_norm": 1.4547848930114706, "learning_rate": 3.386120338639941e-07, "loss": 0.1383, "step": 14318 }, { "epoch": 3.2580204778156996, "grad_norm": 0.5778671821660917, "learning_rate": 3.385326178981013e-07, "loss": 0.0198, "step": 14319 }, { "epoch": 3.2582480091012513, "grad_norm": 1.7318327795933735, "learning_rate": 3.3845320778707726e-07, "loss": 0.0075, "step": 14320 }, { "epoch": 3.258475540386803, "grad_norm": 1.5318572645815751, "learning_rate": 3.383738035325447e-07, "loss": 0.0424, "step": 14321 }, { "epoch": 3.258703071672355, "grad_norm": 2.365783370359913, "learning_rate": 3.3829440513612697e-07, "loss": 0.04, "step": 14322 }, { "epoch": 3.2589306029579066, "grad_norm": 2.1134056363419784, "learning_rate": 3.382150125994466e-07, "loss": 0.0446, "step": 14323 }, { "epoch": 3.2591581342434583, "grad_norm": 0.9786628929275357, "learning_rate": 3.3813562592412586e-07, "loss": 0.06, "step": 14324 }, { "epoch": 3.25938566552901, "grad_norm": 2.502565680174159, "learning_rate": 3.3805624511178784e-07, "loss": 0.0241, "step": 14325 }, { "epoch": 3.259613196814562, "grad_norm": 1.3176807261083403, "learning_rate": 3.379768701640541e-07, "loss": 0.0436, "step": 14326 }, { "epoch": 3.2598407281001136, "grad_norm": 1.2831831646850218, "learning_rate": 3.3789750108254803e-07, "loss": 0.0141, "step": 14327 }, { "epoch": 3.2600682593856654, "grad_norm": 1.053182044271938, "learning_rate": 3.378181378688912e-07, "loss": 0.0318, "step": 14328 }, { "epoch": 3.260295790671217, "grad_norm": 6.052824857772029, "learning_rate": 3.3773878052470544e-07, "loss": 0.0376, "step": 14329 }, { "epoch": 3.260523321956769, "grad_norm": 1.3337937841982275, "learning_rate": 3.3765942905161317e-07, "loss": 0.0324, "step": 14330 }, { "epoch": 3.260750853242321, "grad_norm": 1.8802660358810024, "learning_rate": 3.3758008345123565e-07, "loss": 0.0379, "step": 14331 }, { "epoch": 3.2609783845278724, "grad_norm": 2.5779377474079896, "learning_rate": 3.375007437251949e-07, "loss": 0.0434, "step": 14332 }, { "epoch": 3.2612059158134246, "grad_norm": 2.3132691624105823, "learning_rate": 3.374214098751124e-07, "loss": 0.0114, "step": 14333 }, { "epoch": 3.261433447098976, "grad_norm": 0.9206088271183956, "learning_rate": 3.373420819026098e-07, "loss": 0.0286, "step": 14334 }, { "epoch": 3.261660978384528, "grad_norm": 2.0420145947128243, "learning_rate": 3.3726275980930826e-07, "loss": 0.0601, "step": 14335 }, { "epoch": 3.26188850967008, "grad_norm": 2.102311711094768, "learning_rate": 3.371834435968287e-07, "loss": 0.0881, "step": 14336 }, { "epoch": 3.2621160409556316, "grad_norm": 1.5000138669584753, "learning_rate": 3.371041332667927e-07, "loss": 0.0183, "step": 14337 }, { "epoch": 3.2623435722411833, "grad_norm": 1.5022161951377642, "learning_rate": 3.370248288208207e-07, "loss": 0.0122, "step": 14338 }, { "epoch": 3.262571103526735, "grad_norm": 5.376785470316891, "learning_rate": 3.369455302605338e-07, "loss": 0.0386, "step": 14339 }, { "epoch": 3.262798634812287, "grad_norm": 2.1417963411042913, "learning_rate": 3.368662375875527e-07, "loss": 0.0896, "step": 14340 }, { "epoch": 3.2630261660978386, "grad_norm": 1.124011450788897, "learning_rate": 3.367869508034983e-07, "loss": 0.0201, "step": 14341 }, { "epoch": 3.2632536973833903, "grad_norm": 0.7772226850662518, "learning_rate": 3.3670766990999075e-07, "loss": 0.0059, "step": 14342 }, { "epoch": 3.263481228668942, "grad_norm": 1.894533899809289, "learning_rate": 3.3662839490865016e-07, "loss": 0.0362, "step": 14343 }, { "epoch": 3.263708759954494, "grad_norm": 2.978945422509964, "learning_rate": 3.365491258010974e-07, "loss": 0.0333, "step": 14344 }, { "epoch": 3.2639362912400456, "grad_norm": 4.099270697423593, "learning_rate": 3.364698625889519e-07, "loss": 0.0642, "step": 14345 }, { "epoch": 3.2641638225255973, "grad_norm": 2.2241836627807885, "learning_rate": 3.3639060527383403e-07, "loss": 0.0836, "step": 14346 }, { "epoch": 3.264391353811149, "grad_norm": 1.5968866539662832, "learning_rate": 3.3631135385736385e-07, "loss": 0.0476, "step": 14347 }, { "epoch": 3.264618885096701, "grad_norm": 1.0750963877978335, "learning_rate": 3.362321083411607e-07, "loss": 0.0193, "step": 14348 }, { "epoch": 3.2648464163822526, "grad_norm": 0.6925096313706516, "learning_rate": 3.361528687268446e-07, "loss": 0.0131, "step": 14349 }, { "epoch": 3.2650739476678043, "grad_norm": 0.8682799371805342, "learning_rate": 3.3607363501603457e-07, "loss": 0.0111, "step": 14350 }, { "epoch": 3.265301478953356, "grad_norm": 0.8745826056186502, "learning_rate": 3.359944072103506e-07, "loss": 0.0192, "step": 14351 }, { "epoch": 3.265529010238908, "grad_norm": 7.002431315556783, "learning_rate": 3.3591518531141146e-07, "loss": 0.1414, "step": 14352 }, { "epoch": 3.2657565415244596, "grad_norm": 1.7524714176414802, "learning_rate": 3.3583596932083645e-07, "loss": 0.0312, "step": 14353 }, { "epoch": 3.2659840728100114, "grad_norm": 1.4292484109238062, "learning_rate": 3.3575675924024483e-07, "loss": 0.0624, "step": 14354 }, { "epoch": 3.266211604095563, "grad_norm": 1.406348466835685, "learning_rate": 3.3567755507125513e-07, "loss": 0.0557, "step": 14355 }, { "epoch": 3.266439135381115, "grad_norm": 1.0062659762412862, "learning_rate": 3.355983568154866e-07, "loss": 0.043, "step": 14356 }, { "epoch": 3.2666666666666666, "grad_norm": 1.3583179890606034, "learning_rate": 3.355191644745574e-07, "loss": 0.0372, "step": 14357 }, { "epoch": 3.2668941979522184, "grad_norm": 2.3146131052932137, "learning_rate": 3.354399780500866e-07, "loss": 0.0435, "step": 14358 }, { "epoch": 3.26712172923777, "grad_norm": 1.2855371995975688, "learning_rate": 3.3536079754369206e-07, "loss": 0.0252, "step": 14359 }, { "epoch": 3.267349260523322, "grad_norm": 1.3981433826660459, "learning_rate": 3.352816229569923e-07, "loss": 0.0165, "step": 14360 }, { "epoch": 3.2675767918088736, "grad_norm": 1.160920234787398, "learning_rate": 3.3520245429160596e-07, "loss": 0.0199, "step": 14361 }, { "epoch": 3.2678043230944254, "grad_norm": 1.2612264838535614, "learning_rate": 3.3512329154915033e-07, "loss": 0.0211, "step": 14362 }, { "epoch": 3.268031854379977, "grad_norm": 1.7970899320746265, "learning_rate": 3.3504413473124415e-07, "loss": 0.0453, "step": 14363 }, { "epoch": 3.268259385665529, "grad_norm": 1.4816020845321332, "learning_rate": 3.349649838395044e-07, "loss": 0.0573, "step": 14364 }, { "epoch": 3.2684869169510806, "grad_norm": 2.785369544727005, "learning_rate": 3.348858388755495e-07, "loss": 0.0365, "step": 14365 }, { "epoch": 3.2687144482366324, "grad_norm": 1.778261323383784, "learning_rate": 3.348066998409966e-07, "loss": 0.1012, "step": 14366 }, { "epoch": 3.268941979522184, "grad_norm": 1.9273569473114718, "learning_rate": 3.347275667374632e-07, "loss": 0.0464, "step": 14367 }, { "epoch": 3.269169510807736, "grad_norm": 1.1379308138055817, "learning_rate": 3.3464843956656694e-07, "loss": 0.0094, "step": 14368 }, { "epoch": 3.2693970420932876, "grad_norm": 3.0659115724868693, "learning_rate": 3.3456931832992465e-07, "loss": 0.0108, "step": 14369 }, { "epoch": 3.26962457337884, "grad_norm": 1.5730156928424777, "learning_rate": 3.344902030291538e-07, "loss": 0.0107, "step": 14370 }, { "epoch": 3.269852104664391, "grad_norm": 1.2022879692337776, "learning_rate": 3.3441109366587095e-07, "loss": 0.0177, "step": 14371 }, { "epoch": 3.2700796359499433, "grad_norm": 1.1881802200840361, "learning_rate": 3.343319902416933e-07, "loss": 0.0274, "step": 14372 }, { "epoch": 3.2703071672354946, "grad_norm": 2.3574140232991483, "learning_rate": 3.3425289275823724e-07, "loss": 0.0516, "step": 14373 }, { "epoch": 3.270534698521047, "grad_norm": 2.1181344997856666, "learning_rate": 3.341738012171196e-07, "loss": 0.1214, "step": 14374 }, { "epoch": 3.2707622298065986, "grad_norm": 0.9169018981237813, "learning_rate": 3.340947156199571e-07, "loss": 0.005, "step": 14375 }, { "epoch": 3.2709897610921503, "grad_norm": 1.5474799149425598, "learning_rate": 3.3401563596836556e-07, "loss": 0.0462, "step": 14376 }, { "epoch": 3.271217292377702, "grad_norm": 1.7992740393950117, "learning_rate": 3.339365622639618e-07, "loss": 0.0225, "step": 14377 }, { "epoch": 3.271444823663254, "grad_norm": 1.166569672732423, "learning_rate": 3.338574945083614e-07, "loss": 0.0278, "step": 14378 }, { "epoch": 3.2716723549488056, "grad_norm": 1.544768775044554, "learning_rate": 3.337784327031808e-07, "loss": 0.0508, "step": 14379 }, { "epoch": 3.2718998862343573, "grad_norm": 2.190765612097965, "learning_rate": 3.3369937685003546e-07, "loss": 0.0088, "step": 14380 }, { "epoch": 3.272127417519909, "grad_norm": 1.364080344216024, "learning_rate": 3.3362032695054144e-07, "loss": 0.0323, "step": 14381 }, { "epoch": 3.272354948805461, "grad_norm": 1.4400410090585332, "learning_rate": 3.335412830063145e-07, "loss": 0.0754, "step": 14382 }, { "epoch": 3.2725824800910126, "grad_norm": 0.8636603623870648, "learning_rate": 3.3346224501896963e-07, "loss": 0.0231, "step": 14383 }, { "epoch": 3.2728100113765644, "grad_norm": 0.847949150466118, "learning_rate": 3.3338321299012285e-07, "loss": 0.0072, "step": 14384 }, { "epoch": 3.273037542662116, "grad_norm": 2.231918952152114, "learning_rate": 3.333041869213892e-07, "loss": 0.0121, "step": 14385 }, { "epoch": 3.273265073947668, "grad_norm": 1.3289656871955662, "learning_rate": 3.332251668143831e-07, "loss": 0.0179, "step": 14386 }, { "epoch": 3.2734926052332196, "grad_norm": 0.9589913762814583, "learning_rate": 3.331461526707208e-07, "loss": 0.0152, "step": 14387 }, { "epoch": 3.2737201365187714, "grad_norm": 1.4514515697469015, "learning_rate": 3.3306714449201647e-07, "loss": 0.0289, "step": 14388 }, { "epoch": 3.273947667804323, "grad_norm": 2.1633803130412295, "learning_rate": 3.329881422798852e-07, "loss": 0.1296, "step": 14389 }, { "epoch": 3.274175199089875, "grad_norm": 1.357132680444134, "learning_rate": 3.3290914603594136e-07, "loss": 0.038, "step": 14390 }, { "epoch": 3.2744027303754266, "grad_norm": 1.0691553422336972, "learning_rate": 3.328301557617998e-07, "loss": 0.0181, "step": 14391 }, { "epoch": 3.2746302616609784, "grad_norm": 1.2045417398332507, "learning_rate": 3.3275117145907457e-07, "loss": 0.0665, "step": 14392 }, { "epoch": 3.27485779294653, "grad_norm": 1.3485281927728379, "learning_rate": 3.3267219312938014e-07, "loss": 0.0284, "step": 14393 }, { "epoch": 3.275085324232082, "grad_norm": 2.376276574970545, "learning_rate": 3.325932207743309e-07, "loss": 0.0365, "step": 14394 }, { "epoch": 3.2753128555176336, "grad_norm": 1.5071604675378039, "learning_rate": 3.3251425439554054e-07, "loss": 0.0151, "step": 14395 }, { "epoch": 3.2755403868031854, "grad_norm": 1.6705333525885306, "learning_rate": 3.3243529399462336e-07, "loss": 0.073, "step": 14396 }, { "epoch": 3.275767918088737, "grad_norm": 1.129407566511268, "learning_rate": 3.323563395731928e-07, "loss": 0.0109, "step": 14397 }, { "epoch": 3.275995449374289, "grad_norm": 1.904412733558171, "learning_rate": 3.322773911328629e-07, "loss": 0.0281, "step": 14398 }, { "epoch": 3.2762229806598406, "grad_norm": 0.9702350512315121, "learning_rate": 3.321984486752468e-07, "loss": 0.0209, "step": 14399 }, { "epoch": 3.2764505119453924, "grad_norm": 2.693028124424915, "learning_rate": 3.3211951220195813e-07, "loss": 0.0128, "step": 14400 }, { "epoch": 3.276678043230944, "grad_norm": 1.27518551564825, "learning_rate": 3.320405817146105e-07, "loss": 0.0884, "step": 14401 }, { "epoch": 3.276905574516496, "grad_norm": 2.1000557119944063, "learning_rate": 3.319616572148166e-07, "loss": 0.0476, "step": 14402 }, { "epoch": 3.2771331058020476, "grad_norm": 1.8149552462254628, "learning_rate": 3.3188273870419e-07, "loss": 0.0084, "step": 14403 }, { "epoch": 3.2773606370875994, "grad_norm": 1.1920730654615261, "learning_rate": 3.3180382618434344e-07, "loss": 0.0097, "step": 14404 }, { "epoch": 3.277588168373151, "grad_norm": 1.7830606060563776, "learning_rate": 3.3172491965688947e-07, "loss": 0.0291, "step": 14405 }, { "epoch": 3.277815699658703, "grad_norm": 1.012857655105847, "learning_rate": 3.3164601912344096e-07, "loss": 0.0082, "step": 14406 }, { "epoch": 3.2780432309442546, "grad_norm": 2.5442629168746227, "learning_rate": 3.3156712458561057e-07, "loss": 0.0532, "step": 14407 }, { "epoch": 3.2782707622298064, "grad_norm": 1.3499468790589704, "learning_rate": 3.3148823604501114e-07, "loss": 0.0336, "step": 14408 }, { "epoch": 3.2784982935153586, "grad_norm": 3.3438893098348563, "learning_rate": 3.314093535032542e-07, "loss": 0.0678, "step": 14409 }, { "epoch": 3.27872582480091, "grad_norm": 1.6206141266824388, "learning_rate": 3.313304769619527e-07, "loss": 0.0265, "step": 14410 }, { "epoch": 3.278953356086462, "grad_norm": 0.9755249875892891, "learning_rate": 3.312516064227185e-07, "loss": 0.0125, "step": 14411 }, { "epoch": 3.2791808873720134, "grad_norm": 0.8975311295488787, "learning_rate": 3.311727418871631e-07, "loss": 0.0053, "step": 14412 }, { "epoch": 3.2794084186575656, "grad_norm": 1.6476115841202366, "learning_rate": 3.3109388335689885e-07, "loss": 0.0379, "step": 14413 }, { "epoch": 3.2796359499431174, "grad_norm": 1.8393746638162314, "learning_rate": 3.3101503083353736e-07, "loss": 0.0333, "step": 14414 }, { "epoch": 3.279863481228669, "grad_norm": 0.6146473669190659, "learning_rate": 3.3093618431869043e-07, "loss": 0.005, "step": 14415 }, { "epoch": 3.280091012514221, "grad_norm": 1.2283024299527978, "learning_rate": 3.3085734381396937e-07, "loss": 0.0734, "step": 14416 }, { "epoch": 3.2803185437997726, "grad_norm": 1.0841900157202604, "learning_rate": 3.307785093209852e-07, "loss": 0.0502, "step": 14417 }, { "epoch": 3.2805460750853244, "grad_norm": 2.0782053116458177, "learning_rate": 3.306996808413498e-07, "loss": 0.0704, "step": 14418 }, { "epoch": 3.280773606370876, "grad_norm": 0.5698165191923525, "learning_rate": 3.306208583766736e-07, "loss": 0.0151, "step": 14419 }, { "epoch": 3.281001137656428, "grad_norm": 1.8436969474680571, "learning_rate": 3.305420419285679e-07, "loss": 0.0166, "step": 14420 }, { "epoch": 3.2812286689419796, "grad_norm": 1.138837226265821, "learning_rate": 3.3046323149864377e-07, "loss": 0.018, "step": 14421 }, { "epoch": 3.2814562002275314, "grad_norm": 1.6127371779618365, "learning_rate": 3.303844270885118e-07, "loss": 0.0181, "step": 14422 }, { "epoch": 3.281683731513083, "grad_norm": 0.9967013212860726, "learning_rate": 3.303056286997827e-07, "loss": 0.0279, "step": 14423 }, { "epoch": 3.281911262798635, "grad_norm": 1.3738260825412998, "learning_rate": 3.302268363340666e-07, "loss": 0.0205, "step": 14424 }, { "epoch": 3.2821387940841866, "grad_norm": 1.2869707108780761, "learning_rate": 3.3014804999297433e-07, "loss": 0.0676, "step": 14425 }, { "epoch": 3.2823663253697384, "grad_norm": 2.0360080603649506, "learning_rate": 3.3006926967811566e-07, "loss": 0.0473, "step": 14426 }, { "epoch": 3.28259385665529, "grad_norm": 1.4884168322871965, "learning_rate": 3.29990495391101e-07, "loss": 0.057, "step": 14427 }, { "epoch": 3.282821387940842, "grad_norm": 1.1344752311450672, "learning_rate": 3.299117271335403e-07, "loss": 0.0088, "step": 14428 }, { "epoch": 3.2830489192263936, "grad_norm": 2.0146155842013957, "learning_rate": 3.298329649070438e-07, "loss": 0.0223, "step": 14429 }, { "epoch": 3.2832764505119454, "grad_norm": 1.600313311528726, "learning_rate": 3.2975420871322087e-07, "loss": 0.0334, "step": 14430 }, { "epoch": 3.283503981797497, "grad_norm": 1.2840792175688844, "learning_rate": 3.2967545855368094e-07, "loss": 0.054, "step": 14431 }, { "epoch": 3.283731513083049, "grad_norm": 1.1071123545204806, "learning_rate": 3.2959671443003395e-07, "loss": 0.0623, "step": 14432 }, { "epoch": 3.2839590443686006, "grad_norm": 1.418154995847858, "learning_rate": 3.2951797634388894e-07, "loss": 0.0306, "step": 14433 }, { "epoch": 3.2841865756541524, "grad_norm": 1.351819288541589, "learning_rate": 3.294392442968554e-07, "loss": 0.0939, "step": 14434 }, { "epoch": 3.284414106939704, "grad_norm": 1.4288120046878339, "learning_rate": 3.293605182905426e-07, "loss": 0.0075, "step": 14435 }, { "epoch": 3.284641638225256, "grad_norm": 1.515377988024722, "learning_rate": 3.2928179832655916e-07, "loss": 0.1009, "step": 14436 }, { "epoch": 3.2848691695108077, "grad_norm": 1.9262575842495406, "learning_rate": 3.292030844065144e-07, "loss": 0.0717, "step": 14437 }, { "epoch": 3.2850967007963594, "grad_norm": 2.0413184650829628, "learning_rate": 3.291243765320166e-07, "loss": 0.0194, "step": 14438 }, { "epoch": 3.285324232081911, "grad_norm": 0.8286884913704072, "learning_rate": 3.290456747046749e-07, "loss": 0.0199, "step": 14439 }, { "epoch": 3.285551763367463, "grad_norm": 2.4998911958618293, "learning_rate": 3.289669789260974e-07, "loss": 0.0425, "step": 14440 }, { "epoch": 3.2857792946530147, "grad_norm": 1.40255372100382, "learning_rate": 3.288882891978927e-07, "loss": 0.0229, "step": 14441 }, { "epoch": 3.2860068259385664, "grad_norm": 1.244929620492713, "learning_rate": 3.2880960552166926e-07, "loss": 0.009, "step": 14442 }, { "epoch": 3.286234357224118, "grad_norm": 2.704212495492077, "learning_rate": 3.287309278990349e-07, "loss": 0.0297, "step": 14443 }, { "epoch": 3.28646188850967, "grad_norm": 0.9802820535904603, "learning_rate": 3.286522563315979e-07, "loss": 0.0126, "step": 14444 }, { "epoch": 3.2866894197952217, "grad_norm": 1.1835012330865278, "learning_rate": 3.2857359082096576e-07, "loss": 0.0363, "step": 14445 }, { "epoch": 3.2869169510807734, "grad_norm": 1.0812811691123976, "learning_rate": 3.284949313687469e-07, "loss": 0.0525, "step": 14446 }, { "epoch": 3.287144482366325, "grad_norm": 1.4313219955638292, "learning_rate": 3.284162779765481e-07, "loss": 0.0524, "step": 14447 }, { "epoch": 3.2873720136518774, "grad_norm": 1.237797213916977, "learning_rate": 3.283376306459779e-07, "loss": 0.0495, "step": 14448 }, { "epoch": 3.2875995449374287, "grad_norm": 2.668464874660663, "learning_rate": 3.2825898937864325e-07, "loss": 0.0323, "step": 14449 }, { "epoch": 3.287827076222981, "grad_norm": 1.8106943674273364, "learning_rate": 3.2818035417615107e-07, "loss": 0.0552, "step": 14450 }, { "epoch": 3.288054607508532, "grad_norm": 2.059770468353781, "learning_rate": 3.2810172504010917e-07, "loss": 0.0402, "step": 14451 }, { "epoch": 3.2882821387940844, "grad_norm": 2.5660491774964442, "learning_rate": 3.28023101972124e-07, "loss": 0.0182, "step": 14452 }, { "epoch": 3.288509670079636, "grad_norm": 1.037681494570665, "learning_rate": 3.2794448497380283e-07, "loss": 0.0125, "step": 14453 }, { "epoch": 3.288737201365188, "grad_norm": 0.5237565188035443, "learning_rate": 3.2786587404675246e-07, "loss": 0.0028, "step": 14454 }, { "epoch": 3.2889647326507396, "grad_norm": 1.045111025165098, "learning_rate": 3.277872691925793e-07, "loss": 0.011, "step": 14455 }, { "epoch": 3.2891922639362914, "grad_norm": 1.5101699520980605, "learning_rate": 3.2770867041289024e-07, "loss": 0.0365, "step": 14456 }, { "epoch": 3.289419795221843, "grad_norm": 1.681254560894784, "learning_rate": 3.2763007770929125e-07, "loss": 0.0771, "step": 14457 }, { "epoch": 3.289647326507395, "grad_norm": 4.038092806554366, "learning_rate": 3.2755149108338907e-07, "loss": 0.0275, "step": 14458 }, { "epoch": 3.2898748577929466, "grad_norm": 1.3157724768461958, "learning_rate": 3.2747291053678953e-07, "loss": 0.0243, "step": 14459 }, { "epoch": 3.2901023890784984, "grad_norm": 1.3697375335584578, "learning_rate": 3.2739433607109865e-07, "loss": 0.0505, "step": 14460 }, { "epoch": 3.29032992036405, "grad_norm": 1.698693180775414, "learning_rate": 3.2731576768792283e-07, "loss": 0.0213, "step": 14461 }, { "epoch": 3.290557451649602, "grad_norm": 1.143601975976637, "learning_rate": 3.2723720538886725e-07, "loss": 0.0091, "step": 14462 }, { "epoch": 3.2907849829351536, "grad_norm": 1.122475244807288, "learning_rate": 3.2715864917553826e-07, "loss": 0.1059, "step": 14463 }, { "epoch": 3.2910125142207054, "grad_norm": 1.079826723881356, "learning_rate": 3.2708009904954055e-07, "loss": 0.0198, "step": 14464 }, { "epoch": 3.291240045506257, "grad_norm": 1.1172407871459806, "learning_rate": 3.2700155501248043e-07, "loss": 0.079, "step": 14465 }, { "epoch": 3.291467576791809, "grad_norm": 1.7522851580891667, "learning_rate": 3.269230170659624e-07, "loss": 0.0312, "step": 14466 }, { "epoch": 3.2916951080773607, "grad_norm": 1.288133308425476, "learning_rate": 3.2684448521159206e-07, "loss": 0.0462, "step": 14467 }, { "epoch": 3.2919226393629124, "grad_norm": 1.0359168731734174, "learning_rate": 3.2676595945097465e-07, "loss": 0.021, "step": 14468 }, { "epoch": 3.292150170648464, "grad_norm": 1.1866575912102564, "learning_rate": 3.266874397857145e-07, "loss": 0.0163, "step": 14469 }, { "epoch": 3.292377701934016, "grad_norm": 1.6850814104683463, "learning_rate": 3.2660892621741706e-07, "loss": 0.0626, "step": 14470 }, { "epoch": 3.2926052332195677, "grad_norm": 1.52326942245301, "learning_rate": 3.2653041874768645e-07, "loss": 0.0149, "step": 14471 }, { "epoch": 3.2928327645051194, "grad_norm": 1.6299876680034489, "learning_rate": 3.2645191737812766e-07, "loss": 0.0544, "step": 14472 }, { "epoch": 3.293060295790671, "grad_norm": 1.5684203789249382, "learning_rate": 3.263734221103447e-07, "loss": 0.098, "step": 14473 }, { "epoch": 3.293287827076223, "grad_norm": 1.0644734247934262, "learning_rate": 3.262949329459421e-07, "loss": 0.0075, "step": 14474 }, { "epoch": 3.2935153583617747, "grad_norm": 1.5673928069857044, "learning_rate": 3.262164498865243e-07, "loss": 0.098, "step": 14475 }, { "epoch": 3.2937428896473264, "grad_norm": 1.829434696499108, "learning_rate": 3.261379729336948e-07, "loss": 0.0648, "step": 14476 }, { "epoch": 3.293970420932878, "grad_norm": 1.7430605196888007, "learning_rate": 3.2605950208905793e-07, "loss": 0.0094, "step": 14477 }, { "epoch": 3.29419795221843, "grad_norm": 1.283045766998469, "learning_rate": 3.2598103735421723e-07, "loss": 0.0309, "step": 14478 }, { "epoch": 3.2944254835039817, "grad_norm": 1.758426967530472, "learning_rate": 3.2590257873077673e-07, "loss": 0.0322, "step": 14479 }, { "epoch": 3.2946530147895334, "grad_norm": 1.8780370225039424, "learning_rate": 3.2582412622033945e-07, "loss": 0.0174, "step": 14480 }, { "epoch": 3.294880546075085, "grad_norm": 2.2048084350505466, "learning_rate": 3.2574567982450913e-07, "loss": 0.0383, "step": 14481 }, { "epoch": 3.295108077360637, "grad_norm": 0.8396347217969281, "learning_rate": 3.2566723954488933e-07, "loss": 0.0524, "step": 14482 }, { "epoch": 3.2953356086461887, "grad_norm": 1.5364742670498355, "learning_rate": 3.255888053830827e-07, "loss": 0.1052, "step": 14483 }, { "epoch": 3.2955631399317404, "grad_norm": 1.3159061227885127, "learning_rate": 3.255103773406928e-07, "loss": 0.0629, "step": 14484 }, { "epoch": 3.295790671217292, "grad_norm": 1.8840693009546432, "learning_rate": 3.254319554193221e-07, "loss": 0.0239, "step": 14485 }, { "epoch": 3.296018202502844, "grad_norm": 2.8898465614749735, "learning_rate": 3.253535396205737e-07, "loss": 0.0626, "step": 14486 }, { "epoch": 3.296245733788396, "grad_norm": 1.2665678383657097, "learning_rate": 3.2527512994605e-07, "loss": 0.0431, "step": 14487 }, { "epoch": 3.2964732650739474, "grad_norm": 1.7388010494501436, "learning_rate": 3.251967263973538e-07, "loss": 0.097, "step": 14488 }, { "epoch": 3.2967007963594996, "grad_norm": 2.1752754110304022, "learning_rate": 3.251183289760876e-07, "loss": 0.0853, "step": 14489 }, { "epoch": 3.296928327645051, "grad_norm": 1.7365227413657693, "learning_rate": 3.2503993768385325e-07, "loss": 0.0462, "step": 14490 }, { "epoch": 3.297155858930603, "grad_norm": 1.028620457013897, "learning_rate": 3.249615525222536e-07, "loss": 0.0072, "step": 14491 }, { "epoch": 3.297383390216155, "grad_norm": 1.611560240206184, "learning_rate": 3.248831734928903e-07, "loss": 0.0668, "step": 14492 }, { "epoch": 3.2976109215017066, "grad_norm": 1.9409913659708906, "learning_rate": 3.2480480059736495e-07, "loss": 0.0219, "step": 14493 }, { "epoch": 3.2978384527872584, "grad_norm": 2.38051256846389, "learning_rate": 3.247264338372798e-07, "loss": 0.0113, "step": 14494 }, { "epoch": 3.29806598407281, "grad_norm": 2.3850349972223666, "learning_rate": 3.246480732142364e-07, "loss": 0.1196, "step": 14495 }, { "epoch": 3.298293515358362, "grad_norm": 1.9222175338677443, "learning_rate": 3.245697187298365e-07, "loss": 0.0937, "step": 14496 }, { "epoch": 3.2985210466439137, "grad_norm": 1.1182012749727364, "learning_rate": 3.244913703856811e-07, "loss": 0.0112, "step": 14497 }, { "epoch": 3.2987485779294654, "grad_norm": 1.2100329969942514, "learning_rate": 3.2441302818337205e-07, "loss": 0.0262, "step": 14498 }, { "epoch": 3.298976109215017, "grad_norm": 1.3433749794547998, "learning_rate": 3.243346921245101e-07, "loss": 0.0213, "step": 14499 }, { "epoch": 3.299203640500569, "grad_norm": 1.131384448254923, "learning_rate": 3.2425636221069633e-07, "loss": 0.0303, "step": 14500 }, { "epoch": 3.2994311717861207, "grad_norm": 1.1163374285539531, "learning_rate": 3.2417803844353156e-07, "loss": 0.0607, "step": 14501 }, { "epoch": 3.2996587030716724, "grad_norm": 2.3441666468453315, "learning_rate": 3.240997208246168e-07, "loss": 0.0676, "step": 14502 }, { "epoch": 3.299886234357224, "grad_norm": 1.5406517304952583, "learning_rate": 3.2402140935555297e-07, "loss": 0.0843, "step": 14503 }, { "epoch": 3.300113765642776, "grad_norm": 1.5308010903294749, "learning_rate": 3.2394310403794005e-07, "loss": 0.0553, "step": 14504 }, { "epoch": 3.3003412969283277, "grad_norm": 2.0224531654240896, "learning_rate": 3.238648048733789e-07, "loss": 0.1157, "step": 14505 }, { "epoch": 3.3005688282138794, "grad_norm": 0.9856058284573992, "learning_rate": 3.237865118634697e-07, "loss": 0.0165, "step": 14506 }, { "epoch": 3.300796359499431, "grad_norm": 1.379644045363057, "learning_rate": 3.2370822500981213e-07, "loss": 0.0925, "step": 14507 }, { "epoch": 3.301023890784983, "grad_norm": 1.1707727721315198, "learning_rate": 3.2362994431400703e-07, "loss": 0.0884, "step": 14508 }, { "epoch": 3.3012514220705347, "grad_norm": 1.1869086573466543, "learning_rate": 3.2355166977765367e-07, "loss": 0.0541, "step": 14509 }, { "epoch": 3.3014789533560864, "grad_norm": 1.7673670368978935, "learning_rate": 3.2347340140235243e-07, "loss": 0.0154, "step": 14510 }, { "epoch": 3.301706484641638, "grad_norm": 1.8409568021023464, "learning_rate": 3.2339513918970266e-07, "loss": 0.119, "step": 14511 }, { "epoch": 3.30193401592719, "grad_norm": 1.5992216435605073, "learning_rate": 3.2331688314130355e-07, "loss": 0.0472, "step": 14512 }, { "epoch": 3.3021615472127417, "grad_norm": 0.7841335204106334, "learning_rate": 3.232386332587549e-07, "loss": 0.0234, "step": 14513 }, { "epoch": 3.3023890784982934, "grad_norm": 1.921733899665934, "learning_rate": 3.231603895436559e-07, "loss": 0.0132, "step": 14514 }, { "epoch": 3.302616609783845, "grad_norm": 0.9736361635866566, "learning_rate": 3.23082151997606e-07, "loss": 0.0066, "step": 14515 }, { "epoch": 3.302844141069397, "grad_norm": 1.0148089152854247, "learning_rate": 3.230039206222037e-07, "loss": 0.0638, "step": 14516 }, { "epoch": 3.3030716723549487, "grad_norm": 1.3464395381927423, "learning_rate": 3.229256954190485e-07, "loss": 0.0276, "step": 14517 }, { "epoch": 3.3032992036405004, "grad_norm": 0.8144595601228222, "learning_rate": 3.2284747638973877e-07, "loss": 0.028, "step": 14518 }, { "epoch": 3.303526734926052, "grad_norm": 5.583232952086528, "learning_rate": 3.227692635358731e-07, "loss": 0.0443, "step": 14519 }, { "epoch": 3.303754266211604, "grad_norm": 0.8541761714248414, "learning_rate": 3.2269105685905023e-07, "loss": 0.0076, "step": 14520 }, { "epoch": 3.3039817974971557, "grad_norm": 0.6720155019610812, "learning_rate": 3.2261285636086837e-07, "loss": 0.0029, "step": 14521 }, { "epoch": 3.3042093287827075, "grad_norm": 0.9701070352728578, "learning_rate": 3.2253466204292624e-07, "loss": 0.0529, "step": 14522 }, { "epoch": 3.304436860068259, "grad_norm": 1.8952214994557972, "learning_rate": 3.2245647390682146e-07, "loss": 0.0426, "step": 14523 }, { "epoch": 3.304664391353811, "grad_norm": 1.394018446126657, "learning_rate": 3.2237829195415244e-07, "loss": 0.035, "step": 14524 }, { "epoch": 3.3048919226393627, "grad_norm": 1.819756502787894, "learning_rate": 3.223001161865169e-07, "loss": 0.037, "step": 14525 }, { "epoch": 3.305119453924915, "grad_norm": 1.3540631403040517, "learning_rate": 3.222219466055125e-07, "loss": 0.0116, "step": 14526 }, { "epoch": 3.305346985210466, "grad_norm": 1.4302177517132728, "learning_rate": 3.2214378321273694e-07, "loss": 0.0804, "step": 14527 }, { "epoch": 3.3055745164960184, "grad_norm": 2.133475269435852, "learning_rate": 3.220656260097877e-07, "loss": 0.0286, "step": 14528 }, { "epoch": 3.3058020477815697, "grad_norm": 1.0756036106862221, "learning_rate": 3.219874749982626e-07, "loss": 0.0417, "step": 14529 }, { "epoch": 3.306029579067122, "grad_norm": 1.2058380392397772, "learning_rate": 3.219093301797585e-07, "loss": 0.058, "step": 14530 }, { "epoch": 3.3062571103526737, "grad_norm": 0.9622162743325816, "learning_rate": 3.218311915558725e-07, "loss": 0.0052, "step": 14531 }, { "epoch": 3.3064846416382254, "grad_norm": 2.770206966788687, "learning_rate": 3.2175305912820184e-07, "loss": 0.0232, "step": 14532 }, { "epoch": 3.306712172923777, "grad_norm": 0.8624710545358333, "learning_rate": 3.21674932898343e-07, "loss": 0.0148, "step": 14533 }, { "epoch": 3.306939704209329, "grad_norm": 1.136338428176675, "learning_rate": 3.2159681286789314e-07, "loss": 0.0738, "step": 14534 }, { "epoch": 3.3071672354948807, "grad_norm": 1.5590872909327393, "learning_rate": 3.2151869903844876e-07, "loss": 0.021, "step": 14535 }, { "epoch": 3.3073947667804324, "grad_norm": 1.0992351463774699, "learning_rate": 3.214405914116066e-07, "loss": 0.0369, "step": 14536 }, { "epoch": 3.307622298065984, "grad_norm": 1.0883987967466435, "learning_rate": 3.2136248998896273e-07, "loss": 0.0501, "step": 14537 }, { "epoch": 3.307849829351536, "grad_norm": 0.8214853888195601, "learning_rate": 3.212843947721133e-07, "loss": 0.0099, "step": 14538 }, { "epoch": 3.3080773606370877, "grad_norm": 1.547495962409173, "learning_rate": 3.212063057626548e-07, "loss": 0.0512, "step": 14539 }, { "epoch": 3.3083048919226394, "grad_norm": 1.543337303250029, "learning_rate": 3.2112822296218287e-07, "loss": 0.021, "step": 14540 }, { "epoch": 3.308532423208191, "grad_norm": 0.8778448530567136, "learning_rate": 3.2105014637229357e-07, "loss": 0.0054, "step": 14541 }, { "epoch": 3.308759954493743, "grad_norm": 1.4015007267550432, "learning_rate": 3.209720759945828e-07, "loss": 0.0651, "step": 14542 }, { "epoch": 3.3089874857792947, "grad_norm": 1.5582447636838344, "learning_rate": 3.208940118306457e-07, "loss": 0.0199, "step": 14543 }, { "epoch": 3.3092150170648464, "grad_norm": 1.2938259950323876, "learning_rate": 3.208159538820783e-07, "loss": 0.1079, "step": 14544 }, { "epoch": 3.309442548350398, "grad_norm": 1.3029585003828135, "learning_rate": 3.207379021504756e-07, "loss": 0.0201, "step": 14545 }, { "epoch": 3.30967007963595, "grad_norm": 1.5402932157334908, "learning_rate": 3.206598566374332e-07, "loss": 0.0232, "step": 14546 }, { "epoch": 3.3098976109215017, "grad_norm": 1.3517500703928838, "learning_rate": 3.205818173445456e-07, "loss": 0.04, "step": 14547 }, { "epoch": 3.3101251422070535, "grad_norm": 1.804780339003789, "learning_rate": 3.2050378427340816e-07, "loss": 0.0641, "step": 14548 }, { "epoch": 3.310352673492605, "grad_norm": 3.361196175636947, "learning_rate": 3.2042575742561604e-07, "loss": 0.0373, "step": 14549 }, { "epoch": 3.310580204778157, "grad_norm": 1.36051694471996, "learning_rate": 3.2034773680276327e-07, "loss": 0.0135, "step": 14550 }, { "epoch": 3.3108077360637087, "grad_norm": 4.680298425724318, "learning_rate": 3.202697224064451e-07, "loss": 0.0486, "step": 14551 }, { "epoch": 3.3110352673492605, "grad_norm": 3.024806335708162, "learning_rate": 3.2019171423825547e-07, "loss": 0.0534, "step": 14552 }, { "epoch": 3.311262798634812, "grad_norm": 1.0958910633155812, "learning_rate": 3.2011371229978925e-07, "loss": 0.022, "step": 14553 }, { "epoch": 3.311490329920364, "grad_norm": 1.3563700568933843, "learning_rate": 3.200357165926402e-07, "loss": 0.008, "step": 14554 }, { "epoch": 3.3117178612059157, "grad_norm": 1.916880656911069, "learning_rate": 3.199577271184025e-07, "loss": 0.0172, "step": 14555 }, { "epoch": 3.3119453924914675, "grad_norm": 1.9961555775098683, "learning_rate": 3.1987974387867037e-07, "loss": 0.0186, "step": 14556 }, { "epoch": 3.312172923777019, "grad_norm": 1.5555621813549403, "learning_rate": 3.198017668750374e-07, "loss": 0.0791, "step": 14557 }, { "epoch": 3.312400455062571, "grad_norm": 1.3646783963561209, "learning_rate": 3.1972379610909743e-07, "loss": 0.0601, "step": 14558 }, { "epoch": 3.3126279863481227, "grad_norm": 1.1714857396127356, "learning_rate": 3.1964583158244383e-07, "loss": 0.0224, "step": 14559 }, { "epoch": 3.3128555176336745, "grad_norm": 1.1644463200064878, "learning_rate": 3.195678732966705e-07, "loss": 0.0126, "step": 14560 }, { "epoch": 3.3130830489192262, "grad_norm": 1.286595573685147, "learning_rate": 3.1948992125337015e-07, "loss": 0.065, "step": 14561 }, { "epoch": 3.313310580204778, "grad_norm": 0.9134086342123098, "learning_rate": 3.1941197545413633e-07, "loss": 0.0524, "step": 14562 }, { "epoch": 3.3135381114903297, "grad_norm": 1.1859801046130867, "learning_rate": 3.1933403590056227e-07, "loss": 0.0953, "step": 14563 }, { "epoch": 3.3137656427758815, "grad_norm": 1.7403772173648193, "learning_rate": 3.1925610259424047e-07, "loss": 0.0177, "step": 14564 }, { "epoch": 3.3139931740614337, "grad_norm": 1.5502000594379817, "learning_rate": 3.191781755367643e-07, "loss": 0.0133, "step": 14565 }, { "epoch": 3.314220705346985, "grad_norm": 1.1107409451462966, "learning_rate": 3.191002547297259e-07, "loss": 0.0037, "step": 14566 }, { "epoch": 3.314448236632537, "grad_norm": 1.7833876602409031, "learning_rate": 3.190223401747182e-07, "loss": 0.0353, "step": 14567 }, { "epoch": 3.3146757679180885, "grad_norm": 0.9274479326482576, "learning_rate": 3.189444318733333e-07, "loss": 0.0648, "step": 14568 }, { "epoch": 3.3149032992036407, "grad_norm": 1.4682553719589237, "learning_rate": 3.188665298271638e-07, "loss": 0.0095, "step": 14569 }, { "epoch": 3.3151308304891924, "grad_norm": 1.5614177892677912, "learning_rate": 3.1878863403780184e-07, "loss": 0.1074, "step": 14570 }, { "epoch": 3.315358361774744, "grad_norm": 1.1614237199232365, "learning_rate": 3.187107445068393e-07, "loss": 0.0291, "step": 14571 }, { "epoch": 3.315585893060296, "grad_norm": 1.333845135966056, "learning_rate": 3.1863286123586844e-07, "loss": 0.0703, "step": 14572 }, { "epoch": 3.3158134243458477, "grad_norm": 1.4071747366187026, "learning_rate": 3.185549842264805e-07, "loss": 0.0189, "step": 14573 }, { "epoch": 3.3160409556313994, "grad_norm": 2.35310187868844, "learning_rate": 3.184771134802675e-07, "loss": 0.0262, "step": 14574 }, { "epoch": 3.316268486916951, "grad_norm": 1.2711163204673857, "learning_rate": 3.1839924899882106e-07, "loss": 0.006, "step": 14575 }, { "epoch": 3.316496018202503, "grad_norm": 0.919246689927728, "learning_rate": 3.1832139078373234e-07, "loss": 0.0215, "step": 14576 }, { "epoch": 3.3167235494880547, "grad_norm": 1.2100623072686236, "learning_rate": 3.18243538836593e-07, "loss": 0.0404, "step": 14577 }, { "epoch": 3.3169510807736065, "grad_norm": 2.499173838359781, "learning_rate": 3.1816569315899353e-07, "loss": 0.0243, "step": 14578 }, { "epoch": 3.317178612059158, "grad_norm": 1.4706855236263612, "learning_rate": 3.1808785375252573e-07, "loss": 0.0341, "step": 14579 }, { "epoch": 3.31740614334471, "grad_norm": 2.0885915004135276, "learning_rate": 3.1801002061877985e-07, "loss": 0.0464, "step": 14580 }, { "epoch": 3.3176336746302617, "grad_norm": 1.111303265714185, "learning_rate": 3.1793219375934693e-07, "loss": 0.035, "step": 14581 }, { "epoch": 3.3178612059158135, "grad_norm": 1.5343177644797608, "learning_rate": 3.178543731758178e-07, "loss": 0.0585, "step": 14582 }, { "epoch": 3.318088737201365, "grad_norm": 1.3168337869886175, "learning_rate": 3.177765588697826e-07, "loss": 0.0231, "step": 14583 }, { "epoch": 3.318316268486917, "grad_norm": 0.8600945946691892, "learning_rate": 3.1769875084283205e-07, "loss": 0.0341, "step": 14584 }, { "epoch": 3.3185437997724687, "grad_norm": 1.8343688171033268, "learning_rate": 3.17620949096556e-07, "loss": 0.0303, "step": 14585 }, { "epoch": 3.3187713310580205, "grad_norm": 1.694979319188401, "learning_rate": 3.175431536325451e-07, "loss": 0.0356, "step": 14586 }, { "epoch": 3.318998862343572, "grad_norm": 1.6795507413622859, "learning_rate": 3.174653644523888e-07, "loss": 0.0121, "step": 14587 }, { "epoch": 3.319226393629124, "grad_norm": 1.2916748369768642, "learning_rate": 3.1738758155767725e-07, "loss": 0.0304, "step": 14588 }, { "epoch": 3.3194539249146757, "grad_norm": 1.1911085267421697, "learning_rate": 3.1730980495000034e-07, "loss": 0.0655, "step": 14589 }, { "epoch": 3.3196814562002275, "grad_norm": 1.452097216546665, "learning_rate": 3.172320346309474e-07, "loss": 0.0148, "step": 14590 }, { "epoch": 3.3199089874857792, "grad_norm": 1.4043907367480253, "learning_rate": 3.171542706021081e-07, "loss": 0.0689, "step": 14591 }, { "epoch": 3.320136518771331, "grad_norm": 2.056570375627022, "learning_rate": 3.1707651286507155e-07, "loss": 0.0662, "step": 14592 }, { "epoch": 3.3203640500568827, "grad_norm": 1.6792483507321443, "learning_rate": 3.1699876142142737e-07, "loss": 0.0833, "step": 14593 }, { "epoch": 3.3205915813424345, "grad_norm": 2.990368511671242, "learning_rate": 3.1692101627276424e-07, "loss": 0.0131, "step": 14594 }, { "epoch": 3.3208191126279862, "grad_norm": 1.6047488261006135, "learning_rate": 3.1684327742067124e-07, "loss": 0.0159, "step": 14595 }, { "epoch": 3.321046643913538, "grad_norm": 1.0350041021359966, "learning_rate": 3.1676554486673756e-07, "loss": 0.0165, "step": 14596 }, { "epoch": 3.3212741751990897, "grad_norm": 0.9882096958325985, "learning_rate": 3.1668781861255145e-07, "loss": 0.0364, "step": 14597 }, { "epoch": 3.3215017064846415, "grad_norm": 1.7998100796840073, "learning_rate": 3.1661009865970183e-07, "loss": 0.0792, "step": 14598 }, { "epoch": 3.3217292377701932, "grad_norm": 1.267000281925239, "learning_rate": 3.1653238500977705e-07, "loss": 0.0214, "step": 14599 }, { "epoch": 3.321956769055745, "grad_norm": 1.3087839719475194, "learning_rate": 3.164546776643651e-07, "loss": 0.0226, "step": 14600 }, { "epoch": 3.3221843003412967, "grad_norm": 2.0289046884833075, "learning_rate": 3.1637697662505454e-07, "loss": 0.0269, "step": 14601 }, { "epoch": 3.3224118316268485, "grad_norm": 0.6334179622093955, "learning_rate": 3.162992818934334e-07, "loss": 0.002, "step": 14602 }, { "epoch": 3.3226393629124003, "grad_norm": 0.712842002598528, "learning_rate": 3.1622159347108967e-07, "loss": 0.0278, "step": 14603 }, { "epoch": 3.3228668941979524, "grad_norm": 1.436381863630472, "learning_rate": 3.16143911359611e-07, "loss": 0.0982, "step": 14604 }, { "epoch": 3.3230944254835038, "grad_norm": 1.8104692849890829, "learning_rate": 3.160662355605852e-07, "loss": 0.1056, "step": 14605 }, { "epoch": 3.323321956769056, "grad_norm": 1.9705885776874246, "learning_rate": 3.1598856607559986e-07, "loss": 0.0096, "step": 14606 }, { "epoch": 3.3235494880546073, "grad_norm": 1.806839913918426, "learning_rate": 3.159109029062421e-07, "loss": 0.0824, "step": 14607 }, { "epoch": 3.3237770193401595, "grad_norm": 6.696243270043342, "learning_rate": 3.1583324605409927e-07, "loss": 0.022, "step": 14608 }, { "epoch": 3.324004550625711, "grad_norm": 1.2112119419770353, "learning_rate": 3.1575559552075885e-07, "loss": 0.0457, "step": 14609 }, { "epoch": 3.324232081911263, "grad_norm": 1.4621640495068626, "learning_rate": 3.1567795130780787e-07, "loss": 0.0219, "step": 14610 }, { "epoch": 3.3244596131968147, "grad_norm": 1.3285265006984184, "learning_rate": 3.156003134168328e-07, "loss": 0.0583, "step": 14611 }, { "epoch": 3.3246871444823665, "grad_norm": 8.2759771689802, "learning_rate": 3.1552268184942094e-07, "loss": 0.0284, "step": 14612 }, { "epoch": 3.324914675767918, "grad_norm": 1.1464381857380532, "learning_rate": 3.154450566071587e-07, "loss": 0.0151, "step": 14613 }, { "epoch": 3.32514220705347, "grad_norm": 1.4398153775720661, "learning_rate": 3.153674376916323e-07, "loss": 0.0603, "step": 14614 }, { "epoch": 3.3253697383390217, "grad_norm": 1.1820132626210968, "learning_rate": 3.152898251044285e-07, "loss": 0.0121, "step": 14615 }, { "epoch": 3.3255972696245735, "grad_norm": 1.1856668124359406, "learning_rate": 3.1521221884713335e-07, "loss": 0.0119, "step": 14616 }, { "epoch": 3.3258248009101252, "grad_norm": 0.9674131569241703, "learning_rate": 3.151346189213334e-07, "loss": 0.0154, "step": 14617 }, { "epoch": 3.326052332195677, "grad_norm": 2.309062300732699, "learning_rate": 3.1505702532861434e-07, "loss": 0.05, "step": 14618 }, { "epoch": 3.3262798634812287, "grad_norm": 0.4961570483065019, "learning_rate": 3.149794380705618e-07, "loss": 0.0022, "step": 14619 }, { "epoch": 3.3265073947667805, "grad_norm": 2.3709958511462688, "learning_rate": 3.149018571487621e-07, "loss": 0.0899, "step": 14620 }, { "epoch": 3.3267349260523322, "grad_norm": 0.7185618524867428, "learning_rate": 3.148242825648001e-07, "loss": 0.0099, "step": 14621 }, { "epoch": 3.326962457337884, "grad_norm": 1.012718827560525, "learning_rate": 3.147467143202619e-07, "loss": 0.0419, "step": 14622 }, { "epoch": 3.3271899886234357, "grad_norm": 2.275925085233542, "learning_rate": 3.146691524167325e-07, "loss": 0.0353, "step": 14623 }, { "epoch": 3.3274175199089875, "grad_norm": 1.4717821469228372, "learning_rate": 3.145915968557976e-07, "loss": 0.0554, "step": 14624 }, { "epoch": 3.3276450511945392, "grad_norm": 2.1726991353464515, "learning_rate": 3.1451404763904193e-07, "loss": 0.0233, "step": 14625 }, { "epoch": 3.327872582480091, "grad_norm": 1.237572767085576, "learning_rate": 3.1443650476805033e-07, "loss": 0.0386, "step": 14626 }, { "epoch": 3.3281001137656427, "grad_norm": 1.481154722188762, "learning_rate": 3.14358968244408e-07, "loss": 0.0587, "step": 14627 }, { "epoch": 3.3283276450511945, "grad_norm": 1.5784166801510915, "learning_rate": 3.1428143806969896e-07, "loss": 0.0253, "step": 14628 }, { "epoch": 3.3285551763367462, "grad_norm": 1.956452371127045, "learning_rate": 3.1420391424550875e-07, "loss": 0.0575, "step": 14629 }, { "epoch": 3.328782707622298, "grad_norm": 4.1153614392114815, "learning_rate": 3.141263967734211e-07, "loss": 0.0731, "step": 14630 }, { "epoch": 3.3290102389078498, "grad_norm": 2.4915288978445576, "learning_rate": 3.1404888565502086e-07, "loss": 0.0446, "step": 14631 }, { "epoch": 3.3292377701934015, "grad_norm": 1.9277598406523992, "learning_rate": 3.139713808918918e-07, "loss": 0.092, "step": 14632 }, { "epoch": 3.3294653014789533, "grad_norm": 1.7776678669349442, "learning_rate": 3.1389388248561787e-07, "loss": 0.0718, "step": 14633 }, { "epoch": 3.329692832764505, "grad_norm": 1.345347576841499, "learning_rate": 3.1381639043778334e-07, "loss": 0.0228, "step": 14634 }, { "epoch": 3.3299203640500568, "grad_norm": 1.5356010699005433, "learning_rate": 3.1373890474997176e-07, "loss": 0.1027, "step": 14635 }, { "epoch": 3.3301478953356085, "grad_norm": 1.2575061751482728, "learning_rate": 3.136614254237673e-07, "loss": 0.0281, "step": 14636 }, { "epoch": 3.3303754266211603, "grad_norm": 0.7160439382181919, "learning_rate": 3.1358395246075296e-07, "loss": 0.0057, "step": 14637 }, { "epoch": 3.330602957906712, "grad_norm": 1.2042908755158956, "learning_rate": 3.135064858625121e-07, "loss": 0.0623, "step": 14638 }, { "epoch": 3.3308304891922638, "grad_norm": 2.100497623049278, "learning_rate": 3.134290256306285e-07, "loss": 0.0614, "step": 14639 }, { "epoch": 3.3310580204778155, "grad_norm": 1.6083549308595473, "learning_rate": 3.133515717666847e-07, "loss": 0.0176, "step": 14640 }, { "epoch": 3.3312855517633673, "grad_norm": 0.8816046740317734, "learning_rate": 3.13274124272264e-07, "loss": 0.006, "step": 14641 }, { "epoch": 3.331513083048919, "grad_norm": 1.6616179946620417, "learning_rate": 3.1319668314894943e-07, "loss": 0.0234, "step": 14642 }, { "epoch": 3.331740614334471, "grad_norm": 1.6076319421408434, "learning_rate": 3.131192483983237e-07, "loss": 0.0098, "step": 14643 }, { "epoch": 3.3319681456200225, "grad_norm": 1.0367967708556964, "learning_rate": 3.130418200219694e-07, "loss": 0.0202, "step": 14644 }, { "epoch": 3.3321956769055747, "grad_norm": 0.9505634378510246, "learning_rate": 3.129643980214687e-07, "loss": 0.0177, "step": 14645 }, { "epoch": 3.3324232081911265, "grad_norm": 2.0073494087332944, "learning_rate": 3.128869823984046e-07, "loss": 0.0807, "step": 14646 }, { "epoch": 3.3326507394766782, "grad_norm": 1.7767894845394268, "learning_rate": 3.1280957315435857e-07, "loss": 0.0518, "step": 14647 }, { "epoch": 3.33287827076223, "grad_norm": 1.160297411806398, "learning_rate": 3.127321702909132e-07, "loss": 0.0239, "step": 14648 }, { "epoch": 3.3331058020477817, "grad_norm": 1.7370001963246793, "learning_rate": 3.1265477380965067e-07, "loss": 0.0503, "step": 14649 }, { "epoch": 3.3333333333333335, "grad_norm": 1.362089599433178, "learning_rate": 3.125773837121522e-07, "loss": 0.0156, "step": 14650 }, { "epoch": 3.3335608646188852, "grad_norm": 1.2855289361919833, "learning_rate": 3.125000000000002e-07, "loss": 0.0193, "step": 14651 }, { "epoch": 3.333788395904437, "grad_norm": 1.3011230438929162, "learning_rate": 3.124226226747755e-07, "loss": 0.0609, "step": 14652 }, { "epoch": 3.3340159271899887, "grad_norm": 1.4281691035742545, "learning_rate": 3.123452517380602e-07, "loss": 0.0144, "step": 14653 }, { "epoch": 3.3342434584755405, "grad_norm": 1.185671602971906, "learning_rate": 3.1226788719143515e-07, "loss": 0.0306, "step": 14654 }, { "epoch": 3.3344709897610922, "grad_norm": 1.1272181058104853, "learning_rate": 3.1219052903648177e-07, "loss": 0.0469, "step": 14655 }, { "epoch": 3.334698521046644, "grad_norm": 1.972368367701384, "learning_rate": 3.121131772747813e-07, "loss": 0.0261, "step": 14656 }, { "epoch": 3.3349260523321957, "grad_norm": 1.7345728824634954, "learning_rate": 3.120358319079142e-07, "loss": 0.0565, "step": 14657 }, { "epoch": 3.3351535836177475, "grad_norm": 1.6629047338679537, "learning_rate": 3.119584929374618e-07, "loss": 0.0633, "step": 14658 }, { "epoch": 3.3353811149032992, "grad_norm": 2.7148069669815142, "learning_rate": 3.1188116036500427e-07, "loss": 0.0777, "step": 14659 }, { "epoch": 3.335608646188851, "grad_norm": 1.6565106986982103, "learning_rate": 3.1180383419212254e-07, "loss": 0.0879, "step": 14660 }, { "epoch": 3.3358361774744028, "grad_norm": 1.7675989033836474, "learning_rate": 3.1172651442039665e-07, "loss": 0.0255, "step": 14661 }, { "epoch": 3.3360637087599545, "grad_norm": 1.4577175293117388, "learning_rate": 3.116492010514071e-07, "loss": 0.0158, "step": 14662 }, { "epoch": 3.3362912400455063, "grad_norm": 0.9893647605958169, "learning_rate": 3.115718940867342e-07, "loss": 0.011, "step": 14663 }, { "epoch": 3.336518771331058, "grad_norm": 0.795595688370377, "learning_rate": 3.1149459352795757e-07, "loss": 0.0171, "step": 14664 }, { "epoch": 3.3367463026166098, "grad_norm": 0.5764425225670202, "learning_rate": 3.114172993766575e-07, "loss": 0.0163, "step": 14665 }, { "epoch": 3.3369738339021615, "grad_norm": 1.2387707754059423, "learning_rate": 3.1134001163441325e-07, "loss": 0.0207, "step": 14666 }, { "epoch": 3.3372013651877133, "grad_norm": 2.0402196041466922, "learning_rate": 3.1126273030280507e-07, "loss": 0.02, "step": 14667 }, { "epoch": 3.337428896473265, "grad_norm": 1.737328239230783, "learning_rate": 3.111854553834118e-07, "loss": 0.0277, "step": 14668 }, { "epoch": 3.3376564277588168, "grad_norm": 1.5336053461803216, "learning_rate": 3.1110818687781314e-07, "loss": 0.1019, "step": 14669 }, { "epoch": 3.3378839590443685, "grad_norm": 1.0032156010113102, "learning_rate": 3.1103092478758846e-07, "loss": 0.0536, "step": 14670 }, { "epoch": 3.3381114903299203, "grad_norm": 1.788726434388919, "learning_rate": 3.109536691143164e-07, "loss": 0.0567, "step": 14671 }, { "epoch": 3.338339021615472, "grad_norm": 1.8692679171862747, "learning_rate": 3.108764198595765e-07, "loss": 0.0248, "step": 14672 }, { "epoch": 3.3385665529010238, "grad_norm": 1.7108296107654786, "learning_rate": 3.10799177024947e-07, "loss": 0.0755, "step": 14673 }, { "epoch": 3.3387940841865755, "grad_norm": 1.114904349441082, "learning_rate": 3.107219406120072e-07, "loss": 0.0062, "step": 14674 }, { "epoch": 3.3390216154721273, "grad_norm": 1.3266263496680128, "learning_rate": 3.106447106223351e-07, "loss": 0.0178, "step": 14675 }, { "epoch": 3.339249146757679, "grad_norm": 1.8474139788129285, "learning_rate": 3.1056748705750935e-07, "loss": 0.0808, "step": 14676 }, { "epoch": 3.339476678043231, "grad_norm": 1.4625900671650427, "learning_rate": 3.1049026991910856e-07, "loss": 0.0321, "step": 14677 }, { "epoch": 3.3397042093287825, "grad_norm": 3.7406110035679534, "learning_rate": 3.1041305920871047e-07, "loss": 0.0367, "step": 14678 }, { "epoch": 3.3399317406143343, "grad_norm": 2.2421865081910153, "learning_rate": 3.1033585492789347e-07, "loss": 0.0868, "step": 14679 }, { "epoch": 3.3401592718998865, "grad_norm": 0.7779855559395559, "learning_rate": 3.102586570782351e-07, "loss": 0.007, "step": 14680 }, { "epoch": 3.340386803185438, "grad_norm": 1.0114066069459813, "learning_rate": 3.101814656613136e-07, "loss": 0.0713, "step": 14681 }, { "epoch": 3.34061433447099, "grad_norm": 1.4871743897898062, "learning_rate": 3.101042806787062e-07, "loss": 0.1018, "step": 14682 }, { "epoch": 3.3408418657565413, "grad_norm": 0.8958661961123583, "learning_rate": 3.1002710213199055e-07, "loss": 0.0103, "step": 14683 }, { "epoch": 3.3410693970420935, "grad_norm": 1.6553015555237978, "learning_rate": 3.099499300227443e-07, "loss": 0.142, "step": 14684 }, { "epoch": 3.3412969283276452, "grad_norm": 1.429670645800284, "learning_rate": 3.0987276435254435e-07, "loss": 0.0294, "step": 14685 }, { "epoch": 3.341524459613197, "grad_norm": 2.1461712097957806, "learning_rate": 3.097956051229681e-07, "loss": 0.0375, "step": 14686 }, { "epoch": 3.3417519908987487, "grad_norm": 1.1309455766695722, "learning_rate": 3.097184523355925e-07, "loss": 0.0172, "step": 14687 }, { "epoch": 3.3419795221843005, "grad_norm": 1.022857579156663, "learning_rate": 3.09641305991994e-07, "loss": 0.012, "step": 14688 }, { "epoch": 3.3422070534698523, "grad_norm": 1.2060710874305718, "learning_rate": 3.095641660937497e-07, "loss": 0.0745, "step": 14689 }, { "epoch": 3.342434584755404, "grad_norm": 1.9037769845682486, "learning_rate": 3.0948703264243614e-07, "loss": 0.0644, "step": 14690 }, { "epoch": 3.3426621160409558, "grad_norm": 0.6843018093278181, "learning_rate": 3.0940990563963004e-07, "loss": 0.0026, "step": 14691 }, { "epoch": 3.3428896473265075, "grad_norm": 3.453036039441402, "learning_rate": 3.0933278508690724e-07, "loss": 0.0183, "step": 14692 }, { "epoch": 3.3431171786120593, "grad_norm": 0.9814776570902782, "learning_rate": 3.092556709858444e-07, "loss": 0.0127, "step": 14693 }, { "epoch": 3.343344709897611, "grad_norm": 1.0831275961012594, "learning_rate": 3.0917856333801744e-07, "loss": 0.0783, "step": 14694 }, { "epoch": 3.3435722411831628, "grad_norm": 1.3051348430607752, "learning_rate": 3.091014621450018e-07, "loss": 0.0066, "step": 14695 }, { "epoch": 3.3437997724687145, "grad_norm": 1.635191868133217, "learning_rate": 3.090243674083742e-07, "loss": 0.1142, "step": 14696 }, { "epoch": 3.3440273037542663, "grad_norm": 0.9242528468250679, "learning_rate": 3.0894727912970954e-07, "loss": 0.0075, "step": 14697 }, { "epoch": 3.344254835039818, "grad_norm": 1.6441391255602655, "learning_rate": 3.0887019731058397e-07, "loss": 0.0389, "step": 14698 }, { "epoch": 3.3444823663253698, "grad_norm": 1.14139203894365, "learning_rate": 3.0879312195257235e-07, "loss": 0.0335, "step": 14699 }, { "epoch": 3.3447098976109215, "grad_norm": 1.425371802352277, "learning_rate": 3.087160530572505e-07, "loss": 0.1265, "step": 14700 }, { "epoch": 3.3449374288964733, "grad_norm": 2.654946205010141, "learning_rate": 3.0863899062619296e-07, "loss": 0.071, "step": 14701 }, { "epoch": 3.345164960182025, "grad_norm": 2.3743351649714333, "learning_rate": 3.085619346609751e-07, "loss": 0.0991, "step": 14702 }, { "epoch": 3.345392491467577, "grad_norm": 1.4676692390435837, "learning_rate": 3.08484885163172e-07, "loss": 0.0548, "step": 14703 }, { "epoch": 3.3456200227531285, "grad_norm": 1.94458169107466, "learning_rate": 3.084078421343579e-07, "loss": 0.0571, "step": 14704 }, { "epoch": 3.3458475540386803, "grad_norm": 1.3721682177364642, "learning_rate": 3.08330805576108e-07, "loss": 0.1214, "step": 14705 }, { "epoch": 3.346075085324232, "grad_norm": 2.227971359804579, "learning_rate": 3.082537754899964e-07, "loss": 0.0255, "step": 14706 }, { "epoch": 3.346302616609784, "grad_norm": 0.6336166574450531, "learning_rate": 3.0817675187759735e-07, "loss": 0.0021, "step": 14707 }, { "epoch": 3.3465301478953355, "grad_norm": 1.3966680049601294, "learning_rate": 3.080997347404853e-07, "loss": 0.0441, "step": 14708 }, { "epoch": 3.3467576791808873, "grad_norm": 1.119770808390108, "learning_rate": 3.080227240802342e-07, "loss": 0.0218, "step": 14709 }, { "epoch": 3.346985210466439, "grad_norm": 1.4823471845289218, "learning_rate": 3.0794571989841845e-07, "loss": 0.0234, "step": 14710 }, { "epoch": 3.347212741751991, "grad_norm": 6.653780431726855, "learning_rate": 3.0786872219661116e-07, "loss": 0.0323, "step": 14711 }, { "epoch": 3.3474402730375425, "grad_norm": 1.099646286126563, "learning_rate": 3.077917309763868e-07, "loss": 0.0139, "step": 14712 }, { "epoch": 3.3476678043230943, "grad_norm": 1.1465489379010443, "learning_rate": 3.077147462393184e-07, "loss": 0.0162, "step": 14713 }, { "epoch": 3.347895335608646, "grad_norm": 0.6263266851692418, "learning_rate": 3.076377679869793e-07, "loss": 0.0063, "step": 14714 }, { "epoch": 3.348122866894198, "grad_norm": 0.9447798023446894, "learning_rate": 3.0756079622094294e-07, "loss": 0.0072, "step": 14715 }, { "epoch": 3.3483503981797496, "grad_norm": 1.7589540237181653, "learning_rate": 3.074838309427826e-07, "loss": 0.1188, "step": 14716 }, { "epoch": 3.3485779294653013, "grad_norm": 1.6875885606212098, "learning_rate": 3.074068721540715e-07, "loss": 0.0111, "step": 14717 }, { "epoch": 3.348805460750853, "grad_norm": 1.2118476220881194, "learning_rate": 3.07329919856382e-07, "loss": 0.034, "step": 14718 }, { "epoch": 3.3490329920364053, "grad_norm": 2.565500333135366, "learning_rate": 3.0725297405128746e-07, "loss": 0.1074, "step": 14719 }, { "epoch": 3.3492605233219566, "grad_norm": 0.9141593240328452, "learning_rate": 3.071760347403602e-07, "loss": 0.0277, "step": 14720 }, { "epoch": 3.3494880546075088, "grad_norm": 3.2738694165594544, "learning_rate": 3.070991019251724e-07, "loss": 0.0303, "step": 14721 }, { "epoch": 3.34971558589306, "grad_norm": 0.961059834843016, "learning_rate": 3.0702217560729685e-07, "loss": 0.0405, "step": 14722 }, { "epoch": 3.3499431171786123, "grad_norm": 1.141188575332633, "learning_rate": 3.0694525578830564e-07, "loss": 0.0413, "step": 14723 }, { "epoch": 3.350170648464164, "grad_norm": 1.160830694842454, "learning_rate": 3.068683424697711e-07, "loss": 0.025, "step": 14724 }, { "epoch": 3.3503981797497158, "grad_norm": 1.4771050779840158, "learning_rate": 3.0679143565326503e-07, "loss": 0.0914, "step": 14725 }, { "epoch": 3.3506257110352675, "grad_norm": 0.7876564820571798, "learning_rate": 3.06714535340359e-07, "loss": 0.0343, "step": 14726 }, { "epoch": 3.3508532423208193, "grad_norm": 0.7937501820732329, "learning_rate": 3.0663764153262524e-07, "loss": 0.0098, "step": 14727 }, { "epoch": 3.351080773606371, "grad_norm": 1.6666896096653052, "learning_rate": 3.065607542316348e-07, "loss": 0.0679, "step": 14728 }, { "epoch": 3.3513083048919228, "grad_norm": 3.4186323948411697, "learning_rate": 3.0648387343895936e-07, "loss": 0.0153, "step": 14729 }, { "epoch": 3.3515358361774745, "grad_norm": 1.6053209129268575, "learning_rate": 3.0640699915617013e-07, "loss": 0.0777, "step": 14730 }, { "epoch": 3.3517633674630263, "grad_norm": 1.5653214005727312, "learning_rate": 3.063301313848387e-07, "loss": 0.0198, "step": 14731 }, { "epoch": 3.351990898748578, "grad_norm": 2.2425098044360783, "learning_rate": 3.062532701265357e-07, "loss": 0.0662, "step": 14732 }, { "epoch": 3.35221843003413, "grad_norm": 1.2298071511035011, "learning_rate": 3.0617641538283186e-07, "loss": 0.0471, "step": 14733 }, { "epoch": 3.3524459613196815, "grad_norm": 1.4186605253579319, "learning_rate": 3.0609956715529845e-07, "loss": 0.0039, "step": 14734 }, { "epoch": 3.3526734926052333, "grad_norm": 1.031149535747485, "learning_rate": 3.060227254455056e-07, "loss": 0.0176, "step": 14735 }, { "epoch": 3.352901023890785, "grad_norm": 1.1866355355413813, "learning_rate": 3.0594589025502413e-07, "loss": 0.0672, "step": 14736 }, { "epoch": 3.353128555176337, "grad_norm": 1.2636669890935972, "learning_rate": 3.058690615854243e-07, "loss": 0.0142, "step": 14737 }, { "epoch": 3.3533560864618885, "grad_norm": 1.474201788618803, "learning_rate": 3.057922394382766e-07, "loss": 0.0921, "step": 14738 }, { "epoch": 3.3535836177474403, "grad_norm": 2.8898402249917234, "learning_rate": 3.057154238151509e-07, "loss": 0.0624, "step": 14739 }, { "epoch": 3.353811149032992, "grad_norm": 0.9445921565427629, "learning_rate": 3.05638614717617e-07, "loss": 0.0102, "step": 14740 }, { "epoch": 3.354038680318544, "grad_norm": 1.3041720478664252, "learning_rate": 3.055618121472452e-07, "loss": 0.0244, "step": 14741 }, { "epoch": 3.3542662116040955, "grad_norm": 1.1869245764273868, "learning_rate": 3.0548501610560467e-07, "loss": 0.019, "step": 14742 }, { "epoch": 3.3544937428896473, "grad_norm": 9.140264369197789, "learning_rate": 3.0540822659426524e-07, "loss": 0.1306, "step": 14743 }, { "epoch": 3.354721274175199, "grad_norm": 1.57853499328683, "learning_rate": 3.053314436147966e-07, "loss": 0.0912, "step": 14744 }, { "epoch": 3.354948805460751, "grad_norm": 1.0344665644372595, "learning_rate": 3.0525466716876756e-07, "loss": 0.0362, "step": 14745 }, { "epoch": 3.3551763367463026, "grad_norm": 1.183955852235188, "learning_rate": 3.051778972577478e-07, "loss": 0.0101, "step": 14746 }, { "epoch": 3.3554038680318543, "grad_norm": 1.7181631360165388, "learning_rate": 3.051011338833058e-07, "loss": 0.0241, "step": 14747 }, { "epoch": 3.355631399317406, "grad_norm": 1.7481702590705932, "learning_rate": 3.050243770470111e-07, "loss": 0.0498, "step": 14748 }, { "epoch": 3.355858930602958, "grad_norm": 1.6976722339431798, "learning_rate": 3.049476267504317e-07, "loss": 0.0592, "step": 14749 }, { "epoch": 3.3560864618885096, "grad_norm": 1.6184559336850524, "learning_rate": 3.0487088299513684e-07, "loss": 0.0362, "step": 14750 }, { "epoch": 3.3563139931740613, "grad_norm": 1.1552304394558495, "learning_rate": 3.04794145782695e-07, "loss": 0.071, "step": 14751 }, { "epoch": 3.356541524459613, "grad_norm": 1.392900930218477, "learning_rate": 3.0471741511467416e-07, "loss": 0.0197, "step": 14752 }, { "epoch": 3.356769055745165, "grad_norm": 1.4099188381854135, "learning_rate": 3.04640690992643e-07, "loss": 0.0129, "step": 14753 }, { "epoch": 3.3569965870307166, "grad_norm": 1.8558401049893891, "learning_rate": 3.045639734181693e-07, "loss": 0.0104, "step": 14754 }, { "epoch": 3.3572241183162683, "grad_norm": 0.8425429881028806, "learning_rate": 3.0448726239282097e-07, "loss": 0.0175, "step": 14755 }, { "epoch": 3.35745164960182, "grad_norm": 1.5713907387506492, "learning_rate": 3.044105579181663e-07, "loss": 0.008, "step": 14756 }, { "epoch": 3.357679180887372, "grad_norm": 1.3672663380341616, "learning_rate": 3.043338599957725e-07, "loss": 0.0732, "step": 14757 }, { "epoch": 3.357906712172924, "grad_norm": 0.9344697849490872, "learning_rate": 3.042571686272075e-07, "loss": 0.0351, "step": 14758 }, { "epoch": 3.3581342434584753, "grad_norm": 1.2422473221512695, "learning_rate": 3.041804838140384e-07, "loss": 0.0157, "step": 14759 }, { "epoch": 3.3583617747440275, "grad_norm": 1.3784966512475414, "learning_rate": 3.0410380555783283e-07, "loss": 0.0563, "step": 14760 }, { "epoch": 3.358589306029579, "grad_norm": 1.5701260516403008, "learning_rate": 3.0402713386015754e-07, "loss": 0.0756, "step": 14761 }, { "epoch": 3.358816837315131, "grad_norm": 1.286948628392537, "learning_rate": 3.0395046872257985e-07, "loss": 0.0109, "step": 14762 }, { "epoch": 3.359044368600683, "grad_norm": 1.7075117229239813, "learning_rate": 3.0387381014666676e-07, "loss": 0.0257, "step": 14763 }, { "epoch": 3.3592718998862345, "grad_norm": 1.9691466981512107, "learning_rate": 3.037971581339847e-07, "loss": 0.0102, "step": 14764 }, { "epoch": 3.3594994311717863, "grad_norm": 1.2434371735043548, "learning_rate": 3.0372051268610074e-07, "loss": 0.0105, "step": 14765 }, { "epoch": 3.359726962457338, "grad_norm": 0.509548430442267, "learning_rate": 3.036438738045808e-07, "loss": 0.005, "step": 14766 }, { "epoch": 3.35995449374289, "grad_norm": 1.615816972623469, "learning_rate": 3.035672414909918e-07, "loss": 0.0104, "step": 14767 }, { "epoch": 3.3601820250284415, "grad_norm": 1.4161093342722173, "learning_rate": 3.0349061574689955e-07, "loss": 0.0172, "step": 14768 }, { "epoch": 3.3604095563139933, "grad_norm": 1.005666654255352, "learning_rate": 3.034139965738702e-07, "loss": 0.0616, "step": 14769 }, { "epoch": 3.360637087599545, "grad_norm": 1.3117953692989075, "learning_rate": 3.0333738397347003e-07, "loss": 0.0702, "step": 14770 }, { "epoch": 3.360864618885097, "grad_norm": 1.8008050302191312, "learning_rate": 3.0326077794726446e-07, "loss": 0.0809, "step": 14771 }, { "epoch": 3.3610921501706486, "grad_norm": 1.8330296684362792, "learning_rate": 3.031841784968196e-07, "loss": 0.0611, "step": 14772 }, { "epoch": 3.3613196814562003, "grad_norm": 1.4526631233417984, "learning_rate": 3.031075856237005e-07, "loss": 0.0347, "step": 14773 }, { "epoch": 3.361547212741752, "grad_norm": 1.8007304708639449, "learning_rate": 3.0303099932947306e-07, "loss": 0.0637, "step": 14774 }, { "epoch": 3.361774744027304, "grad_norm": 1.2710692758521684, "learning_rate": 3.029544196157021e-07, "loss": 0.0425, "step": 14775 }, { "epoch": 3.3620022753128556, "grad_norm": 2.4365310129506947, "learning_rate": 3.0287784648395295e-07, "loss": 0.0709, "step": 14776 }, { "epoch": 3.3622298065984073, "grad_norm": 1.4481867653157143, "learning_rate": 3.0280127993579095e-07, "loss": 0.0279, "step": 14777 }, { "epoch": 3.362457337883959, "grad_norm": 1.17882327757228, "learning_rate": 3.027247199727805e-07, "loss": 0.0526, "step": 14778 }, { "epoch": 3.362684869169511, "grad_norm": 0.5977882693393903, "learning_rate": 3.026481665964868e-07, "loss": 0.0088, "step": 14779 }, { "epoch": 3.3629124004550626, "grad_norm": 1.8355305001308944, "learning_rate": 3.025716198084739e-07, "loss": 0.0251, "step": 14780 }, { "epoch": 3.3631399317406143, "grad_norm": 1.9387318340584834, "learning_rate": 3.024950796103069e-07, "loss": 0.0196, "step": 14781 }, { "epoch": 3.363367463026166, "grad_norm": 0.5838682034934873, "learning_rate": 3.0241854600354954e-07, "loss": 0.0033, "step": 14782 }, { "epoch": 3.363594994311718, "grad_norm": 2.6022670378838857, "learning_rate": 3.023420189897664e-07, "loss": 0.0168, "step": 14783 }, { "epoch": 3.3638225255972696, "grad_norm": 1.6840409731100994, "learning_rate": 3.022654985705217e-07, "loss": 0.0391, "step": 14784 }, { "epoch": 3.3640500568828213, "grad_norm": 1.2276141963442864, "learning_rate": 3.0218898474737883e-07, "loss": 0.081, "step": 14785 }, { "epoch": 3.364277588168373, "grad_norm": 1.3703827766063412, "learning_rate": 3.021124775219022e-07, "loss": 0.0449, "step": 14786 }, { "epoch": 3.364505119453925, "grad_norm": 1.5068853613947943, "learning_rate": 3.0203597689565495e-07, "loss": 0.0302, "step": 14787 }, { "epoch": 3.3647326507394766, "grad_norm": 1.4581108886737988, "learning_rate": 3.0195948287020106e-07, "loss": 0.0925, "step": 14788 }, { "epoch": 3.3649601820250283, "grad_norm": 1.211762483712993, "learning_rate": 3.018829954471035e-07, "loss": 0.0273, "step": 14789 }, { "epoch": 3.36518771331058, "grad_norm": 1.6482287696592541, "learning_rate": 3.018065146279258e-07, "loss": 0.0343, "step": 14790 }, { "epoch": 3.365415244596132, "grad_norm": 1.0564026583820798, "learning_rate": 3.017300404142312e-07, "loss": 0.0315, "step": 14791 }, { "epoch": 3.3656427758816836, "grad_norm": 1.524836694799892, "learning_rate": 3.016535728075824e-07, "loss": 0.082, "step": 14792 }, { "epoch": 3.3658703071672353, "grad_norm": 1.215090695898506, "learning_rate": 3.015771118095425e-07, "loss": 0.0414, "step": 14793 }, { "epoch": 3.366097838452787, "grad_norm": 2.028212410090114, "learning_rate": 3.0150065742167417e-07, "loss": 0.0587, "step": 14794 }, { "epoch": 3.366325369738339, "grad_norm": 1.2429012611466213, "learning_rate": 3.014242096455397e-07, "loss": 0.0135, "step": 14795 }, { "epoch": 3.3665529010238906, "grad_norm": 0.8330148350645136, "learning_rate": 3.0134776848270183e-07, "loss": 0.0064, "step": 14796 }, { "epoch": 3.366780432309443, "grad_norm": 0.7680326805344982, "learning_rate": 3.012713339347228e-07, "loss": 0.0151, "step": 14797 }, { "epoch": 3.367007963594994, "grad_norm": 1.3994063959822587, "learning_rate": 3.01194906003165e-07, "loss": 0.0353, "step": 14798 }, { "epoch": 3.3672354948805463, "grad_norm": 1.4460377093816255, "learning_rate": 3.0111848468959016e-07, "loss": 0.0237, "step": 14799 }, { "epoch": 3.3674630261660976, "grad_norm": 2.2242168814727683, "learning_rate": 3.0104206999556053e-07, "loss": 0.0162, "step": 14800 }, { "epoch": 3.36769055745165, "grad_norm": 1.305559215214942, "learning_rate": 3.009656619226377e-07, "loss": 0.0201, "step": 14801 }, { "epoch": 3.3679180887372016, "grad_norm": 0.8185289886549934, "learning_rate": 3.0088926047238303e-07, "loss": 0.0127, "step": 14802 }, { "epoch": 3.3681456200227533, "grad_norm": 0.9795959471289694, "learning_rate": 3.0081286564635826e-07, "loss": 0.078, "step": 14803 }, { "epoch": 3.368373151308305, "grad_norm": 1.0867200907798764, "learning_rate": 3.0073647744612485e-07, "loss": 0.0077, "step": 14804 }, { "epoch": 3.368600682593857, "grad_norm": 1.1131993417151782, "learning_rate": 3.0066009587324424e-07, "loss": 0.0581, "step": 14805 }, { "epoch": 3.3688282138794086, "grad_norm": 2.4008889312459814, "learning_rate": 3.0058372092927704e-07, "loss": 0.0307, "step": 14806 }, { "epoch": 3.3690557451649603, "grad_norm": 1.8715124861183843, "learning_rate": 3.0050735261578465e-07, "loss": 0.0555, "step": 14807 }, { "epoch": 3.369283276450512, "grad_norm": 1.732974493851481, "learning_rate": 3.004309909343277e-07, "loss": 0.0169, "step": 14808 }, { "epoch": 3.369510807736064, "grad_norm": 1.8643066643682582, "learning_rate": 3.003546358864666e-07, "loss": 0.0464, "step": 14809 }, { "epoch": 3.3697383390216156, "grad_norm": 2.0106212045778307, "learning_rate": 3.0027828747376217e-07, "loss": 0.0651, "step": 14810 }, { "epoch": 3.3699658703071673, "grad_norm": 2.3838175856683694, "learning_rate": 3.002019456977749e-07, "loss": 0.0312, "step": 14811 }, { "epoch": 3.370193401592719, "grad_norm": 2.882302722918285, "learning_rate": 3.0012561056006526e-07, "loss": 0.0193, "step": 14812 }, { "epoch": 3.370420932878271, "grad_norm": 1.079084551507786, "learning_rate": 3.0004928206219316e-07, "loss": 0.0407, "step": 14813 }, { "epoch": 3.3706484641638226, "grad_norm": 1.5242507736965492, "learning_rate": 2.999729602057183e-07, "loss": 0.0278, "step": 14814 }, { "epoch": 3.3708759954493743, "grad_norm": 1.5045601814494585, "learning_rate": 2.998966449922012e-07, "loss": 0.0781, "step": 14815 }, { "epoch": 3.371103526734926, "grad_norm": 2.4225255048597285, "learning_rate": 2.998203364232008e-07, "loss": 0.0284, "step": 14816 }, { "epoch": 3.371331058020478, "grad_norm": 0.8523914398569103, "learning_rate": 2.9974403450027765e-07, "loss": 0.0562, "step": 14817 }, { "epoch": 3.3715585893060296, "grad_norm": 1.44544676974627, "learning_rate": 2.9966773922499047e-07, "loss": 0.0626, "step": 14818 }, { "epoch": 3.3717861205915813, "grad_norm": 2.5010057862323647, "learning_rate": 2.9959145059889916e-07, "loss": 0.049, "step": 14819 }, { "epoch": 3.372013651877133, "grad_norm": 1.3037462528403896, "learning_rate": 2.9951516862356265e-07, "loss": 0.0503, "step": 14820 }, { "epoch": 3.372241183162685, "grad_norm": 1.0423313981439828, "learning_rate": 2.994388933005397e-07, "loss": 0.0236, "step": 14821 }, { "epoch": 3.3724687144482366, "grad_norm": 1.4037925868824581, "learning_rate": 2.993626246313897e-07, "loss": 0.0703, "step": 14822 }, { "epoch": 3.3726962457337883, "grad_norm": 1.4208843260151565, "learning_rate": 2.9928636261767105e-07, "loss": 0.015, "step": 14823 }, { "epoch": 3.37292377701934, "grad_norm": 1.585167387100322, "learning_rate": 2.99210107260943e-07, "loss": 0.1341, "step": 14824 }, { "epoch": 3.373151308304892, "grad_norm": 1.2400833892292444, "learning_rate": 2.991338585627634e-07, "loss": 0.0185, "step": 14825 }, { "epoch": 3.3733788395904436, "grad_norm": 1.4948232943240138, "learning_rate": 2.9905761652469124e-07, "loss": 0.1259, "step": 14826 }, { "epoch": 3.3736063708759954, "grad_norm": 1.3165046031736303, "learning_rate": 2.989813811482845e-07, "loss": 0.069, "step": 14827 }, { "epoch": 3.373833902161547, "grad_norm": 1.4723393788078045, "learning_rate": 2.989051524351009e-07, "loss": 0.0225, "step": 14828 }, { "epoch": 3.374061433447099, "grad_norm": 2.333486048177497, "learning_rate": 2.9882893038669883e-07, "loss": 0.0979, "step": 14829 }, { "epoch": 3.3742889647326506, "grad_norm": 4.249971504689784, "learning_rate": 2.987527150046361e-07, "loss": 0.0835, "step": 14830 }, { "epoch": 3.3745164960182024, "grad_norm": 1.5738228738020221, "learning_rate": 2.986765062904706e-07, "loss": 0.1116, "step": 14831 }, { "epoch": 3.374744027303754, "grad_norm": 1.0897715438290072, "learning_rate": 2.9860030424575965e-07, "loss": 0.0589, "step": 14832 }, { "epoch": 3.374971558589306, "grad_norm": 2.8019278856521477, "learning_rate": 2.985241088720604e-07, "loss": 0.0216, "step": 14833 }, { "epoch": 3.3751990898748576, "grad_norm": 1.380335103559008, "learning_rate": 2.984479201709308e-07, "loss": 0.0136, "step": 14834 }, { "epoch": 3.3754266211604094, "grad_norm": 2.253554753342801, "learning_rate": 2.9837173814392736e-07, "loss": 0.0275, "step": 14835 }, { "epoch": 3.3756541524459616, "grad_norm": 0.8801802948716918, "learning_rate": 2.982955627926075e-07, "loss": 0.0198, "step": 14836 }, { "epoch": 3.375881683731513, "grad_norm": 1.7257584694169628, "learning_rate": 2.9821939411852787e-07, "loss": 0.011, "step": 14837 }, { "epoch": 3.376109215017065, "grad_norm": 1.2534272163560212, "learning_rate": 2.9814323212324564e-07, "loss": 0.1143, "step": 14838 }, { "epoch": 3.3763367463026164, "grad_norm": 1.0785387726938953, "learning_rate": 2.980670768083172e-07, "loss": 0.0387, "step": 14839 }, { "epoch": 3.3765642775881686, "grad_norm": 1.4228548108529242, "learning_rate": 2.9799092817529867e-07, "loss": 0.0218, "step": 14840 }, { "epoch": 3.3767918088737203, "grad_norm": 2.3483532891655097, "learning_rate": 2.9791478622574693e-07, "loss": 0.0188, "step": 14841 }, { "epoch": 3.377019340159272, "grad_norm": 1.7431980079798415, "learning_rate": 2.978386509612177e-07, "loss": 0.0952, "step": 14842 }, { "epoch": 3.377246871444824, "grad_norm": 0.8299678680801658, "learning_rate": 2.9776252238326725e-07, "loss": 0.0538, "step": 14843 }, { "epoch": 3.3774744027303756, "grad_norm": 1.8691483710017847, "learning_rate": 2.9768640049345157e-07, "loss": 0.0176, "step": 14844 }, { "epoch": 3.3777019340159273, "grad_norm": 1.332105819785803, "learning_rate": 2.9761028529332667e-07, "loss": 0.0796, "step": 14845 }, { "epoch": 3.377929465301479, "grad_norm": 1.916442092621782, "learning_rate": 2.9753417678444785e-07, "loss": 0.0736, "step": 14846 }, { "epoch": 3.378156996587031, "grad_norm": 1.262301692580466, "learning_rate": 2.974580749683706e-07, "loss": 0.0941, "step": 14847 }, { "epoch": 3.3783845278725826, "grad_norm": 0.9783584794552792, "learning_rate": 2.9738197984665063e-07, "loss": 0.0115, "step": 14848 }, { "epoch": 3.3786120591581343, "grad_norm": 2.090595900123147, "learning_rate": 2.9730589142084274e-07, "loss": 0.0312, "step": 14849 }, { "epoch": 3.378839590443686, "grad_norm": 1.4878738524369697, "learning_rate": 2.972298096925023e-07, "loss": 0.0816, "step": 14850 }, { "epoch": 3.379067121729238, "grad_norm": 1.2228400318844936, "learning_rate": 2.971537346631845e-07, "loss": 0.0161, "step": 14851 }, { "epoch": 3.3792946530147896, "grad_norm": 1.41666010756741, "learning_rate": 2.970776663344437e-07, "loss": 0.0359, "step": 14852 }, { "epoch": 3.3795221843003413, "grad_norm": 1.3595156704576326, "learning_rate": 2.9700160470783507e-07, "loss": 0.0547, "step": 14853 }, { "epoch": 3.379749715585893, "grad_norm": 2.2135983154988925, "learning_rate": 2.969255497849127e-07, "loss": 0.0392, "step": 14854 }, { "epoch": 3.379977246871445, "grad_norm": 3.0529420064483523, "learning_rate": 2.968495015672315e-07, "loss": 0.0235, "step": 14855 }, { "epoch": 3.3802047781569966, "grad_norm": 1.37951614943802, "learning_rate": 2.967734600563453e-07, "loss": 0.0177, "step": 14856 }, { "epoch": 3.3804323094425484, "grad_norm": 1.8358739362890417, "learning_rate": 2.966974252538084e-07, "loss": 0.1302, "step": 14857 }, { "epoch": 3.3806598407281, "grad_norm": 1.359544911615277, "learning_rate": 2.966213971611752e-07, "loss": 0.0222, "step": 14858 }, { "epoch": 3.380887372013652, "grad_norm": 1.6076444707649975, "learning_rate": 2.9654537577999897e-07, "loss": 0.0243, "step": 14859 }, { "epoch": 3.3811149032992036, "grad_norm": 1.9186173224187786, "learning_rate": 2.964693611118339e-07, "loss": 0.0178, "step": 14860 }, { "epoch": 3.3813424345847554, "grad_norm": 1.7561485983357892, "learning_rate": 2.9639335315823323e-07, "loss": 0.0748, "step": 14861 }, { "epoch": 3.381569965870307, "grad_norm": 1.511714250856182, "learning_rate": 2.963173519207508e-07, "loss": 0.0589, "step": 14862 }, { "epoch": 3.381797497155859, "grad_norm": 0.6111824291849376, "learning_rate": 2.9624135740093956e-07, "loss": 0.0035, "step": 14863 }, { "epoch": 3.3820250284414106, "grad_norm": 1.5940242665468247, "learning_rate": 2.961653696003529e-07, "loss": 0.0442, "step": 14864 }, { "epoch": 3.3822525597269624, "grad_norm": 1.671152043275416, "learning_rate": 2.9608938852054405e-07, "loss": 0.025, "step": 14865 }, { "epoch": 3.382480091012514, "grad_norm": 1.7125576225227865, "learning_rate": 2.9601341416306545e-07, "loss": 0.0323, "step": 14866 }, { "epoch": 3.382707622298066, "grad_norm": 0.9256041471294837, "learning_rate": 2.9593744652947044e-07, "loss": 0.0102, "step": 14867 }, { "epoch": 3.3829351535836176, "grad_norm": 1.0608187158789284, "learning_rate": 2.958614856213111e-07, "loss": 0.0125, "step": 14868 }, { "epoch": 3.3831626848691694, "grad_norm": 1.7291200553799568, "learning_rate": 2.9578553144014047e-07, "loss": 0.0705, "step": 14869 }, { "epoch": 3.383390216154721, "grad_norm": 1.112255270008984, "learning_rate": 2.957095839875104e-07, "loss": 0.0323, "step": 14870 }, { "epoch": 3.383617747440273, "grad_norm": 0.9473132123376466, "learning_rate": 2.956336432649734e-07, "loss": 0.0085, "step": 14871 }, { "epoch": 3.3838452787258246, "grad_norm": 1.4424789969123815, "learning_rate": 2.955577092740817e-07, "loss": 0.05, "step": 14872 }, { "epoch": 3.3840728100113764, "grad_norm": 1.3450714050912291, "learning_rate": 2.954817820163869e-07, "loss": 0.0438, "step": 14873 }, { "epoch": 3.384300341296928, "grad_norm": 1.8068632727866685, "learning_rate": 2.9540586149344126e-07, "loss": 0.098, "step": 14874 }, { "epoch": 3.3845278725824803, "grad_norm": 1.1330102449338475, "learning_rate": 2.9532994770679585e-07, "loss": 0.0563, "step": 14875 }, { "epoch": 3.3847554038680316, "grad_norm": 1.6296501405031436, "learning_rate": 2.952540406580026e-07, "loss": 0.0284, "step": 14876 }, { "epoch": 3.384982935153584, "grad_norm": 1.5570351252547414, "learning_rate": 2.951781403486131e-07, "loss": 0.0484, "step": 14877 }, { "epoch": 3.385210466439135, "grad_norm": 3.6400293124864698, "learning_rate": 2.951022467801781e-07, "loss": 0.0933, "step": 14878 }, { "epoch": 3.3854379977246873, "grad_norm": 1.4233963584695275, "learning_rate": 2.9502635995424926e-07, "loss": 0.0358, "step": 14879 }, { "epoch": 3.385665529010239, "grad_norm": 0.6688297956273078, "learning_rate": 2.949504798723771e-07, "loss": 0.0157, "step": 14880 }, { "epoch": 3.385893060295791, "grad_norm": 1.351311340363586, "learning_rate": 2.9487460653611285e-07, "loss": 0.0248, "step": 14881 }, { "epoch": 3.3861205915813426, "grad_norm": 2.2586794754589716, "learning_rate": 2.947987399470068e-07, "loss": 0.0442, "step": 14882 }, { "epoch": 3.3863481228668944, "grad_norm": 1.3024969961499355, "learning_rate": 2.947228801066098e-07, "loss": 0.0345, "step": 14883 }, { "epoch": 3.386575654152446, "grad_norm": 1.6756961411373452, "learning_rate": 2.9464702701647254e-07, "loss": 0.0281, "step": 14884 }, { "epoch": 3.386803185437998, "grad_norm": 0.5085723733975758, "learning_rate": 2.9457118067814476e-07, "loss": 0.0034, "step": 14885 }, { "epoch": 3.3870307167235496, "grad_norm": 1.8148955641913396, "learning_rate": 2.944953410931771e-07, "loss": 0.0197, "step": 14886 }, { "epoch": 3.3872582480091014, "grad_norm": 0.994978742279857, "learning_rate": 2.9441950826311917e-07, "loss": 0.0194, "step": 14887 }, { "epoch": 3.387485779294653, "grad_norm": 1.689769710451231, "learning_rate": 2.9434368218952134e-07, "loss": 0.0293, "step": 14888 }, { "epoch": 3.387713310580205, "grad_norm": 0.978578718369039, "learning_rate": 2.9426786287393273e-07, "loss": 0.0533, "step": 14889 }, { "epoch": 3.3879408418657566, "grad_norm": 2.3992506829526414, "learning_rate": 2.941920503179034e-07, "loss": 0.0278, "step": 14890 }, { "epoch": 3.3881683731513084, "grad_norm": 1.582344013624743, "learning_rate": 2.9411624452298294e-07, "loss": 0.0489, "step": 14891 }, { "epoch": 3.38839590443686, "grad_norm": 0.9615719655798494, "learning_rate": 2.9404044549072015e-07, "loss": 0.0415, "step": 14892 }, { "epoch": 3.388623435722412, "grad_norm": 1.8028272180773508, "learning_rate": 2.9396465322266486e-07, "loss": 0.0321, "step": 14893 }, { "epoch": 3.3888509670079636, "grad_norm": 1.3502263496818583, "learning_rate": 2.9388886772036554e-07, "loss": 0.0391, "step": 14894 }, { "epoch": 3.3890784982935154, "grad_norm": 2.0886305301602452, "learning_rate": 2.938130889853715e-07, "loss": 0.0196, "step": 14895 }, { "epoch": 3.389306029579067, "grad_norm": 1.3542788588555996, "learning_rate": 2.9373731701923124e-07, "loss": 0.1004, "step": 14896 }, { "epoch": 3.389533560864619, "grad_norm": 1.4008626806281974, "learning_rate": 2.936615518234935e-07, "loss": 0.016, "step": 14897 }, { "epoch": 3.3897610921501706, "grad_norm": 1.3516916316938041, "learning_rate": 2.935857933997071e-07, "loss": 0.0213, "step": 14898 }, { "epoch": 3.3899886234357224, "grad_norm": 1.4977063104547874, "learning_rate": 2.935100417494199e-07, "loss": 0.046, "step": 14899 }, { "epoch": 3.390216154721274, "grad_norm": 1.1342627573455877, "learning_rate": 2.9343429687418053e-07, "loss": 0.0153, "step": 14900 }, { "epoch": 3.390443686006826, "grad_norm": 1.477932759720769, "learning_rate": 2.9335855877553695e-07, "loss": 0.0499, "step": 14901 }, { "epoch": 3.3906712172923776, "grad_norm": 0.9607495064575746, "learning_rate": 2.932828274550367e-07, "loss": 0.0276, "step": 14902 }, { "epoch": 3.3908987485779294, "grad_norm": 1.1532994068441995, "learning_rate": 2.932071029142281e-07, "loss": 0.1053, "step": 14903 }, { "epoch": 3.391126279863481, "grad_norm": 0.6973759160558909, "learning_rate": 2.931313851546586e-07, "loss": 0.0062, "step": 14904 }, { "epoch": 3.391353811149033, "grad_norm": 1.9728660321601041, "learning_rate": 2.9305567417787605e-07, "loss": 0.0213, "step": 14905 }, { "epoch": 3.3915813424345846, "grad_norm": 0.8513746003065995, "learning_rate": 2.9297996998542734e-07, "loss": 0.0112, "step": 14906 }, { "epoch": 3.3918088737201364, "grad_norm": 0.9652534347741566, "learning_rate": 2.929042725788602e-07, "loss": 0.0103, "step": 14907 }, { "epoch": 3.392036405005688, "grad_norm": 1.1431617412731871, "learning_rate": 2.928285819597215e-07, "loss": 0.0607, "step": 14908 }, { "epoch": 3.39226393629124, "grad_norm": 1.162533937732281, "learning_rate": 2.9275289812955814e-07, "loss": 0.0834, "step": 14909 }, { "epoch": 3.3924914675767917, "grad_norm": 1.856094988649172, "learning_rate": 2.9267722108991697e-07, "loss": 0.1264, "step": 14910 }, { "epoch": 3.3927189988623434, "grad_norm": 2.572299415431304, "learning_rate": 2.926015508423447e-07, "loss": 0.0338, "step": 14911 }, { "epoch": 3.392946530147895, "grad_norm": 0.9742966878708694, "learning_rate": 2.9252588738838837e-07, "loss": 0.0059, "step": 14912 }, { "epoch": 3.393174061433447, "grad_norm": 1.1218394662505338, "learning_rate": 2.9245023072959367e-07, "loss": 0.0289, "step": 14913 }, { "epoch": 3.393401592718999, "grad_norm": 1.8184992342729327, "learning_rate": 2.9237458086750744e-07, "loss": 0.016, "step": 14914 }, { "epoch": 3.3936291240045504, "grad_norm": 1.9235723145904988, "learning_rate": 2.9229893780367566e-07, "loss": 0.0521, "step": 14915 }, { "epoch": 3.3938566552901026, "grad_norm": 1.4770282108271728, "learning_rate": 2.9222330153964396e-07, "loss": 0.0611, "step": 14916 }, { "epoch": 3.394084186575654, "grad_norm": 1.1933866465799607, "learning_rate": 2.9214767207695856e-07, "loss": 0.0191, "step": 14917 }, { "epoch": 3.394311717861206, "grad_norm": 1.4267906133713217, "learning_rate": 2.920720494171652e-07, "loss": 0.0089, "step": 14918 }, { "epoch": 3.394539249146758, "grad_norm": 1.4240831307374182, "learning_rate": 2.919964335618097e-07, "loss": 0.0218, "step": 14919 }, { "epoch": 3.3947667804323096, "grad_norm": 2.138328849638314, "learning_rate": 2.919208245124371e-07, "loss": 0.0349, "step": 14920 }, { "epoch": 3.3949943117178614, "grad_norm": 1.9600129485671367, "learning_rate": 2.9184522227059266e-07, "loss": 0.0195, "step": 14921 }, { "epoch": 3.395221843003413, "grad_norm": 1.2729474314976954, "learning_rate": 2.91769626837822e-07, "loss": 0.0099, "step": 14922 }, { "epoch": 3.395449374288965, "grad_norm": 0.9460230419194392, "learning_rate": 2.916940382156696e-07, "loss": 0.0157, "step": 14923 }, { "epoch": 3.3956769055745166, "grad_norm": 1.179101943033029, "learning_rate": 2.9161845640568063e-07, "loss": 0.0642, "step": 14924 }, { "epoch": 3.3959044368600684, "grad_norm": 1.295801712766819, "learning_rate": 2.915428814093999e-07, "loss": 0.0114, "step": 14925 }, { "epoch": 3.39613196814562, "grad_norm": 1.355099723450223, "learning_rate": 2.914673132283722e-07, "loss": 0.013, "step": 14926 }, { "epoch": 3.396359499431172, "grad_norm": 1.9670543501553797, "learning_rate": 2.913917518641418e-07, "loss": 0.0491, "step": 14927 }, { "epoch": 3.3965870307167236, "grad_norm": 0.6610058460574739, "learning_rate": 2.9131619731825274e-07, "loss": 0.0211, "step": 14928 }, { "epoch": 3.3968145620022754, "grad_norm": 1.0380218372824097, "learning_rate": 2.9124064959224976e-07, "loss": 0.0612, "step": 14929 }, { "epoch": 3.397042093287827, "grad_norm": 1.5099514102968772, "learning_rate": 2.911651086876764e-07, "loss": 0.0463, "step": 14930 }, { "epoch": 3.397269624573379, "grad_norm": 2.195311166897284, "learning_rate": 2.9108957460607683e-07, "loss": 0.0629, "step": 14931 }, { "epoch": 3.3974971558589306, "grad_norm": 1.2640577908447361, "learning_rate": 2.910140473489948e-07, "loss": 0.0218, "step": 14932 }, { "epoch": 3.3977246871444824, "grad_norm": 0.9654815570877894, "learning_rate": 2.909385269179742e-07, "loss": 0.0074, "step": 14933 }, { "epoch": 3.397952218430034, "grad_norm": 1.61079984073111, "learning_rate": 2.908630133145584e-07, "loss": 0.0182, "step": 14934 }, { "epoch": 3.398179749715586, "grad_norm": 0.6518301285487628, "learning_rate": 2.9078750654029037e-07, "loss": 0.0151, "step": 14935 }, { "epoch": 3.3984072810011376, "grad_norm": 1.712090593268371, "learning_rate": 2.9071200659671386e-07, "loss": 0.0216, "step": 14936 }, { "epoch": 3.3986348122866894, "grad_norm": 0.6497952529743519, "learning_rate": 2.906365134853713e-07, "loss": 0.0069, "step": 14937 }, { "epoch": 3.398862343572241, "grad_norm": 1.5240884746879817, "learning_rate": 2.905610272078064e-07, "loss": 0.0498, "step": 14938 }, { "epoch": 3.399089874857793, "grad_norm": 0.7113489485994693, "learning_rate": 2.9048554776556166e-07, "loss": 0.0061, "step": 14939 }, { "epoch": 3.3993174061433447, "grad_norm": 1.4641127483385166, "learning_rate": 2.9041007516017946e-07, "loss": 0.0161, "step": 14940 }, { "epoch": 3.3995449374288964, "grad_norm": 0.5845804608066288, "learning_rate": 2.9033460939320283e-07, "loss": 0.0082, "step": 14941 }, { "epoch": 3.399772468714448, "grad_norm": 1.1754484539799392, "learning_rate": 2.902591504661736e-07, "loss": 0.0294, "step": 14942 }, { "epoch": 3.4, "grad_norm": 1.0362387522131857, "learning_rate": 2.901836983806343e-07, "loss": 0.0252, "step": 14943 }, { "epoch": 3.4002275312855517, "grad_norm": 1.0802771682982424, "learning_rate": 2.90108253138127e-07, "loss": 0.0404, "step": 14944 }, { "epoch": 3.4004550625711034, "grad_norm": 2.4403433646470942, "learning_rate": 2.9003281474019393e-07, "loss": 0.024, "step": 14945 }, { "epoch": 3.400682593856655, "grad_norm": 1.1653822133260074, "learning_rate": 2.8995738318837665e-07, "loss": 0.0138, "step": 14946 }, { "epoch": 3.400910125142207, "grad_norm": 1.5937206773874015, "learning_rate": 2.898819584842166e-07, "loss": 0.0354, "step": 14947 }, { "epoch": 3.4011376564277587, "grad_norm": 1.2654339004401682, "learning_rate": 2.898065406292559e-07, "loss": 0.0281, "step": 14948 }, { "epoch": 3.4013651877133104, "grad_norm": 0.8490360911797463, "learning_rate": 2.8973112962503525e-07, "loss": 0.0218, "step": 14949 }, { "epoch": 3.401592718998862, "grad_norm": 2.056621835623002, "learning_rate": 2.896557254730964e-07, "loss": 0.0581, "step": 14950 }, { "epoch": 3.401820250284414, "grad_norm": 1.3994023961559694, "learning_rate": 2.895803281749804e-07, "loss": 0.0153, "step": 14951 }, { "epoch": 3.4020477815699657, "grad_norm": 1.6386060213611127, "learning_rate": 2.895049377322284e-07, "loss": 0.1077, "step": 14952 }, { "epoch": 3.402275312855518, "grad_norm": 0.7956161343991847, "learning_rate": 2.8942955414638094e-07, "loss": 0.0307, "step": 14953 }, { "epoch": 3.402502844141069, "grad_norm": 1.7178286065813737, "learning_rate": 2.893541774189787e-07, "loss": 0.089, "step": 14954 }, { "epoch": 3.4027303754266214, "grad_norm": 1.1075225419604202, "learning_rate": 2.8927880755156257e-07, "loss": 0.0073, "step": 14955 }, { "epoch": 3.4029579067121727, "grad_norm": 1.6012872198606511, "learning_rate": 2.8920344454567257e-07, "loss": 0.0322, "step": 14956 }, { "epoch": 3.403185437997725, "grad_norm": 1.5809888953331286, "learning_rate": 2.8912808840284913e-07, "loss": 0.025, "step": 14957 }, { "epoch": 3.4034129692832766, "grad_norm": 1.5443909411315084, "learning_rate": 2.890527391246327e-07, "loss": 0.033, "step": 14958 }, { "epoch": 3.4036405005688284, "grad_norm": 0.646984810113513, "learning_rate": 2.8897739671256283e-07, "loss": 0.0111, "step": 14959 }, { "epoch": 3.40386803185438, "grad_norm": 1.2830766717341167, "learning_rate": 2.889020611681798e-07, "loss": 0.1046, "step": 14960 }, { "epoch": 3.404095563139932, "grad_norm": 1.3076628985989138, "learning_rate": 2.888267324930228e-07, "loss": 0.0758, "step": 14961 }, { "epoch": 3.4043230944254836, "grad_norm": 2.1735184397363914, "learning_rate": 2.8875141068863193e-07, "loss": 0.004, "step": 14962 }, { "epoch": 3.4045506257110354, "grad_norm": 1.5094906991257437, "learning_rate": 2.8867609575654625e-07, "loss": 0.0227, "step": 14963 }, { "epoch": 3.404778156996587, "grad_norm": 1.1842515884527327, "learning_rate": 2.8860078769830526e-07, "loss": 0.0087, "step": 14964 }, { "epoch": 3.405005688282139, "grad_norm": 1.8175821650343278, "learning_rate": 2.885254865154483e-07, "loss": 0.0671, "step": 14965 }, { "epoch": 3.4052332195676907, "grad_norm": 21.30110366129306, "learning_rate": 2.8845019220951384e-07, "loss": 0.0616, "step": 14966 }, { "epoch": 3.4054607508532424, "grad_norm": 1.1672325184086567, "learning_rate": 2.8837490478204147e-07, "loss": 0.0375, "step": 14967 }, { "epoch": 3.405688282138794, "grad_norm": 1.4736658520806252, "learning_rate": 2.8829962423456917e-07, "loss": 0.1671, "step": 14968 }, { "epoch": 3.405915813424346, "grad_norm": 1.371671733605225, "learning_rate": 2.882243505686362e-07, "loss": 0.0755, "step": 14969 }, { "epoch": 3.4061433447098977, "grad_norm": 1.107801387790148, "learning_rate": 2.8814908378578056e-07, "loss": 0.0089, "step": 14970 }, { "epoch": 3.4063708759954494, "grad_norm": 1.319330161436145, "learning_rate": 2.8807382388754067e-07, "loss": 0.0622, "step": 14971 }, { "epoch": 3.406598407281001, "grad_norm": 3.0435211943876497, "learning_rate": 2.87998570875455e-07, "loss": 0.0625, "step": 14972 }, { "epoch": 3.406825938566553, "grad_norm": 1.0929319385261955, "learning_rate": 2.8792332475106114e-07, "loss": 0.0094, "step": 14973 }, { "epoch": 3.4070534698521047, "grad_norm": 1.0136330001107445, "learning_rate": 2.8784808551589745e-07, "loss": 0.0065, "step": 14974 }, { "epoch": 3.4072810011376564, "grad_norm": 1.562507370657703, "learning_rate": 2.8777285317150116e-07, "loss": 0.0263, "step": 14975 }, { "epoch": 3.407508532423208, "grad_norm": 2.1567668666001034, "learning_rate": 2.8769762771941037e-07, "loss": 0.09, "step": 14976 }, { "epoch": 3.40773606370876, "grad_norm": 1.561062775986053, "learning_rate": 2.8762240916116213e-07, "loss": 0.0803, "step": 14977 }, { "epoch": 3.4079635949943117, "grad_norm": 2.0049371696604963, "learning_rate": 2.875471974982939e-07, "loss": 0.0573, "step": 14978 }, { "epoch": 3.4081911262798634, "grad_norm": 1.333543141644847, "learning_rate": 2.8747199273234325e-07, "loss": 0.0127, "step": 14979 }, { "epoch": 3.408418657565415, "grad_norm": 1.9157068385176448, "learning_rate": 2.8739679486484664e-07, "loss": 0.11, "step": 14980 }, { "epoch": 3.408646188850967, "grad_norm": 2.8705205273359953, "learning_rate": 2.8732160389734157e-07, "loss": 0.0188, "step": 14981 }, { "epoch": 3.4088737201365187, "grad_norm": 1.151985847699398, "learning_rate": 2.872464198313642e-07, "loss": 0.0198, "step": 14982 }, { "epoch": 3.4091012514220704, "grad_norm": 1.6856223067200773, "learning_rate": 2.871712426684516e-07, "loss": 0.0194, "step": 14983 }, { "epoch": 3.409328782707622, "grad_norm": 2.292079422080405, "learning_rate": 2.8709607241013993e-07, "loss": 0.0854, "step": 14984 }, { "epoch": 3.409556313993174, "grad_norm": 2.014071918587574, "learning_rate": 2.870209090579657e-07, "loss": 0.0658, "step": 14985 }, { "epoch": 3.4097838452787257, "grad_norm": 1.4991464596207364, "learning_rate": 2.869457526134653e-07, "loss": 0.0133, "step": 14986 }, { "epoch": 3.4100113765642774, "grad_norm": 1.790178009999013, "learning_rate": 2.868706030781744e-07, "loss": 0.0304, "step": 14987 }, { "epoch": 3.410238907849829, "grad_norm": 1.7631151348721683, "learning_rate": 2.867954604536294e-07, "loss": 0.0361, "step": 14988 }, { "epoch": 3.410466439135381, "grad_norm": 1.314739848496795, "learning_rate": 2.867203247413657e-07, "loss": 0.0515, "step": 14989 }, { "epoch": 3.4106939704209327, "grad_norm": 1.1960335156389108, "learning_rate": 2.866451959429189e-07, "loss": 0.0334, "step": 14990 }, { "epoch": 3.4109215017064844, "grad_norm": 0.9229938782166055, "learning_rate": 2.8657007405982457e-07, "loss": 0.0136, "step": 14991 }, { "epoch": 3.4111490329920366, "grad_norm": 1.8481676792057289, "learning_rate": 2.8649495909361817e-07, "loss": 0.025, "step": 14992 }, { "epoch": 3.411376564277588, "grad_norm": 2.3572575284599946, "learning_rate": 2.86419851045835e-07, "loss": 0.027, "step": 14993 }, { "epoch": 3.41160409556314, "grad_norm": 1.4595908718715993, "learning_rate": 2.8634474991800984e-07, "loss": 0.041, "step": 14994 }, { "epoch": 3.4118316268486915, "grad_norm": 2.174891618114461, "learning_rate": 2.8626965571167793e-07, "loss": 0.0841, "step": 14995 }, { "epoch": 3.4120591581342437, "grad_norm": 0.7433806087176603, "learning_rate": 2.8619456842837394e-07, "loss": 0.0129, "step": 14996 }, { "epoch": 3.4122866894197954, "grad_norm": 0.8607328521105923, "learning_rate": 2.8611948806963206e-07, "loss": 0.0055, "step": 14997 }, { "epoch": 3.412514220705347, "grad_norm": 3.0799172519589946, "learning_rate": 2.8604441463698763e-07, "loss": 0.0281, "step": 14998 }, { "epoch": 3.412741751990899, "grad_norm": 1.6473183464916588, "learning_rate": 2.859693481319744e-07, "loss": 0.0158, "step": 14999 }, { "epoch": 3.4129692832764507, "grad_norm": 0.7760020698536536, "learning_rate": 2.85894288556127e-07, "loss": 0.0273, "step": 15000 }, { "epoch": 3.4131968145620024, "grad_norm": 1.1430736244678603, "learning_rate": 2.8581923591097904e-07, "loss": 0.0223, "step": 15001 }, { "epoch": 3.413424345847554, "grad_norm": 1.3876145899915096, "learning_rate": 2.85744190198065e-07, "loss": 0.0467, "step": 15002 }, { "epoch": 3.413651877133106, "grad_norm": 2.049976909353948, "learning_rate": 2.856691514189181e-07, "loss": 0.175, "step": 15003 }, { "epoch": 3.4138794084186577, "grad_norm": 2.345646790877689, "learning_rate": 2.8559411957507234e-07, "loss": 0.0262, "step": 15004 }, { "epoch": 3.4141069397042094, "grad_norm": 1.217081619785433, "learning_rate": 2.855190946680614e-07, "loss": 0.0224, "step": 15005 }, { "epoch": 3.414334470989761, "grad_norm": 2.6934891252531408, "learning_rate": 2.854440766994182e-07, "loss": 0.1129, "step": 15006 }, { "epoch": 3.414562002275313, "grad_norm": 0.8947828447926367, "learning_rate": 2.8536906567067647e-07, "loss": 0.0309, "step": 15007 }, { "epoch": 3.4147895335608647, "grad_norm": 1.717416398714058, "learning_rate": 2.85294061583369e-07, "loss": 0.0826, "step": 15008 }, { "epoch": 3.4150170648464164, "grad_norm": 1.16050812271313, "learning_rate": 2.8521906443902856e-07, "loss": 0.101, "step": 15009 }, { "epoch": 3.415244596131968, "grad_norm": 1.5397885244121516, "learning_rate": 2.8514407423918816e-07, "loss": 0.0569, "step": 15010 }, { "epoch": 3.41547212741752, "grad_norm": 1.1578679235228588, "learning_rate": 2.8506909098538046e-07, "loss": 0.0511, "step": 15011 }, { "epoch": 3.4156996587030717, "grad_norm": 0.35241839261995705, "learning_rate": 2.849941146791382e-07, "loss": 0.0022, "step": 15012 }, { "epoch": 3.4159271899886234, "grad_norm": 2.1275204178265663, "learning_rate": 2.8491914532199334e-07, "loss": 0.1385, "step": 15013 }, { "epoch": 3.416154721274175, "grad_norm": 3.8157159636362104, "learning_rate": 2.8484418291547853e-07, "loss": 0.0444, "step": 15014 }, { "epoch": 3.416382252559727, "grad_norm": 0.9160277378655813, "learning_rate": 2.8476922746112567e-07, "loss": 0.0564, "step": 15015 }, { "epoch": 3.4166097838452787, "grad_norm": 1.5478167899202044, "learning_rate": 2.846942789604665e-07, "loss": 0.0147, "step": 15016 }, { "epoch": 3.4168373151308304, "grad_norm": 1.3076909210268743, "learning_rate": 2.8461933741503296e-07, "loss": 0.0172, "step": 15017 }, { "epoch": 3.417064846416382, "grad_norm": 1.6995412019741944, "learning_rate": 2.845444028263568e-07, "loss": 0.0488, "step": 15018 }, { "epoch": 3.417292377701934, "grad_norm": 1.4689991630572004, "learning_rate": 2.844694751959698e-07, "loss": 0.1057, "step": 15019 }, { "epoch": 3.4175199089874857, "grad_norm": 0.8655281524502126, "learning_rate": 2.843945545254028e-07, "loss": 0.0101, "step": 15020 }, { "epoch": 3.4177474402730375, "grad_norm": 1.264332654475305, "learning_rate": 2.843196408161875e-07, "loss": 0.0099, "step": 15021 }, { "epoch": 3.417974971558589, "grad_norm": 5.513093898586165, "learning_rate": 2.8424473406985494e-07, "loss": 0.0505, "step": 15022 }, { "epoch": 3.418202502844141, "grad_norm": 1.2934887698461128, "learning_rate": 2.8416983428793563e-07, "loss": 0.0147, "step": 15023 }, { "epoch": 3.4184300341296927, "grad_norm": 2.2010349240230065, "learning_rate": 2.8409494147196075e-07, "loss": 0.0952, "step": 15024 }, { "epoch": 3.4186575654152445, "grad_norm": 1.3656493524031197, "learning_rate": 2.840200556234609e-07, "loss": 0.0058, "step": 15025 }, { "epoch": 3.418885096700796, "grad_norm": 1.5028125324151669, "learning_rate": 2.839451767439669e-07, "loss": 0.078, "step": 15026 }, { "epoch": 3.419112627986348, "grad_norm": 1.8318991354193082, "learning_rate": 2.838703048350089e-07, "loss": 0.08, "step": 15027 }, { "epoch": 3.4193401592718997, "grad_norm": 1.0481177368536916, "learning_rate": 2.837954398981168e-07, "loss": 0.0073, "step": 15028 }, { "epoch": 3.4195676905574515, "grad_norm": 1.8410699912303081, "learning_rate": 2.8372058193482134e-07, "loss": 0.0355, "step": 15029 }, { "epoch": 3.419795221843003, "grad_norm": 0.7985411757075739, "learning_rate": 2.8364573094665185e-07, "loss": 0.0079, "step": 15030 }, { "epoch": 3.4200227531285554, "grad_norm": 0.8381546458892681, "learning_rate": 2.835708869351385e-07, "loss": 0.0287, "step": 15031 }, { "epoch": 3.4202502844141067, "grad_norm": 2.8782833872621008, "learning_rate": 2.834960499018109e-07, "loss": 0.0306, "step": 15032 }, { "epoch": 3.420477815699659, "grad_norm": 1.5843440221144776, "learning_rate": 2.8342121984819895e-07, "loss": 0.0689, "step": 15033 }, { "epoch": 3.4207053469852102, "grad_norm": 0.9136121006433044, "learning_rate": 2.833463967758315e-07, "loss": 0.0122, "step": 15034 }, { "epoch": 3.4209328782707624, "grad_norm": 1.5261137483385396, "learning_rate": 2.8327158068623797e-07, "loss": 0.0284, "step": 15035 }, { "epoch": 3.421160409556314, "grad_norm": 1.6082996196649788, "learning_rate": 2.831967715809476e-07, "loss": 0.0381, "step": 15036 }, { "epoch": 3.421387940841866, "grad_norm": 1.3242962105164593, "learning_rate": 2.8312196946148897e-07, "loss": 0.0792, "step": 15037 }, { "epoch": 3.4216154721274177, "grad_norm": 0.583081256938025, "learning_rate": 2.8304717432939116e-07, "loss": 0.0037, "step": 15038 }, { "epoch": 3.4218430034129694, "grad_norm": 1.9600517456416902, "learning_rate": 2.8297238618618295e-07, "loss": 0.0685, "step": 15039 }, { "epoch": 3.422070534698521, "grad_norm": 1.070026805543693, "learning_rate": 2.828976050333929e-07, "loss": 0.0221, "step": 15040 }, { "epoch": 3.422298065984073, "grad_norm": 0.9366854421038127, "learning_rate": 2.828228308725494e-07, "loss": 0.0076, "step": 15041 }, { "epoch": 3.4225255972696247, "grad_norm": 1.3996265669069983, "learning_rate": 2.827480637051802e-07, "loss": 0.0811, "step": 15042 }, { "epoch": 3.4227531285551764, "grad_norm": 2.086768665313088, "learning_rate": 2.8267330353281405e-07, "loss": 0.0255, "step": 15043 }, { "epoch": 3.422980659840728, "grad_norm": 1.7051110204226436, "learning_rate": 2.825985503569784e-07, "loss": 0.1245, "step": 15044 }, { "epoch": 3.42320819112628, "grad_norm": 1.7230925519354003, "learning_rate": 2.825238041792014e-07, "loss": 0.096, "step": 15045 }, { "epoch": 3.4234357224118317, "grad_norm": 1.8386102637236141, "learning_rate": 2.8244906500101076e-07, "loss": 0.1113, "step": 15046 }, { "epoch": 3.4236632536973834, "grad_norm": 1.501350285860591, "learning_rate": 2.8237433282393367e-07, "loss": 0.0565, "step": 15047 }, { "epoch": 3.423890784982935, "grad_norm": 0.6855227144056881, "learning_rate": 2.8229960764949797e-07, "loss": 0.0233, "step": 15048 }, { "epoch": 3.424118316268487, "grad_norm": 1.8857317571159506, "learning_rate": 2.822248894792304e-07, "loss": 0.0371, "step": 15049 }, { "epoch": 3.4243458475540387, "grad_norm": 1.07249235627489, "learning_rate": 2.821501783146586e-07, "loss": 0.055, "step": 15050 }, { "epoch": 3.4245733788395905, "grad_norm": 1.409129122320421, "learning_rate": 2.8207547415730894e-07, "loss": 0.0808, "step": 15051 }, { "epoch": 3.424800910125142, "grad_norm": 1.6529687298691769, "learning_rate": 2.820007770087087e-07, "loss": 0.0799, "step": 15052 }, { "epoch": 3.425028441410694, "grad_norm": 1.3752289899924852, "learning_rate": 2.819260868703845e-07, "loss": 0.0124, "step": 15053 }, { "epoch": 3.4252559726962457, "grad_norm": 2.0615433685895694, "learning_rate": 2.818514037438626e-07, "loss": 0.0807, "step": 15054 }, { "epoch": 3.4254835039817975, "grad_norm": 1.1650116273754403, "learning_rate": 2.817767276306697e-07, "loss": 0.0232, "step": 15055 }, { "epoch": 3.425711035267349, "grad_norm": 1.3394942551842142, "learning_rate": 2.817020585323318e-07, "loss": 0.0412, "step": 15056 }, { "epoch": 3.425938566552901, "grad_norm": 0.9146227002419302, "learning_rate": 2.816273964503753e-07, "loss": 0.0111, "step": 15057 }, { "epoch": 3.4261660978384527, "grad_norm": 1.6477061762500302, "learning_rate": 2.8155274138632545e-07, "loss": 0.0161, "step": 15058 }, { "epoch": 3.4263936291240045, "grad_norm": 1.2226692075051924, "learning_rate": 2.814780933417091e-07, "loss": 0.0397, "step": 15059 }, { "epoch": 3.426621160409556, "grad_norm": 2.056847009408462, "learning_rate": 2.814034523180514e-07, "loss": 0.0319, "step": 15060 }, { "epoch": 3.426848691695108, "grad_norm": 1.3928365955791129, "learning_rate": 2.8132881831687756e-07, "loss": 0.0131, "step": 15061 }, { "epoch": 3.4270762229806597, "grad_norm": 2.072493196587677, "learning_rate": 2.8125419133971354e-07, "loss": 0.027, "step": 15062 }, { "epoch": 3.4273037542662115, "grad_norm": 1.9195776149531698, "learning_rate": 2.811795713880841e-07, "loss": 0.0676, "step": 15063 }, { "epoch": 3.4275312855517632, "grad_norm": 1.8589504451668815, "learning_rate": 2.8110495846351446e-07, "loss": 0.0368, "step": 15064 }, { "epoch": 3.427758816837315, "grad_norm": 1.4476191330253931, "learning_rate": 2.8103035256753e-07, "loss": 0.1232, "step": 15065 }, { "epoch": 3.4279863481228667, "grad_norm": 1.4577029055345783, "learning_rate": 2.809557537016549e-07, "loss": 0.0141, "step": 15066 }, { "epoch": 3.4282138794084185, "grad_norm": 0.8115831137231342, "learning_rate": 2.8088116186741435e-07, "loss": 0.0093, "step": 15067 }, { "epoch": 3.4284414106939702, "grad_norm": 1.9679245422519387, "learning_rate": 2.808065770663324e-07, "loss": 0.0702, "step": 15068 }, { "epoch": 3.428668941979522, "grad_norm": 1.18009516036074, "learning_rate": 2.8073199929993384e-07, "loss": 0.0196, "step": 15069 }, { "epoch": 3.428896473265074, "grad_norm": 2.821268187428797, "learning_rate": 2.806574285697425e-07, "loss": 0.0564, "step": 15070 }, { "epoch": 3.4291240045506255, "grad_norm": 1.5793934704038595, "learning_rate": 2.805828648772827e-07, "loss": 0.0141, "step": 15071 }, { "epoch": 3.4293515358361777, "grad_norm": 1.4132267756889303, "learning_rate": 2.8050830822407856e-07, "loss": 0.0254, "step": 15072 }, { "epoch": 3.429579067121729, "grad_norm": 1.5259373765655897, "learning_rate": 2.804337586116535e-07, "loss": 0.0789, "step": 15073 }, { "epoch": 3.429806598407281, "grad_norm": 1.1788719248893145, "learning_rate": 2.8035921604153163e-07, "loss": 0.018, "step": 15074 }, { "epoch": 3.430034129692833, "grad_norm": 1.1293058984498614, "learning_rate": 2.8028468051523596e-07, "loss": 0.0619, "step": 15075 }, { "epoch": 3.4302616609783847, "grad_norm": 0.9607460919596584, "learning_rate": 2.802101520342903e-07, "loss": 0.0411, "step": 15076 }, { "epoch": 3.4304891922639364, "grad_norm": 1.0135291469319474, "learning_rate": 2.801356306002175e-07, "loss": 0.0419, "step": 15077 }, { "epoch": 3.430716723549488, "grad_norm": 1.7935376449064948, "learning_rate": 2.800611162145408e-07, "loss": 0.0778, "step": 15078 }, { "epoch": 3.43094425483504, "grad_norm": 0.947516758630471, "learning_rate": 2.799866088787834e-07, "loss": 0.0556, "step": 15079 }, { "epoch": 3.4311717861205917, "grad_norm": 0.9771774410954746, "learning_rate": 2.7991210859446757e-07, "loss": 0.0094, "step": 15080 }, { "epoch": 3.4313993174061435, "grad_norm": 1.3812324735042163, "learning_rate": 2.7983761536311654e-07, "loss": 0.0908, "step": 15081 }, { "epoch": 3.431626848691695, "grad_norm": 1.5708019896039103, "learning_rate": 2.7976312918625225e-07, "loss": 0.0531, "step": 15082 }, { "epoch": 3.431854379977247, "grad_norm": 1.5899447134192768, "learning_rate": 2.796886500653975e-07, "loss": 0.0904, "step": 15083 }, { "epoch": 3.4320819112627987, "grad_norm": 1.0927796683151023, "learning_rate": 2.796141780020742e-07, "loss": 0.0151, "step": 15084 }, { "epoch": 3.4323094425483505, "grad_norm": 0.5587778209517523, "learning_rate": 2.7953971299780454e-07, "loss": 0.0058, "step": 15085 }, { "epoch": 3.432536973833902, "grad_norm": 2.5032850396358794, "learning_rate": 2.794652550541107e-07, "loss": 0.039, "step": 15086 }, { "epoch": 3.432764505119454, "grad_norm": 0.6316828129287865, "learning_rate": 2.7939080417251395e-07, "loss": 0.0059, "step": 15087 }, { "epoch": 3.4329920364050057, "grad_norm": 1.3342147801076802, "learning_rate": 2.7931636035453646e-07, "loss": 0.0155, "step": 15088 }, { "epoch": 3.4332195676905575, "grad_norm": 1.4298377390900772, "learning_rate": 2.7924192360169927e-07, "loss": 0.0167, "step": 15089 }, { "epoch": 3.4334470989761092, "grad_norm": 2.0585508066150573, "learning_rate": 2.791674939155242e-07, "loss": 0.094, "step": 15090 }, { "epoch": 3.433674630261661, "grad_norm": 1.5309483514231315, "learning_rate": 2.790930712975318e-07, "loss": 0.0251, "step": 15091 }, { "epoch": 3.4339021615472127, "grad_norm": 1.473301428400663, "learning_rate": 2.7901865574924375e-07, "loss": 0.0953, "step": 15092 }, { "epoch": 3.4341296928327645, "grad_norm": 1.6623999844554749, "learning_rate": 2.789442472721808e-07, "loss": 0.0312, "step": 15093 }, { "epoch": 3.4343572241183162, "grad_norm": 2.114321261250475, "learning_rate": 2.788698458678635e-07, "loss": 0.052, "step": 15094 }, { "epoch": 3.434584755403868, "grad_norm": 1.3409885252576648, "learning_rate": 2.787954515378129e-07, "loss": 0.0285, "step": 15095 }, { "epoch": 3.4348122866894197, "grad_norm": 0.9433933561461021, "learning_rate": 2.787210642835492e-07, "loss": 0.0422, "step": 15096 }, { "epoch": 3.4350398179749715, "grad_norm": 1.1531003257519068, "learning_rate": 2.786466841065925e-07, "loss": 0.019, "step": 15097 }, { "epoch": 3.4352673492605232, "grad_norm": 5.468321520186073, "learning_rate": 2.7857231100846324e-07, "loss": 0.0169, "step": 15098 }, { "epoch": 3.435494880546075, "grad_norm": 1.169048138863353, "learning_rate": 2.7849794499068155e-07, "loss": 0.0568, "step": 15099 }, { "epoch": 3.4357224118316267, "grad_norm": 1.8144576026766452, "learning_rate": 2.784235860547675e-07, "loss": 0.1472, "step": 15100 }, { "epoch": 3.4359499431171785, "grad_norm": 0.9684011923089273, "learning_rate": 2.783492342022404e-07, "loss": 0.051, "step": 15101 }, { "epoch": 3.4361774744027302, "grad_norm": 2.0224139049797336, "learning_rate": 2.782748894346203e-07, "loss": 0.0159, "step": 15102 }, { "epoch": 3.436405005688282, "grad_norm": 1.8444374309476286, "learning_rate": 2.7820055175342647e-07, "loss": 0.0251, "step": 15103 }, { "epoch": 3.4366325369738338, "grad_norm": 0.835307849648204, "learning_rate": 2.78126221160178e-07, "loss": 0.0412, "step": 15104 }, { "epoch": 3.4368600682593855, "grad_norm": 1.6546004493907962, "learning_rate": 2.780518976563943e-07, "loss": 0.0363, "step": 15105 }, { "epoch": 3.4370875995449373, "grad_norm": 2.1229361013253727, "learning_rate": 2.779775812435944e-07, "loss": 0.0202, "step": 15106 }, { "epoch": 3.437315130830489, "grad_norm": 1.2662932121455384, "learning_rate": 2.779032719232975e-07, "loss": 0.0215, "step": 15107 }, { "epoch": 3.4375426621160408, "grad_norm": 1.3090467692706818, "learning_rate": 2.778289696970218e-07, "loss": 0.0659, "step": 15108 }, { "epoch": 3.437770193401593, "grad_norm": 1.2443621328791692, "learning_rate": 2.7775467456628623e-07, "loss": 0.0204, "step": 15109 }, { "epoch": 3.4379977246871443, "grad_norm": 1.6164300663641964, "learning_rate": 2.776803865326093e-07, "loss": 0.071, "step": 15110 }, { "epoch": 3.4382252559726965, "grad_norm": 1.962837240928607, "learning_rate": 2.776061055975089e-07, "loss": 0.0384, "step": 15111 }, { "epoch": 3.4384527872582478, "grad_norm": 1.2221498287782715, "learning_rate": 2.775318317625035e-07, "loss": 0.0402, "step": 15112 }, { "epoch": 3.4386803185438, "grad_norm": 1.0446333214363077, "learning_rate": 2.774575650291111e-07, "loss": 0.0188, "step": 15113 }, { "epoch": 3.4389078498293517, "grad_norm": 1.8404887330479296, "learning_rate": 2.773833053988498e-07, "loss": 0.0301, "step": 15114 }, { "epoch": 3.4391353811149035, "grad_norm": 2.729011407564149, "learning_rate": 2.7730905287323706e-07, "loss": 0.1102, "step": 15115 }, { "epoch": 3.439362912400455, "grad_norm": 0.8258774655296246, "learning_rate": 2.772348074537904e-07, "loss": 0.0579, "step": 15116 }, { "epoch": 3.439590443686007, "grad_norm": 0.9447916355503901, "learning_rate": 2.7716056914202755e-07, "loss": 0.0185, "step": 15117 }, { "epoch": 3.4398179749715587, "grad_norm": 1.452240447175961, "learning_rate": 2.7708633793946537e-07, "loss": 0.0577, "step": 15118 }, { "epoch": 3.4400455062571105, "grad_norm": 2.013131316555558, "learning_rate": 2.770121138476213e-07, "loss": 0.0182, "step": 15119 }, { "epoch": 3.4402730375426622, "grad_norm": 1.0431510069575938, "learning_rate": 2.7693789686801224e-07, "loss": 0.0059, "step": 15120 }, { "epoch": 3.440500568828214, "grad_norm": 1.4704991344320781, "learning_rate": 2.768636870021554e-07, "loss": 0.098, "step": 15121 }, { "epoch": 3.4407281001137657, "grad_norm": 3.323417843633827, "learning_rate": 2.7678948425156726e-07, "loss": 0.037, "step": 15122 }, { "epoch": 3.4409556313993175, "grad_norm": 1.3911066679291442, "learning_rate": 2.767152886177641e-07, "loss": 0.0207, "step": 15123 }, { "epoch": 3.4411831626848692, "grad_norm": 3.3589013072500835, "learning_rate": 2.766411001022626e-07, "loss": 0.0238, "step": 15124 }, { "epoch": 3.441410693970421, "grad_norm": 1.4708786943832959, "learning_rate": 2.7656691870657893e-07, "loss": 0.0199, "step": 15125 }, { "epoch": 3.4416382252559727, "grad_norm": 1.0412192241826137, "learning_rate": 2.764927444322296e-07, "loss": 0.007, "step": 15126 }, { "epoch": 3.4418657565415245, "grad_norm": 1.0687769085763579, "learning_rate": 2.7641857728073013e-07, "loss": 0.0538, "step": 15127 }, { "epoch": 3.4420932878270762, "grad_norm": 1.5966061115533183, "learning_rate": 2.763444172535967e-07, "loss": 0.027, "step": 15128 }, { "epoch": 3.442320819112628, "grad_norm": 0.5292954560182, "learning_rate": 2.762702643523449e-07, "loss": 0.0034, "step": 15129 }, { "epoch": 3.4425483503981797, "grad_norm": 3.439041812656658, "learning_rate": 2.7619611857849e-07, "loss": 0.0924, "step": 15130 }, { "epoch": 3.4427758816837315, "grad_norm": 1.4365266938303072, "learning_rate": 2.761219799335476e-07, "loss": 0.0717, "step": 15131 }, { "epoch": 3.4430034129692833, "grad_norm": 3.0173486903137094, "learning_rate": 2.76047848419033e-07, "loss": 0.0306, "step": 15132 }, { "epoch": 3.443230944254835, "grad_norm": 2.5466680500854784, "learning_rate": 2.7597372403646155e-07, "loss": 0.0194, "step": 15133 }, { "epoch": 3.4434584755403868, "grad_norm": 1.5933654682757217, "learning_rate": 2.7589960678734796e-07, "loss": 0.0582, "step": 15134 }, { "epoch": 3.4436860068259385, "grad_norm": 0.9422619884868919, "learning_rate": 2.7582549667320683e-07, "loss": 0.0564, "step": 15135 }, { "epoch": 3.4439135381114903, "grad_norm": 1.046316105239072, "learning_rate": 2.757513936955533e-07, "loss": 0.0535, "step": 15136 }, { "epoch": 3.444141069397042, "grad_norm": 1.7439229634728903, "learning_rate": 2.756772978559014e-07, "loss": 0.0425, "step": 15137 }, { "epoch": 3.4443686006825938, "grad_norm": 0.9065803512416694, "learning_rate": 2.756032091557658e-07, "loss": 0.0242, "step": 15138 }, { "epoch": 3.4445961319681455, "grad_norm": 0.5339225128480739, "learning_rate": 2.7552912759666074e-07, "loss": 0.0049, "step": 15139 }, { "epoch": 3.4448236632536973, "grad_norm": 1.5211114079040156, "learning_rate": 2.754550531801005e-07, "loss": 0.0094, "step": 15140 }, { "epoch": 3.445051194539249, "grad_norm": 0.9842020188042084, "learning_rate": 2.7538098590759887e-07, "loss": 0.0199, "step": 15141 }, { "epoch": 3.4452787258248008, "grad_norm": 1.8584351294340367, "learning_rate": 2.7530692578066925e-07, "loss": 0.0151, "step": 15142 }, { "epoch": 3.4455062571103525, "grad_norm": 1.8820398065643364, "learning_rate": 2.752328728008259e-07, "loss": 0.1139, "step": 15143 }, { "epoch": 3.4457337883959043, "grad_norm": 0.953128074297268, "learning_rate": 2.7515882696958185e-07, "loss": 0.0209, "step": 15144 }, { "epoch": 3.445961319681456, "grad_norm": 1.4287883938267698, "learning_rate": 2.7508478828845067e-07, "loss": 0.0521, "step": 15145 }, { "epoch": 3.4461888509670078, "grad_norm": 1.0204889132019523, "learning_rate": 2.7501075675894556e-07, "loss": 0.019, "step": 15146 }, { "epoch": 3.4464163822525595, "grad_norm": 3.721264608215111, "learning_rate": 2.749367323825799e-07, "loss": 0.0206, "step": 15147 }, { "epoch": 3.4466439135381117, "grad_norm": 2.924577213017021, "learning_rate": 2.748627151608663e-07, "loss": 0.0712, "step": 15148 }, { "epoch": 3.446871444823663, "grad_norm": 1.4017000952992826, "learning_rate": 2.7478870509531725e-07, "loss": 0.083, "step": 15149 }, { "epoch": 3.4470989761092152, "grad_norm": 1.9433586884087768, "learning_rate": 2.747147021874459e-07, "loss": 0.0441, "step": 15150 }, { "epoch": 3.4473265073947665, "grad_norm": 1.5758130511873318, "learning_rate": 2.746407064387644e-07, "loss": 0.0938, "step": 15151 }, { "epoch": 3.4475540386803187, "grad_norm": 1.5206125869664207, "learning_rate": 2.7456671785078515e-07, "loss": 0.0787, "step": 15152 }, { "epoch": 3.4477815699658705, "grad_norm": 1.413264369774, "learning_rate": 2.7449273642502064e-07, "loss": 0.0677, "step": 15153 }, { "epoch": 3.4480091012514222, "grad_norm": 2.858322955427901, "learning_rate": 2.744187621629825e-07, "loss": 0.0877, "step": 15154 }, { "epoch": 3.448236632536974, "grad_norm": 1.89260177401183, "learning_rate": 2.7434479506618296e-07, "loss": 0.0242, "step": 15155 }, { "epoch": 3.4484641638225257, "grad_norm": 0.7709365456904015, "learning_rate": 2.742708351361334e-07, "loss": 0.0066, "step": 15156 }, { "epoch": 3.4486916951080775, "grad_norm": 1.2093987558043768, "learning_rate": 2.7419688237434587e-07, "loss": 0.0119, "step": 15157 }, { "epoch": 3.4489192263936292, "grad_norm": 1.3607057383012737, "learning_rate": 2.7412293678233136e-07, "loss": 0.0328, "step": 15158 }, { "epoch": 3.449146757679181, "grad_norm": 2.1465163042557407, "learning_rate": 2.740489983616014e-07, "loss": 0.023, "step": 15159 }, { "epoch": 3.4493742889647327, "grad_norm": 0.829713786658992, "learning_rate": 2.739750671136675e-07, "loss": 0.0866, "step": 15160 }, { "epoch": 3.4496018202502845, "grad_norm": 1.8117578080214887, "learning_rate": 2.7390114304004004e-07, "loss": 0.0724, "step": 15161 }, { "epoch": 3.4498293515358363, "grad_norm": 2.0259404094917173, "learning_rate": 2.738272261422304e-07, "loss": 0.0051, "step": 15162 }, { "epoch": 3.450056882821388, "grad_norm": 1.6343572850789048, "learning_rate": 2.73753316421749e-07, "loss": 0.0154, "step": 15163 }, { "epoch": 3.4502844141069398, "grad_norm": 1.2714655614007075, "learning_rate": 2.7367941388010666e-07, "loss": 0.038, "step": 15164 }, { "epoch": 3.4505119453924915, "grad_norm": 1.6523856798193484, "learning_rate": 2.736055185188136e-07, "loss": 0.0455, "step": 15165 }, { "epoch": 3.4507394766780433, "grad_norm": 1.824040242502262, "learning_rate": 2.735316303393801e-07, "loss": 0.1153, "step": 15166 }, { "epoch": 3.450967007963595, "grad_norm": 1.9997005360277091, "learning_rate": 2.734577493433166e-07, "loss": 0.055, "step": 15167 }, { "epoch": 3.4511945392491468, "grad_norm": 1.619713609528561, "learning_rate": 2.733838755321327e-07, "loss": 0.0095, "step": 15168 }, { "epoch": 3.4514220705346985, "grad_norm": 1.2641586923284944, "learning_rate": 2.733100089073386e-07, "loss": 0.0199, "step": 15169 }, { "epoch": 3.4516496018202503, "grad_norm": 2.1236410909297527, "learning_rate": 2.7323614947044367e-07, "loss": 0.0186, "step": 15170 }, { "epoch": 3.451877133105802, "grad_norm": 1.2251335893284745, "learning_rate": 2.7316229722295777e-07, "loss": 0.0255, "step": 15171 }, { "epoch": 3.4521046643913538, "grad_norm": 0.9051691688405964, "learning_rate": 2.7308845216639e-07, "loss": 0.0562, "step": 15172 }, { "epoch": 3.4523321956769055, "grad_norm": 1.7452163689930122, "learning_rate": 2.7301461430224977e-07, "loss": 0.0174, "step": 15173 }, { "epoch": 3.4525597269624573, "grad_norm": 1.7007792357875684, "learning_rate": 2.7294078363204634e-07, "loss": 0.0155, "step": 15174 }, { "epoch": 3.452787258248009, "grad_norm": 2.2295232354571173, "learning_rate": 2.7286696015728837e-07, "loss": 0.0193, "step": 15175 }, { "epoch": 3.453014789533561, "grad_norm": 2.0045400014765913, "learning_rate": 2.72793143879485e-07, "loss": 0.0432, "step": 15176 }, { "epoch": 3.4532423208191125, "grad_norm": 1.2100262913515882, "learning_rate": 2.727193348001446e-07, "loss": 0.0189, "step": 15177 }, { "epoch": 3.4534698521046643, "grad_norm": 1.4982894186434883, "learning_rate": 2.72645532920776e-07, "loss": 0.0599, "step": 15178 }, { "epoch": 3.453697383390216, "grad_norm": 0.963574451702531, "learning_rate": 2.725717382428872e-07, "loss": 0.0355, "step": 15179 }, { "epoch": 3.453924914675768, "grad_norm": 2.581674088199604, "learning_rate": 2.724979507679865e-07, "loss": 0.0281, "step": 15180 }, { "epoch": 3.4541524459613195, "grad_norm": 0.6450778042946467, "learning_rate": 2.7242417049758256e-07, "loss": 0.0053, "step": 15181 }, { "epoch": 3.4543799772468713, "grad_norm": 1.9152820977290028, "learning_rate": 2.7235039743318243e-07, "loss": 0.1028, "step": 15182 }, { "epoch": 3.454607508532423, "grad_norm": 1.5001110289558586, "learning_rate": 2.7227663157629465e-07, "loss": 0.0131, "step": 15183 }, { "epoch": 3.454835039817975, "grad_norm": 1.4141328449845767, "learning_rate": 2.7220287292842657e-07, "loss": 0.0304, "step": 15184 }, { "epoch": 3.4550625711035265, "grad_norm": 1.6152994249890102, "learning_rate": 2.721291214910851e-07, "loss": 0.1152, "step": 15185 }, { "epoch": 3.4552901023890783, "grad_norm": 2.627798173663294, "learning_rate": 2.7205537726577864e-07, "loss": 0.0491, "step": 15186 }, { "epoch": 3.4555176336746305, "grad_norm": 1.0755672038579398, "learning_rate": 2.719816402540137e-07, "loss": 0.0587, "step": 15187 }, { "epoch": 3.455745164960182, "grad_norm": 1.159412259550881, "learning_rate": 2.719079104572977e-07, "loss": 0.0826, "step": 15188 }, { "epoch": 3.455972696245734, "grad_norm": 1.4838443650729318, "learning_rate": 2.718341878771371e-07, "loss": 0.0126, "step": 15189 }, { "epoch": 3.4562002275312853, "grad_norm": 1.5205014200441902, "learning_rate": 2.717604725150392e-07, "loss": 0.0759, "step": 15190 }, { "epoch": 3.4564277588168375, "grad_norm": 1.6705855100677725, "learning_rate": 2.7168676437251004e-07, "loss": 0.1067, "step": 15191 }, { "epoch": 3.4566552901023893, "grad_norm": 1.1266041761718215, "learning_rate": 2.716130634510563e-07, "loss": 0.0587, "step": 15192 }, { "epoch": 3.456882821387941, "grad_norm": 1.9763991196959136, "learning_rate": 2.715393697521847e-07, "loss": 0.0357, "step": 15193 }, { "epoch": 3.4571103526734928, "grad_norm": 1.4927514610449646, "learning_rate": 2.714656832774007e-07, "loss": 0.0196, "step": 15194 }, { "epoch": 3.4573378839590445, "grad_norm": 1.3636577208273393, "learning_rate": 2.713920040282109e-07, "loss": 0.0321, "step": 15195 }, { "epoch": 3.4575654152445963, "grad_norm": 1.1698085821877768, "learning_rate": 2.713183320061208e-07, "loss": 0.0454, "step": 15196 }, { "epoch": 3.457792946530148, "grad_norm": 1.5687161985557179, "learning_rate": 2.712446672126364e-07, "loss": 0.0919, "step": 15197 }, { "epoch": 3.4580204778156998, "grad_norm": 1.3191102559943138, "learning_rate": 2.71171009649263e-07, "loss": 0.0134, "step": 15198 }, { "epoch": 3.4582480091012515, "grad_norm": 1.1908067086691507, "learning_rate": 2.7109735931750605e-07, "loss": 0.0568, "step": 15199 }, { "epoch": 3.4584755403868033, "grad_norm": 1.4902309767663755, "learning_rate": 2.7102371621887123e-07, "loss": 0.018, "step": 15200 }, { "epoch": 3.458703071672355, "grad_norm": 3.1745419078338504, "learning_rate": 2.7095008035486313e-07, "loss": 0.0425, "step": 15201 }, { "epoch": 3.4589306029579068, "grad_norm": 1.566037574948144, "learning_rate": 2.708764517269872e-07, "loss": 0.0247, "step": 15202 }, { "epoch": 3.4591581342434585, "grad_norm": 1.0331305151971764, "learning_rate": 2.7080283033674807e-07, "loss": 0.0332, "step": 15203 }, { "epoch": 3.4593856655290103, "grad_norm": 0.9627737219482738, "learning_rate": 2.7072921618565014e-07, "loss": 0.0437, "step": 15204 }, { "epoch": 3.459613196814562, "grad_norm": 1.798175339168015, "learning_rate": 2.706556092751982e-07, "loss": 0.0579, "step": 15205 }, { "epoch": 3.459840728100114, "grad_norm": 1.4899351535362715, "learning_rate": 2.705820096068967e-07, "loss": 0.0553, "step": 15206 }, { "epoch": 3.4600682593856655, "grad_norm": 0.9223206985463028, "learning_rate": 2.7050841718225e-07, "loss": 0.0095, "step": 15207 }, { "epoch": 3.4602957906712173, "grad_norm": 1.2650911382457823, "learning_rate": 2.704348320027619e-07, "loss": 0.0306, "step": 15208 }, { "epoch": 3.460523321956769, "grad_norm": 1.0850158395715543, "learning_rate": 2.7036125406993664e-07, "loss": 0.0081, "step": 15209 }, { "epoch": 3.460750853242321, "grad_norm": 4.674015107415367, "learning_rate": 2.7028768338527784e-07, "loss": 0.0758, "step": 15210 }, { "epoch": 3.4609783845278725, "grad_norm": 1.7657923545787002, "learning_rate": 2.7021411995028906e-07, "loss": 0.0272, "step": 15211 }, { "epoch": 3.4612059158134243, "grad_norm": 1.1686515177859975, "learning_rate": 2.7014056376647375e-07, "loss": 0.0111, "step": 15212 }, { "epoch": 3.461433447098976, "grad_norm": 1.293410682360866, "learning_rate": 2.700670148353356e-07, "loss": 0.0378, "step": 15213 }, { "epoch": 3.461660978384528, "grad_norm": 2.0059300279689314, "learning_rate": 2.699934731583777e-07, "loss": 0.0449, "step": 15214 }, { "epoch": 3.4618885096700796, "grad_norm": 3.531192636962808, "learning_rate": 2.6991993873710285e-07, "loss": 0.0328, "step": 15215 }, { "epoch": 3.4621160409556313, "grad_norm": 1.3929588930800876, "learning_rate": 2.698464115730144e-07, "loss": 0.0086, "step": 15216 }, { "epoch": 3.462343572241183, "grad_norm": 1.4640448864243247, "learning_rate": 2.697728916676149e-07, "loss": 0.0107, "step": 15217 }, { "epoch": 3.462571103526735, "grad_norm": 0.976228836191813, "learning_rate": 2.696993790224067e-07, "loss": 0.0164, "step": 15218 }, { "epoch": 3.4627986348122866, "grad_norm": 1.0528127175724744, "learning_rate": 2.696258736388924e-07, "loss": 0.0023, "step": 15219 }, { "epoch": 3.4630261660978383, "grad_norm": 1.8599796065727252, "learning_rate": 2.695523755185745e-07, "loss": 0.2123, "step": 15220 }, { "epoch": 3.46325369738339, "grad_norm": 1.0347728506232334, "learning_rate": 2.694788846629553e-07, "loss": 0.0235, "step": 15221 }, { "epoch": 3.463481228668942, "grad_norm": 2.50501576114314, "learning_rate": 2.6940540107353656e-07, "loss": 0.1023, "step": 15222 }, { "epoch": 3.4637087599544936, "grad_norm": 1.0544806959492217, "learning_rate": 2.6933192475181997e-07, "loss": 0.058, "step": 15223 }, { "epoch": 3.4639362912400453, "grad_norm": 1.1284292780366665, "learning_rate": 2.6925845569930767e-07, "loss": 0.0194, "step": 15224 }, { "epoch": 3.464163822525597, "grad_norm": 1.1508535485872318, "learning_rate": 2.691849939175008e-07, "loss": 0.0304, "step": 15225 }, { "epoch": 3.4643913538111493, "grad_norm": 1.2590349716600167, "learning_rate": 2.6911153940790103e-07, "loss": 0.012, "step": 15226 }, { "epoch": 3.4646188850967006, "grad_norm": 1.5867373079058664, "learning_rate": 2.6903809217200957e-07, "loss": 0.0565, "step": 15227 }, { "epoch": 3.4648464163822528, "grad_norm": 2.1223698661951684, "learning_rate": 2.689646522113278e-07, "loss": 0.0438, "step": 15228 }, { "epoch": 3.465073947667804, "grad_norm": 1.6167784898491726, "learning_rate": 2.6889121952735657e-07, "loss": 0.0562, "step": 15229 }, { "epoch": 3.4653014789533563, "grad_norm": 0.7872389403461044, "learning_rate": 2.688177941215964e-07, "loss": 0.0073, "step": 15230 }, { "epoch": 3.465529010238908, "grad_norm": 1.3941801443903261, "learning_rate": 2.687443759955484e-07, "loss": 0.0762, "step": 15231 }, { "epoch": 3.4657565415244598, "grad_norm": 1.4439382754175893, "learning_rate": 2.6867096515071267e-07, "loss": 0.1179, "step": 15232 }, { "epoch": 3.4659840728100115, "grad_norm": 0.9598940515463316, "learning_rate": 2.685975615885898e-07, "loss": 0.0169, "step": 15233 }, { "epoch": 3.4662116040955633, "grad_norm": 1.8665254001759004, "learning_rate": 2.6852416531068014e-07, "loss": 0.0242, "step": 15234 }, { "epoch": 3.466439135381115, "grad_norm": 2.5361426100188, "learning_rate": 2.684507763184838e-07, "loss": 0.0624, "step": 15235 }, { "epoch": 3.466666666666667, "grad_norm": 2.0852309393449944, "learning_rate": 2.683773946135007e-07, "loss": 0.029, "step": 15236 }, { "epoch": 3.4668941979522185, "grad_norm": 1.8058148790108268, "learning_rate": 2.6830402019723026e-07, "loss": 0.1132, "step": 15237 }, { "epoch": 3.4671217292377703, "grad_norm": 1.51529492832501, "learning_rate": 2.6823065307117263e-07, "loss": 0.0156, "step": 15238 }, { "epoch": 3.467349260523322, "grad_norm": 2.7433926344229906, "learning_rate": 2.6815729323682683e-07, "loss": 0.0839, "step": 15239 }, { "epoch": 3.467576791808874, "grad_norm": 1.5285145740475237, "learning_rate": 2.680839406956924e-07, "loss": 0.0125, "step": 15240 }, { "epoch": 3.4678043230944255, "grad_norm": 0.8772720944377869, "learning_rate": 2.6801059544926883e-07, "loss": 0.0334, "step": 15241 }, { "epoch": 3.4680318543799773, "grad_norm": 0.9616249192462, "learning_rate": 2.679372574990546e-07, "loss": 0.016, "step": 15242 }, { "epoch": 3.468259385665529, "grad_norm": 1.3067494666885902, "learning_rate": 2.6786392684654926e-07, "loss": 0.0296, "step": 15243 }, { "epoch": 3.468486916951081, "grad_norm": 1.5830751272604324, "learning_rate": 2.6779060349325085e-07, "loss": 0.0507, "step": 15244 }, { "epoch": 3.4687144482366326, "grad_norm": 1.1669639833164651, "learning_rate": 2.677172874406583e-07, "loss": 0.038, "step": 15245 }, { "epoch": 3.4689419795221843, "grad_norm": 2.179111086750479, "learning_rate": 2.6764397869027013e-07, "loss": 0.0122, "step": 15246 }, { "epoch": 3.469169510807736, "grad_norm": 1.1447253852052286, "learning_rate": 2.6757067724358473e-07, "loss": 0.0483, "step": 15247 }, { "epoch": 3.469397042093288, "grad_norm": 1.4199866536493457, "learning_rate": 2.6749738310210015e-07, "loss": 0.0074, "step": 15248 }, { "epoch": 3.4696245733788396, "grad_norm": 2.2688964600675714, "learning_rate": 2.6742409626731405e-07, "loss": 0.047, "step": 15249 }, { "epoch": 3.4698521046643913, "grad_norm": 0.8573343292901634, "learning_rate": 2.673508167407248e-07, "loss": 0.0064, "step": 15250 }, { "epoch": 3.470079635949943, "grad_norm": 1.2766578108630944, "learning_rate": 2.6727754452382967e-07, "loss": 0.063, "step": 15251 }, { "epoch": 3.470307167235495, "grad_norm": 1.6243592658745643, "learning_rate": 2.672042796181263e-07, "loss": 0.0634, "step": 15252 }, { "epoch": 3.4705346985210466, "grad_norm": 1.8563575407370787, "learning_rate": 2.671310220251122e-07, "loss": 0.0255, "step": 15253 }, { "epoch": 3.4707622298065983, "grad_norm": 1.256468966564515, "learning_rate": 2.6705777174628486e-07, "loss": 0.021, "step": 15254 }, { "epoch": 3.47098976109215, "grad_norm": 1.3684094177455075, "learning_rate": 2.6698452878314113e-07, "loss": 0.0406, "step": 15255 }, { "epoch": 3.471217292377702, "grad_norm": 0.7618182658247171, "learning_rate": 2.6691129313717776e-07, "loss": 0.0068, "step": 15256 }, { "epoch": 3.4714448236632536, "grad_norm": 1.1540589149055578, "learning_rate": 2.668380648098919e-07, "loss": 0.0655, "step": 15257 }, { "epoch": 3.4716723549488053, "grad_norm": 2.1603573663354982, "learning_rate": 2.6676484380277985e-07, "loss": 0.0177, "step": 15258 }, { "epoch": 3.471899886234357, "grad_norm": 1.0059966892245586, "learning_rate": 2.6669163011733836e-07, "loss": 0.0198, "step": 15259 }, { "epoch": 3.472127417519909, "grad_norm": 1.4585212719217695, "learning_rate": 2.6661842375506395e-07, "loss": 0.0277, "step": 15260 }, { "epoch": 3.4723549488054606, "grad_norm": 0.7936082956577524, "learning_rate": 2.6654522471745237e-07, "loss": 0.0042, "step": 15261 }, { "epoch": 3.4725824800910123, "grad_norm": 0.9200058434682415, "learning_rate": 2.664720330060001e-07, "loss": 0.0411, "step": 15262 }, { "epoch": 3.472810011376564, "grad_norm": 1.5560303729000928, "learning_rate": 2.663988486222027e-07, "loss": 0.0238, "step": 15263 }, { "epoch": 3.473037542662116, "grad_norm": 0.6545905393109085, "learning_rate": 2.6632567156755633e-07, "loss": 0.0306, "step": 15264 }, { "epoch": 3.473265073947668, "grad_norm": 3.0446546072214833, "learning_rate": 2.6625250184355605e-07, "loss": 0.0301, "step": 15265 }, { "epoch": 3.4734926052332193, "grad_norm": 0.987452885777539, "learning_rate": 2.661793394516977e-07, "loss": 0.0149, "step": 15266 }, { "epoch": 3.4737201365187715, "grad_norm": 2.0302987068566556, "learning_rate": 2.661061843934767e-07, "loss": 0.0121, "step": 15267 }, { "epoch": 3.473947667804323, "grad_norm": 0.8629377813294012, "learning_rate": 2.6603303667038773e-07, "loss": 0.0839, "step": 15268 }, { "epoch": 3.474175199089875, "grad_norm": 1.5053968996021094, "learning_rate": 2.6595989628392637e-07, "loss": 0.0152, "step": 15269 }, { "epoch": 3.474402730375427, "grad_norm": 1.6767864635038041, "learning_rate": 2.6588676323558693e-07, "loss": 0.0356, "step": 15270 }, { "epoch": 3.4746302616609785, "grad_norm": 1.7960576747566381, "learning_rate": 2.658136375268646e-07, "loss": 0.0744, "step": 15271 }, { "epoch": 3.4748577929465303, "grad_norm": 1.6216300964340566, "learning_rate": 2.6574051915925344e-07, "loss": 0.0278, "step": 15272 }, { "epoch": 3.475085324232082, "grad_norm": 2.280922539895665, "learning_rate": 2.6566740813424815e-07, "loss": 0.029, "step": 15273 }, { "epoch": 3.475312855517634, "grad_norm": 1.8655571875679453, "learning_rate": 2.6559430445334313e-07, "loss": 0.0534, "step": 15274 }, { "epoch": 3.4755403868031856, "grad_norm": 2.0010993392118017, "learning_rate": 2.6552120811803213e-07, "loss": 0.1135, "step": 15275 }, { "epoch": 3.4757679180887373, "grad_norm": 1.3786883575740296, "learning_rate": 2.6544811912980954e-07, "loss": 0.0706, "step": 15276 }, { "epoch": 3.475995449374289, "grad_norm": 0.99573516394255, "learning_rate": 2.653750374901686e-07, "loss": 0.01, "step": 15277 }, { "epoch": 3.476222980659841, "grad_norm": 0.8848438564140791, "learning_rate": 2.6530196320060355e-07, "loss": 0.023, "step": 15278 }, { "epoch": 3.4764505119453926, "grad_norm": 2.171152900756473, "learning_rate": 2.6522889626260734e-07, "loss": 0.0808, "step": 15279 }, { "epoch": 3.4766780432309443, "grad_norm": 0.8006148572476203, "learning_rate": 2.651558366776736e-07, "loss": 0.0088, "step": 15280 }, { "epoch": 3.476905574516496, "grad_norm": 2.1990706374035662, "learning_rate": 2.650827844472958e-07, "loss": 0.0773, "step": 15281 }, { "epoch": 3.477133105802048, "grad_norm": 0.7715815725223004, "learning_rate": 2.650097395729665e-07, "loss": 0.0551, "step": 15282 }, { "epoch": 3.4773606370875996, "grad_norm": 2.1652941995515174, "learning_rate": 2.64936702056179e-07, "loss": 0.0362, "step": 15283 }, { "epoch": 3.4775881683731513, "grad_norm": 2.7832339418814223, "learning_rate": 2.648636718984258e-07, "loss": 0.0634, "step": 15284 }, { "epoch": 3.477815699658703, "grad_norm": 1.7033275968458854, "learning_rate": 2.647906491011997e-07, "loss": 0.0451, "step": 15285 }, { "epoch": 3.478043230944255, "grad_norm": 0.3626813555226799, "learning_rate": 2.6471763366599283e-07, "loss": 0.0021, "step": 15286 }, { "epoch": 3.4782707622298066, "grad_norm": 1.0147554703923416, "learning_rate": 2.646446255942977e-07, "loss": 0.0133, "step": 15287 }, { "epoch": 3.4784982935153583, "grad_norm": 2.1472016581419298, "learning_rate": 2.6457162488760673e-07, "loss": 0.0192, "step": 15288 }, { "epoch": 3.47872582480091, "grad_norm": 1.8816662926065215, "learning_rate": 2.644986315474114e-07, "loss": 0.0406, "step": 15289 }, { "epoch": 3.478953356086462, "grad_norm": 1.4964192215498264, "learning_rate": 2.64425645575204e-07, "loss": 0.0244, "step": 15290 }, { "epoch": 3.4791808873720136, "grad_norm": 0.8070039932001472, "learning_rate": 2.643526669724761e-07, "loss": 0.0184, "step": 15291 }, { "epoch": 3.4794084186575653, "grad_norm": 1.3269397014290873, "learning_rate": 2.642796957407189e-07, "loss": 0.0193, "step": 15292 }, { "epoch": 3.479635949943117, "grad_norm": 1.2811936139598188, "learning_rate": 2.642067318814242e-07, "loss": 0.0146, "step": 15293 }, { "epoch": 3.479863481228669, "grad_norm": 2.043341403836222, "learning_rate": 2.6413377539608304e-07, "loss": 0.0392, "step": 15294 }, { "epoch": 3.4800910125142206, "grad_norm": 0.9779928207358911, "learning_rate": 2.640608262861869e-07, "loss": 0.013, "step": 15295 }, { "epoch": 3.4803185437997723, "grad_norm": 2.8627344913330393, "learning_rate": 2.639878845532262e-07, "loss": 0.0362, "step": 15296 }, { "epoch": 3.480546075085324, "grad_norm": 1.506214503859656, "learning_rate": 2.639149501986922e-07, "loss": 0.08, "step": 15297 }, { "epoch": 3.480773606370876, "grad_norm": 1.1631751491556244, "learning_rate": 2.638420232240753e-07, "loss": 0.028, "step": 15298 }, { "epoch": 3.4810011376564276, "grad_norm": 1.2185245202987836, "learning_rate": 2.637691036308658e-07, "loss": 0.0232, "step": 15299 }, { "epoch": 3.4812286689419794, "grad_norm": 1.5415915036033532, "learning_rate": 2.636961914205543e-07, "loss": 0.0195, "step": 15300 }, { "epoch": 3.481456200227531, "grad_norm": 1.8191779725992885, "learning_rate": 2.6362328659463093e-07, "loss": 0.1019, "step": 15301 }, { "epoch": 3.481683731513083, "grad_norm": 0.7465555002771523, "learning_rate": 2.63550389154586e-07, "loss": 0.0102, "step": 15302 }, { "epoch": 3.4819112627986346, "grad_norm": 1.1877953859273795, "learning_rate": 2.6347749910190887e-07, "loss": 0.0227, "step": 15303 }, { "epoch": 3.482138794084187, "grad_norm": 1.1051081450197195, "learning_rate": 2.634046164380898e-07, "loss": 0.027, "step": 15304 }, { "epoch": 3.482366325369738, "grad_norm": 1.1188140488063734, "learning_rate": 2.6333174116461813e-07, "loss": 0.0612, "step": 15305 }, { "epoch": 3.4825938566552903, "grad_norm": 0.970078017622188, "learning_rate": 2.6325887328298283e-07, "loss": 0.0142, "step": 15306 }, { "epoch": 3.4828213879408416, "grad_norm": 1.910656012091514, "learning_rate": 2.631860127946742e-07, "loss": 0.0201, "step": 15307 }, { "epoch": 3.483048919226394, "grad_norm": 1.428297392582113, "learning_rate": 2.631131597011806e-07, "loss": 0.1357, "step": 15308 }, { "epoch": 3.4832764505119456, "grad_norm": 1.584769009724363, "learning_rate": 2.6304031400399146e-07, "loss": 0.0234, "step": 15309 }, { "epoch": 3.4835039817974973, "grad_norm": 1.3383022265957034, "learning_rate": 2.629674757045954e-07, "loss": 0.0824, "step": 15310 }, { "epoch": 3.483731513083049, "grad_norm": 1.6821923955738927, "learning_rate": 2.628946448044808e-07, "loss": 0.0942, "step": 15311 }, { "epoch": 3.483959044368601, "grad_norm": 1.7294663629678346, "learning_rate": 2.628218213051366e-07, "loss": 0.0434, "step": 15312 }, { "epoch": 3.4841865756541526, "grad_norm": 1.587603375678173, "learning_rate": 2.627490052080511e-07, "loss": 0.0506, "step": 15313 }, { "epoch": 3.4844141069397043, "grad_norm": 1.6417063692570242, "learning_rate": 2.626761965147126e-07, "loss": 0.0985, "step": 15314 }, { "epoch": 3.484641638225256, "grad_norm": 1.0072719269651262, "learning_rate": 2.626033952266089e-07, "loss": 0.0326, "step": 15315 }, { "epoch": 3.484869169510808, "grad_norm": 2.683889454630421, "learning_rate": 2.625306013452284e-07, "loss": 0.0484, "step": 15316 }, { "epoch": 3.4850967007963596, "grad_norm": 1.4921741662052792, "learning_rate": 2.624578148720585e-07, "loss": 0.0851, "step": 15317 }, { "epoch": 3.4853242320819113, "grad_norm": 2.0882643331672694, "learning_rate": 2.623850358085867e-07, "loss": 0.0267, "step": 15318 }, { "epoch": 3.485551763367463, "grad_norm": 1.5582047239503092, "learning_rate": 2.623122641563007e-07, "loss": 0.0709, "step": 15319 }, { "epoch": 3.485779294653015, "grad_norm": 1.5323151227469798, "learning_rate": 2.6223949991668773e-07, "loss": 0.0454, "step": 15320 }, { "epoch": 3.4860068259385666, "grad_norm": 1.0634104893844392, "learning_rate": 2.621667430912353e-07, "loss": 0.0229, "step": 15321 }, { "epoch": 3.4862343572241183, "grad_norm": 1.8765587515002624, "learning_rate": 2.6209399368142987e-07, "loss": 0.0179, "step": 15322 }, { "epoch": 3.48646188850967, "grad_norm": 1.4794493753599869, "learning_rate": 2.620212516887588e-07, "loss": 0.0419, "step": 15323 }, { "epoch": 3.486689419795222, "grad_norm": 0.9760707334091709, "learning_rate": 2.619485171147086e-07, "loss": 0.0248, "step": 15324 }, { "epoch": 3.4869169510807736, "grad_norm": 0.8164379285818208, "learning_rate": 2.618757899607657e-07, "loss": 0.0046, "step": 15325 }, { "epoch": 3.4871444823663253, "grad_norm": 1.2012115920950828, "learning_rate": 2.6180307022841646e-07, "loss": 0.01, "step": 15326 }, { "epoch": 3.487372013651877, "grad_norm": 0.7530072651310784, "learning_rate": 2.6173035791914735e-07, "loss": 0.0152, "step": 15327 }, { "epoch": 3.487599544937429, "grad_norm": 0.8097480029312786, "learning_rate": 2.616576530344447e-07, "loss": 0.0076, "step": 15328 }, { "epoch": 3.4878270762229806, "grad_norm": 1.2678627067740935, "learning_rate": 2.615849555757941e-07, "loss": 0.0109, "step": 15329 }, { "epoch": 3.4880546075085324, "grad_norm": 1.9882044780997008, "learning_rate": 2.615122655446813e-07, "loss": 0.0394, "step": 15330 }, { "epoch": 3.488282138794084, "grad_norm": 1.021690158081257, "learning_rate": 2.6143958294259225e-07, "loss": 0.0118, "step": 15331 }, { "epoch": 3.488509670079636, "grad_norm": 1.8491746218965592, "learning_rate": 2.613669077710122e-07, "loss": 0.0229, "step": 15332 }, { "epoch": 3.4887372013651876, "grad_norm": 0.907966410079638, "learning_rate": 2.6129424003142646e-07, "loss": 0.008, "step": 15333 }, { "epoch": 3.4889647326507394, "grad_norm": 1.5537266518833495, "learning_rate": 2.6122157972532036e-07, "loss": 0.0658, "step": 15334 }, { "epoch": 3.489192263936291, "grad_norm": 2.4486838730118023, "learning_rate": 2.6114892685417926e-07, "loss": 0.0507, "step": 15335 }, { "epoch": 3.489419795221843, "grad_norm": 1.4433299388258842, "learning_rate": 2.610762814194877e-07, "loss": 0.0774, "step": 15336 }, { "epoch": 3.4896473265073946, "grad_norm": 2.081271387145862, "learning_rate": 2.6100364342273016e-07, "loss": 0.0099, "step": 15337 }, { "epoch": 3.4898748577929464, "grad_norm": 1.6996863759529877, "learning_rate": 2.609310128653918e-07, "loss": 0.0259, "step": 15338 }, { "epoch": 3.490102389078498, "grad_norm": 0.8348418261653794, "learning_rate": 2.608583897489566e-07, "loss": 0.0106, "step": 15339 }, { "epoch": 3.49032992036405, "grad_norm": 2.5963662042762716, "learning_rate": 2.6078577407490896e-07, "loss": 0.0525, "step": 15340 }, { "epoch": 3.4905574516496016, "grad_norm": 1.1284033319148845, "learning_rate": 2.6071316584473304e-07, "loss": 0.0222, "step": 15341 }, { "epoch": 3.4907849829351534, "grad_norm": 1.9394419216608918, "learning_rate": 2.6064056505991315e-07, "loss": 0.0213, "step": 15342 }, { "epoch": 3.4910125142207056, "grad_norm": 1.4549215897537366, "learning_rate": 2.6056797172193275e-07, "loss": 0.0283, "step": 15343 }, { "epoch": 3.491240045506257, "grad_norm": 1.226171961242363, "learning_rate": 2.6049538583227543e-07, "loss": 0.0439, "step": 15344 }, { "epoch": 3.491467576791809, "grad_norm": 2.5061816315120904, "learning_rate": 2.6042280739242503e-07, "loss": 0.0266, "step": 15345 }, { "epoch": 3.4916951080773604, "grad_norm": 1.093188950506333, "learning_rate": 2.603502364038645e-07, "loss": 0.0105, "step": 15346 }, { "epoch": 3.4919226393629126, "grad_norm": 1.030155856801887, "learning_rate": 2.602776728680774e-07, "loss": 0.0146, "step": 15347 }, { "epoch": 3.4921501706484643, "grad_norm": 1.5939119827649166, "learning_rate": 2.6020511678654695e-07, "loss": 0.0149, "step": 15348 }, { "epoch": 3.492377701934016, "grad_norm": 1.6737761168524612, "learning_rate": 2.6013256816075546e-07, "loss": 0.038, "step": 15349 }, { "epoch": 3.492605233219568, "grad_norm": 1.300245030636641, "learning_rate": 2.600600269921864e-07, "loss": 0.0177, "step": 15350 }, { "epoch": 3.4928327645051196, "grad_norm": 0.9644127314188987, "learning_rate": 2.5998749328232174e-07, "loss": 0.0711, "step": 15351 }, { "epoch": 3.4930602957906713, "grad_norm": 1.3933387021750474, "learning_rate": 2.599149670326445e-07, "loss": 0.0422, "step": 15352 }, { "epoch": 3.493287827076223, "grad_norm": 1.2292537571094126, "learning_rate": 2.5984244824463647e-07, "loss": 0.0502, "step": 15353 }, { "epoch": 3.493515358361775, "grad_norm": 1.2551463955897895, "learning_rate": 2.597699369197801e-07, "loss": 0.0592, "step": 15354 }, { "epoch": 3.4937428896473266, "grad_norm": 1.7896351924154659, "learning_rate": 2.5969743305955746e-07, "loss": 0.0236, "step": 15355 }, { "epoch": 3.4939704209328784, "grad_norm": 0.951593302476065, "learning_rate": 2.596249366654501e-07, "loss": 0.0233, "step": 15356 }, { "epoch": 3.49419795221843, "grad_norm": 1.61442684402443, "learning_rate": 2.595524477389401e-07, "loss": 0.0183, "step": 15357 }, { "epoch": 3.494425483503982, "grad_norm": 1.7042197924881128, "learning_rate": 2.594799662815085e-07, "loss": 0.0205, "step": 15358 }, { "epoch": 3.4946530147895336, "grad_norm": 2.309761663081815, "learning_rate": 2.5940749229463726e-07, "loss": 0.0346, "step": 15359 }, { "epoch": 3.4948805460750854, "grad_norm": 1.3094379732206123, "learning_rate": 2.593350257798071e-07, "loss": 0.0187, "step": 15360 }, { "epoch": 3.495108077360637, "grad_norm": 2.431733422754107, "learning_rate": 2.5926256673849933e-07, "loss": 0.0159, "step": 15361 }, { "epoch": 3.495335608646189, "grad_norm": 1.3916651768072137, "learning_rate": 2.591901151721951e-07, "loss": 0.0189, "step": 15362 }, { "epoch": 3.4955631399317406, "grad_norm": 1.0482150330819784, "learning_rate": 2.591176710823747e-07, "loss": 0.0086, "step": 15363 }, { "epoch": 3.4957906712172924, "grad_norm": 1.6464325967494275, "learning_rate": 2.590452344705193e-07, "loss": 0.062, "step": 15364 }, { "epoch": 3.496018202502844, "grad_norm": 2.2167783832659134, "learning_rate": 2.589728053381088e-07, "loss": 0.0658, "step": 15365 }, { "epoch": 3.496245733788396, "grad_norm": 1.3896336678050525, "learning_rate": 2.589003836866239e-07, "loss": 0.0962, "step": 15366 }, { "epoch": 3.4964732650739476, "grad_norm": 2.420112407976772, "learning_rate": 2.5882796951754485e-07, "loss": 0.0335, "step": 15367 }, { "epoch": 3.4967007963594994, "grad_norm": 1.2483701721253724, "learning_rate": 2.5875556283235124e-07, "loss": 0.0186, "step": 15368 }, { "epoch": 3.496928327645051, "grad_norm": 1.0690629107639853, "learning_rate": 2.586831636325235e-07, "loss": 0.058, "step": 15369 }, { "epoch": 3.497155858930603, "grad_norm": 1.9972037969272871, "learning_rate": 2.586107719195407e-07, "loss": 0.0619, "step": 15370 }, { "epoch": 3.4973833902161546, "grad_norm": 2.39237889264841, "learning_rate": 2.5853838769488297e-07, "loss": 0.0801, "step": 15371 }, { "epoch": 3.4976109215017064, "grad_norm": 2.673008397752983, "learning_rate": 2.5846601096002925e-07, "loss": 0.0174, "step": 15372 }, { "epoch": 3.497838452787258, "grad_norm": 1.0123344925186444, "learning_rate": 2.5839364171645895e-07, "loss": 0.0642, "step": 15373 }, { "epoch": 3.49806598407281, "grad_norm": 1.0544094295409463, "learning_rate": 2.583212799656515e-07, "loss": 0.0282, "step": 15374 }, { "epoch": 3.4982935153583616, "grad_norm": 1.4387279687250052, "learning_rate": 2.5824892570908526e-07, "loss": 0.0271, "step": 15375 }, { "epoch": 3.4985210466439134, "grad_norm": 1.1388110745887432, "learning_rate": 2.5817657894823954e-07, "loss": 0.022, "step": 15376 }, { "epoch": 3.498748577929465, "grad_norm": 0.638775029805815, "learning_rate": 2.581042396845925e-07, "loss": 0.0044, "step": 15377 }, { "epoch": 3.498976109215017, "grad_norm": 1.0805943808716787, "learning_rate": 2.5803190791962317e-07, "loss": 0.0074, "step": 15378 }, { "epoch": 3.4992036405005686, "grad_norm": 1.4033989907778937, "learning_rate": 2.579595836548093e-07, "loss": 0.0545, "step": 15379 }, { "epoch": 3.4994311717861204, "grad_norm": 3.3193598529218122, "learning_rate": 2.5788726689162926e-07, "loss": 0.0452, "step": 15380 }, { "epoch": 3.499658703071672, "grad_norm": 1.5355707068728912, "learning_rate": 2.5781495763156146e-07, "loss": 0.0469, "step": 15381 }, { "epoch": 3.4998862343572243, "grad_norm": 2.753049603258507, "learning_rate": 2.5774265587608313e-07, "loss": 0.0279, "step": 15382 }, { "epoch": 3.5001137656427757, "grad_norm": 1.4038622266253833, "learning_rate": 2.5767036162667266e-07, "loss": 0.0995, "step": 15383 }, { "epoch": 3.500341296928328, "grad_norm": 0.763646334228079, "learning_rate": 2.57598074884807e-07, "loss": 0.0059, "step": 15384 }, { "epoch": 3.500568828213879, "grad_norm": 1.65318904490769, "learning_rate": 2.57525795651964e-07, "loss": 0.0118, "step": 15385 }, { "epoch": 3.5007963594994314, "grad_norm": 1.1294261557976126, "learning_rate": 2.574535239296206e-07, "loss": 0.026, "step": 15386 }, { "epoch": 3.5010238907849827, "grad_norm": 1.379627040332973, "learning_rate": 2.5738125971925406e-07, "loss": 0.0562, "step": 15387 }, { "epoch": 3.501251422070535, "grad_norm": 1.5664962592107112, "learning_rate": 2.5730900302234145e-07, "loss": 0.0323, "step": 15388 }, { "epoch": 3.5014789533560866, "grad_norm": 1.2725562302930402, "learning_rate": 2.5723675384035925e-07, "loss": 0.022, "step": 15389 }, { "epoch": 3.5017064846416384, "grad_norm": 1.6360575041561838, "learning_rate": 2.571645121747844e-07, "loss": 0.1385, "step": 15390 }, { "epoch": 3.50193401592719, "grad_norm": 1.3781054129612047, "learning_rate": 2.570922780270932e-07, "loss": 0.0802, "step": 15391 }, { "epoch": 3.502161547212742, "grad_norm": 1.7569850017522877, "learning_rate": 2.570200513987622e-07, "loss": 0.0737, "step": 15392 }, { "epoch": 3.5023890784982936, "grad_norm": 1.153830944145152, "learning_rate": 2.569478322912671e-07, "loss": 0.0203, "step": 15393 }, { "epoch": 3.5026166097838454, "grad_norm": 0.886327561789869, "learning_rate": 2.5687562070608434e-07, "loss": 0.0298, "step": 15394 }, { "epoch": 3.502844141069397, "grad_norm": 2.1083634675814946, "learning_rate": 2.5680341664469e-07, "loss": 0.0247, "step": 15395 }, { "epoch": 3.503071672354949, "grad_norm": 1.84224373815693, "learning_rate": 2.5673122010855916e-07, "loss": 0.1582, "step": 15396 }, { "epoch": 3.5032992036405006, "grad_norm": 2.4896427841614894, "learning_rate": 2.56659031099168e-07, "loss": 0.0526, "step": 15397 }, { "epoch": 3.5035267349260524, "grad_norm": 2.7456826355301547, "learning_rate": 2.5658684961799164e-07, "loss": 0.0283, "step": 15398 }, { "epoch": 3.503754266211604, "grad_norm": 2.665239593184904, "learning_rate": 2.5651467566650516e-07, "loss": 0.0314, "step": 15399 }, { "epoch": 3.503981797497156, "grad_norm": 0.7042322687359703, "learning_rate": 2.564425092461839e-07, "loss": 0.0038, "step": 15400 }, { "epoch": 3.5042093287827076, "grad_norm": 1.963739641420533, "learning_rate": 2.5637035035850274e-07, "loss": 0.0206, "step": 15401 }, { "epoch": 3.5044368600682594, "grad_norm": 1.4527567451848191, "learning_rate": 2.562981990049367e-07, "loss": 0.0181, "step": 15402 }, { "epoch": 3.504664391353811, "grad_norm": 1.0269447482236778, "learning_rate": 2.5622605518695997e-07, "loss": 0.0242, "step": 15403 }, { "epoch": 3.504891922639363, "grad_norm": 3.5443775224845773, "learning_rate": 2.561539189060476e-07, "loss": 0.0305, "step": 15404 }, { "epoch": 3.5051194539249146, "grad_norm": 1.7463683892614872, "learning_rate": 2.5608179016367354e-07, "loss": 0.0498, "step": 15405 }, { "epoch": 3.5053469852104664, "grad_norm": 1.4988700969619684, "learning_rate": 2.5600966896131187e-07, "loss": 0.0274, "step": 15406 }, { "epoch": 3.505574516496018, "grad_norm": 1.3102131440846136, "learning_rate": 2.559375553004368e-07, "loss": 0.0463, "step": 15407 }, { "epoch": 3.50580204778157, "grad_norm": 1.2039508166955026, "learning_rate": 2.5586544918252224e-07, "loss": 0.0656, "step": 15408 }, { "epoch": 3.5060295790671216, "grad_norm": 1.2586457242313196, "learning_rate": 2.5579335060904196e-07, "loss": 0.0692, "step": 15409 }, { "epoch": 3.5062571103526734, "grad_norm": 0.622676488330328, "learning_rate": 2.5572125958146925e-07, "loss": 0.0087, "step": 15410 }, { "epoch": 3.506484641638225, "grad_norm": 1.3986997059886213, "learning_rate": 2.5564917610127795e-07, "loss": 0.0729, "step": 15411 }, { "epoch": 3.506712172923777, "grad_norm": 2.3612221646633476, "learning_rate": 2.5557710016994105e-07, "loss": 0.0334, "step": 15412 }, { "epoch": 3.5069397042093287, "grad_norm": 1.7895064075187574, "learning_rate": 2.5550503178893143e-07, "loss": 0.1146, "step": 15413 }, { "epoch": 3.5071672354948804, "grad_norm": 1.1166511981296527, "learning_rate": 2.5543297095972224e-07, "loss": 0.0347, "step": 15414 }, { "epoch": 3.507394766780432, "grad_norm": 1.1331924838864078, "learning_rate": 2.553609176837863e-07, "loss": 0.0246, "step": 15415 }, { "epoch": 3.507622298065984, "grad_norm": 1.897061085603471, "learning_rate": 2.552888719625965e-07, "loss": 0.0307, "step": 15416 }, { "epoch": 3.507849829351536, "grad_norm": 3.287950079036191, "learning_rate": 2.5521683379762507e-07, "loss": 0.0369, "step": 15417 }, { "epoch": 3.5080773606370874, "grad_norm": 1.1743168339742593, "learning_rate": 2.55144803190344e-07, "loss": 0.0082, "step": 15418 }, { "epoch": 3.5083048919226396, "grad_norm": 1.0485812482194363, "learning_rate": 2.5507278014222614e-07, "loss": 0.0184, "step": 15419 }, { "epoch": 3.508532423208191, "grad_norm": 2.2468065954321137, "learning_rate": 2.550007646547429e-07, "loss": 0.0216, "step": 15420 }, { "epoch": 3.508759954493743, "grad_norm": 1.789972304675693, "learning_rate": 2.5492875672936637e-07, "loss": 0.0346, "step": 15421 }, { "epoch": 3.5089874857792944, "grad_norm": 1.2778285059525158, "learning_rate": 2.5485675636756834e-07, "loss": 0.0119, "step": 15422 }, { "epoch": 3.5092150170648466, "grad_norm": 2.28092801811861, "learning_rate": 2.547847635708205e-07, "loss": 0.0714, "step": 15423 }, { "epoch": 3.509442548350398, "grad_norm": 2.0001014045545404, "learning_rate": 2.547127783405941e-07, "loss": 0.0186, "step": 15424 }, { "epoch": 3.50967007963595, "grad_norm": 1.403138481335072, "learning_rate": 2.5464080067836015e-07, "loss": 0.0172, "step": 15425 }, { "epoch": 3.5098976109215014, "grad_norm": 1.985404415564708, "learning_rate": 2.545688305855902e-07, "loss": 0.0268, "step": 15426 }, { "epoch": 3.5101251422070536, "grad_norm": 1.0597140943367578, "learning_rate": 2.5449686806375445e-07, "loss": 0.0259, "step": 15427 }, { "epoch": 3.5103526734926054, "grad_norm": 2.613132417951668, "learning_rate": 2.544249131143247e-07, "loss": 0.0225, "step": 15428 }, { "epoch": 3.510580204778157, "grad_norm": 1.7850824824138443, "learning_rate": 2.5435296573877076e-07, "loss": 0.0193, "step": 15429 }, { "epoch": 3.510807736063709, "grad_norm": 1.298265653565993, "learning_rate": 2.5428102593856363e-07, "loss": 0.0163, "step": 15430 }, { "epoch": 3.5110352673492606, "grad_norm": 1.7283264218565462, "learning_rate": 2.5420909371517336e-07, "loss": 0.0194, "step": 15431 }, { "epoch": 3.5112627986348124, "grad_norm": 0.7314098852013442, "learning_rate": 2.5413716907007e-07, "loss": 0.0073, "step": 15432 }, { "epoch": 3.511490329920364, "grad_norm": 4.729208781888009, "learning_rate": 2.5406525200472357e-07, "loss": 0.0568, "step": 15433 }, { "epoch": 3.511717861205916, "grad_norm": 1.686106477749392, "learning_rate": 2.5399334252060414e-07, "loss": 0.0225, "step": 15434 }, { "epoch": 3.5119453924914676, "grad_norm": 2.4058979961332385, "learning_rate": 2.5392144061918153e-07, "loss": 0.0341, "step": 15435 }, { "epoch": 3.5121729237770194, "grad_norm": 0.7364555215913106, "learning_rate": 2.5384954630192506e-07, "loss": 0.008, "step": 15436 }, { "epoch": 3.512400455062571, "grad_norm": 1.6264613555501226, "learning_rate": 2.5377765957030397e-07, "loss": 0.0273, "step": 15437 }, { "epoch": 3.512627986348123, "grad_norm": 2.66538757828235, "learning_rate": 2.537057804257878e-07, "loss": 0.0224, "step": 15438 }, { "epoch": 3.5128555176336747, "grad_norm": 1.4113017574268432, "learning_rate": 2.536339088698453e-07, "loss": 0.0367, "step": 15439 }, { "epoch": 3.5130830489192264, "grad_norm": 1.1688955291004894, "learning_rate": 2.535620449039456e-07, "loss": 0.0306, "step": 15440 }, { "epoch": 3.513310580204778, "grad_norm": 0.8790506394703284, "learning_rate": 2.5349018852955745e-07, "loss": 0.0394, "step": 15441 }, { "epoch": 3.51353811149033, "grad_norm": 1.29677266294789, "learning_rate": 2.534183397481496e-07, "loss": 0.0796, "step": 15442 }, { "epoch": 3.5137656427758817, "grad_norm": 1.393389240882117, "learning_rate": 2.533464985611905e-07, "loss": 0.0127, "step": 15443 }, { "epoch": 3.5139931740614334, "grad_norm": 0.8131824726637463, "learning_rate": 2.53274664970148e-07, "loss": 0.0104, "step": 15444 }, { "epoch": 3.514220705346985, "grad_norm": 1.079014261813888, "learning_rate": 2.5320283897649075e-07, "loss": 0.0133, "step": 15445 }, { "epoch": 3.514448236632537, "grad_norm": 1.2865942839873383, "learning_rate": 2.531310205816864e-07, "loss": 0.0098, "step": 15446 }, { "epoch": 3.5146757679180887, "grad_norm": 3.2110352591777716, "learning_rate": 2.530592097872029e-07, "loss": 0.0318, "step": 15447 }, { "epoch": 3.5149032992036404, "grad_norm": 0.8779439856979416, "learning_rate": 2.52987406594508e-07, "loss": 0.0474, "step": 15448 }, { "epoch": 3.515130830489192, "grad_norm": 2.0764576082848007, "learning_rate": 2.5291561100506947e-07, "loss": 0.0151, "step": 15449 }, { "epoch": 3.515358361774744, "grad_norm": 1.4106598331870297, "learning_rate": 2.5284382302035434e-07, "loss": 0.0117, "step": 15450 }, { "epoch": 3.5155858930602957, "grad_norm": 4.10415475636304, "learning_rate": 2.5277204264182974e-07, "loss": 0.0231, "step": 15451 }, { "epoch": 3.5158134243458474, "grad_norm": 2.2553160354405875, "learning_rate": 2.5270026987096306e-07, "loss": 0.0105, "step": 15452 }, { "epoch": 3.516040955631399, "grad_norm": 1.4574678353102435, "learning_rate": 2.526285047092208e-07, "loss": 0.0853, "step": 15453 }, { "epoch": 3.516268486916951, "grad_norm": 2.8532610544977013, "learning_rate": 2.5255674715807e-07, "loss": 0.0813, "step": 15454 }, { "epoch": 3.5164960182025027, "grad_norm": 1.4773224512630188, "learning_rate": 2.524849972189773e-07, "loss": 0.0489, "step": 15455 }, { "epoch": 3.516723549488055, "grad_norm": 1.6740128842095459, "learning_rate": 2.524132548934089e-07, "loss": 0.0191, "step": 15456 }, { "epoch": 3.516951080773606, "grad_norm": 1.2230551172730557, "learning_rate": 2.5234152018283143e-07, "loss": 0.1093, "step": 15457 }, { "epoch": 3.5171786120591584, "grad_norm": 1.7408732775412932, "learning_rate": 2.522697930887105e-07, "loss": 0.0237, "step": 15458 }, { "epoch": 3.5174061433447097, "grad_norm": 1.2084021425124398, "learning_rate": 2.521980736125127e-07, "loss": 0.0368, "step": 15459 }, { "epoch": 3.517633674630262, "grad_norm": 1.8061363682238836, "learning_rate": 2.521263617557032e-07, "loss": 0.0144, "step": 15460 }, { "epoch": 3.517861205915813, "grad_norm": 1.5019180862189292, "learning_rate": 2.520546575197481e-07, "loss": 0.0234, "step": 15461 }, { "epoch": 3.5180887372013654, "grad_norm": 1.3480436006394905, "learning_rate": 2.5198296090611286e-07, "loss": 0.0284, "step": 15462 }, { "epoch": 3.5183162684869167, "grad_norm": 1.2900794079548645, "learning_rate": 2.519112719162626e-07, "loss": 0.0397, "step": 15463 }, { "epoch": 3.518543799772469, "grad_norm": 1.1350709506291174, "learning_rate": 2.518395905516629e-07, "loss": 0.0849, "step": 15464 }, { "epoch": 3.51877133105802, "grad_norm": 1.4177755507406802, "learning_rate": 2.517679168137784e-07, "loss": 0.0783, "step": 15465 }, { "epoch": 3.5189988623435724, "grad_norm": 1.8662483512133767, "learning_rate": 2.516962507040742e-07, "loss": 0.0311, "step": 15466 }, { "epoch": 3.519226393629124, "grad_norm": 0.9047986383012805, "learning_rate": 2.516245922240148e-07, "loss": 0.0658, "step": 15467 }, { "epoch": 3.519453924914676, "grad_norm": 1.1229588721580563, "learning_rate": 2.5155294137506495e-07, "loss": 0.0097, "step": 15468 }, { "epoch": 3.5196814562002277, "grad_norm": 2.2520155026937596, "learning_rate": 2.5148129815868926e-07, "loss": 0.0307, "step": 15469 }, { "epoch": 3.5199089874857794, "grad_norm": 0.8420604835854284, "learning_rate": 2.514096625763516e-07, "loss": 0.0225, "step": 15470 }, { "epoch": 3.520136518771331, "grad_norm": 1.592094025954882, "learning_rate": 2.513380346295164e-07, "loss": 0.0887, "step": 15471 }, { "epoch": 3.520364050056883, "grad_norm": 1.6299961700101637, "learning_rate": 2.512664143196472e-07, "loss": 0.0914, "step": 15472 }, { "epoch": 3.5205915813424347, "grad_norm": 1.6435586311732118, "learning_rate": 2.5119480164820833e-07, "loss": 0.0203, "step": 15473 }, { "epoch": 3.5208191126279864, "grad_norm": 2.029589546983785, "learning_rate": 2.511231966166628e-07, "loss": 0.0615, "step": 15474 }, { "epoch": 3.521046643913538, "grad_norm": 2.3423294174552787, "learning_rate": 2.5105159922647447e-07, "loss": 0.0079, "step": 15475 }, { "epoch": 3.52127417519909, "grad_norm": 1.1150043777992498, "learning_rate": 2.5098000947910684e-07, "loss": 0.0205, "step": 15476 }, { "epoch": 3.5215017064846417, "grad_norm": 1.3696685010164515, "learning_rate": 2.509084273760226e-07, "loss": 0.0516, "step": 15477 }, { "epoch": 3.5217292377701934, "grad_norm": 2.24854609002265, "learning_rate": 2.508368529186852e-07, "loss": 0.0231, "step": 15478 }, { "epoch": 3.521956769055745, "grad_norm": 1.4640449521021968, "learning_rate": 2.5076528610855703e-07, "loss": 0.0112, "step": 15479 }, { "epoch": 3.522184300341297, "grad_norm": 1.6341153293738788, "learning_rate": 2.5069372694710125e-07, "loss": 0.114, "step": 15480 }, { "epoch": 3.5224118316268487, "grad_norm": 1.2091466178923054, "learning_rate": 2.5062217543578003e-07, "loss": 0.0556, "step": 15481 }, { "epoch": 3.5226393629124004, "grad_norm": 1.4377022443242953, "learning_rate": 2.505506315760559e-07, "loss": 0.069, "step": 15482 }, { "epoch": 3.522866894197952, "grad_norm": 1.635564827454851, "learning_rate": 2.5047909536939123e-07, "loss": 0.1395, "step": 15483 }, { "epoch": 3.523094425483504, "grad_norm": 2.27552965210789, "learning_rate": 2.504075668172477e-07, "loss": 0.1079, "step": 15484 }, { "epoch": 3.5233219567690557, "grad_norm": 1.1813462612040293, "learning_rate": 2.503360459210878e-07, "loss": 0.0455, "step": 15485 }, { "epoch": 3.5235494880546074, "grad_norm": 1.4028097096380328, "learning_rate": 2.5026453268237265e-07, "loss": 0.0911, "step": 15486 }, { "epoch": 3.523777019340159, "grad_norm": 1.4393251252991648, "learning_rate": 2.5019302710256427e-07, "loss": 0.0768, "step": 15487 }, { "epoch": 3.524004550625711, "grad_norm": 1.4126588442541914, "learning_rate": 2.501215291831241e-07, "loss": 0.0406, "step": 15488 }, { "epoch": 3.5242320819112627, "grad_norm": 0.6689450364687646, "learning_rate": 2.500500389255131e-07, "loss": 0.0156, "step": 15489 }, { "epoch": 3.5244596131968144, "grad_norm": 2.4295411665009308, "learning_rate": 2.4997855633119287e-07, "loss": 0.0764, "step": 15490 }, { "epoch": 3.524687144482366, "grad_norm": 1.5887861922166313, "learning_rate": 2.499070814016239e-07, "loss": 0.0304, "step": 15491 }, { "epoch": 3.524914675767918, "grad_norm": 1.0424908019228725, "learning_rate": 2.4983561413826753e-07, "loss": 0.0176, "step": 15492 }, { "epoch": 3.5251422070534697, "grad_norm": 2.775824634561684, "learning_rate": 2.4976415454258386e-07, "loss": 0.047, "step": 15493 }, { "epoch": 3.5253697383390215, "grad_norm": 1.2049332352730677, "learning_rate": 2.4969270261603363e-07, "loss": 0.0757, "step": 15494 }, { "epoch": 3.5255972696245736, "grad_norm": 1.3599073984335266, "learning_rate": 2.496212583600774e-07, "loss": 0.0997, "step": 15495 }, { "epoch": 3.525824800910125, "grad_norm": 1.3571817341754824, "learning_rate": 2.49549821776175e-07, "loss": 0.0516, "step": 15496 }, { "epoch": 3.526052332195677, "grad_norm": 1.033421805657187, "learning_rate": 2.4947839286578686e-07, "loss": 0.012, "step": 15497 }, { "epoch": 3.5262798634812285, "grad_norm": 1.633443485123172, "learning_rate": 2.4940697163037243e-07, "loss": 0.0509, "step": 15498 }, { "epoch": 3.5265073947667807, "grad_norm": 1.3356917400830146, "learning_rate": 2.4933555807139187e-07, "loss": 0.104, "step": 15499 }, { "epoch": 3.526734926052332, "grad_norm": 0.7913510446235369, "learning_rate": 2.492641521903042e-07, "loss": 0.0242, "step": 15500 }, { "epoch": 3.526962457337884, "grad_norm": 3.7339586469211326, "learning_rate": 2.491927539885692e-07, "loss": 0.0958, "step": 15501 }, { "epoch": 3.5271899886234355, "grad_norm": 1.467017795507774, "learning_rate": 2.4912136346764627e-07, "loss": 0.0144, "step": 15502 }, { "epoch": 3.5274175199089877, "grad_norm": 2.0926950065646506, "learning_rate": 2.49049980628994e-07, "loss": 0.0355, "step": 15503 }, { "epoch": 3.527645051194539, "grad_norm": 2.9118936544684693, "learning_rate": 2.489786054740719e-07, "loss": 0.0553, "step": 15504 }, { "epoch": 3.527872582480091, "grad_norm": 1.2663588254178175, "learning_rate": 2.489072380043384e-07, "loss": 0.0167, "step": 15505 }, { "epoch": 3.528100113765643, "grad_norm": 1.5070851902876832, "learning_rate": 2.48835878221252e-07, "loss": 0.0915, "step": 15506 }, { "epoch": 3.5283276450511947, "grad_norm": 1.3301825377385597, "learning_rate": 2.487645261262713e-07, "loss": 0.0173, "step": 15507 }, { "epoch": 3.5285551763367464, "grad_norm": 1.5444313178476419, "learning_rate": 2.4869318172085467e-07, "loss": 0.0105, "step": 15508 }, { "epoch": 3.528782707622298, "grad_norm": 1.4177073393231, "learning_rate": 2.486218450064605e-07, "loss": 0.0684, "step": 15509 }, { "epoch": 3.52901023890785, "grad_norm": 6.011500149184285, "learning_rate": 2.4855051598454626e-07, "loss": 0.0642, "step": 15510 }, { "epoch": 3.5292377701934017, "grad_norm": 1.9090725259558894, "learning_rate": 2.484791946565702e-07, "loss": 0.0863, "step": 15511 }, { "epoch": 3.5294653014789534, "grad_norm": 0.9190687584702811, "learning_rate": 2.484078810239898e-07, "loss": 0.0052, "step": 15512 }, { "epoch": 3.529692832764505, "grad_norm": 1.370940765637802, "learning_rate": 2.4833657508826256e-07, "loss": 0.017, "step": 15513 }, { "epoch": 3.529920364050057, "grad_norm": 1.9142721410871129, "learning_rate": 2.4826527685084576e-07, "loss": 0.0264, "step": 15514 }, { "epoch": 3.5301478953356087, "grad_norm": 1.1470168755824772, "learning_rate": 2.481939863131968e-07, "loss": 0.0755, "step": 15515 }, { "epoch": 3.5303754266211604, "grad_norm": 1.7395378739322815, "learning_rate": 2.481227034767729e-07, "loss": 0.0937, "step": 15516 }, { "epoch": 3.530602957906712, "grad_norm": 0.836823836469414, "learning_rate": 2.480514283430305e-07, "loss": 0.0287, "step": 15517 }, { "epoch": 3.530830489192264, "grad_norm": 1.1797579946576169, "learning_rate": 2.479801609134267e-07, "loss": 0.0936, "step": 15518 }, { "epoch": 3.5310580204778157, "grad_norm": 1.5990371568946538, "learning_rate": 2.4790890118941805e-07, "loss": 0.0806, "step": 15519 }, { "epoch": 3.5312855517633674, "grad_norm": 5.038482741749592, "learning_rate": 2.4783764917246054e-07, "loss": 0.0283, "step": 15520 }, { "epoch": 3.531513083048919, "grad_norm": 1.1123817759082977, "learning_rate": 2.4776640486401075e-07, "loss": 0.0261, "step": 15521 }, { "epoch": 3.531740614334471, "grad_norm": 0.9090076328722406, "learning_rate": 2.4769516826552484e-07, "loss": 0.042, "step": 15522 }, { "epoch": 3.5319681456200227, "grad_norm": 1.2496285553050879, "learning_rate": 2.4762393937845886e-07, "loss": 0.0912, "step": 15523 }, { "epoch": 3.5321956769055745, "grad_norm": 0.7828146232023303, "learning_rate": 2.4755271820426843e-07, "loss": 0.0093, "step": 15524 }, { "epoch": 3.532423208191126, "grad_norm": 1.0179373017606694, "learning_rate": 2.4748150474440897e-07, "loss": 0.018, "step": 15525 }, { "epoch": 3.532650739476678, "grad_norm": 1.699961065410633, "learning_rate": 2.4741029900033637e-07, "loss": 0.057, "step": 15526 }, { "epoch": 3.5328782707622297, "grad_norm": 1.0022302711186992, "learning_rate": 2.473391009735055e-07, "loss": 0.034, "step": 15527 }, { "epoch": 3.5331058020477815, "grad_norm": 1.0886292791993333, "learning_rate": 2.472679106653718e-07, "loss": 0.0066, "step": 15528 }, { "epoch": 3.533333333333333, "grad_norm": 1.4865095478413133, "learning_rate": 2.471967280773902e-07, "loss": 0.0701, "step": 15529 }, { "epoch": 3.533560864618885, "grad_norm": 1.1635780771576147, "learning_rate": 2.4712555321101574e-07, "loss": 0.0123, "step": 15530 }, { "epoch": 3.5337883959044367, "grad_norm": 1.0494566863790034, "learning_rate": 2.47054386067703e-07, "loss": 0.0076, "step": 15531 }, { "epoch": 3.5340159271899885, "grad_norm": 1.5388553247418095, "learning_rate": 2.469832266489062e-07, "loss": 0.0178, "step": 15532 }, { "epoch": 3.5342434584755402, "grad_norm": 1.0375002987829842, "learning_rate": 2.4691207495608006e-07, "loss": 0.0149, "step": 15533 }, { "epoch": 3.5344709897610924, "grad_norm": 1.5172367939274662, "learning_rate": 2.4684093099067856e-07, "loss": 0.0716, "step": 15534 }, { "epoch": 3.5346985210466437, "grad_norm": 1.1748832781885348, "learning_rate": 2.4676979475415586e-07, "loss": 0.0256, "step": 15535 }, { "epoch": 3.534926052332196, "grad_norm": 1.7998803843049718, "learning_rate": 2.466986662479658e-07, "loss": 0.1104, "step": 15536 }, { "epoch": 3.5351535836177472, "grad_norm": 2.457807421519632, "learning_rate": 2.466275454735625e-07, "loss": 0.0596, "step": 15537 }, { "epoch": 3.5353811149032994, "grad_norm": 1.3797570475465557, "learning_rate": 2.4655643243239913e-07, "loss": 0.013, "step": 15538 }, { "epoch": 3.5356086461888507, "grad_norm": 1.3396613936608213, "learning_rate": 2.46485327125929e-07, "loss": 0.0152, "step": 15539 }, { "epoch": 3.535836177474403, "grad_norm": 1.5625238438904292, "learning_rate": 2.464142295556058e-07, "loss": 0.0106, "step": 15540 }, { "epoch": 3.5360637087599542, "grad_norm": 0.9097885355304977, "learning_rate": 2.4634313972288214e-07, "loss": 0.0547, "step": 15541 }, { "epoch": 3.5362912400455064, "grad_norm": 0.4212728044151556, "learning_rate": 2.462720576292112e-07, "loss": 0.003, "step": 15542 }, { "epoch": 3.536518771331058, "grad_norm": 1.3380319393933513, "learning_rate": 2.462009832760461e-07, "loss": 0.0781, "step": 15543 }, { "epoch": 3.53674630261661, "grad_norm": 1.2249578237024648, "learning_rate": 2.461299166648389e-07, "loss": 0.0237, "step": 15544 }, { "epoch": 3.5369738339021617, "grad_norm": 1.254427672060212, "learning_rate": 2.4605885779704255e-07, "loss": 0.0686, "step": 15545 }, { "epoch": 3.5372013651877134, "grad_norm": 3.050183870676653, "learning_rate": 2.459878066741089e-07, "loss": 0.0364, "step": 15546 }, { "epoch": 3.537428896473265, "grad_norm": 1.4535398200622602, "learning_rate": 2.459167632974907e-07, "loss": 0.0902, "step": 15547 }, { "epoch": 3.537656427758817, "grad_norm": 2.0365442075395346, "learning_rate": 2.458457276686391e-07, "loss": 0.1075, "step": 15548 }, { "epoch": 3.5378839590443687, "grad_norm": 1.0460707204423247, "learning_rate": 2.457746997890068e-07, "loss": 0.0656, "step": 15549 }, { "epoch": 3.5381114903299204, "grad_norm": 2.6669120806496314, "learning_rate": 2.4570367966004517e-07, "loss": 0.132, "step": 15550 }, { "epoch": 3.538339021615472, "grad_norm": 1.6486344579376138, "learning_rate": 2.4563266728320546e-07, "loss": 0.0165, "step": 15551 }, { "epoch": 3.538566552901024, "grad_norm": 1.175331495653259, "learning_rate": 2.455616626599395e-07, "loss": 0.0453, "step": 15552 }, { "epoch": 3.5387940841865757, "grad_norm": 1.4909529228223226, "learning_rate": 2.4549066579169797e-07, "loss": 0.013, "step": 15553 }, { "epoch": 3.5390216154721275, "grad_norm": 1.6284926291457813, "learning_rate": 2.454196766799322e-07, "loss": 0.0421, "step": 15554 }, { "epoch": 3.539249146757679, "grad_norm": 1.450206759119342, "learning_rate": 2.453486953260931e-07, "loss": 0.0166, "step": 15555 }, { "epoch": 3.539476678043231, "grad_norm": 1.0349338688525762, "learning_rate": 2.452777217316316e-07, "loss": 0.0317, "step": 15556 }, { "epoch": 3.5397042093287827, "grad_norm": 1.4869233150038785, "learning_rate": 2.45206755897998e-07, "loss": 0.1158, "step": 15557 }, { "epoch": 3.5399317406143345, "grad_norm": 1.4293438163509875, "learning_rate": 2.4513579782664257e-07, "loss": 0.0976, "step": 15558 }, { "epoch": 3.540159271899886, "grad_norm": 1.569442537605322, "learning_rate": 2.4506484751901595e-07, "loss": 0.0284, "step": 15559 }, { "epoch": 3.540386803185438, "grad_norm": 1.9914822068207962, "learning_rate": 2.4499390497656784e-07, "loss": 0.0282, "step": 15560 }, { "epoch": 3.5406143344709897, "grad_norm": 1.2928078789812516, "learning_rate": 2.4492297020074826e-07, "loss": 0.0793, "step": 15561 }, { "epoch": 3.5408418657565415, "grad_norm": 0.6919531680991468, "learning_rate": 2.448520431930074e-07, "loss": 0.004, "step": 15562 }, { "epoch": 3.5410693970420932, "grad_norm": 1.2642417563904755, "learning_rate": 2.4478112395479433e-07, "loss": 0.0174, "step": 15563 }, { "epoch": 3.541296928327645, "grad_norm": 1.3888388997650936, "learning_rate": 2.4471021248755894e-07, "loss": 0.0211, "step": 15564 }, { "epoch": 3.5415244596131967, "grad_norm": 1.421567112154868, "learning_rate": 2.446393087927502e-07, "loss": 0.0261, "step": 15565 }, { "epoch": 3.5417519908987485, "grad_norm": 1.8711466197573932, "learning_rate": 2.4456841287181754e-07, "loss": 0.0211, "step": 15566 }, { "epoch": 3.5419795221843002, "grad_norm": 0.8869656414836038, "learning_rate": 2.444975247262096e-07, "loss": 0.009, "step": 15567 }, { "epoch": 3.542207053469852, "grad_norm": 1.7656766421309371, "learning_rate": 2.4442664435737535e-07, "loss": 0.0627, "step": 15568 }, { "epoch": 3.5424345847554037, "grad_norm": 0.9256570235388368, "learning_rate": 2.443557717667638e-07, "loss": 0.0076, "step": 15569 }, { "epoch": 3.5426621160409555, "grad_norm": 2.1325553821390804, "learning_rate": 2.4428490695582286e-07, "loss": 0.0328, "step": 15570 }, { "epoch": 3.5428896473265072, "grad_norm": 0.9370630129411086, "learning_rate": 2.442140499260014e-07, "loss": 0.0217, "step": 15571 }, { "epoch": 3.543117178612059, "grad_norm": 1.7713821156636973, "learning_rate": 2.441432006787473e-07, "loss": 0.0528, "step": 15572 }, { "epoch": 3.543344709897611, "grad_norm": 1.7656330266032172, "learning_rate": 2.440723592155087e-07, "loss": 0.0695, "step": 15573 }, { "epoch": 3.5435722411831625, "grad_norm": 1.5310403497850025, "learning_rate": 2.440015255377333e-07, "loss": 0.0849, "step": 15574 }, { "epoch": 3.5437997724687147, "grad_norm": 0.8613007392655174, "learning_rate": 2.43930699646869e-07, "loss": 0.0039, "step": 15575 }, { "epoch": 3.544027303754266, "grad_norm": 1.091650349644534, "learning_rate": 2.4385988154436346e-07, "loss": 0.0299, "step": 15576 }, { "epoch": 3.544254835039818, "grad_norm": 2.3445622357713236, "learning_rate": 2.4378907123166373e-07, "loss": 0.0487, "step": 15577 }, { "epoch": 3.5444823663253695, "grad_norm": 1.991982260068537, "learning_rate": 2.437182687102174e-07, "loss": 0.0312, "step": 15578 }, { "epoch": 3.5447098976109217, "grad_norm": 2.895141060449694, "learning_rate": 2.436474739814712e-07, "loss": 0.031, "step": 15579 }, { "epoch": 3.544937428896473, "grad_norm": 0.6077794168483958, "learning_rate": 2.435766870468725e-07, "loss": 0.0027, "step": 15580 }, { "epoch": 3.545164960182025, "grad_norm": 2.08311421836451, "learning_rate": 2.435059079078674e-07, "loss": 0.1029, "step": 15581 }, { "epoch": 3.545392491467577, "grad_norm": 2.1185639657699307, "learning_rate": 2.4343513656590303e-07, "loss": 0.077, "step": 15582 }, { "epoch": 3.5456200227531287, "grad_norm": 2.1852297957873352, "learning_rate": 2.4336437302242574e-07, "loss": 0.0269, "step": 15583 }, { "epoch": 3.5458475540386805, "grad_norm": 1.0885631378691798, "learning_rate": 2.432936172788816e-07, "loss": 0.0123, "step": 15584 }, { "epoch": 3.546075085324232, "grad_norm": 0.9915041763176655, "learning_rate": 2.432228693367171e-07, "loss": 0.0038, "step": 15585 }, { "epoch": 3.546302616609784, "grad_norm": 0.7516117995967515, "learning_rate": 2.431521291973777e-07, "loss": 0.0158, "step": 15586 }, { "epoch": 3.5465301478953357, "grad_norm": 1.4907397600991155, "learning_rate": 2.430813968623096e-07, "loss": 0.0471, "step": 15587 }, { "epoch": 3.5467576791808875, "grad_norm": 1.688686054458933, "learning_rate": 2.430106723329582e-07, "loss": 0.1423, "step": 15588 }, { "epoch": 3.546985210466439, "grad_norm": 1.2068859522739261, "learning_rate": 2.42939955610769e-07, "loss": 0.1001, "step": 15589 }, { "epoch": 3.547212741751991, "grad_norm": 1.6633839614109043, "learning_rate": 2.428692466971877e-07, "loss": 0.1121, "step": 15590 }, { "epoch": 3.5474402730375427, "grad_norm": 0.8111986368729226, "learning_rate": 2.4279854559365886e-07, "loss": 0.0155, "step": 15591 }, { "epoch": 3.5476678043230945, "grad_norm": 0.5962771631290074, "learning_rate": 2.4272785230162806e-07, "loss": 0.0025, "step": 15592 }, { "epoch": 3.5478953356086462, "grad_norm": 1.367829205647957, "learning_rate": 2.426571668225396e-07, "loss": 0.0375, "step": 15593 }, { "epoch": 3.548122866894198, "grad_norm": 2.2108688955798925, "learning_rate": 2.4258648915783863e-07, "loss": 0.0162, "step": 15594 }, { "epoch": 3.5483503981797497, "grad_norm": 0.917598416511136, "learning_rate": 2.4251581930896925e-07, "loss": 0.0663, "step": 15595 }, { "epoch": 3.5485779294653015, "grad_norm": 1.69604871001392, "learning_rate": 2.424451572773761e-07, "loss": 0.1076, "step": 15596 }, { "epoch": 3.5488054607508532, "grad_norm": 1.5831110919396045, "learning_rate": 2.4237450306450346e-07, "loss": 0.0291, "step": 15597 }, { "epoch": 3.549032992036405, "grad_norm": 0.3763317162600505, "learning_rate": 2.42303856671795e-07, "loss": 0.0018, "step": 15598 }, { "epoch": 3.5492605233219567, "grad_norm": 4.281971565193897, "learning_rate": 2.422332181006951e-07, "loss": 0.0475, "step": 15599 }, { "epoch": 3.5494880546075085, "grad_norm": 2.193256324521391, "learning_rate": 2.4216258735264725e-07, "loss": 0.0122, "step": 15600 }, { "epoch": 3.5497155858930602, "grad_norm": 1.8761067402129374, "learning_rate": 2.420919644290947e-07, "loss": 0.0383, "step": 15601 }, { "epoch": 3.549943117178612, "grad_norm": 1.3513102655022258, "learning_rate": 2.4202134933148117e-07, "loss": 0.0487, "step": 15602 }, { "epoch": 3.5501706484641637, "grad_norm": 1.468678015285961, "learning_rate": 2.4195074206124986e-07, "loss": 0.0523, "step": 15603 }, { "epoch": 3.5503981797497155, "grad_norm": 2.45405439915442, "learning_rate": 2.418801426198441e-07, "loss": 0.1219, "step": 15604 }, { "epoch": 3.5506257110352673, "grad_norm": 1.8702650511295689, "learning_rate": 2.418095510087063e-07, "loss": 0.093, "step": 15605 }, { "epoch": 3.550853242320819, "grad_norm": 1.2249016027745416, "learning_rate": 2.4173896722927975e-07, "loss": 0.0671, "step": 15606 }, { "epoch": 3.5510807736063708, "grad_norm": 0.6808952827560685, "learning_rate": 2.416683912830068e-07, "loss": 0.0151, "step": 15607 }, { "epoch": 3.5513083048919225, "grad_norm": 1.4641154292185607, "learning_rate": 2.4159782317132966e-07, "loss": 0.0218, "step": 15608 }, { "epoch": 3.5515358361774743, "grad_norm": 1.1185006855179749, "learning_rate": 2.4152726289569085e-07, "loss": 0.029, "step": 15609 }, { "epoch": 3.551763367463026, "grad_norm": 0.8956161067550987, "learning_rate": 2.414567104575325e-07, "loss": 0.0118, "step": 15610 }, { "epoch": 3.5519908987485778, "grad_norm": 0.7290538873463356, "learning_rate": 2.413861658582968e-07, "loss": 0.0042, "step": 15611 }, { "epoch": 3.55221843003413, "grad_norm": 2.364362331816741, "learning_rate": 2.413156290994253e-07, "loss": 0.0154, "step": 15612 }, { "epoch": 3.5524459613196813, "grad_norm": 3.288584337594145, "learning_rate": 2.4124510018235945e-07, "loss": 0.097, "step": 15613 }, { "epoch": 3.5526734926052335, "grad_norm": 1.2511364627393589, "learning_rate": 2.411745791085409e-07, "loss": 0.0525, "step": 15614 }, { "epoch": 3.5529010238907848, "grad_norm": 1.308217188108452, "learning_rate": 2.4110406587941103e-07, "loss": 0.0357, "step": 15615 }, { "epoch": 3.553128555176337, "grad_norm": 1.3989973883564253, "learning_rate": 2.410335604964112e-07, "loss": 0.0202, "step": 15616 }, { "epoch": 3.5533560864618883, "grad_norm": 1.1575250069146403, "learning_rate": 2.4096306296098196e-07, "loss": 0.0195, "step": 15617 }, { "epoch": 3.5535836177474405, "grad_norm": 0.8788297630175845, "learning_rate": 2.408925732745646e-07, "loss": 0.0515, "step": 15618 }, { "epoch": 3.553811149032992, "grad_norm": 1.296048838341406, "learning_rate": 2.408220914385996e-07, "loss": 0.0176, "step": 15619 }, { "epoch": 3.554038680318544, "grad_norm": 1.0357003716036794, "learning_rate": 2.407516174545273e-07, "loss": 0.0565, "step": 15620 }, { "epoch": 3.5542662116040957, "grad_norm": 0.7771099045645125, "learning_rate": 2.4068115132378814e-07, "loss": 0.0438, "step": 15621 }, { "epoch": 3.5544937428896475, "grad_norm": 0.7575785131308759, "learning_rate": 2.4061069304782243e-07, "loss": 0.0107, "step": 15622 }, { "epoch": 3.5547212741751992, "grad_norm": 1.5196692269185124, "learning_rate": 2.4054024262807036e-07, "loss": 0.0219, "step": 15623 }, { "epoch": 3.554948805460751, "grad_norm": 2.1526199000542383, "learning_rate": 2.404698000659714e-07, "loss": 0.0147, "step": 15624 }, { "epoch": 3.5551763367463027, "grad_norm": 0.6187529002464729, "learning_rate": 2.403993653629658e-07, "loss": 0.0033, "step": 15625 }, { "epoch": 3.5554038680318545, "grad_norm": 6.633695864278632, "learning_rate": 2.4032893852049274e-07, "loss": 0.0133, "step": 15626 }, { "epoch": 3.5556313993174062, "grad_norm": 1.7556409937486115, "learning_rate": 2.402585195399915e-07, "loss": 0.0931, "step": 15627 }, { "epoch": 3.555858930602958, "grad_norm": 1.9136316785472895, "learning_rate": 2.401881084229014e-07, "loss": 0.0781, "step": 15628 }, { "epoch": 3.5560864618885097, "grad_norm": 2.9814448682002195, "learning_rate": 2.401177051706618e-07, "loss": 0.0338, "step": 15629 }, { "epoch": 3.5563139931740615, "grad_norm": 2.3152179164284648, "learning_rate": 2.400473097847115e-07, "loss": 0.1145, "step": 15630 }, { "epoch": 3.5565415244596132, "grad_norm": 1.4201453683709062, "learning_rate": 2.3997692226648923e-07, "loss": 0.0614, "step": 15631 }, { "epoch": 3.556769055745165, "grad_norm": 1.0452739597592313, "learning_rate": 2.399065426174333e-07, "loss": 0.0119, "step": 15632 }, { "epoch": 3.5569965870307167, "grad_norm": 1.2142880125413678, "learning_rate": 2.398361708389826e-07, "loss": 0.0163, "step": 15633 }, { "epoch": 3.5572241183162685, "grad_norm": 2.447855602841095, "learning_rate": 2.39765806932575e-07, "loss": 0.078, "step": 15634 }, { "epoch": 3.5574516496018203, "grad_norm": 0.8177468443672763, "learning_rate": 2.3969545089964875e-07, "loss": 0.0184, "step": 15635 }, { "epoch": 3.557679180887372, "grad_norm": 1.185759476027867, "learning_rate": 2.396251027416418e-07, "loss": 0.0154, "step": 15636 }, { "epoch": 3.5579067121729238, "grad_norm": 0.5692653281392789, "learning_rate": 2.395547624599922e-07, "loss": 0.0045, "step": 15637 }, { "epoch": 3.5581342434584755, "grad_norm": 1.054093618127974, "learning_rate": 2.394844300561373e-07, "loss": 0.019, "step": 15638 }, { "epoch": 3.5583617747440273, "grad_norm": 1.6671771355432645, "learning_rate": 2.3941410553151446e-07, "loss": 0.0184, "step": 15639 }, { "epoch": 3.558589306029579, "grad_norm": 1.0816645491893502, "learning_rate": 2.393437888875614e-07, "loss": 0.0478, "step": 15640 }, { "epoch": 3.5588168373151308, "grad_norm": 2.1779090325236767, "learning_rate": 2.392734801257147e-07, "loss": 0.0425, "step": 15641 }, { "epoch": 3.5590443686006825, "grad_norm": 2.336356282850723, "learning_rate": 2.392031792474116e-07, "loss": 0.1255, "step": 15642 }, { "epoch": 3.5592718998862343, "grad_norm": 2.31053093545642, "learning_rate": 2.3913288625408906e-07, "loss": 0.0616, "step": 15643 }, { "epoch": 3.559499431171786, "grad_norm": 2.7188128183886104, "learning_rate": 2.390626011471838e-07, "loss": 0.0134, "step": 15644 }, { "epoch": 3.5597269624573378, "grad_norm": 1.5187467799106686, "learning_rate": 2.3899232392813223e-07, "loss": 0.0582, "step": 15645 }, { "epoch": 3.5599544937428895, "grad_norm": 1.4096211774136693, "learning_rate": 2.389220545983704e-07, "loss": 0.0119, "step": 15646 }, { "epoch": 3.5601820250284413, "grad_norm": 2.1000551606983024, "learning_rate": 2.3885179315933483e-07, "loss": 0.0491, "step": 15647 }, { "epoch": 3.560409556313993, "grad_norm": 1.4556493647541677, "learning_rate": 2.3878153961246125e-07, "loss": 0.0761, "step": 15648 }, { "epoch": 3.560637087599545, "grad_norm": 0.7009344226059434, "learning_rate": 2.387112939591857e-07, "loss": 0.0057, "step": 15649 }, { "epoch": 3.5608646188850965, "grad_norm": 1.2367456106236019, "learning_rate": 2.38641056200944e-07, "loss": 0.0224, "step": 15650 }, { "epoch": 3.5610921501706487, "grad_norm": 1.1349947205058692, "learning_rate": 2.385708263391714e-07, "loss": 0.0456, "step": 15651 }, { "epoch": 3.5613196814562, "grad_norm": 1.0513797725183076, "learning_rate": 2.385006043753035e-07, "loss": 0.0757, "step": 15652 }, { "epoch": 3.5615472127417522, "grad_norm": 1.7718158671924602, "learning_rate": 2.3843039031077526e-07, "loss": 0.0911, "step": 15653 }, { "epoch": 3.5617747440273035, "grad_norm": 0.9705147446534058, "learning_rate": 2.3836018414702205e-07, "loss": 0.0145, "step": 15654 }, { "epoch": 3.5620022753128557, "grad_norm": 0.9425562317266315, "learning_rate": 2.3828998588547842e-07, "loss": 0.0083, "step": 15655 }, { "epoch": 3.562229806598407, "grad_norm": 1.7602120752347583, "learning_rate": 2.3821979552757926e-07, "loss": 0.091, "step": 15656 }, { "epoch": 3.5624573378839592, "grad_norm": 1.3443532650281635, "learning_rate": 2.381496130747593e-07, "loss": 0.0188, "step": 15657 }, { "epoch": 3.5626848691695105, "grad_norm": 2.4375311158963937, "learning_rate": 2.3807943852845252e-07, "loss": 0.0341, "step": 15658 }, { "epoch": 3.5629124004550627, "grad_norm": 2.5142795705944576, "learning_rate": 2.3800927189009364e-07, "loss": 0.0106, "step": 15659 }, { "epoch": 3.5631399317406145, "grad_norm": 1.141656922868559, "learning_rate": 2.3793911316111632e-07, "loss": 0.0271, "step": 15660 }, { "epoch": 3.5633674630261662, "grad_norm": 1.5293825563312191, "learning_rate": 2.378689623429549e-07, "loss": 0.0266, "step": 15661 }, { "epoch": 3.563594994311718, "grad_norm": 1.6360850767968909, "learning_rate": 2.3779881943704259e-07, "loss": 0.0832, "step": 15662 }, { "epoch": 3.5638225255972698, "grad_norm": 4.176696317413638, "learning_rate": 2.3772868444481339e-07, "loss": 0.0176, "step": 15663 }, { "epoch": 3.5640500568828215, "grad_norm": 1.8658459580105484, "learning_rate": 2.3765855736770074e-07, "loss": 0.0227, "step": 15664 }, { "epoch": 3.5642775881683733, "grad_norm": 2.7574061840519892, "learning_rate": 2.3758843820713764e-07, "loss": 0.0287, "step": 15665 }, { "epoch": 3.564505119453925, "grad_norm": 1.5033788507859644, "learning_rate": 2.3751832696455749e-07, "loss": 0.033, "step": 15666 }, { "epoch": 3.5647326507394768, "grad_norm": 2.9817900152620354, "learning_rate": 2.3744822364139295e-07, "loss": 0.0358, "step": 15667 }, { "epoch": 3.5649601820250285, "grad_norm": 0.5460157399630654, "learning_rate": 2.3737812823907718e-07, "loss": 0.0077, "step": 15668 }, { "epoch": 3.5651877133105803, "grad_norm": 1.1475027241364446, "learning_rate": 2.3730804075904238e-07, "loss": 0.0744, "step": 15669 }, { "epoch": 3.565415244596132, "grad_norm": 1.298060869302903, "learning_rate": 2.3723796120272112e-07, "loss": 0.0282, "step": 15670 }, { "epoch": 3.5656427758816838, "grad_norm": 3.741928612417865, "learning_rate": 2.3716788957154603e-07, "loss": 0.0323, "step": 15671 }, { "epoch": 3.5658703071672355, "grad_norm": 1.9558892440804967, "learning_rate": 2.370978258669488e-07, "loss": 0.027, "step": 15672 }, { "epoch": 3.5660978384527873, "grad_norm": 3.068019624294311, "learning_rate": 2.3702777009036178e-07, "loss": 0.0232, "step": 15673 }, { "epoch": 3.566325369738339, "grad_norm": 1.1713431676039592, "learning_rate": 2.3695772224321643e-07, "loss": 0.0324, "step": 15674 }, { "epoch": 3.5665529010238908, "grad_norm": 1.717245300585007, "learning_rate": 2.3688768232694456e-07, "loss": 0.03, "step": 15675 }, { "epoch": 3.5667804323094425, "grad_norm": 0.9634803647786089, "learning_rate": 2.3681765034297783e-07, "loss": 0.0083, "step": 15676 }, { "epoch": 3.5670079635949943, "grad_norm": 1.073730759497232, "learning_rate": 2.3674762629274726e-07, "loss": 0.0248, "step": 15677 }, { "epoch": 3.567235494880546, "grad_norm": 2.7642425905890247, "learning_rate": 2.366776101776843e-07, "loss": 0.0324, "step": 15678 }, { "epoch": 3.567463026166098, "grad_norm": 0.6014724018384943, "learning_rate": 2.3660760199921965e-07, "loss": 0.0125, "step": 15679 }, { "epoch": 3.5676905574516495, "grad_norm": 1.067387301035098, "learning_rate": 2.3653760175878456e-07, "loss": 0.0459, "step": 15680 }, { "epoch": 3.5679180887372013, "grad_norm": 2.256534862573769, "learning_rate": 2.3646760945780917e-07, "loss": 0.0282, "step": 15681 }, { "epoch": 3.568145620022753, "grad_norm": 1.4777494741830344, "learning_rate": 2.363976250977243e-07, "loss": 0.0933, "step": 15682 }, { "epoch": 3.568373151308305, "grad_norm": 1.5051663051271433, "learning_rate": 2.3632764867996045e-07, "loss": 0.0355, "step": 15683 }, { "epoch": 3.5686006825938565, "grad_norm": 0.8099162962795442, "learning_rate": 2.3625768020594749e-07, "loss": 0.0071, "step": 15684 }, { "epoch": 3.5688282138794083, "grad_norm": 1.1607269800643365, "learning_rate": 2.3618771967711582e-07, "loss": 0.0163, "step": 15685 }, { "epoch": 3.56905574516496, "grad_norm": 1.0615652188043336, "learning_rate": 2.3611776709489485e-07, "loss": 0.0294, "step": 15686 }, { "epoch": 3.569283276450512, "grad_norm": 1.702806411570727, "learning_rate": 2.3604782246071476e-07, "loss": 0.0903, "step": 15687 }, { "epoch": 3.5695108077360636, "grad_norm": 0.9298000846232841, "learning_rate": 2.3597788577600458e-07, "loss": 0.008, "step": 15688 }, { "epoch": 3.5697383390216153, "grad_norm": 0.7305117464103524, "learning_rate": 2.3590795704219397e-07, "loss": 0.0098, "step": 15689 }, { "epoch": 3.5699658703071675, "grad_norm": 0.8342560405054796, "learning_rate": 2.3583803626071232e-07, "loss": 0.0076, "step": 15690 }, { "epoch": 3.570193401592719, "grad_norm": 1.1941173825531237, "learning_rate": 2.357681234329883e-07, "loss": 0.0454, "step": 15691 }, { "epoch": 3.570420932878271, "grad_norm": 1.1569611863707747, "learning_rate": 2.3569821856045123e-07, "loss": 0.0071, "step": 15692 }, { "epoch": 3.5706484641638223, "grad_norm": 1.080181310030607, "learning_rate": 2.356283216445294e-07, "loss": 0.0089, "step": 15693 }, { "epoch": 3.5708759954493745, "grad_norm": 1.6498273362977178, "learning_rate": 2.3555843268665176e-07, "loss": 0.0106, "step": 15694 }, { "epoch": 3.571103526734926, "grad_norm": 1.30638485664048, "learning_rate": 2.354885516882463e-07, "loss": 0.0165, "step": 15695 }, { "epoch": 3.571331058020478, "grad_norm": 0.9550920702439156, "learning_rate": 2.3541867865074147e-07, "loss": 0.0074, "step": 15696 }, { "epoch": 3.5715585893060293, "grad_norm": 0.8955529260137366, "learning_rate": 2.3534881357556562e-07, "loss": 0.015, "step": 15697 }, { "epoch": 3.5717861205915815, "grad_norm": 1.9339109939465882, "learning_rate": 2.3527895646414618e-07, "loss": 0.1291, "step": 15698 }, { "epoch": 3.5720136518771333, "grad_norm": 1.6375280094438234, "learning_rate": 2.3520910731791136e-07, "loss": 0.0258, "step": 15699 }, { "epoch": 3.572241183162685, "grad_norm": 0.8389955107627963, "learning_rate": 2.3513926613828828e-07, "loss": 0.0417, "step": 15700 }, { "epoch": 3.5724687144482368, "grad_norm": 1.2815564946303593, "learning_rate": 2.3506943292670482e-07, "loss": 0.0424, "step": 15701 }, { "epoch": 3.5726962457337885, "grad_norm": 1.5101332530706568, "learning_rate": 2.3499960768458778e-07, "loss": 0.0245, "step": 15702 }, { "epoch": 3.5729237770193403, "grad_norm": 1.5944081817798321, "learning_rate": 2.3492979041336455e-07, "loss": 0.0721, "step": 15703 }, { "epoch": 3.573151308304892, "grad_norm": 2.3997404680052448, "learning_rate": 2.3485998111446222e-07, "loss": 0.1325, "step": 15704 }, { "epoch": 3.573378839590444, "grad_norm": 0.7363390960059728, "learning_rate": 2.3479017978930722e-07, "loss": 0.0588, "step": 15705 }, { "epoch": 3.5736063708759955, "grad_norm": 1.5124523233220641, "learning_rate": 2.3472038643932645e-07, "loss": 0.0496, "step": 15706 }, { "epoch": 3.5738339021615473, "grad_norm": 1.6331005023035103, "learning_rate": 2.3465060106594626e-07, "loss": 0.0364, "step": 15707 }, { "epoch": 3.574061433447099, "grad_norm": 1.2347235364802116, "learning_rate": 2.3458082367059264e-07, "loss": 0.0134, "step": 15708 }, { "epoch": 3.574288964732651, "grad_norm": 1.6078795291942514, "learning_rate": 2.3451105425469197e-07, "loss": 0.0481, "step": 15709 }, { "epoch": 3.5745164960182025, "grad_norm": 1.3659145762974723, "learning_rate": 2.344412928196702e-07, "loss": 0.0295, "step": 15710 }, { "epoch": 3.5747440273037543, "grad_norm": 0.8622414851332375, "learning_rate": 2.3437153936695336e-07, "loss": 0.0062, "step": 15711 }, { "epoch": 3.574971558589306, "grad_norm": 1.667455960894293, "learning_rate": 2.3430179389796665e-07, "loss": 0.0644, "step": 15712 }, { "epoch": 3.575199089874858, "grad_norm": 1.9426895077398163, "learning_rate": 2.3423205641413592e-07, "loss": 0.0433, "step": 15713 }, { "epoch": 3.5754266211604095, "grad_norm": 1.9894621882163317, "learning_rate": 2.3416232691688635e-07, "loss": 0.0682, "step": 15714 }, { "epoch": 3.5756541524459613, "grad_norm": 1.3402369911701089, "learning_rate": 2.3409260540764284e-07, "loss": 0.0628, "step": 15715 }, { "epoch": 3.575881683731513, "grad_norm": 0.8972580000351204, "learning_rate": 2.3402289188783045e-07, "loss": 0.008, "step": 15716 }, { "epoch": 3.576109215017065, "grad_norm": 1.1181645603443808, "learning_rate": 2.339531863588742e-07, "loss": 0.0133, "step": 15717 }, { "epoch": 3.5763367463026166, "grad_norm": 0.7217035189324879, "learning_rate": 2.3388348882219887e-07, "loss": 0.0088, "step": 15718 }, { "epoch": 3.5765642775881683, "grad_norm": 1.3525814810809977, "learning_rate": 2.3381379927922845e-07, "loss": 0.0861, "step": 15719 }, { "epoch": 3.57679180887372, "grad_norm": 0.92084787881746, "learning_rate": 2.337441177313878e-07, "loss": 0.0148, "step": 15720 }, { "epoch": 3.577019340159272, "grad_norm": 1.9139238007131394, "learning_rate": 2.3367444418010088e-07, "loss": 0.0272, "step": 15721 }, { "epoch": 3.5772468714448236, "grad_norm": 1.8457352011474193, "learning_rate": 2.3360477862679135e-07, "loss": 0.1009, "step": 15722 }, { "epoch": 3.5774744027303753, "grad_norm": 0.9129509073473066, "learning_rate": 2.335351210728834e-07, "loss": 0.0067, "step": 15723 }, { "epoch": 3.577701934015927, "grad_norm": 1.3281692660610107, "learning_rate": 2.3346547151980058e-07, "loss": 0.0186, "step": 15724 }, { "epoch": 3.577929465301479, "grad_norm": 1.3304462042866587, "learning_rate": 2.3339582996896675e-07, "loss": 0.0293, "step": 15725 }, { "epoch": 3.5781569965870306, "grad_norm": 0.9421318625494641, "learning_rate": 2.333261964218049e-07, "loss": 0.0156, "step": 15726 }, { "epoch": 3.5783845278725823, "grad_norm": 0.811923186776626, "learning_rate": 2.332565708797381e-07, "loss": 0.0055, "step": 15727 }, { "epoch": 3.578612059158134, "grad_norm": 1.4235295423597205, "learning_rate": 2.3318695334418974e-07, "loss": 0.0319, "step": 15728 }, { "epoch": 3.5788395904436863, "grad_norm": 1.0308512607803437, "learning_rate": 2.3311734381658228e-07, "loss": 0.0101, "step": 15729 }, { "epoch": 3.5790671217292376, "grad_norm": 2.279504528321418, "learning_rate": 2.3304774229833864e-07, "loss": 0.0426, "step": 15730 }, { "epoch": 3.5792946530147898, "grad_norm": 2.1498433959785817, "learning_rate": 2.329781487908813e-07, "loss": 0.0371, "step": 15731 }, { "epoch": 3.579522184300341, "grad_norm": 1.0330446812572933, "learning_rate": 2.329085632956328e-07, "loss": 0.0271, "step": 15732 }, { "epoch": 3.5797497155858933, "grad_norm": 2.1899631352192146, "learning_rate": 2.3283898581401524e-07, "loss": 0.0266, "step": 15733 }, { "epoch": 3.5799772468714446, "grad_norm": 1.291850813334467, "learning_rate": 2.3276941634745039e-07, "loss": 0.0062, "step": 15734 }, { "epoch": 3.580204778156997, "grad_norm": 1.3533137943693119, "learning_rate": 2.3269985489736032e-07, "loss": 0.0067, "step": 15735 }, { "epoch": 3.580432309442548, "grad_norm": 0.6675920191253923, "learning_rate": 2.326303014651668e-07, "loss": 0.0081, "step": 15736 }, { "epoch": 3.5806598407281003, "grad_norm": 1.8921375013720578, "learning_rate": 2.3256075605229148e-07, "loss": 0.023, "step": 15737 }, { "epoch": 3.580887372013652, "grad_norm": 1.4956240124932, "learning_rate": 2.3249121866015557e-07, "loss": 0.0471, "step": 15738 }, { "epoch": 3.581114903299204, "grad_norm": 1.409705130643799, "learning_rate": 2.3242168929018017e-07, "loss": 0.0459, "step": 15739 }, { "epoch": 3.5813424345847555, "grad_norm": 2.070356641029637, "learning_rate": 2.3235216794378665e-07, "loss": 0.0264, "step": 15740 }, { "epoch": 3.5815699658703073, "grad_norm": 0.6787123268736301, "learning_rate": 2.3228265462239545e-07, "loss": 0.0622, "step": 15741 }, { "epoch": 3.581797497155859, "grad_norm": 1.4140809551718572, "learning_rate": 2.322131493274276e-07, "loss": 0.0204, "step": 15742 }, { "epoch": 3.582025028441411, "grad_norm": 1.1157020668206987, "learning_rate": 2.321436520603036e-07, "loss": 0.0087, "step": 15743 }, { "epoch": 3.5822525597269625, "grad_norm": 1.9076185800401906, "learning_rate": 2.3207416282244419e-07, "loss": 0.0475, "step": 15744 }, { "epoch": 3.5824800910125143, "grad_norm": 1.5236668583821356, "learning_rate": 2.320046816152692e-07, "loss": 0.027, "step": 15745 }, { "epoch": 3.582707622298066, "grad_norm": 2.537537987665066, "learning_rate": 2.319352084401985e-07, "loss": 0.0202, "step": 15746 }, { "epoch": 3.582935153583618, "grad_norm": 1.9412175508093148, "learning_rate": 2.3186574329865263e-07, "loss": 0.0437, "step": 15747 }, { "epoch": 3.5831626848691696, "grad_norm": 1.6416416349804588, "learning_rate": 2.3179628619205064e-07, "loss": 0.0556, "step": 15748 }, { "epoch": 3.5833902161547213, "grad_norm": 1.5291870129990601, "learning_rate": 2.3172683712181253e-07, "loss": 0.0186, "step": 15749 }, { "epoch": 3.583617747440273, "grad_norm": 1.0724274280310027, "learning_rate": 2.3165739608935756e-07, "loss": 0.0095, "step": 15750 }, { "epoch": 3.583845278725825, "grad_norm": 4.706993988079269, "learning_rate": 2.3158796309610528e-07, "loss": 0.0172, "step": 15751 }, { "epoch": 3.5840728100113766, "grad_norm": 1.211247244536206, "learning_rate": 2.3151853814347453e-07, "loss": 0.0336, "step": 15752 }, { "epoch": 3.5843003412969283, "grad_norm": 1.668884679440578, "learning_rate": 2.3144912123288407e-07, "loss": 0.0328, "step": 15753 }, { "epoch": 3.58452787258248, "grad_norm": 1.232087178151307, "learning_rate": 2.3137971236575297e-07, "loss": 0.0911, "step": 15754 }, { "epoch": 3.584755403868032, "grad_norm": 2.325772089387766, "learning_rate": 2.3131031154349947e-07, "loss": 0.0499, "step": 15755 }, { "epoch": 3.5849829351535836, "grad_norm": 1.5023314582265477, "learning_rate": 2.3124091876754218e-07, "loss": 0.0116, "step": 15756 }, { "epoch": 3.5852104664391353, "grad_norm": 1.4396462767732754, "learning_rate": 2.3117153403929963e-07, "loss": 0.0113, "step": 15757 }, { "epoch": 3.585437997724687, "grad_norm": 1.3615593185128565, "learning_rate": 2.311021573601894e-07, "loss": 0.0144, "step": 15758 }, { "epoch": 3.585665529010239, "grad_norm": 1.0389953180168925, "learning_rate": 2.3103278873162987e-07, "loss": 0.0186, "step": 15759 }, { "epoch": 3.5858930602957906, "grad_norm": 1.2487554951035553, "learning_rate": 2.3096342815503847e-07, "loss": 0.025, "step": 15760 }, { "epoch": 3.5861205915813423, "grad_norm": 1.5588129243317799, "learning_rate": 2.3089407563183315e-07, "loss": 0.0514, "step": 15761 }, { "epoch": 3.586348122866894, "grad_norm": 1.9157825482685915, "learning_rate": 2.3082473116343096e-07, "loss": 0.0145, "step": 15762 }, { "epoch": 3.586575654152446, "grad_norm": 1.375708200771742, "learning_rate": 2.3075539475124933e-07, "loss": 0.0355, "step": 15763 }, { "epoch": 3.5868031854379976, "grad_norm": 1.1006570775452575, "learning_rate": 2.3068606639670566e-07, "loss": 0.0712, "step": 15764 }, { "epoch": 3.5870307167235493, "grad_norm": 1.2468278039011385, "learning_rate": 2.306167461012164e-07, "loss": 0.0282, "step": 15765 }, { "epoch": 3.587258248009101, "grad_norm": 0.8368108752447663, "learning_rate": 2.305474338661988e-07, "loss": 0.0209, "step": 15766 }, { "epoch": 3.587485779294653, "grad_norm": 1.5098044875525596, "learning_rate": 2.30478129693069e-07, "loss": 0.0264, "step": 15767 }, { "epoch": 3.587713310580205, "grad_norm": 1.3902139364519686, "learning_rate": 2.304088335832439e-07, "loss": 0.027, "step": 15768 }, { "epoch": 3.5879408418657563, "grad_norm": 0.7871800592241073, "learning_rate": 2.3033954553813943e-07, "loss": 0.0193, "step": 15769 }, { "epoch": 3.5881683731513085, "grad_norm": 1.5948830158128167, "learning_rate": 2.302702655591718e-07, "loss": 0.0917, "step": 15770 }, { "epoch": 3.58839590443686, "grad_norm": 0.422354181756058, "learning_rate": 2.3020099364775734e-07, "loss": 0.0013, "step": 15771 }, { "epoch": 3.588623435722412, "grad_norm": 2.901796593405476, "learning_rate": 2.301317298053112e-07, "loss": 0.0643, "step": 15772 }, { "epoch": 3.5888509670079634, "grad_norm": 1.6317401407161827, "learning_rate": 2.3006247403324965e-07, "loss": 0.0122, "step": 15773 }, { "epoch": 3.5890784982935156, "grad_norm": 3.3447402457487554, "learning_rate": 2.299932263329876e-07, "loss": 0.15, "step": 15774 }, { "epoch": 3.589306029579067, "grad_norm": 1.1936542997156783, "learning_rate": 2.2992398670594073e-07, "loss": 0.0882, "step": 15775 }, { "epoch": 3.589533560864619, "grad_norm": 1.087300271742027, "learning_rate": 2.2985475515352385e-07, "loss": 0.0282, "step": 15776 }, { "epoch": 3.589761092150171, "grad_norm": 1.6162553128389705, "learning_rate": 2.297855316771521e-07, "loss": 0.0675, "step": 15777 }, { "epoch": 3.5899886234357226, "grad_norm": 1.5132938730068752, "learning_rate": 2.297163162782405e-07, "loss": 0.0268, "step": 15778 }, { "epoch": 3.5902161547212743, "grad_norm": 1.2646987121455369, "learning_rate": 2.2964710895820323e-07, "loss": 0.0214, "step": 15779 }, { "epoch": 3.590443686006826, "grad_norm": 1.2361717689178342, "learning_rate": 2.2957790971845528e-07, "loss": 0.0157, "step": 15780 }, { "epoch": 3.590671217292378, "grad_norm": 1.4510303914111646, "learning_rate": 2.2950871856041037e-07, "loss": 0.0266, "step": 15781 }, { "epoch": 3.5908987485779296, "grad_norm": 1.4443201925593514, "learning_rate": 2.2943953548548324e-07, "loss": 0.068, "step": 15782 }, { "epoch": 3.5911262798634813, "grad_norm": 1.8171446737565904, "learning_rate": 2.2937036049508727e-07, "loss": 0.0188, "step": 15783 }, { "epoch": 3.591353811149033, "grad_norm": 0.4017552263944133, "learning_rate": 2.293011935906366e-07, "loss": 0.0034, "step": 15784 }, { "epoch": 3.591581342434585, "grad_norm": 1.3254557316054183, "learning_rate": 2.2923203477354515e-07, "loss": 0.0623, "step": 15785 }, { "epoch": 3.5918088737201366, "grad_norm": 1.2652833284489169, "learning_rate": 2.2916288404522576e-07, "loss": 0.0461, "step": 15786 }, { "epoch": 3.5920364050056883, "grad_norm": 1.120686383070198, "learning_rate": 2.2909374140709233e-07, "loss": 0.0083, "step": 15787 }, { "epoch": 3.59226393629124, "grad_norm": 1.055716081900666, "learning_rate": 2.2902460686055755e-07, "loss": 0.0256, "step": 15788 }, { "epoch": 3.592491467576792, "grad_norm": 2.1908554958170323, "learning_rate": 2.2895548040703485e-07, "loss": 0.118, "step": 15789 }, { "epoch": 3.5927189988623436, "grad_norm": 2.1114355392536592, "learning_rate": 2.288863620479366e-07, "loss": 0.0899, "step": 15790 }, { "epoch": 3.5929465301478953, "grad_norm": 1.4562510005490534, "learning_rate": 2.288172517846756e-07, "loss": 0.0086, "step": 15791 }, { "epoch": 3.593174061433447, "grad_norm": 0.8835074008567946, "learning_rate": 2.2874814961866465e-07, "loss": 0.0584, "step": 15792 }, { "epoch": 3.593401592718999, "grad_norm": 1.684135791756441, "learning_rate": 2.286790555513156e-07, "loss": 0.0278, "step": 15793 }, { "epoch": 3.5936291240045506, "grad_norm": 1.0110390769929507, "learning_rate": 2.286099695840411e-07, "loss": 0.0101, "step": 15794 }, { "epoch": 3.5938566552901023, "grad_norm": 1.372750573345256, "learning_rate": 2.285408917182528e-07, "loss": 0.0139, "step": 15795 }, { "epoch": 3.594084186575654, "grad_norm": 1.4726889809184476, "learning_rate": 2.2847182195536216e-07, "loss": 0.0274, "step": 15796 }, { "epoch": 3.594311717861206, "grad_norm": 1.5155018663033135, "learning_rate": 2.2840276029678172e-07, "loss": 0.0663, "step": 15797 }, { "epoch": 3.5945392491467576, "grad_norm": 0.5610374165805729, "learning_rate": 2.283337067439223e-07, "loss": 0.0044, "step": 15798 }, { "epoch": 3.5947667804323093, "grad_norm": 0.8681616896804059, "learning_rate": 2.282646612981957e-07, "loss": 0.0525, "step": 15799 }, { "epoch": 3.594994311717861, "grad_norm": 1.0346033096358522, "learning_rate": 2.2819562396101258e-07, "loss": 0.0208, "step": 15800 }, { "epoch": 3.595221843003413, "grad_norm": 2.7759593247516445, "learning_rate": 2.281265947337844e-07, "loss": 0.0349, "step": 15801 }, { "epoch": 3.5954493742889646, "grad_norm": 1.655199933051034, "learning_rate": 2.2805757361792163e-07, "loss": 0.0889, "step": 15802 }, { "epoch": 3.5956769055745164, "grad_norm": 1.4513049847285613, "learning_rate": 2.2798856061483507e-07, "loss": 0.1078, "step": 15803 }, { "epoch": 3.595904436860068, "grad_norm": 2.064392854182548, "learning_rate": 2.279195557259354e-07, "loss": 0.0173, "step": 15804 }, { "epoch": 3.59613196814562, "grad_norm": 1.4315373204668924, "learning_rate": 2.2785055895263266e-07, "loss": 0.0158, "step": 15805 }, { "epoch": 3.5963594994311716, "grad_norm": 1.2060461299484972, "learning_rate": 2.2778157029633728e-07, "loss": 0.0093, "step": 15806 }, { "epoch": 3.596587030716724, "grad_norm": 0.990165858335863, "learning_rate": 2.2771258975845902e-07, "loss": 0.0867, "step": 15807 }, { "epoch": 3.596814562002275, "grad_norm": 1.646934923029914, "learning_rate": 2.27643617340408e-07, "loss": 0.0499, "step": 15808 }, { "epoch": 3.5970420932878273, "grad_norm": 2.1081433035685095, "learning_rate": 2.2757465304359343e-07, "loss": 0.0719, "step": 15809 }, { "epoch": 3.5972696245733786, "grad_norm": 1.4124952196318956, "learning_rate": 2.2750569686942517e-07, "loss": 0.0621, "step": 15810 }, { "epoch": 3.597497155858931, "grad_norm": 1.300288655766353, "learning_rate": 2.2743674881931272e-07, "loss": 0.0323, "step": 15811 }, { "epoch": 3.597724687144482, "grad_norm": 1.041255430916624, "learning_rate": 2.2736780889466473e-07, "loss": 0.0258, "step": 15812 }, { "epoch": 3.5979522184300343, "grad_norm": 0.7494503138720593, "learning_rate": 2.2729887709689078e-07, "loss": 0.0366, "step": 15813 }, { "epoch": 3.5981797497155856, "grad_norm": 1.5016343885568524, "learning_rate": 2.2722995342739945e-07, "loss": 0.0794, "step": 15814 }, { "epoch": 3.598407281001138, "grad_norm": 1.0101223393892782, "learning_rate": 2.2716103788759908e-07, "loss": 0.0434, "step": 15815 }, { "epoch": 3.5986348122866896, "grad_norm": 2.4558559495350427, "learning_rate": 2.2709213047889859e-07, "loss": 0.0269, "step": 15816 }, { "epoch": 3.5988623435722413, "grad_norm": 1.3260222281331118, "learning_rate": 2.2702323120270618e-07, "loss": 0.0453, "step": 15817 }, { "epoch": 3.599089874857793, "grad_norm": 2.5709822375615716, "learning_rate": 2.2695434006043021e-07, "loss": 0.0494, "step": 15818 }, { "epoch": 3.599317406143345, "grad_norm": 1.8077230203017662, "learning_rate": 2.2688545705347843e-07, "loss": 0.0673, "step": 15819 }, { "epoch": 3.5995449374288966, "grad_norm": 0.9428515440329154, "learning_rate": 2.2681658218325894e-07, "loss": 0.0087, "step": 15820 }, { "epoch": 3.5997724687144483, "grad_norm": 1.3747529476614981, "learning_rate": 2.2674771545117936e-07, "loss": 0.0204, "step": 15821 }, { "epoch": 3.6, "grad_norm": 1.5810345861237443, "learning_rate": 2.2667885685864677e-07, "loss": 0.0543, "step": 15822 }, { "epoch": 3.600227531285552, "grad_norm": 1.5044215229138658, "learning_rate": 2.2661000640706893e-07, "loss": 0.0747, "step": 15823 }, { "epoch": 3.6004550625711036, "grad_norm": 1.1690278367039766, "learning_rate": 2.2654116409785293e-07, "loss": 0.0136, "step": 15824 }, { "epoch": 3.6006825938566553, "grad_norm": 1.4275774604463405, "learning_rate": 2.2647232993240605e-07, "loss": 0.0848, "step": 15825 }, { "epoch": 3.600910125142207, "grad_norm": 0.9928683766757119, "learning_rate": 2.2640350391213462e-07, "loss": 0.013, "step": 15826 }, { "epoch": 3.601137656427759, "grad_norm": 2.4490970860621664, "learning_rate": 2.2633468603844576e-07, "loss": 0.0524, "step": 15827 }, { "epoch": 3.6013651877133106, "grad_norm": 0.911442992342187, "learning_rate": 2.2626587631274587e-07, "loss": 0.0131, "step": 15828 }, { "epoch": 3.6015927189988624, "grad_norm": 1.0467560338912785, "learning_rate": 2.2619707473644094e-07, "loss": 0.0171, "step": 15829 }, { "epoch": 3.601820250284414, "grad_norm": 1.691907032592351, "learning_rate": 2.261282813109375e-07, "loss": 0.0525, "step": 15830 }, { "epoch": 3.602047781569966, "grad_norm": 1.4074883291679834, "learning_rate": 2.2605949603764145e-07, "loss": 0.02, "step": 15831 }, { "epoch": 3.6022753128555176, "grad_norm": 3.201343460837881, "learning_rate": 2.2599071891795887e-07, "loss": 0.0214, "step": 15832 }, { "epoch": 3.6025028441410694, "grad_norm": 1.1868649455939482, "learning_rate": 2.2592194995329525e-07, "loss": 0.007, "step": 15833 }, { "epoch": 3.602730375426621, "grad_norm": 1.404083115342947, "learning_rate": 2.258531891450559e-07, "loss": 0.1054, "step": 15834 }, { "epoch": 3.602957906712173, "grad_norm": 1.339092696933157, "learning_rate": 2.2578443649464654e-07, "loss": 0.0847, "step": 15835 }, { "epoch": 3.6031854379977246, "grad_norm": 1.4107278785393036, "learning_rate": 2.25715692003472e-07, "loss": 0.01, "step": 15836 }, { "epoch": 3.6034129692832764, "grad_norm": 1.4642250501159866, "learning_rate": 2.2564695567293745e-07, "loss": 0.0816, "step": 15837 }, { "epoch": 3.603640500568828, "grad_norm": 2.072364172739147, "learning_rate": 2.2557822750444776e-07, "loss": 0.0153, "step": 15838 }, { "epoch": 3.60386803185438, "grad_norm": 2.259564511290655, "learning_rate": 2.255095074994078e-07, "loss": 0.008, "step": 15839 }, { "epoch": 3.6040955631399316, "grad_norm": 1.007338053007618, "learning_rate": 2.254407956592218e-07, "loss": 0.0077, "step": 15840 }, { "epoch": 3.6043230944254834, "grad_norm": 1.6614323250440728, "learning_rate": 2.2537209198529406e-07, "loss": 0.0937, "step": 15841 }, { "epoch": 3.604550625711035, "grad_norm": 0.955130281088151, "learning_rate": 2.2530339647902902e-07, "loss": 0.0575, "step": 15842 }, { "epoch": 3.604778156996587, "grad_norm": 2.098203214340266, "learning_rate": 2.252347091418304e-07, "loss": 0.103, "step": 15843 }, { "epoch": 3.6050056882821386, "grad_norm": 1.4655145202954598, "learning_rate": 2.2516602997510218e-07, "loss": 0.0251, "step": 15844 }, { "epoch": 3.6052332195676904, "grad_norm": 1.3992607173353875, "learning_rate": 2.2509735898024825e-07, "loss": 0.0467, "step": 15845 }, { "epoch": 3.6054607508532426, "grad_norm": 1.6964202984368544, "learning_rate": 2.2502869615867167e-07, "loss": 0.0333, "step": 15846 }, { "epoch": 3.605688282138794, "grad_norm": 1.6960087120088254, "learning_rate": 2.249600415117763e-07, "loss": 0.021, "step": 15847 }, { "epoch": 3.605915813424346, "grad_norm": 2.175421185808465, "learning_rate": 2.2489139504096474e-07, "loss": 0.0508, "step": 15848 }, { "epoch": 3.6061433447098974, "grad_norm": 1.6681125972462538, "learning_rate": 2.2482275674764056e-07, "loss": 0.0559, "step": 15849 }, { "epoch": 3.6063708759954496, "grad_norm": 1.4917865695728387, "learning_rate": 2.2475412663320616e-07, "loss": 0.0707, "step": 15850 }, { "epoch": 3.606598407281001, "grad_norm": 1.1322507193507025, "learning_rate": 2.246855046990644e-07, "loss": 0.0596, "step": 15851 }, { "epoch": 3.606825938566553, "grad_norm": 1.8603857994677808, "learning_rate": 2.2461689094661795e-07, "loss": 0.0356, "step": 15852 }, { "epoch": 3.6070534698521044, "grad_norm": 1.0034537729207138, "learning_rate": 2.2454828537726875e-07, "loss": 0.0276, "step": 15853 }, { "epoch": 3.6072810011376566, "grad_norm": 1.3355737611833545, "learning_rate": 2.2447968799241944e-07, "loss": 0.0106, "step": 15854 }, { "epoch": 3.6075085324232083, "grad_norm": 0.9571703097680564, "learning_rate": 2.244110987934716e-07, "loss": 0.0273, "step": 15855 }, { "epoch": 3.60773606370876, "grad_norm": 1.0451260768113149, "learning_rate": 2.2434251778182725e-07, "loss": 0.0136, "step": 15856 }, { "epoch": 3.607963594994312, "grad_norm": 1.7950789484387946, "learning_rate": 2.2427394495888806e-07, "loss": 0.0621, "step": 15857 }, { "epoch": 3.6081911262798636, "grad_norm": 1.7122911166966677, "learning_rate": 2.2420538032605577e-07, "loss": 0.0958, "step": 15858 }, { "epoch": 3.6084186575654154, "grad_norm": 0.7573305062255528, "learning_rate": 2.2413682388473148e-07, "loss": 0.0097, "step": 15859 }, { "epoch": 3.608646188850967, "grad_norm": 1.220148844510899, "learning_rate": 2.2406827563631612e-07, "loss": 0.0486, "step": 15860 }, { "epoch": 3.608873720136519, "grad_norm": 2.439019239988751, "learning_rate": 2.239997355822112e-07, "loss": 0.1323, "step": 15861 }, { "epoch": 3.6091012514220706, "grad_norm": 1.042603413611321, "learning_rate": 2.23931203723817e-07, "loss": 0.0098, "step": 15862 }, { "epoch": 3.6093287827076224, "grad_norm": 4.193347527407922, "learning_rate": 2.238626800625345e-07, "loss": 0.0225, "step": 15863 }, { "epoch": 3.609556313993174, "grad_norm": 0.9319338680057941, "learning_rate": 2.2379416459976448e-07, "loss": 0.0037, "step": 15864 }, { "epoch": 3.609783845278726, "grad_norm": 0.9881131116411489, "learning_rate": 2.2372565733690661e-07, "loss": 0.0359, "step": 15865 }, { "epoch": 3.6100113765642776, "grad_norm": 0.9301409271588362, "learning_rate": 2.2365715827536167e-07, "loss": 0.0133, "step": 15866 }, { "epoch": 3.6102389078498294, "grad_norm": 1.0591929657995842, "learning_rate": 2.235886674165292e-07, "loss": 0.0116, "step": 15867 }, { "epoch": 3.610466439135381, "grad_norm": 1.4876152719571123, "learning_rate": 2.2352018476180945e-07, "loss": 0.0457, "step": 15868 }, { "epoch": 3.610693970420933, "grad_norm": 2.305019098229327, "learning_rate": 2.234517103126016e-07, "loss": 0.1034, "step": 15869 }, { "epoch": 3.6109215017064846, "grad_norm": 0.8772177572583515, "learning_rate": 2.2338324407030537e-07, "loss": 0.0169, "step": 15870 }, { "epoch": 3.6111490329920364, "grad_norm": 1.9214599535234724, "learning_rate": 2.2331478603632038e-07, "loss": 0.0304, "step": 15871 }, { "epoch": 3.611376564277588, "grad_norm": 1.1182835727706315, "learning_rate": 2.2324633621204524e-07, "loss": 0.018, "step": 15872 }, { "epoch": 3.61160409556314, "grad_norm": 0.8442689281022293, "learning_rate": 2.231778945988794e-07, "loss": 0.0236, "step": 15873 }, { "epoch": 3.6118316268486916, "grad_norm": 2.1431735029547805, "learning_rate": 2.2310946119822132e-07, "loss": 0.1393, "step": 15874 }, { "epoch": 3.6120591581342434, "grad_norm": 1.152233956390708, "learning_rate": 2.230410360114701e-07, "loss": 0.0133, "step": 15875 }, { "epoch": 3.612286689419795, "grad_norm": 3.716797923436251, "learning_rate": 2.229726190400236e-07, "loss": 0.0473, "step": 15876 }, { "epoch": 3.612514220705347, "grad_norm": 0.7128765485772208, "learning_rate": 2.229042102852806e-07, "loss": 0.0031, "step": 15877 }, { "epoch": 3.6127417519908986, "grad_norm": 2.5331228231598644, "learning_rate": 2.2283580974863932e-07, "loss": 0.0291, "step": 15878 }, { "epoch": 3.6129692832764504, "grad_norm": 1.5078563094210653, "learning_rate": 2.2276741743149733e-07, "loss": 0.0211, "step": 15879 }, { "epoch": 3.613196814562002, "grad_norm": 1.8152449582910017, "learning_rate": 2.2269903333525285e-07, "loss": 0.078, "step": 15880 }, { "epoch": 3.613424345847554, "grad_norm": 1.178587665297939, "learning_rate": 2.2263065746130325e-07, "loss": 0.008, "step": 15881 }, { "epoch": 3.6136518771331056, "grad_norm": 1.5188257130487308, "learning_rate": 2.2256228981104627e-07, "loss": 0.0153, "step": 15882 }, { "epoch": 3.6138794084186574, "grad_norm": 0.7084819416468591, "learning_rate": 2.2249393038587877e-07, "loss": 0.0062, "step": 15883 }, { "epoch": 3.614106939704209, "grad_norm": 0.5787498829079859, "learning_rate": 2.2242557918719827e-07, "loss": 0.006, "step": 15884 }, { "epoch": 3.6143344709897613, "grad_norm": 1.191466565808107, "learning_rate": 2.2235723621640185e-07, "loss": 0.071, "step": 15885 }, { "epoch": 3.6145620022753127, "grad_norm": 1.5646860486306484, "learning_rate": 2.2228890147488587e-07, "loss": 0.0215, "step": 15886 }, { "epoch": 3.614789533560865, "grad_norm": 5.566741893544076, "learning_rate": 2.2222057496404743e-07, "loss": 0.088, "step": 15887 }, { "epoch": 3.615017064846416, "grad_norm": 0.9327186560743016, "learning_rate": 2.2215225668528255e-07, "loss": 0.0068, "step": 15888 }, { "epoch": 3.6152445961319684, "grad_norm": 1.8330504486422838, "learning_rate": 2.22083946639988e-07, "loss": 0.0947, "step": 15889 }, { "epoch": 3.6154721274175197, "grad_norm": 1.233269439030942, "learning_rate": 2.2201564482955948e-07, "loss": 0.0884, "step": 15890 }, { "epoch": 3.615699658703072, "grad_norm": 1.1945727934738732, "learning_rate": 2.2194735125539316e-07, "loss": 0.0399, "step": 15891 }, { "epoch": 3.615927189988623, "grad_norm": 1.252688299589172, "learning_rate": 2.2187906591888497e-07, "loss": 0.0267, "step": 15892 }, { "epoch": 3.6161547212741754, "grad_norm": 2.0868339663838524, "learning_rate": 2.2181078882143026e-07, "loss": 0.0733, "step": 15893 }, { "epoch": 3.616382252559727, "grad_norm": 1.0980574506085092, "learning_rate": 2.2174251996442478e-07, "loss": 0.0214, "step": 15894 }, { "epoch": 3.616609783845279, "grad_norm": 1.390664483843087, "learning_rate": 2.2167425934926344e-07, "loss": 0.0709, "step": 15895 }, { "epoch": 3.6168373151308306, "grad_norm": 0.7141609103967588, "learning_rate": 2.2160600697734187e-07, "loss": 0.0087, "step": 15896 }, { "epoch": 3.6170648464163824, "grad_norm": 2.2257368967555653, "learning_rate": 2.2153776285005444e-07, "loss": 0.0438, "step": 15897 }, { "epoch": 3.617292377701934, "grad_norm": 1.7240210259508535, "learning_rate": 2.2146952696879622e-07, "loss": 0.0242, "step": 15898 }, { "epoch": 3.617519908987486, "grad_norm": 1.5956623375628762, "learning_rate": 2.2140129933496215e-07, "loss": 0.0538, "step": 15899 }, { "epoch": 3.6177474402730376, "grad_norm": 1.5193827893829654, "learning_rate": 2.2133307994994608e-07, "loss": 0.0888, "step": 15900 }, { "epoch": 3.6179749715585894, "grad_norm": 1.1568145074029041, "learning_rate": 2.212648688151428e-07, "loss": 0.0148, "step": 15901 }, { "epoch": 3.618202502844141, "grad_norm": 1.4913587064482146, "learning_rate": 2.2119666593194617e-07, "loss": 0.1016, "step": 15902 }, { "epoch": 3.618430034129693, "grad_norm": 1.3189870284143048, "learning_rate": 2.2112847130175002e-07, "loss": 0.0041, "step": 15903 }, { "epoch": 3.6186575654152446, "grad_norm": 1.9681141185331301, "learning_rate": 2.210602849259482e-07, "loss": 0.0887, "step": 15904 }, { "epoch": 3.6188850967007964, "grad_norm": 1.2420658907375128, "learning_rate": 2.209921068059344e-07, "loss": 0.0155, "step": 15905 }, { "epoch": 3.619112627986348, "grad_norm": 3.413037793536414, "learning_rate": 2.2092393694310224e-07, "loss": 0.0286, "step": 15906 }, { "epoch": 3.6193401592719, "grad_norm": 2.1986868061854934, "learning_rate": 2.2085577533884455e-07, "loss": 0.1128, "step": 15907 }, { "epoch": 3.6195676905574516, "grad_norm": 0.9070844342781041, "learning_rate": 2.2078762199455483e-07, "loss": 0.0336, "step": 15908 }, { "epoch": 3.6197952218430034, "grad_norm": 0.3965107076075778, "learning_rate": 2.207194769116259e-07, "loss": 0.0026, "step": 15909 }, { "epoch": 3.620022753128555, "grad_norm": 1.4578662311747865, "learning_rate": 2.2065134009145028e-07, "loss": 0.0404, "step": 15910 }, { "epoch": 3.620250284414107, "grad_norm": 1.620364921842609, "learning_rate": 2.2058321153542068e-07, "loss": 0.0576, "step": 15911 }, { "epoch": 3.6204778156996587, "grad_norm": 1.6415696408328715, "learning_rate": 2.2051509124492955e-07, "loss": 0.0232, "step": 15912 }, { "epoch": 3.6207053469852104, "grad_norm": 1.2739672582219035, "learning_rate": 2.2044697922136946e-07, "loss": 0.0273, "step": 15913 }, { "epoch": 3.620932878270762, "grad_norm": 1.792206739518213, "learning_rate": 2.2037887546613193e-07, "loss": 0.0329, "step": 15914 }, { "epoch": 3.621160409556314, "grad_norm": 2.4597647067098123, "learning_rate": 2.2031077998060945e-07, "loss": 0.0427, "step": 15915 }, { "epoch": 3.6213879408418657, "grad_norm": 1.5733816176137194, "learning_rate": 2.2024269276619338e-07, "loss": 0.0113, "step": 15916 }, { "epoch": 3.6216154721274174, "grad_norm": 1.5520005198845204, "learning_rate": 2.2017461382427502e-07, "loss": 0.0678, "step": 15917 }, { "epoch": 3.621843003412969, "grad_norm": 0.42535160975918596, "learning_rate": 2.2010654315624657e-07, "loss": 0.0022, "step": 15918 }, { "epoch": 3.622070534698521, "grad_norm": 4.918710976059866, "learning_rate": 2.2003848076349866e-07, "loss": 0.0249, "step": 15919 }, { "epoch": 3.6222980659840727, "grad_norm": 1.70738539735778, "learning_rate": 2.199704266474228e-07, "loss": 0.0592, "step": 15920 }, { "epoch": 3.6225255972696244, "grad_norm": 1.546635780712711, "learning_rate": 2.1990238080940953e-07, "loss": 0.0098, "step": 15921 }, { "epoch": 3.622753128555176, "grad_norm": 6.140664297052045, "learning_rate": 2.1983434325084956e-07, "loss": 0.0229, "step": 15922 }, { "epoch": 3.622980659840728, "grad_norm": 0.9247239792568883, "learning_rate": 2.1976631397313353e-07, "loss": 0.0465, "step": 15923 }, { "epoch": 3.62320819112628, "grad_norm": 0.9299180075191263, "learning_rate": 2.1969829297765193e-07, "loss": 0.0153, "step": 15924 }, { "epoch": 3.6234357224118314, "grad_norm": 0.7413015966763356, "learning_rate": 2.196302802657952e-07, "loss": 0.0058, "step": 15925 }, { "epoch": 3.6236632536973836, "grad_norm": 1.5465880770248575, "learning_rate": 2.195622758389529e-07, "loss": 0.0269, "step": 15926 }, { "epoch": 3.623890784982935, "grad_norm": 0.9586903297546994, "learning_rate": 2.1949427969851535e-07, "loss": 0.0808, "step": 15927 }, { "epoch": 3.624118316268487, "grad_norm": 1.0322782673822957, "learning_rate": 2.1942629184587205e-07, "loss": 0.0127, "step": 15928 }, { "epoch": 3.6243458475540384, "grad_norm": 5.007743927355067, "learning_rate": 2.1935831228241242e-07, "loss": 0.0111, "step": 15929 }, { "epoch": 3.6245733788395906, "grad_norm": 0.9841935057950147, "learning_rate": 2.1929034100952596e-07, "loss": 0.0093, "step": 15930 }, { "epoch": 3.624800910125142, "grad_norm": 0.9559077012902057, "learning_rate": 2.1922237802860186e-07, "loss": 0.0128, "step": 15931 }, { "epoch": 3.625028441410694, "grad_norm": 1.1569771940208073, "learning_rate": 2.191544233410295e-07, "loss": 0.0137, "step": 15932 }, { "epoch": 3.625255972696246, "grad_norm": 1.0705522590276457, "learning_rate": 2.190864769481972e-07, "loss": 0.0126, "step": 15933 }, { "epoch": 3.6254835039817976, "grad_norm": 2.3287970006828447, "learning_rate": 2.1901853885149414e-07, "loss": 0.1652, "step": 15934 }, { "epoch": 3.6257110352673494, "grad_norm": 1.369462955580559, "learning_rate": 2.1895060905230865e-07, "loss": 0.0263, "step": 15935 }, { "epoch": 3.625938566552901, "grad_norm": 1.8648074787401672, "learning_rate": 2.1888268755202883e-07, "loss": 0.0128, "step": 15936 }, { "epoch": 3.626166097838453, "grad_norm": 1.6700129965762833, "learning_rate": 2.1881477435204308e-07, "loss": 0.027, "step": 15937 }, { "epoch": 3.6263936291240046, "grad_norm": 1.1587181775635913, "learning_rate": 2.1874686945373953e-07, "loss": 0.0094, "step": 15938 }, { "epoch": 3.6266211604095564, "grad_norm": 0.9707072102900801, "learning_rate": 2.1867897285850606e-07, "loss": 0.0323, "step": 15939 }, { "epoch": 3.626848691695108, "grad_norm": 1.2002126143002143, "learning_rate": 2.1861108456773028e-07, "loss": 0.0478, "step": 15940 }, { "epoch": 3.62707622298066, "grad_norm": 1.7207725204644129, "learning_rate": 2.1854320458279948e-07, "loss": 0.0681, "step": 15941 }, { "epoch": 3.6273037542662117, "grad_norm": 1.5701714235887643, "learning_rate": 2.184753329051014e-07, "loss": 0.0211, "step": 15942 }, { "epoch": 3.6275312855517634, "grad_norm": 0.9004695002675717, "learning_rate": 2.1840746953602276e-07, "loss": 0.0207, "step": 15943 }, { "epoch": 3.627758816837315, "grad_norm": 1.9159338088376798, "learning_rate": 2.1833961447695084e-07, "loss": 0.0249, "step": 15944 }, { "epoch": 3.627986348122867, "grad_norm": 0.7632812657606867, "learning_rate": 2.182717677292724e-07, "loss": 0.0274, "step": 15945 }, { "epoch": 3.6282138794084187, "grad_norm": 1.9079759786725872, "learning_rate": 2.1820392929437433e-07, "loss": 0.0472, "step": 15946 }, { "epoch": 3.6284414106939704, "grad_norm": 2.0018585336336203, "learning_rate": 2.1813609917364303e-07, "loss": 0.0126, "step": 15947 }, { "epoch": 3.628668941979522, "grad_norm": 2.0638808115779623, "learning_rate": 2.180682773684644e-07, "loss": 0.0966, "step": 15948 }, { "epoch": 3.628896473265074, "grad_norm": 0.907325304725754, "learning_rate": 2.180004638802252e-07, "loss": 0.0156, "step": 15949 }, { "epoch": 3.6291240045506257, "grad_norm": 1.2646846229965316, "learning_rate": 2.179326587103109e-07, "loss": 0.049, "step": 15950 }, { "epoch": 3.6293515358361774, "grad_norm": 2.273709351901189, "learning_rate": 2.1786486186010759e-07, "loss": 0.0967, "step": 15951 }, { "epoch": 3.629579067121729, "grad_norm": 1.4480099127550539, "learning_rate": 2.17797073331001e-07, "loss": 0.0233, "step": 15952 }, { "epoch": 3.629806598407281, "grad_norm": 2.8637448240416608, "learning_rate": 2.1772929312437637e-07, "loss": 0.0554, "step": 15953 }, { "epoch": 3.6300341296928327, "grad_norm": 0.7649263898892656, "learning_rate": 2.1766152124161924e-07, "loss": 0.0056, "step": 15954 }, { "epoch": 3.6302616609783844, "grad_norm": 2.0961674616063535, "learning_rate": 2.1759375768411445e-07, "loss": 0.0996, "step": 15955 }, { "epoch": 3.630489192263936, "grad_norm": 1.6577620514220697, "learning_rate": 2.1752600245324724e-07, "loss": 0.101, "step": 15956 }, { "epoch": 3.630716723549488, "grad_norm": 1.8646206743022127, "learning_rate": 2.174582555504022e-07, "loss": 0.0456, "step": 15957 }, { "epoch": 3.6309442548350397, "grad_norm": 0.9557105600113359, "learning_rate": 2.1739051697696397e-07, "loss": 0.0266, "step": 15958 }, { "epoch": 3.6311717861205914, "grad_norm": 2.187766761877558, "learning_rate": 2.173227867343173e-07, "loss": 0.1276, "step": 15959 }, { "epoch": 3.631399317406143, "grad_norm": 1.7618300595533976, "learning_rate": 2.17255064823846e-07, "loss": 0.0634, "step": 15960 }, { "epoch": 3.631626848691695, "grad_norm": 0.7735585616817597, "learning_rate": 2.171873512469347e-07, "loss": 0.0327, "step": 15961 }, { "epoch": 3.6318543799772467, "grad_norm": 1.3152056390442892, "learning_rate": 2.1711964600496675e-07, "loss": 0.0079, "step": 15962 }, { "epoch": 3.632081911262799, "grad_norm": 1.3961298119882206, "learning_rate": 2.1705194909932655e-07, "loss": 0.0362, "step": 15963 }, { "epoch": 3.63230944254835, "grad_norm": 1.674171103439447, "learning_rate": 2.169842605313971e-07, "loss": 0.0165, "step": 15964 }, { "epoch": 3.6325369738339024, "grad_norm": 0.9426455522930469, "learning_rate": 2.1691658030256218e-07, "loss": 0.0102, "step": 15965 }, { "epoch": 3.6327645051194537, "grad_norm": 1.1069377779209246, "learning_rate": 2.1684890841420517e-07, "loss": 0.0141, "step": 15966 }, { "epoch": 3.632992036405006, "grad_norm": 2.025086778062872, "learning_rate": 2.1678124486770884e-07, "loss": 0.0207, "step": 15967 }, { "epoch": 3.633219567690557, "grad_norm": 1.431514220198651, "learning_rate": 2.1671358966445635e-07, "loss": 0.0205, "step": 15968 }, { "epoch": 3.6334470989761094, "grad_norm": 0.825884857273945, "learning_rate": 2.166459428058302e-07, "loss": 0.0075, "step": 15969 }, { "epoch": 3.6336746302616607, "grad_norm": 2.1461179338797454, "learning_rate": 2.1657830429321333e-07, "loss": 0.0248, "step": 15970 }, { "epoch": 3.633902161547213, "grad_norm": 1.223174556241605, "learning_rate": 2.1651067412798767e-07, "loss": 0.024, "step": 15971 }, { "epoch": 3.6341296928327647, "grad_norm": 1.8869267388922768, "learning_rate": 2.1644305231153573e-07, "loss": 0.0265, "step": 15972 }, { "epoch": 3.6343572241183164, "grad_norm": 2.2739787790938206, "learning_rate": 2.1637543884523978e-07, "loss": 0.0201, "step": 15973 }, { "epoch": 3.634584755403868, "grad_norm": 1.4341291214984238, "learning_rate": 2.1630783373048136e-07, "loss": 0.0143, "step": 15974 }, { "epoch": 3.63481228668942, "grad_norm": 1.460175100227624, "learning_rate": 2.1624023696864247e-07, "loss": 0.0306, "step": 15975 }, { "epoch": 3.6350398179749717, "grad_norm": 1.9319647478343853, "learning_rate": 2.161726485611043e-07, "loss": 0.013, "step": 15976 }, { "epoch": 3.6352673492605234, "grad_norm": 1.5663853742954126, "learning_rate": 2.1610506850924875e-07, "loss": 0.0533, "step": 15977 }, { "epoch": 3.635494880546075, "grad_norm": 1.5867983993840484, "learning_rate": 2.1603749681445642e-07, "loss": 0.0408, "step": 15978 }, { "epoch": 3.635722411831627, "grad_norm": 2.085558999596661, "learning_rate": 2.1596993347810882e-07, "loss": 0.1206, "step": 15979 }, { "epoch": 3.6359499431171787, "grad_norm": 1.5779210329350057, "learning_rate": 2.159023785015868e-07, "loss": 0.0195, "step": 15980 }, { "epoch": 3.6361774744027304, "grad_norm": 1.9693715240513534, "learning_rate": 2.1583483188627074e-07, "loss": 0.0407, "step": 15981 }, { "epoch": 3.636405005688282, "grad_norm": 1.6936273839266196, "learning_rate": 2.1576729363354158e-07, "loss": 0.1144, "step": 15982 }, { "epoch": 3.636632536973834, "grad_norm": 1.1074311528121878, "learning_rate": 2.156997637447792e-07, "loss": 0.1009, "step": 15983 }, { "epoch": 3.6368600682593857, "grad_norm": 1.1303257511862375, "learning_rate": 2.1563224222136413e-07, "loss": 0.0325, "step": 15984 }, { "epoch": 3.6370875995449374, "grad_norm": 1.177990636673866, "learning_rate": 2.1556472906467645e-07, "loss": 0.0786, "step": 15985 }, { "epoch": 3.637315130830489, "grad_norm": 1.5267391047035272, "learning_rate": 2.154972242760957e-07, "loss": 0.0784, "step": 15986 }, { "epoch": 3.637542662116041, "grad_norm": 2.712715421369953, "learning_rate": 2.1542972785700195e-07, "loss": 0.04, "step": 15987 }, { "epoch": 3.6377701934015927, "grad_norm": 0.9724207911967908, "learning_rate": 2.1536223980877423e-07, "loss": 0.0127, "step": 15988 }, { "epoch": 3.6379977246871444, "grad_norm": 1.7610886929493417, "learning_rate": 2.1529476013279227e-07, "loss": 0.0214, "step": 15989 }, { "epoch": 3.638225255972696, "grad_norm": 1.286814474225823, "learning_rate": 2.1522728883043492e-07, "loss": 0.0283, "step": 15990 }, { "epoch": 3.638452787258248, "grad_norm": 1.1117109018703588, "learning_rate": 2.1515982590308133e-07, "loss": 0.049, "step": 15991 }, { "epoch": 3.6386803185437997, "grad_norm": 0.9326681272707151, "learning_rate": 2.1509237135211046e-07, "loss": 0.0583, "step": 15992 }, { "epoch": 3.6389078498293514, "grad_norm": 1.138481142483113, "learning_rate": 2.1502492517890064e-07, "loss": 0.0109, "step": 15993 }, { "epoch": 3.639135381114903, "grad_norm": 2.415071307518005, "learning_rate": 2.1495748738483076e-07, "loss": 0.0326, "step": 15994 }, { "epoch": 3.639362912400455, "grad_norm": 1.214620843835978, "learning_rate": 2.1489005797127863e-07, "loss": 0.0601, "step": 15995 }, { "epoch": 3.6395904436860067, "grad_norm": 1.0961671028739068, "learning_rate": 2.1482263693962285e-07, "loss": 0.0506, "step": 15996 }, { "epoch": 3.6398179749715585, "grad_norm": 1.2352152230341789, "learning_rate": 2.1475522429124096e-07, "loss": 0.0497, "step": 15997 }, { "epoch": 3.64004550625711, "grad_norm": 1.8249282150564774, "learning_rate": 2.1468782002751097e-07, "loss": 0.0605, "step": 15998 }, { "epoch": 3.640273037542662, "grad_norm": 1.4966866552407796, "learning_rate": 2.1462042414981072e-07, "loss": 0.0884, "step": 15999 }, { "epoch": 3.640500568828214, "grad_norm": 1.252816733585218, "learning_rate": 2.1455303665951714e-07, "loss": 0.0474, "step": 16000 }, { "epoch": 3.6407281001137655, "grad_norm": 1.033445274030682, "learning_rate": 2.1448565755800808e-07, "loss": 0.038, "step": 16001 }, { "epoch": 3.6409556313993177, "grad_norm": 2.4329751305680603, "learning_rate": 2.1441828684666008e-07, "loss": 0.0317, "step": 16002 }, { "epoch": 3.641183162684869, "grad_norm": 1.9399679706325712, "learning_rate": 2.143509245268506e-07, "loss": 0.0297, "step": 16003 }, { "epoch": 3.641410693970421, "grad_norm": 0.5148674943306966, "learning_rate": 2.1428357059995597e-07, "loss": 0.0155, "step": 16004 }, { "epoch": 3.6416382252559725, "grad_norm": 1.4834258144628054, "learning_rate": 2.142162250673529e-07, "loss": 0.0397, "step": 16005 }, { "epoch": 3.6418657565415247, "grad_norm": 2.497021159647285, "learning_rate": 2.1414888793041813e-07, "loss": 0.108, "step": 16006 }, { "epoch": 3.642093287827076, "grad_norm": 2.1937865426683065, "learning_rate": 2.1408155919052745e-07, "loss": 0.0282, "step": 16007 }, { "epoch": 3.642320819112628, "grad_norm": 1.2546677112102118, "learning_rate": 2.140142388490573e-07, "loss": 0.0421, "step": 16008 }, { "epoch": 3.6425483503981795, "grad_norm": 2.786413466491492, "learning_rate": 2.1394692690738344e-07, "loss": 0.0506, "step": 16009 }, { "epoch": 3.6427758816837317, "grad_norm": 1.2789874835736772, "learning_rate": 2.138796233668814e-07, "loss": 0.0056, "step": 16010 }, { "epoch": 3.6430034129692834, "grad_norm": 1.57758762995604, "learning_rate": 2.1381232822892694e-07, "loss": 0.0923, "step": 16011 }, { "epoch": 3.643230944254835, "grad_norm": 0.543118054450169, "learning_rate": 2.1374504149489536e-07, "loss": 0.0057, "step": 16012 }, { "epoch": 3.643458475540387, "grad_norm": 1.5350134452814215, "learning_rate": 2.1367776316616227e-07, "loss": 0.0818, "step": 16013 }, { "epoch": 3.6436860068259387, "grad_norm": 1.203933738537517, "learning_rate": 2.136104932441021e-07, "loss": 0.0139, "step": 16014 }, { "epoch": 3.6439135381114904, "grad_norm": 1.4708218271714892, "learning_rate": 2.1354323173009027e-07, "loss": 0.0074, "step": 16015 }, { "epoch": 3.644141069397042, "grad_norm": 1.484432608183271, "learning_rate": 2.1347597862550127e-07, "loss": 0.0591, "step": 16016 }, { "epoch": 3.644368600682594, "grad_norm": 1.7867463956989742, "learning_rate": 2.134087339317093e-07, "loss": 0.085, "step": 16017 }, { "epoch": 3.6445961319681457, "grad_norm": 1.6312811896527493, "learning_rate": 2.1334149765008909e-07, "loss": 0.0407, "step": 16018 }, { "epoch": 3.6448236632536974, "grad_norm": 1.2191284627848273, "learning_rate": 2.1327426978201476e-07, "loss": 0.1093, "step": 16019 }, { "epoch": 3.645051194539249, "grad_norm": 0.9811495883638827, "learning_rate": 2.1320705032886044e-07, "loss": 0.0159, "step": 16020 }, { "epoch": 3.645278725824801, "grad_norm": 0.9760616891296321, "learning_rate": 2.131398392919997e-07, "loss": 0.0076, "step": 16021 }, { "epoch": 3.6455062571103527, "grad_norm": 0.9477392318701482, "learning_rate": 2.1307263667280655e-07, "loss": 0.0141, "step": 16022 }, { "epoch": 3.6457337883959045, "grad_norm": 0.8915450455280841, "learning_rate": 2.1300544247265437e-07, "loss": 0.0225, "step": 16023 }, { "epoch": 3.645961319681456, "grad_norm": 1.0354966710226048, "learning_rate": 2.1293825669291612e-07, "loss": 0.01, "step": 16024 }, { "epoch": 3.646188850967008, "grad_norm": 2.5260603075796757, "learning_rate": 2.1287107933496524e-07, "loss": 0.0329, "step": 16025 }, { "epoch": 3.6464163822525597, "grad_norm": 1.3534772181988066, "learning_rate": 2.1280391040017473e-07, "loss": 0.0744, "step": 16026 }, { "epoch": 3.6466439135381115, "grad_norm": 1.0935572358577472, "learning_rate": 2.1273674988991764e-07, "loss": 0.0158, "step": 16027 }, { "epoch": 3.646871444823663, "grad_norm": 1.6114762391154271, "learning_rate": 2.1266959780556637e-07, "loss": 0.0395, "step": 16028 }, { "epoch": 3.647098976109215, "grad_norm": 0.9823280762715598, "learning_rate": 2.1260245414849313e-07, "loss": 0.0115, "step": 16029 }, { "epoch": 3.6473265073947667, "grad_norm": 1.6738685458880187, "learning_rate": 2.1253531892007067e-07, "loss": 0.0116, "step": 16030 }, { "epoch": 3.6475540386803185, "grad_norm": 1.7314428975718998, "learning_rate": 2.1246819212167065e-07, "loss": 0.077, "step": 16031 }, { "epoch": 3.64778156996587, "grad_norm": 2.6159899214339513, "learning_rate": 2.124010737546653e-07, "loss": 0.0106, "step": 16032 }, { "epoch": 3.648009101251422, "grad_norm": 1.252784886868959, "learning_rate": 2.1233396382042637e-07, "loss": 0.019, "step": 16033 }, { "epoch": 3.6482366325369737, "grad_norm": 0.8516724541526605, "learning_rate": 2.1226686232032563e-07, "loss": 0.0574, "step": 16034 }, { "epoch": 3.6484641638225255, "grad_norm": 1.0268558107309962, "learning_rate": 2.121997692557344e-07, "loss": 0.0662, "step": 16035 }, { "epoch": 3.6486916951080772, "grad_norm": 1.2104286435069374, "learning_rate": 2.121326846280236e-07, "loss": 0.0134, "step": 16036 }, { "epoch": 3.648919226393629, "grad_norm": 1.4653600349332354, "learning_rate": 2.1206560843856486e-07, "loss": 0.082, "step": 16037 }, { "epoch": 3.6491467576791807, "grad_norm": 1.2026521149795848, "learning_rate": 2.1199854068872844e-07, "loss": 0.0118, "step": 16038 }, { "epoch": 3.649374288964733, "grad_norm": 1.155575233540877, "learning_rate": 2.1193148137988582e-07, "loss": 0.0163, "step": 16039 }, { "epoch": 3.6496018202502842, "grad_norm": 1.099519555459795, "learning_rate": 2.1186443051340705e-07, "loss": 0.0613, "step": 16040 }, { "epoch": 3.6498293515358364, "grad_norm": 0.9642456004205318, "learning_rate": 2.1179738809066292e-07, "loss": 0.0116, "step": 16041 }, { "epoch": 3.6500568828213877, "grad_norm": 0.780370970420326, "learning_rate": 2.1173035411302333e-07, "loss": 0.0137, "step": 16042 }, { "epoch": 3.65028441410694, "grad_norm": 0.9830868759455271, "learning_rate": 2.1166332858185833e-07, "loss": 0.0156, "step": 16043 }, { "epoch": 3.6505119453924912, "grad_norm": 1.1891157314477747, "learning_rate": 2.115963114985379e-07, "loss": 0.07, "step": 16044 }, { "epoch": 3.6507394766780434, "grad_norm": 1.776940611871219, "learning_rate": 2.115293028644317e-07, "loss": 0.063, "step": 16045 }, { "epoch": 3.6509670079635947, "grad_norm": 1.287546219935416, "learning_rate": 2.1146230268090956e-07, "loss": 0.0364, "step": 16046 }, { "epoch": 3.651194539249147, "grad_norm": 1.3481451810507186, "learning_rate": 2.1139531094934059e-07, "loss": 0.0666, "step": 16047 }, { "epoch": 3.6514220705346982, "grad_norm": 1.5638892033323955, "learning_rate": 2.1132832767109374e-07, "loss": 0.0371, "step": 16048 }, { "epoch": 3.6516496018202504, "grad_norm": 1.9732214927327139, "learning_rate": 2.112613528475385e-07, "loss": 0.105, "step": 16049 }, { "epoch": 3.651877133105802, "grad_norm": 1.5290622848504127, "learning_rate": 2.111943864800433e-07, "loss": 0.1152, "step": 16050 }, { "epoch": 3.652104664391354, "grad_norm": 0.8996204783682167, "learning_rate": 2.11127428569977e-07, "loss": 0.0056, "step": 16051 }, { "epoch": 3.6523321956769057, "grad_norm": 1.4504100732443037, "learning_rate": 2.1106047911870804e-07, "loss": 0.0372, "step": 16052 }, { "epoch": 3.6525597269624575, "grad_norm": 1.9054107861290923, "learning_rate": 2.1099353812760502e-07, "loss": 0.0603, "step": 16053 }, { "epoch": 3.652787258248009, "grad_norm": 1.1814991875382865, "learning_rate": 2.1092660559803588e-07, "loss": 0.068, "step": 16054 }, { "epoch": 3.653014789533561, "grad_norm": 1.4469456908890101, "learning_rate": 2.1085968153136834e-07, "loss": 0.0679, "step": 16055 }, { "epoch": 3.6532423208191127, "grad_norm": 2.34658080053377, "learning_rate": 2.1079276592897067e-07, "loss": 0.0386, "step": 16056 }, { "epoch": 3.6534698521046645, "grad_norm": 1.3371167844361542, "learning_rate": 2.1072585879221008e-07, "loss": 0.027, "step": 16057 }, { "epoch": 3.653697383390216, "grad_norm": 1.2615634852966764, "learning_rate": 2.1065896012245422e-07, "loss": 0.0813, "step": 16058 }, { "epoch": 3.653924914675768, "grad_norm": 1.0215141364957319, "learning_rate": 2.1059206992107056e-07, "loss": 0.0147, "step": 16059 }, { "epoch": 3.6541524459613197, "grad_norm": 1.0695927549605435, "learning_rate": 2.1052518818942588e-07, "loss": 0.0204, "step": 16060 }, { "epoch": 3.6543799772468715, "grad_norm": 2.1307921049244403, "learning_rate": 2.1045831492888748e-07, "loss": 0.1244, "step": 16061 }, { "epoch": 3.654607508532423, "grad_norm": 1.6706789474265018, "learning_rate": 2.103914501408217e-07, "loss": 0.0193, "step": 16062 }, { "epoch": 3.654835039817975, "grad_norm": 1.7536086087696852, "learning_rate": 2.1032459382659556e-07, "loss": 0.0062, "step": 16063 }, { "epoch": 3.6550625711035267, "grad_norm": 1.3269693324922647, "learning_rate": 2.1025774598757507e-07, "loss": 0.021, "step": 16064 }, { "epoch": 3.6552901023890785, "grad_norm": 0.9329819525649443, "learning_rate": 2.1019090662512676e-07, "loss": 0.0073, "step": 16065 }, { "epoch": 3.6555176336746302, "grad_norm": 0.9959870393691284, "learning_rate": 2.1012407574061677e-07, "loss": 0.0251, "step": 16066 }, { "epoch": 3.655745164960182, "grad_norm": 1.7530074527898956, "learning_rate": 2.1005725333541068e-07, "loss": 0.1098, "step": 16067 }, { "epoch": 3.6559726962457337, "grad_norm": 1.7444252270878506, "learning_rate": 2.0999043941087455e-07, "loss": 0.0611, "step": 16068 }, { "epoch": 3.6562002275312855, "grad_norm": 1.0394794777965828, "learning_rate": 2.0992363396837363e-07, "loss": 0.0212, "step": 16069 }, { "epoch": 3.6564277588168372, "grad_norm": 1.8110530545487658, "learning_rate": 2.098568370092737e-07, "loss": 0.0254, "step": 16070 }, { "epoch": 3.656655290102389, "grad_norm": 1.1006506183062619, "learning_rate": 2.0979004853493947e-07, "loss": 0.01, "step": 16071 }, { "epoch": 3.6568828213879407, "grad_norm": 2.5411848022706964, "learning_rate": 2.0972326854673626e-07, "loss": 0.0178, "step": 16072 }, { "epoch": 3.6571103526734925, "grad_norm": 1.6555998874460274, "learning_rate": 2.0965649704602903e-07, "loss": 0.0451, "step": 16073 }, { "epoch": 3.6573378839590442, "grad_norm": 1.515786758410613, "learning_rate": 2.0958973403418215e-07, "loss": 0.0204, "step": 16074 }, { "epoch": 3.657565415244596, "grad_norm": 0.7967662589368925, "learning_rate": 2.0952297951256057e-07, "loss": 0.0076, "step": 16075 }, { "epoch": 3.6577929465301477, "grad_norm": 1.6202778114500607, "learning_rate": 2.0945623348252814e-07, "loss": 0.0484, "step": 16076 }, { "epoch": 3.6580204778156995, "grad_norm": 1.1547872930305008, "learning_rate": 2.0938949594544949e-07, "loss": 0.0752, "step": 16077 }, { "epoch": 3.6582480091012517, "grad_norm": 1.2563722110280544, "learning_rate": 2.0932276690268815e-07, "loss": 0.0411, "step": 16078 }, { "epoch": 3.658475540386803, "grad_norm": 1.8402962993725616, "learning_rate": 2.0925604635560821e-07, "loss": 0.0341, "step": 16079 }, { "epoch": 3.658703071672355, "grad_norm": 1.382722716781861, "learning_rate": 2.091893343055735e-07, "loss": 0.0083, "step": 16080 }, { "epoch": 3.6589306029579065, "grad_norm": 1.1409726988158242, "learning_rate": 2.0912263075394706e-07, "loss": 0.0158, "step": 16081 }, { "epoch": 3.6591581342434587, "grad_norm": 1.4782612810814466, "learning_rate": 2.0905593570209258e-07, "loss": 0.121, "step": 16082 }, { "epoch": 3.65938566552901, "grad_norm": 2.7003294662451767, "learning_rate": 2.089892491513728e-07, "loss": 0.0427, "step": 16083 }, { "epoch": 3.659613196814562, "grad_norm": 1.2120257289620309, "learning_rate": 2.089225711031511e-07, "loss": 0.0231, "step": 16084 }, { "epoch": 3.6598407281001135, "grad_norm": 2.22508720520357, "learning_rate": 2.0885590155878987e-07, "loss": 0.0596, "step": 16085 }, { "epoch": 3.6600682593856657, "grad_norm": 1.6015132801694436, "learning_rate": 2.0878924051965183e-07, "loss": 0.0098, "step": 16086 }, { "epoch": 3.660295790671217, "grad_norm": 1.7450455960731581, "learning_rate": 2.0872258798709974e-07, "loss": 0.0161, "step": 16087 }, { "epoch": 3.660523321956769, "grad_norm": 1.265902912585769, "learning_rate": 2.0865594396249528e-07, "loss": 0.0216, "step": 16088 }, { "epoch": 3.660750853242321, "grad_norm": 1.3938977317522656, "learning_rate": 2.0858930844720107e-07, "loss": 0.0238, "step": 16089 }, { "epoch": 3.6609783845278727, "grad_norm": 1.402091714202594, "learning_rate": 2.0852268144257858e-07, "loss": 0.0537, "step": 16090 }, { "epoch": 3.6612059158134245, "grad_norm": 1.1561015388182427, "learning_rate": 2.084560629499899e-07, "loss": 0.0385, "step": 16091 }, { "epoch": 3.6614334470989762, "grad_norm": 1.1752506211909477, "learning_rate": 2.0838945297079624e-07, "loss": 0.028, "step": 16092 }, { "epoch": 3.661660978384528, "grad_norm": 1.315017809796682, "learning_rate": 2.083228515063592e-07, "loss": 0.0406, "step": 16093 }, { "epoch": 3.6618885096700797, "grad_norm": 0.6148626849219472, "learning_rate": 2.082562585580402e-07, "loss": 0.0027, "step": 16094 }, { "epoch": 3.6621160409556315, "grad_norm": 1.9966554249634216, "learning_rate": 2.0818967412719978e-07, "loss": 0.0759, "step": 16095 }, { "epoch": 3.6623435722411832, "grad_norm": 1.3052701073471953, "learning_rate": 2.081230982151993e-07, "loss": 0.0113, "step": 16096 }, { "epoch": 3.662571103526735, "grad_norm": 3.5226809673533266, "learning_rate": 2.080565308233992e-07, "loss": 0.1841, "step": 16097 }, { "epoch": 3.6627986348122867, "grad_norm": 0.44124477998251055, "learning_rate": 2.079899719531598e-07, "loss": 0.0024, "step": 16098 }, { "epoch": 3.6630261660978385, "grad_norm": 1.541539347706413, "learning_rate": 2.079234216058417e-07, "loss": 0.0285, "step": 16099 }, { "epoch": 3.6632536973833902, "grad_norm": 1.0572249283268749, "learning_rate": 2.0785687978280505e-07, "loss": 0.1029, "step": 16100 }, { "epoch": 3.663481228668942, "grad_norm": 1.194864314130736, "learning_rate": 2.0779034648540996e-07, "loss": 0.0038, "step": 16101 }, { "epoch": 3.6637087599544937, "grad_norm": 3.476052543946188, "learning_rate": 2.077238217150159e-07, "loss": 0.0255, "step": 16102 }, { "epoch": 3.6639362912400455, "grad_norm": 1.057854327882877, "learning_rate": 2.0765730547298295e-07, "loss": 0.0164, "step": 16103 }, { "epoch": 3.6641638225255972, "grad_norm": 1.1163267166224755, "learning_rate": 2.075907977606701e-07, "loss": 0.0258, "step": 16104 }, { "epoch": 3.664391353811149, "grad_norm": 0.9725811249996598, "learning_rate": 2.0752429857943692e-07, "loss": 0.0062, "step": 16105 }, { "epoch": 3.6646188850967008, "grad_norm": 2.167046836877129, "learning_rate": 2.0745780793064274e-07, "loss": 0.0135, "step": 16106 }, { "epoch": 3.6648464163822525, "grad_norm": 1.0765438656225803, "learning_rate": 2.0739132581564603e-07, "loss": 0.0527, "step": 16107 }, { "epoch": 3.6650739476678043, "grad_norm": 1.3513201681756608, "learning_rate": 2.07324852235806e-07, "loss": 0.0143, "step": 16108 }, { "epoch": 3.665301478953356, "grad_norm": 1.6136316867491687, "learning_rate": 2.072583871924809e-07, "loss": 0.0868, "step": 16109 }, { "epoch": 3.6655290102389078, "grad_norm": 3.5613625066570793, "learning_rate": 2.071919306870295e-07, "loss": 0.045, "step": 16110 }, { "epoch": 3.6657565415244595, "grad_norm": 1.8004516946520017, "learning_rate": 2.071254827208096e-07, "loss": 0.0282, "step": 16111 }, { "epoch": 3.6659840728100113, "grad_norm": 0.5645190574841403, "learning_rate": 2.070590432951796e-07, "loss": 0.0036, "step": 16112 }, { "epoch": 3.666211604095563, "grad_norm": 1.8877997574829044, "learning_rate": 2.069926124114975e-07, "loss": 0.136, "step": 16113 }, { "epoch": 3.6664391353811148, "grad_norm": 1.0404969319792587, "learning_rate": 2.0692619007112066e-07, "loss": 0.0069, "step": 16114 }, { "epoch": 3.6666666666666665, "grad_norm": 1.4086304913728318, "learning_rate": 2.0685977627540704e-07, "loss": 0.0237, "step": 16115 }, { "epoch": 3.6668941979522183, "grad_norm": 1.4024163044678706, "learning_rate": 2.0679337102571382e-07, "loss": 0.1008, "step": 16116 }, { "epoch": 3.6671217292377705, "grad_norm": 1.4470893001830085, "learning_rate": 2.0672697432339795e-07, "loss": 0.0712, "step": 16117 }, { "epoch": 3.6673492605233218, "grad_norm": 3.460723816266517, "learning_rate": 2.066605861698167e-07, "loss": 0.0386, "step": 16118 }, { "epoch": 3.667576791808874, "grad_norm": 1.2288828976869792, "learning_rate": 2.0659420656632693e-07, "loss": 0.0099, "step": 16119 }, { "epoch": 3.6678043230944253, "grad_norm": 1.5986865233015135, "learning_rate": 2.0652783551428552e-07, "loss": 0.0862, "step": 16120 }, { "epoch": 3.6680318543799775, "grad_norm": 1.0819864140354518, "learning_rate": 2.0646147301504855e-07, "loss": 0.0637, "step": 16121 }, { "epoch": 3.668259385665529, "grad_norm": 1.9108310039221468, "learning_rate": 2.063951190699727e-07, "loss": 0.0788, "step": 16122 }, { "epoch": 3.668486916951081, "grad_norm": 1.8854524044925405, "learning_rate": 2.0632877368041407e-07, "loss": 0.0187, "step": 16123 }, { "epoch": 3.6687144482366323, "grad_norm": 1.175717269869301, "learning_rate": 2.0626243684772825e-07, "loss": 0.0146, "step": 16124 }, { "epoch": 3.6689419795221845, "grad_norm": 0.6856656353110822, "learning_rate": 2.061961085732714e-07, "loss": 0.0041, "step": 16125 }, { "epoch": 3.669169510807736, "grad_norm": 1.8503995832520386, "learning_rate": 2.0612978885839908e-07, "loss": 0.0264, "step": 16126 }, { "epoch": 3.669397042093288, "grad_norm": 2.3118012951069535, "learning_rate": 2.0606347770446692e-07, "loss": 0.0105, "step": 16127 }, { "epoch": 3.6696245733788397, "grad_norm": 1.436317575185019, "learning_rate": 2.0599717511282986e-07, "loss": 0.1173, "step": 16128 }, { "epoch": 3.6698521046643915, "grad_norm": 1.4687473025035573, "learning_rate": 2.0593088108484336e-07, "loss": 0.0086, "step": 16129 }, { "epoch": 3.6700796359499432, "grad_norm": 4.453116260787463, "learning_rate": 2.0586459562186217e-07, "loss": 0.0081, "step": 16130 }, { "epoch": 3.670307167235495, "grad_norm": 3.455251249006838, "learning_rate": 2.0579831872524082e-07, "loss": 0.0049, "step": 16131 }, { "epoch": 3.6705346985210467, "grad_norm": 1.8131316598369176, "learning_rate": 2.0573205039633413e-07, "loss": 0.0176, "step": 16132 }, { "epoch": 3.6707622298065985, "grad_norm": 1.097444551923757, "learning_rate": 2.0566579063649644e-07, "loss": 0.0206, "step": 16133 }, { "epoch": 3.6709897610921502, "grad_norm": 1.3477017591104694, "learning_rate": 2.055995394470823e-07, "loss": 0.0158, "step": 16134 }, { "epoch": 3.671217292377702, "grad_norm": 1.548703141631695, "learning_rate": 2.0553329682944537e-07, "loss": 0.0428, "step": 16135 }, { "epoch": 3.6714448236632538, "grad_norm": 1.5027820674840362, "learning_rate": 2.0546706278493949e-07, "loss": 0.0229, "step": 16136 }, { "epoch": 3.6716723549488055, "grad_norm": 1.5776867665654117, "learning_rate": 2.054008373149187e-07, "loss": 0.1594, "step": 16137 }, { "epoch": 3.6718998862343573, "grad_norm": 0.4070267456533952, "learning_rate": 2.0533462042073612e-07, "loss": 0.0025, "step": 16138 }, { "epoch": 3.672127417519909, "grad_norm": 0.9218797919319096, "learning_rate": 2.0526841210374536e-07, "loss": 0.004, "step": 16139 }, { "epoch": 3.6723549488054608, "grad_norm": 1.9175963162253091, "learning_rate": 2.0520221236529954e-07, "loss": 0.0497, "step": 16140 }, { "epoch": 3.6725824800910125, "grad_norm": 1.5979970460334334, "learning_rate": 2.0513602120675186e-07, "loss": 0.0182, "step": 16141 }, { "epoch": 3.6728100113765643, "grad_norm": 2.4791870810114927, "learning_rate": 2.0506983862945495e-07, "loss": 0.031, "step": 16142 }, { "epoch": 3.673037542662116, "grad_norm": 1.514183754420172, "learning_rate": 2.0500366463476133e-07, "loss": 0.091, "step": 16143 }, { "epoch": 3.6732650739476678, "grad_norm": 1.398780430384763, "learning_rate": 2.0493749922402376e-07, "loss": 0.0938, "step": 16144 }, { "epoch": 3.6734926052332195, "grad_norm": 1.457801767388455, "learning_rate": 2.0487134239859423e-07, "loss": 0.0236, "step": 16145 }, { "epoch": 3.6737201365187713, "grad_norm": 1.8790661856688964, "learning_rate": 2.0480519415982495e-07, "loss": 0.0237, "step": 16146 }, { "epoch": 3.673947667804323, "grad_norm": 1.9778547517202374, "learning_rate": 2.0473905450906807e-07, "loss": 0.0129, "step": 16147 }, { "epoch": 3.6741751990898748, "grad_norm": 2.605819598585238, "learning_rate": 2.046729234476754e-07, "loss": 0.0429, "step": 16148 }, { "epoch": 3.6744027303754265, "grad_norm": 1.1978881188762842, "learning_rate": 2.046068009769984e-07, "loss": 0.045, "step": 16149 }, { "epoch": 3.6746302616609783, "grad_norm": 1.447792354953659, "learning_rate": 2.045406870983882e-07, "loss": 0.0181, "step": 16150 }, { "epoch": 3.67485779294653, "grad_norm": 2.2277289038922516, "learning_rate": 2.0447458181319657e-07, "loss": 0.0261, "step": 16151 }, { "epoch": 3.675085324232082, "grad_norm": 1.8141200116216423, "learning_rate": 2.0440848512277414e-07, "loss": 0.0147, "step": 16152 }, { "epoch": 3.6753128555176335, "grad_norm": 1.661091298644261, "learning_rate": 2.0434239702847198e-07, "loss": 0.12, "step": 16153 }, { "epoch": 3.6755403868031853, "grad_norm": 1.0550976118412667, "learning_rate": 2.0427631753164103e-07, "loss": 0.024, "step": 16154 }, { "epoch": 3.675767918088737, "grad_norm": 2.4681836034831806, "learning_rate": 2.0421024663363146e-07, "loss": 0.1037, "step": 16155 }, { "epoch": 3.6759954493742892, "grad_norm": 2.292297592888241, "learning_rate": 2.04144184335794e-07, "loss": 0.0364, "step": 16156 }, { "epoch": 3.6762229806598405, "grad_norm": 1.7727993324362148, "learning_rate": 2.0407813063947838e-07, "loss": 0.1615, "step": 16157 }, { "epoch": 3.6764505119453927, "grad_norm": 1.4396347330382895, "learning_rate": 2.0401208554603514e-07, "loss": 0.0183, "step": 16158 }, { "epoch": 3.676678043230944, "grad_norm": 1.3799164418696408, "learning_rate": 2.0394604905681348e-07, "loss": 0.0264, "step": 16159 }, { "epoch": 3.6769055745164962, "grad_norm": 1.9111006867474638, "learning_rate": 2.038800211731639e-07, "loss": 0.0133, "step": 16160 }, { "epoch": 3.6771331058020476, "grad_norm": 1.1269973526221793, "learning_rate": 2.0381400189643533e-07, "loss": 0.017, "step": 16161 }, { "epoch": 3.6773606370875997, "grad_norm": 0.9461138491800098, "learning_rate": 2.037479912279771e-07, "loss": 0.0065, "step": 16162 }, { "epoch": 3.677588168373151, "grad_norm": 1.7012495873242508, "learning_rate": 2.0368198916913855e-07, "loss": 0.1035, "step": 16163 }, { "epoch": 3.6778156996587033, "grad_norm": 1.6950166221404543, "learning_rate": 2.036159957212684e-07, "loss": 0.0347, "step": 16164 }, { "epoch": 3.6780432309442546, "grad_norm": 1.5817675878082864, "learning_rate": 2.0355001088571552e-07, "loss": 0.0323, "step": 16165 }, { "epoch": 3.6782707622298068, "grad_norm": 4.711726685506363, "learning_rate": 2.034840346638288e-07, "loss": 0.0814, "step": 16166 }, { "epoch": 3.6784982935153585, "grad_norm": 1.0995643767508356, "learning_rate": 2.0341806705695625e-07, "loss": 0.0399, "step": 16167 }, { "epoch": 3.6787258248009103, "grad_norm": 1.4060949808884091, "learning_rate": 2.0335210806644656e-07, "loss": 0.0381, "step": 16168 }, { "epoch": 3.678953356086462, "grad_norm": 1.838254377714135, "learning_rate": 2.0328615769364727e-07, "loss": 0.0923, "step": 16169 }, { "epoch": 3.6791808873720138, "grad_norm": 1.4109378001505566, "learning_rate": 2.0322021593990688e-07, "loss": 0.0285, "step": 16170 }, { "epoch": 3.6794084186575655, "grad_norm": 0.5959431926235939, "learning_rate": 2.0315428280657252e-07, "loss": 0.0099, "step": 16171 }, { "epoch": 3.6796359499431173, "grad_norm": 1.1513333153023027, "learning_rate": 2.0308835829499211e-07, "loss": 0.0391, "step": 16172 }, { "epoch": 3.679863481228669, "grad_norm": 1.863385454255039, "learning_rate": 2.0302244240651318e-07, "loss": 0.1034, "step": 16173 }, { "epoch": 3.6800910125142208, "grad_norm": 1.336494423294062, "learning_rate": 2.0295653514248247e-07, "loss": 0.0412, "step": 16174 }, { "epoch": 3.6803185437997725, "grad_norm": 1.7862453007341947, "learning_rate": 2.0289063650424746e-07, "loss": 0.0688, "step": 16175 }, { "epoch": 3.6805460750853243, "grad_norm": 0.8848614526860062, "learning_rate": 2.028247464931546e-07, "loss": 0.012, "step": 16176 }, { "epoch": 3.680773606370876, "grad_norm": 0.7296249642706529, "learning_rate": 2.0275886511055088e-07, "loss": 0.011, "step": 16177 }, { "epoch": 3.681001137656428, "grad_norm": 1.9876829486202523, "learning_rate": 2.026929923577825e-07, "loss": 0.0254, "step": 16178 }, { "epoch": 3.6812286689419795, "grad_norm": 1.5332855556318958, "learning_rate": 2.0262712823619592e-07, "loss": 0.0275, "step": 16179 }, { "epoch": 3.6814562002275313, "grad_norm": 2.106868589606867, "learning_rate": 2.025612727471376e-07, "loss": 0.0423, "step": 16180 }, { "epoch": 3.681683731513083, "grad_norm": 1.5864708809081296, "learning_rate": 2.0249542589195287e-07, "loss": 0.1065, "step": 16181 }, { "epoch": 3.681911262798635, "grad_norm": 1.6494179022927609, "learning_rate": 2.0242958767198811e-07, "loss": 0.0288, "step": 16182 }, { "epoch": 3.6821387940841865, "grad_norm": 2.198251468127842, "learning_rate": 2.023637580885885e-07, "loss": 0.0404, "step": 16183 }, { "epoch": 3.6823663253697383, "grad_norm": 3.5858822938120563, "learning_rate": 2.0229793714309985e-07, "loss": 0.0267, "step": 16184 }, { "epoch": 3.68259385665529, "grad_norm": 1.381226915095224, "learning_rate": 2.02232124836867e-07, "loss": 0.0714, "step": 16185 }, { "epoch": 3.682821387940842, "grad_norm": 1.3606856873950604, "learning_rate": 2.0216632117123527e-07, "loss": 0.0466, "step": 16186 }, { "epoch": 3.6830489192263935, "grad_norm": 4.904004952753646, "learning_rate": 2.021005261475497e-07, "loss": 0.0267, "step": 16187 }, { "epoch": 3.6832764505119453, "grad_norm": 1.6730764804470213, "learning_rate": 2.0203473976715472e-07, "loss": 0.0361, "step": 16188 }, { "epoch": 3.683503981797497, "grad_norm": 1.4766849077631483, "learning_rate": 2.0196896203139527e-07, "loss": 0.0338, "step": 16189 }, { "epoch": 3.683731513083049, "grad_norm": 0.7877609757125155, "learning_rate": 2.0190319294161523e-07, "loss": 0.0069, "step": 16190 }, { "epoch": 3.6839590443686006, "grad_norm": 0.8304432205749673, "learning_rate": 2.0183743249915926e-07, "loss": 0.0214, "step": 16191 }, { "epoch": 3.6841865756541523, "grad_norm": 1.3890266748618223, "learning_rate": 2.0177168070537102e-07, "loss": 0.0911, "step": 16192 }, { "epoch": 3.684414106939704, "grad_norm": 4.5516767987470725, "learning_rate": 2.017059375615945e-07, "loss": 0.0205, "step": 16193 }, { "epoch": 3.684641638225256, "grad_norm": 1.2118569975285192, "learning_rate": 2.016402030691736e-07, "loss": 0.0189, "step": 16194 }, { "epoch": 3.684869169510808, "grad_norm": 1.6292551400655806, "learning_rate": 2.015744772294514e-07, "loss": 0.0783, "step": 16195 }, { "epoch": 3.6850967007963593, "grad_norm": 1.1069384345289033, "learning_rate": 2.0150876004377158e-07, "loss": 0.04, "step": 16196 }, { "epoch": 3.6853242320819115, "grad_norm": 1.7278724114829562, "learning_rate": 2.0144305151347694e-07, "loss": 0.0975, "step": 16197 }, { "epoch": 3.685551763367463, "grad_norm": 1.5230732700825553, "learning_rate": 2.0137735163991083e-07, "loss": 0.0197, "step": 16198 }, { "epoch": 3.685779294653015, "grad_norm": 1.0670797109452452, "learning_rate": 2.0131166042441566e-07, "loss": 0.0543, "step": 16199 }, { "epoch": 3.6860068259385663, "grad_norm": 1.7329705518863228, "learning_rate": 2.0124597786833413e-07, "loss": 0.1084, "step": 16200 }, { "epoch": 3.6862343572241185, "grad_norm": 1.7987808226996016, "learning_rate": 2.0118030397300905e-07, "loss": 0.1169, "step": 16201 }, { "epoch": 3.68646188850967, "grad_norm": 1.2213773549884435, "learning_rate": 2.0111463873978208e-07, "loss": 0.0435, "step": 16202 }, { "epoch": 3.686689419795222, "grad_norm": 1.738125284535498, "learning_rate": 2.0104898216999576e-07, "loss": 0.0946, "step": 16203 }, { "epoch": 3.6869169510807733, "grad_norm": 2.1736606517660633, "learning_rate": 2.009833342649919e-07, "loss": 0.0885, "step": 16204 }, { "epoch": 3.6871444823663255, "grad_norm": 1.8252664379452486, "learning_rate": 2.0091769502611186e-07, "loss": 0.0145, "step": 16205 }, { "epoch": 3.6873720136518773, "grad_norm": 1.0014058715321248, "learning_rate": 2.0085206445469755e-07, "loss": 0.0153, "step": 16206 }, { "epoch": 3.687599544937429, "grad_norm": 1.5704950912619313, "learning_rate": 2.0078644255209014e-07, "loss": 0.0439, "step": 16207 }, { "epoch": 3.687827076222981, "grad_norm": 1.3819731209269097, "learning_rate": 2.0072082931963125e-07, "loss": 0.1129, "step": 16208 }, { "epoch": 3.6880546075085325, "grad_norm": 1.4732207117393477, "learning_rate": 2.0065522475866127e-07, "loss": 0.0307, "step": 16209 }, { "epoch": 3.6882821387940843, "grad_norm": 1.392635294904281, "learning_rate": 2.0058962887052162e-07, "loss": 0.0259, "step": 16210 }, { "epoch": 3.688509670079636, "grad_norm": 0.9748506280990185, "learning_rate": 2.0052404165655265e-07, "loss": 0.0277, "step": 16211 }, { "epoch": 3.688737201365188, "grad_norm": 1.3245895640775305, "learning_rate": 2.0045846311809476e-07, "loss": 0.0276, "step": 16212 }, { "epoch": 3.6889647326507395, "grad_norm": 1.1800240541326752, "learning_rate": 2.0039289325648824e-07, "loss": 0.0136, "step": 16213 }, { "epoch": 3.6891922639362913, "grad_norm": 1.3028690099367304, "learning_rate": 2.0032733207307343e-07, "loss": 0.0359, "step": 16214 }, { "epoch": 3.689419795221843, "grad_norm": 1.417054799293204, "learning_rate": 2.0026177956919038e-07, "loss": 0.033, "step": 16215 }, { "epoch": 3.689647326507395, "grad_norm": 1.6716996293458588, "learning_rate": 2.001962357461785e-07, "loss": 0.0225, "step": 16216 }, { "epoch": 3.6898748577929465, "grad_norm": 1.013432748311509, "learning_rate": 2.0013070060537779e-07, "loss": 0.0206, "step": 16217 }, { "epoch": 3.6901023890784983, "grad_norm": 1.3307484304766304, "learning_rate": 2.0006517414812744e-07, "loss": 0.0368, "step": 16218 }, { "epoch": 3.69032992036405, "grad_norm": 0.5635411847323776, "learning_rate": 1.999996563757664e-07, "loss": 0.0088, "step": 16219 }, { "epoch": 3.690557451649602, "grad_norm": 1.2141764391746537, "learning_rate": 1.9993414728963413e-07, "loss": 0.0201, "step": 16220 }, { "epoch": 3.6907849829351536, "grad_norm": 2.281502411750932, "learning_rate": 1.998686468910694e-07, "loss": 0.029, "step": 16221 }, { "epoch": 3.6910125142207053, "grad_norm": 1.8261949233700565, "learning_rate": 1.9980315518141112e-07, "loss": 0.0406, "step": 16222 }, { "epoch": 3.691240045506257, "grad_norm": 1.9226224510916554, "learning_rate": 1.997376721619977e-07, "loss": 0.0936, "step": 16223 }, { "epoch": 3.691467576791809, "grad_norm": 1.3630968353485056, "learning_rate": 1.9967219783416717e-07, "loss": 0.0099, "step": 16224 }, { "epoch": 3.6916951080773606, "grad_norm": 2.109432824860569, "learning_rate": 1.99606732199258e-07, "loss": 0.051, "step": 16225 }, { "epoch": 3.6919226393629123, "grad_norm": 1.550576538107685, "learning_rate": 1.995412752586081e-07, "loss": 0.0578, "step": 16226 }, { "epoch": 3.692150170648464, "grad_norm": 1.4445806327136925, "learning_rate": 1.9947582701355556e-07, "loss": 0.042, "step": 16227 }, { "epoch": 3.692377701934016, "grad_norm": 1.6745908933498246, "learning_rate": 1.994103874654376e-07, "loss": 0.0549, "step": 16228 }, { "epoch": 3.6926052332195676, "grad_norm": 1.8272295257043325, "learning_rate": 1.9934495661559214e-07, "loss": 0.0721, "step": 16229 }, { "epoch": 3.6928327645051193, "grad_norm": 1.769761985890601, "learning_rate": 1.9927953446535622e-07, "loss": 0.0403, "step": 16230 }, { "epoch": 3.693060295790671, "grad_norm": 1.086752209192903, "learning_rate": 1.992141210160667e-07, "loss": 0.0149, "step": 16231 }, { "epoch": 3.693287827076223, "grad_norm": 1.3740381535191708, "learning_rate": 1.991487162690607e-07, "loss": 0.0874, "step": 16232 }, { "epoch": 3.6935153583617746, "grad_norm": 1.6918344298119825, "learning_rate": 1.9908332022567513e-07, "loss": 0.0219, "step": 16233 }, { "epoch": 3.6937428896473268, "grad_norm": 1.4210394650151046, "learning_rate": 1.9901793288724664e-07, "loss": 0.0285, "step": 16234 }, { "epoch": 3.693970420932878, "grad_norm": 1.2068175210379293, "learning_rate": 1.9895255425511123e-07, "loss": 0.0281, "step": 16235 }, { "epoch": 3.6941979522184303, "grad_norm": 1.0849729238534704, "learning_rate": 1.9888718433060554e-07, "loss": 0.0228, "step": 16236 }, { "epoch": 3.6944254835039816, "grad_norm": 1.1150773758214951, "learning_rate": 1.9882182311506543e-07, "loss": 0.0263, "step": 16237 }, { "epoch": 3.694653014789534, "grad_norm": 2.4928922535523297, "learning_rate": 1.9875647060982655e-07, "loss": 0.0143, "step": 16238 }, { "epoch": 3.694880546075085, "grad_norm": 0.8687143509960885, "learning_rate": 1.9869112681622477e-07, "loss": 0.0162, "step": 16239 }, { "epoch": 3.6951080773606373, "grad_norm": 1.5339523244450937, "learning_rate": 1.9862579173559563e-07, "loss": 0.0166, "step": 16240 }, { "epoch": 3.6953356086461886, "grad_norm": 1.363810537737465, "learning_rate": 1.9856046536927465e-07, "loss": 0.1348, "step": 16241 }, { "epoch": 3.695563139931741, "grad_norm": 1.4246612435506938, "learning_rate": 1.9849514771859674e-07, "loss": 0.0243, "step": 16242 }, { "epoch": 3.695790671217292, "grad_norm": 1.3819004885812893, "learning_rate": 1.9842983878489673e-07, "loss": 0.0422, "step": 16243 }, { "epoch": 3.6960182025028443, "grad_norm": 1.2998119972287856, "learning_rate": 1.9836453856950973e-07, "loss": 0.0171, "step": 16244 }, { "epoch": 3.696245733788396, "grad_norm": 1.193492104013778, "learning_rate": 1.9829924707377005e-07, "loss": 0.0247, "step": 16245 }, { "epoch": 3.696473265073948, "grad_norm": 2.049447374008907, "learning_rate": 1.9823396429901235e-07, "loss": 0.1463, "step": 16246 }, { "epoch": 3.6967007963594996, "grad_norm": 1.7807299851055705, "learning_rate": 1.9816869024657078e-07, "loss": 0.0707, "step": 16247 }, { "epoch": 3.6969283276450513, "grad_norm": 1.0383986745037566, "learning_rate": 1.981034249177797e-07, "loss": 0.0343, "step": 16248 }, { "epoch": 3.697155858930603, "grad_norm": 2.0198315163175766, "learning_rate": 1.9803816831397274e-07, "loss": 0.0703, "step": 16249 }, { "epoch": 3.697383390216155, "grad_norm": 1.8193365118418594, "learning_rate": 1.979729204364835e-07, "loss": 0.0318, "step": 16250 }, { "epoch": 3.6976109215017066, "grad_norm": 1.4512715006817347, "learning_rate": 1.9790768128664588e-07, "loss": 0.0548, "step": 16251 }, { "epoch": 3.6978384527872583, "grad_norm": 1.3388507118838329, "learning_rate": 1.9784245086579292e-07, "loss": 0.0446, "step": 16252 }, { "epoch": 3.69806598407281, "grad_norm": 1.0097029156844226, "learning_rate": 1.9777722917525797e-07, "loss": 0.0379, "step": 16253 }, { "epoch": 3.698293515358362, "grad_norm": 2.5970869583867633, "learning_rate": 1.9771201621637398e-07, "loss": 0.1375, "step": 16254 }, { "epoch": 3.6985210466439136, "grad_norm": 2.1682375403405905, "learning_rate": 1.976468119904741e-07, "loss": 0.0322, "step": 16255 }, { "epoch": 3.6987485779294653, "grad_norm": 1.5540878563816154, "learning_rate": 1.9758161649889072e-07, "loss": 0.0669, "step": 16256 }, { "epoch": 3.698976109215017, "grad_norm": 0.910872464521084, "learning_rate": 1.9751642974295605e-07, "loss": 0.0191, "step": 16257 }, { "epoch": 3.699203640500569, "grad_norm": 1.78899937710868, "learning_rate": 1.9745125172400292e-07, "loss": 0.0675, "step": 16258 }, { "epoch": 3.6994311717861206, "grad_norm": 0.7825799107672567, "learning_rate": 1.9738608244336293e-07, "loss": 0.0044, "step": 16259 }, { "epoch": 3.6996587030716723, "grad_norm": 0.9704085447937684, "learning_rate": 1.973209219023683e-07, "loss": 0.0225, "step": 16260 }, { "epoch": 3.699886234357224, "grad_norm": 2.0655220350539767, "learning_rate": 1.97255770102351e-07, "loss": 0.0508, "step": 16261 }, { "epoch": 3.700113765642776, "grad_norm": 1.3098080179541194, "learning_rate": 1.9719062704464214e-07, "loss": 0.0414, "step": 16262 }, { "epoch": 3.7003412969283276, "grad_norm": 2.5160336746482543, "learning_rate": 1.971254927305736e-07, "loss": 0.1166, "step": 16263 }, { "epoch": 3.7005688282138793, "grad_norm": 2.3911864808064336, "learning_rate": 1.970603671614762e-07, "loss": 0.0235, "step": 16264 }, { "epoch": 3.700796359499431, "grad_norm": 1.3557149496156315, "learning_rate": 1.9699525033868129e-07, "loss": 0.0437, "step": 16265 }, { "epoch": 3.701023890784983, "grad_norm": 1.6135860118076468, "learning_rate": 1.969301422635194e-07, "loss": 0.0067, "step": 16266 }, { "epoch": 3.7012514220705346, "grad_norm": 0.9075209711230305, "learning_rate": 1.9686504293732153e-07, "loss": 0.0079, "step": 16267 }, { "epoch": 3.7014789533560863, "grad_norm": 2.395837593365568, "learning_rate": 1.967999523614182e-07, "loss": 0.0249, "step": 16268 }, { "epoch": 3.701706484641638, "grad_norm": 1.7305019461120357, "learning_rate": 1.9673487053713948e-07, "loss": 0.0208, "step": 16269 }, { "epoch": 3.70193401592719, "grad_norm": 1.169553245497255, "learning_rate": 1.9666979746581585e-07, "loss": 0.0607, "step": 16270 }, { "epoch": 3.7021615472127416, "grad_norm": 2.3951284435215467, "learning_rate": 1.9660473314877693e-07, "loss": 0.0522, "step": 16271 }, { "epoch": 3.7023890784982934, "grad_norm": 0.8093977006747577, "learning_rate": 1.9653967758735285e-07, "loss": 0.013, "step": 16272 }, { "epoch": 3.7026166097838455, "grad_norm": 1.7436842522553373, "learning_rate": 1.9647463078287293e-07, "loss": 0.0194, "step": 16273 }, { "epoch": 3.702844141069397, "grad_norm": 0.43949584795359276, "learning_rate": 1.9640959273666675e-07, "loss": 0.0021, "step": 16274 }, { "epoch": 3.703071672354949, "grad_norm": 1.4662433913202455, "learning_rate": 1.9634456345006374e-07, "loss": 0.0222, "step": 16275 }, { "epoch": 3.7032992036405004, "grad_norm": 0.9360675118603309, "learning_rate": 1.9627954292439259e-07, "loss": 0.03, "step": 16276 }, { "epoch": 3.7035267349260526, "grad_norm": 1.5470683800796543, "learning_rate": 1.9621453116098261e-07, "loss": 0.1188, "step": 16277 }, { "epoch": 3.703754266211604, "grad_norm": 2.1917749191098466, "learning_rate": 1.9614952816116215e-07, "loss": 0.049, "step": 16278 }, { "epoch": 3.703981797497156, "grad_norm": 0.8938934839367547, "learning_rate": 1.9608453392626023e-07, "loss": 0.0106, "step": 16279 }, { "epoch": 3.7042093287827074, "grad_norm": 1.6178496207443887, "learning_rate": 1.960195484576046e-07, "loss": 0.0705, "step": 16280 }, { "epoch": 3.7044368600682596, "grad_norm": 1.462949098080958, "learning_rate": 1.9595457175652379e-07, "loss": 0.0745, "step": 16281 }, { "epoch": 3.7046643913538113, "grad_norm": 1.6503164686700391, "learning_rate": 1.9588960382434594e-07, "loss": 0.0506, "step": 16282 }, { "epoch": 3.704891922639363, "grad_norm": 1.3585259928203381, "learning_rate": 1.9582464466239856e-07, "loss": 0.0184, "step": 16283 }, { "epoch": 3.705119453924915, "grad_norm": 1.6773368861499833, "learning_rate": 1.9575969427200962e-07, "loss": 0.0184, "step": 16284 }, { "epoch": 3.7053469852104666, "grad_norm": 1.9180258382060715, "learning_rate": 1.9569475265450618e-07, "loss": 0.0177, "step": 16285 }, { "epoch": 3.7055745164960183, "grad_norm": 2.593149014159315, "learning_rate": 1.956298198112158e-07, "loss": 0.0437, "step": 16286 }, { "epoch": 3.70580204778157, "grad_norm": 1.393612459629699, "learning_rate": 1.9556489574346576e-07, "loss": 0.1035, "step": 16287 }, { "epoch": 3.706029579067122, "grad_norm": 2.5899347076454458, "learning_rate": 1.9549998045258257e-07, "loss": 0.0167, "step": 16288 }, { "epoch": 3.7062571103526736, "grad_norm": 1.5846257985241772, "learning_rate": 1.9543507393989338e-07, "loss": 0.0596, "step": 16289 }, { "epoch": 3.7064846416382253, "grad_norm": 2.052639550605012, "learning_rate": 1.9537017620672436e-07, "loss": 0.1646, "step": 16290 }, { "epoch": 3.706712172923777, "grad_norm": 1.4652480074831915, "learning_rate": 1.9530528725440235e-07, "loss": 0.0656, "step": 16291 }, { "epoch": 3.706939704209329, "grad_norm": 1.7194639779852183, "learning_rate": 1.9524040708425306e-07, "loss": 0.0358, "step": 16292 }, { "epoch": 3.7071672354948806, "grad_norm": 2.792178011206008, "learning_rate": 1.9517553569760282e-07, "loss": 0.0116, "step": 16293 }, { "epoch": 3.7073947667804323, "grad_norm": 1.5737728397650967, "learning_rate": 1.951106730957776e-07, "loss": 0.0765, "step": 16294 }, { "epoch": 3.707622298065984, "grad_norm": 3.8729239707921703, "learning_rate": 1.950458192801028e-07, "loss": 0.0406, "step": 16295 }, { "epoch": 3.707849829351536, "grad_norm": 0.7741102212081903, "learning_rate": 1.9498097425190419e-07, "loss": 0.005, "step": 16296 }, { "epoch": 3.7080773606370876, "grad_norm": 1.4241625869537367, "learning_rate": 1.949161380125067e-07, "loss": 0.0085, "step": 16297 }, { "epoch": 3.7083048919226393, "grad_norm": 1.1665690617753057, "learning_rate": 1.948513105632359e-07, "loss": 0.0777, "step": 16298 }, { "epoch": 3.708532423208191, "grad_norm": 1.5516899582390375, "learning_rate": 1.9478649190541632e-07, "loss": 0.0277, "step": 16299 }, { "epoch": 3.708759954493743, "grad_norm": 1.04735236862378, "learning_rate": 1.9472168204037292e-07, "loss": 0.0566, "step": 16300 }, { "epoch": 3.7089874857792946, "grad_norm": 1.6675444209779218, "learning_rate": 1.9465688096943062e-07, "loss": 0.0808, "step": 16301 }, { "epoch": 3.7092150170648464, "grad_norm": 3.263272435976284, "learning_rate": 1.9459208869391324e-07, "loss": 0.0305, "step": 16302 }, { "epoch": 3.709442548350398, "grad_norm": 0.5775765602970567, "learning_rate": 1.9452730521514557e-07, "loss": 0.0174, "step": 16303 }, { "epoch": 3.70967007963595, "grad_norm": 0.36510226149875324, "learning_rate": 1.9446253053445122e-07, "loss": 0.0019, "step": 16304 }, { "epoch": 3.7098976109215016, "grad_norm": 1.089894033776036, "learning_rate": 1.943977646531544e-07, "loss": 0.0511, "step": 16305 }, { "epoch": 3.7101251422070534, "grad_norm": 1.0034181178956745, "learning_rate": 1.943330075725785e-07, "loss": 0.0268, "step": 16306 }, { "epoch": 3.710352673492605, "grad_norm": 1.2128611286705437, "learning_rate": 1.9426825929404716e-07, "loss": 0.0611, "step": 16307 }, { "epoch": 3.710580204778157, "grad_norm": 1.6399025873650588, "learning_rate": 1.9420351981888394e-07, "loss": 0.0779, "step": 16308 }, { "epoch": 3.7108077360637086, "grad_norm": 0.5508182575533073, "learning_rate": 1.9413878914841165e-07, "loss": 0.0051, "step": 16309 }, { "epoch": 3.7110352673492604, "grad_norm": 2.0297029027746945, "learning_rate": 1.940740672839536e-07, "loss": 0.0369, "step": 16310 }, { "epoch": 3.711262798634812, "grad_norm": 1.891096625245776, "learning_rate": 1.9400935422683245e-07, "loss": 0.0229, "step": 16311 }, { "epoch": 3.7114903299203643, "grad_norm": 0.8790854257892741, "learning_rate": 1.9394464997837057e-07, "loss": 0.0466, "step": 16312 }, { "epoch": 3.7117178612059156, "grad_norm": 1.1943347195670813, "learning_rate": 1.9387995453989055e-07, "loss": 0.0186, "step": 16313 }, { "epoch": 3.711945392491468, "grad_norm": 2.3116400768682492, "learning_rate": 1.9381526791271477e-07, "loss": 0.044, "step": 16314 }, { "epoch": 3.712172923777019, "grad_norm": 2.7670347348683864, "learning_rate": 1.9375059009816537e-07, "loss": 0.0149, "step": 16315 }, { "epoch": 3.7124004550625713, "grad_norm": 0.8399922025540042, "learning_rate": 1.9368592109756396e-07, "loss": 0.0051, "step": 16316 }, { "epoch": 3.7126279863481226, "grad_norm": 2.0858734654768956, "learning_rate": 1.936212609122326e-07, "loss": 0.0635, "step": 16317 }, { "epoch": 3.712855517633675, "grad_norm": 1.6367036759864364, "learning_rate": 1.9355660954349258e-07, "loss": 0.0422, "step": 16318 }, { "epoch": 3.713083048919226, "grad_norm": 1.0435639167107305, "learning_rate": 1.934919669926652e-07, "loss": 0.0418, "step": 16319 }, { "epoch": 3.7133105802047783, "grad_norm": 1.7367949464329937, "learning_rate": 1.9342733326107172e-07, "loss": 0.0339, "step": 16320 }, { "epoch": 3.71353811149033, "grad_norm": 1.748463919683655, "learning_rate": 1.9336270835003314e-07, "loss": 0.0695, "step": 16321 }, { "epoch": 3.713765642775882, "grad_norm": 1.5016442251805646, "learning_rate": 1.932980922608705e-07, "loss": 0.0124, "step": 16322 }, { "epoch": 3.7139931740614336, "grad_norm": 2.1854797067691125, "learning_rate": 1.93233484994904e-07, "loss": 0.0555, "step": 16323 }, { "epoch": 3.7142207053469853, "grad_norm": 1.5715370419484433, "learning_rate": 1.9316888655345457e-07, "loss": 0.0134, "step": 16324 }, { "epoch": 3.714448236632537, "grad_norm": 1.7396486168469436, "learning_rate": 1.9310429693784215e-07, "loss": 0.0214, "step": 16325 }, { "epoch": 3.714675767918089, "grad_norm": 1.5957830440217333, "learning_rate": 1.9303971614938682e-07, "loss": 0.0658, "step": 16326 }, { "epoch": 3.7149032992036406, "grad_norm": 2.1814028278933084, "learning_rate": 1.9297514418940857e-07, "loss": 0.0428, "step": 16327 }, { "epoch": 3.7151308304891923, "grad_norm": 1.368469841748115, "learning_rate": 1.9291058105922715e-07, "loss": 0.0741, "step": 16328 }, { "epoch": 3.715358361774744, "grad_norm": 1.4031796575395772, "learning_rate": 1.9284602676016227e-07, "loss": 0.0573, "step": 16329 }, { "epoch": 3.715585893060296, "grad_norm": 1.893146885858898, "learning_rate": 1.9278148129353324e-07, "loss": 0.0436, "step": 16330 }, { "epoch": 3.7158134243458476, "grad_norm": 1.8296605503364198, "learning_rate": 1.9271694466065894e-07, "loss": 0.0587, "step": 16331 }, { "epoch": 3.7160409556313994, "grad_norm": 1.6681257047239928, "learning_rate": 1.926524168628588e-07, "loss": 0.0715, "step": 16332 }, { "epoch": 3.716268486916951, "grad_norm": 2.5128149628856145, "learning_rate": 1.9258789790145124e-07, "loss": 0.0174, "step": 16333 }, { "epoch": 3.716496018202503, "grad_norm": 1.683230885747149, "learning_rate": 1.9252338777775506e-07, "loss": 0.0225, "step": 16334 }, { "epoch": 3.7167235494880546, "grad_norm": 1.2779650839525567, "learning_rate": 1.9245888649308887e-07, "loss": 0.0608, "step": 16335 }, { "epoch": 3.7169510807736064, "grad_norm": 1.6553560656411144, "learning_rate": 1.9239439404877101e-07, "loss": 0.0819, "step": 16336 }, { "epoch": 3.717178612059158, "grad_norm": 1.5251765547000415, "learning_rate": 1.9232991044611951e-07, "loss": 0.055, "step": 16337 }, { "epoch": 3.71740614334471, "grad_norm": 1.519612842226098, "learning_rate": 1.9226543568645207e-07, "loss": 0.0233, "step": 16338 }, { "epoch": 3.7176336746302616, "grad_norm": 1.7460350506434505, "learning_rate": 1.922009697710867e-07, "loss": 0.0722, "step": 16339 }, { "epoch": 3.7178612059158134, "grad_norm": 1.57783220345399, "learning_rate": 1.9213651270134082e-07, "loss": 0.018, "step": 16340 }, { "epoch": 3.718088737201365, "grad_norm": 0.4908096504404262, "learning_rate": 1.9207206447853176e-07, "loss": 0.0021, "step": 16341 }, { "epoch": 3.718316268486917, "grad_norm": 0.7133917584488971, "learning_rate": 1.9200762510397687e-07, "loss": 0.0251, "step": 16342 }, { "epoch": 3.7185437997724686, "grad_norm": 1.0244065703686223, "learning_rate": 1.9194319457899332e-07, "loss": 0.0219, "step": 16343 }, { "epoch": 3.7187713310580204, "grad_norm": 1.6948324376234871, "learning_rate": 1.9187877290489778e-07, "loss": 0.1145, "step": 16344 }, { "epoch": 3.718998862343572, "grad_norm": 1.4302367806531469, "learning_rate": 1.918143600830067e-07, "loss": 0.0292, "step": 16345 }, { "epoch": 3.719226393629124, "grad_norm": 2.1635389265263285, "learning_rate": 1.9174995611463694e-07, "loss": 0.0163, "step": 16346 }, { "epoch": 3.7194539249146756, "grad_norm": 1.2180086634813472, "learning_rate": 1.9168556100110425e-07, "loss": 0.0126, "step": 16347 }, { "epoch": 3.7196814562002274, "grad_norm": 1.1204059228638064, "learning_rate": 1.916211747437255e-07, "loss": 0.0583, "step": 16348 }, { "epoch": 3.719908987485779, "grad_norm": 0.6587193841244309, "learning_rate": 1.915567973438162e-07, "loss": 0.0083, "step": 16349 }, { "epoch": 3.720136518771331, "grad_norm": 1.6323599857405555, "learning_rate": 1.9149242880269204e-07, "loss": 0.0849, "step": 16350 }, { "epoch": 3.720364050056883, "grad_norm": 1.817845680761316, "learning_rate": 1.9142806912166884e-07, "loss": 0.0806, "step": 16351 }, { "epoch": 3.7205915813424344, "grad_norm": 1.4553921207335219, "learning_rate": 1.913637183020617e-07, "loss": 0.0809, "step": 16352 }, { "epoch": 3.7208191126279866, "grad_norm": 1.5562556062506998, "learning_rate": 1.9129937634518592e-07, "loss": 0.0482, "step": 16353 }, { "epoch": 3.721046643913538, "grad_norm": 1.9180857708026118, "learning_rate": 1.9123504325235666e-07, "loss": 0.0233, "step": 16354 }, { "epoch": 3.72127417519909, "grad_norm": 1.5147670335901218, "learning_rate": 1.911707190248889e-07, "loss": 0.0878, "step": 16355 }, { "epoch": 3.7215017064846414, "grad_norm": 0.8455097371161419, "learning_rate": 1.9110640366409707e-07, "loss": 0.0141, "step": 16356 }, { "epoch": 3.7217292377701936, "grad_norm": 3.2935950031566428, "learning_rate": 1.9104209717129556e-07, "loss": 0.0485, "step": 16357 }, { "epoch": 3.721956769055745, "grad_norm": 1.5724928220288645, "learning_rate": 1.9097779954779904e-07, "loss": 0.0262, "step": 16358 }, { "epoch": 3.722184300341297, "grad_norm": 1.1455803479692672, "learning_rate": 1.909135107949212e-07, "loss": 0.0075, "step": 16359 }, { "epoch": 3.722411831626849, "grad_norm": 2.2503879042966233, "learning_rate": 1.9084923091397624e-07, "loss": 0.1502, "step": 16360 }, { "epoch": 3.7226393629124006, "grad_norm": 1.522665464835703, "learning_rate": 1.9078495990627783e-07, "loss": 0.0709, "step": 16361 }, { "epoch": 3.7228668941979524, "grad_norm": 1.726852931246412, "learning_rate": 1.9072069777313982e-07, "loss": 0.0147, "step": 16362 }, { "epoch": 3.723094425483504, "grad_norm": 1.7316937904559013, "learning_rate": 1.9065644451587547e-07, "loss": 0.0583, "step": 16363 }, { "epoch": 3.723321956769056, "grad_norm": 1.2441429229698748, "learning_rate": 1.9059220013579765e-07, "loss": 0.0183, "step": 16364 }, { "epoch": 3.7235494880546076, "grad_norm": 0.6593506706479412, "learning_rate": 1.9052796463421994e-07, "loss": 0.0029, "step": 16365 }, { "epoch": 3.7237770193401594, "grad_norm": 5.193773041805862, "learning_rate": 1.9046373801245473e-07, "loss": 0.0477, "step": 16366 }, { "epoch": 3.724004550625711, "grad_norm": 1.4530190195787804, "learning_rate": 1.9039952027181487e-07, "loss": 0.0278, "step": 16367 }, { "epoch": 3.724232081911263, "grad_norm": 1.0873989850284818, "learning_rate": 1.9033531141361313e-07, "loss": 0.0071, "step": 16368 }, { "epoch": 3.7244596131968146, "grad_norm": 1.367701239949284, "learning_rate": 1.902711114391613e-07, "loss": 0.0746, "step": 16369 }, { "epoch": 3.7246871444823664, "grad_norm": 2.2876592873378105, "learning_rate": 1.9020692034977202e-07, "loss": 0.0114, "step": 16370 }, { "epoch": 3.724914675767918, "grad_norm": 1.0190601682292246, "learning_rate": 1.901427381467568e-07, "loss": 0.0236, "step": 16371 }, { "epoch": 3.72514220705347, "grad_norm": 2.5758055093290237, "learning_rate": 1.9007856483142787e-07, "loss": 0.0214, "step": 16372 }, { "epoch": 3.7253697383390216, "grad_norm": 0.7585556147401666, "learning_rate": 1.9001440040509625e-07, "loss": 0.0067, "step": 16373 }, { "epoch": 3.7255972696245734, "grad_norm": 1.050184584679099, "learning_rate": 1.8995024486907369e-07, "loss": 0.0067, "step": 16374 }, { "epoch": 3.725824800910125, "grad_norm": 1.6969660481724227, "learning_rate": 1.8988609822467162e-07, "loss": 0.0171, "step": 16375 }, { "epoch": 3.726052332195677, "grad_norm": 1.6758946637822094, "learning_rate": 1.898219604732006e-07, "loss": 0.0492, "step": 16376 }, { "epoch": 3.7262798634812286, "grad_norm": 2.730939080708368, "learning_rate": 1.8975783161597186e-07, "loss": 0.042, "step": 16377 }, { "epoch": 3.7265073947667804, "grad_norm": 1.082517442011131, "learning_rate": 1.896937116542958e-07, "loss": 0.0105, "step": 16378 }, { "epoch": 3.726734926052332, "grad_norm": 1.5880069508493047, "learning_rate": 1.896296005894832e-07, "loss": 0.0146, "step": 16379 }, { "epoch": 3.726962457337884, "grad_norm": 1.7018133421195594, "learning_rate": 1.8956549842284406e-07, "loss": 0.0694, "step": 16380 }, { "epoch": 3.7271899886234356, "grad_norm": 1.6241644278728267, "learning_rate": 1.8950140515568864e-07, "loss": 0.0912, "step": 16381 }, { "epoch": 3.7274175199089874, "grad_norm": 0.8883119173435325, "learning_rate": 1.8943732078932713e-07, "loss": 0.0061, "step": 16382 }, { "epoch": 3.727645051194539, "grad_norm": 1.3558657689575326, "learning_rate": 1.8937324532506893e-07, "loss": 0.0209, "step": 16383 }, { "epoch": 3.727872582480091, "grad_norm": 1.0205668522626015, "learning_rate": 1.8930917876422394e-07, "loss": 0.0103, "step": 16384 }, { "epoch": 3.7281001137656427, "grad_norm": 1.0786311937368653, "learning_rate": 1.8924512110810116e-07, "loss": 0.0063, "step": 16385 }, { "epoch": 3.7283276450511944, "grad_norm": 0.8644291595930607, "learning_rate": 1.8918107235801036e-07, "loss": 0.0438, "step": 16386 }, { "epoch": 3.728555176336746, "grad_norm": 1.8049963260624726, "learning_rate": 1.8911703251525998e-07, "loss": 0.035, "step": 16387 }, { "epoch": 3.728782707622298, "grad_norm": 1.4154038752379499, "learning_rate": 1.8905300158115925e-07, "loss": 0.0567, "step": 16388 }, { "epoch": 3.7290102389078497, "grad_norm": 1.5641509176380182, "learning_rate": 1.8898897955701692e-07, "loss": 0.084, "step": 16389 }, { "epoch": 3.729237770193402, "grad_norm": 1.0862047719455896, "learning_rate": 1.8892496644414116e-07, "loss": 0.0228, "step": 16390 }, { "epoch": 3.729465301478953, "grad_norm": 2.1038314430711655, "learning_rate": 1.8886096224384068e-07, "loss": 0.01, "step": 16391 }, { "epoch": 3.7296928327645054, "grad_norm": 2.8653763222046127, "learning_rate": 1.8879696695742313e-07, "loss": 0.0508, "step": 16392 }, { "epoch": 3.7299203640500567, "grad_norm": 1.3698946405361077, "learning_rate": 1.8873298058619691e-07, "loss": 0.0229, "step": 16393 }, { "epoch": 3.730147895335609, "grad_norm": 2.5270484135734796, "learning_rate": 1.886690031314694e-07, "loss": 0.0112, "step": 16394 }, { "epoch": 3.73037542662116, "grad_norm": 1.0843879887294197, "learning_rate": 1.886050345945483e-07, "loss": 0.0299, "step": 16395 }, { "epoch": 3.7306029579067124, "grad_norm": 1.1535486687559608, "learning_rate": 1.8854107497674135e-07, "loss": 0.0182, "step": 16396 }, { "epoch": 3.7308304891922637, "grad_norm": 1.8807050916158943, "learning_rate": 1.8847712427935525e-07, "loss": 0.0917, "step": 16397 }, { "epoch": 3.731058020477816, "grad_norm": 2.7326019369842616, "learning_rate": 1.884131825036975e-07, "loss": 0.0159, "step": 16398 }, { "epoch": 3.7312855517633676, "grad_norm": 1.353809463757067, "learning_rate": 1.8834924965107478e-07, "loss": 0.0188, "step": 16399 }, { "epoch": 3.7315130830489194, "grad_norm": 1.9255945657530016, "learning_rate": 1.882853257227934e-07, "loss": 0.0545, "step": 16400 }, { "epoch": 3.731740614334471, "grad_norm": 1.007461420648493, "learning_rate": 1.882214107201602e-07, "loss": 0.015, "step": 16401 }, { "epoch": 3.731968145620023, "grad_norm": 1.2512259989176717, "learning_rate": 1.8815750464448147e-07, "loss": 0.0461, "step": 16402 }, { "epoch": 3.7321956769055746, "grad_norm": 2.1616138635147575, "learning_rate": 1.8809360749706352e-07, "loss": 0.0568, "step": 16403 }, { "epoch": 3.7324232081911264, "grad_norm": 1.3118180080200903, "learning_rate": 1.880297192792118e-07, "loss": 0.0783, "step": 16404 }, { "epoch": 3.732650739476678, "grad_norm": 2.017104615839942, "learning_rate": 1.879658399922326e-07, "loss": 0.0305, "step": 16405 }, { "epoch": 3.73287827076223, "grad_norm": 1.4406712312665555, "learning_rate": 1.8790196963743123e-07, "loss": 0.0504, "step": 16406 }, { "epoch": 3.7331058020477816, "grad_norm": 1.8003638437523615, "learning_rate": 1.878381082161127e-07, "loss": 0.0276, "step": 16407 }, { "epoch": 3.7333333333333334, "grad_norm": 1.5989260527038665, "learning_rate": 1.87774255729583e-07, "loss": 0.016, "step": 16408 }, { "epoch": 3.733560864618885, "grad_norm": 1.2076996439799679, "learning_rate": 1.8771041217914656e-07, "loss": 0.0465, "step": 16409 }, { "epoch": 3.733788395904437, "grad_norm": 1.712552164282314, "learning_rate": 1.876465775661087e-07, "loss": 0.0168, "step": 16410 }, { "epoch": 3.7340159271899886, "grad_norm": 1.955228714158796, "learning_rate": 1.8758275189177353e-07, "loss": 0.0805, "step": 16411 }, { "epoch": 3.7342434584755404, "grad_norm": 1.7373904924395185, "learning_rate": 1.8751893515744606e-07, "loss": 0.0299, "step": 16412 }, { "epoch": 3.734470989761092, "grad_norm": 1.6229721714549836, "learning_rate": 1.8745512736443017e-07, "loss": 0.0144, "step": 16413 }, { "epoch": 3.734698521046644, "grad_norm": 4.685965571511752, "learning_rate": 1.873913285140301e-07, "loss": 0.022, "step": 16414 }, { "epoch": 3.7349260523321957, "grad_norm": 1.4114686901509492, "learning_rate": 1.873275386075501e-07, "loss": 0.0252, "step": 16415 }, { "epoch": 3.7351535836177474, "grad_norm": 1.600732700002851, "learning_rate": 1.8726375764629344e-07, "loss": 0.0115, "step": 16416 }, { "epoch": 3.735381114903299, "grad_norm": 0.9445298782521521, "learning_rate": 1.8719998563156408e-07, "loss": 0.0182, "step": 16417 }, { "epoch": 3.735608646188851, "grad_norm": 2.268364557861303, "learning_rate": 1.8713622256466514e-07, "loss": 0.0313, "step": 16418 }, { "epoch": 3.7358361774744027, "grad_norm": 0.7146568960420685, "learning_rate": 1.8707246844689982e-07, "loss": 0.0043, "step": 16419 }, { "epoch": 3.7360637087599544, "grad_norm": 1.5412292324575005, "learning_rate": 1.8700872327957114e-07, "loss": 0.0491, "step": 16420 }, { "epoch": 3.736291240045506, "grad_norm": 1.7210151820662278, "learning_rate": 1.8694498706398208e-07, "loss": 0.0196, "step": 16421 }, { "epoch": 3.736518771331058, "grad_norm": 1.338524899129981, "learning_rate": 1.8688125980143537e-07, "loss": 0.0224, "step": 16422 }, { "epoch": 3.7367463026166097, "grad_norm": 1.9726076818519571, "learning_rate": 1.8681754149323322e-07, "loss": 0.1568, "step": 16423 }, { "epoch": 3.7369738339021614, "grad_norm": 1.400147055358001, "learning_rate": 1.867538321406781e-07, "loss": 0.0565, "step": 16424 }, { "epoch": 3.737201365187713, "grad_norm": 1.237853868268288, "learning_rate": 1.8669013174507214e-07, "loss": 0.0416, "step": 16425 }, { "epoch": 3.737428896473265, "grad_norm": 1.8312660595463712, "learning_rate": 1.866264403077169e-07, "loss": 0.1554, "step": 16426 }, { "epoch": 3.7376564277588167, "grad_norm": 1.2248539649361252, "learning_rate": 1.8656275782991443e-07, "loss": 0.0104, "step": 16427 }, { "epoch": 3.7378839590443684, "grad_norm": 1.54506730049772, "learning_rate": 1.8649908431296618e-07, "loss": 0.0747, "step": 16428 }, { "epoch": 3.7381114903299206, "grad_norm": 1.5521321577905682, "learning_rate": 1.864354197581738e-07, "loss": 0.0814, "step": 16429 }, { "epoch": 3.738339021615472, "grad_norm": 0.555221939032736, "learning_rate": 1.8637176416683806e-07, "loss": 0.0027, "step": 16430 }, { "epoch": 3.738566552901024, "grad_norm": 1.3304569165302305, "learning_rate": 1.8630811754026028e-07, "loss": 0.0108, "step": 16431 }, { "epoch": 3.7387940841865754, "grad_norm": 1.74121765981397, "learning_rate": 1.8624447987974123e-07, "loss": 0.0851, "step": 16432 }, { "epoch": 3.7390216154721276, "grad_norm": 0.9722614237269348, "learning_rate": 1.8618085118658125e-07, "loss": 0.0044, "step": 16433 }, { "epoch": 3.739249146757679, "grad_norm": 1.2887641872432098, "learning_rate": 1.86117231462081e-07, "loss": 0.0345, "step": 16434 }, { "epoch": 3.739476678043231, "grad_norm": 1.3769738960960036, "learning_rate": 1.8605362070754079e-07, "loss": 0.0206, "step": 16435 }, { "epoch": 3.7397042093287824, "grad_norm": 1.2497964962735393, "learning_rate": 1.8599001892426083e-07, "loss": 0.0254, "step": 16436 }, { "epoch": 3.7399317406143346, "grad_norm": 6.853236670255927, "learning_rate": 1.8592642611354085e-07, "loss": 0.0169, "step": 16437 }, { "epoch": 3.7401592718998864, "grad_norm": 0.7374511707271859, "learning_rate": 1.8586284227668046e-07, "loss": 0.0064, "step": 16438 }, { "epoch": 3.740386803185438, "grad_norm": 4.734460201401469, "learning_rate": 1.8579926741497952e-07, "loss": 0.066, "step": 16439 }, { "epoch": 3.74061433447099, "grad_norm": 0.7142732936026911, "learning_rate": 1.8573570152973702e-07, "loss": 0.0035, "step": 16440 }, { "epoch": 3.7408418657565417, "grad_norm": 1.532497301796238, "learning_rate": 1.8567214462225223e-07, "loss": 0.0059, "step": 16441 }, { "epoch": 3.7410693970420934, "grad_norm": 1.037220802248732, "learning_rate": 1.8560859669382432e-07, "loss": 0.0198, "step": 16442 }, { "epoch": 3.741296928327645, "grad_norm": 1.3047388507497821, "learning_rate": 1.855450577457521e-07, "loss": 0.0305, "step": 16443 }, { "epoch": 3.741524459613197, "grad_norm": 1.7222296503369314, "learning_rate": 1.8548152777933413e-07, "loss": 0.0491, "step": 16444 }, { "epoch": 3.7417519908987487, "grad_norm": 1.9099710018909284, "learning_rate": 1.8541800679586858e-07, "loss": 0.0575, "step": 16445 }, { "epoch": 3.7419795221843004, "grad_norm": 1.1125574961368538, "learning_rate": 1.8535449479665407e-07, "loss": 0.0573, "step": 16446 }, { "epoch": 3.742207053469852, "grad_norm": 1.6950830518622122, "learning_rate": 1.8529099178298837e-07, "loss": 0.0707, "step": 16447 }, { "epoch": 3.742434584755404, "grad_norm": 0.8864196550113566, "learning_rate": 1.8522749775616944e-07, "loss": 0.0063, "step": 16448 }, { "epoch": 3.7426621160409557, "grad_norm": 1.0036780333301694, "learning_rate": 1.8516401271749508e-07, "loss": 0.0146, "step": 16449 }, { "epoch": 3.7428896473265074, "grad_norm": 1.64115448140366, "learning_rate": 1.85100536668263e-07, "loss": 0.0126, "step": 16450 }, { "epoch": 3.743117178612059, "grad_norm": 2.130581369533378, "learning_rate": 1.8503706960977031e-07, "loss": 0.0121, "step": 16451 }, { "epoch": 3.743344709897611, "grad_norm": 1.1764845795097, "learning_rate": 1.84973611543314e-07, "loss": 0.0481, "step": 16452 }, { "epoch": 3.7435722411831627, "grad_norm": 1.675170396489072, "learning_rate": 1.8491016247019134e-07, "loss": 0.0572, "step": 16453 }, { "epoch": 3.7437997724687144, "grad_norm": 1.6468623452731723, "learning_rate": 1.848467223916988e-07, "loss": 0.0065, "step": 16454 }, { "epoch": 3.744027303754266, "grad_norm": 1.9397062422198683, "learning_rate": 1.847832913091331e-07, "loss": 0.0436, "step": 16455 }, { "epoch": 3.744254835039818, "grad_norm": 0.9223966194738429, "learning_rate": 1.84719869223791e-07, "loss": 0.0083, "step": 16456 }, { "epoch": 3.7444823663253697, "grad_norm": 1.4441448895005744, "learning_rate": 1.846564561369682e-07, "loss": 0.0129, "step": 16457 }, { "epoch": 3.7447098976109214, "grad_norm": 0.6879049467780861, "learning_rate": 1.8459305204996115e-07, "loss": 0.0052, "step": 16458 }, { "epoch": 3.744937428896473, "grad_norm": 1.2003952457864455, "learning_rate": 1.8452965696406534e-07, "loss": 0.0607, "step": 16459 }, { "epoch": 3.745164960182025, "grad_norm": 1.8203998901886724, "learning_rate": 1.844662708805769e-07, "loss": 0.0585, "step": 16460 }, { "epoch": 3.7453924914675767, "grad_norm": 2.1316526815972723, "learning_rate": 1.8440289380079087e-07, "loss": 0.0949, "step": 16461 }, { "epoch": 3.7456200227531284, "grad_norm": 1.33752281402842, "learning_rate": 1.8433952572600278e-07, "loss": 0.0315, "step": 16462 }, { "epoch": 3.74584755403868, "grad_norm": 1.366221509868287, "learning_rate": 1.8427616665750797e-07, "loss": 0.0157, "step": 16463 }, { "epoch": 3.746075085324232, "grad_norm": 1.5084063146076705, "learning_rate": 1.8421281659660096e-07, "loss": 0.1316, "step": 16464 }, { "epoch": 3.7463026166097837, "grad_norm": 1.7607643340066854, "learning_rate": 1.841494755445769e-07, "loss": 0.0295, "step": 16465 }, { "epoch": 3.7465301478953354, "grad_norm": 1.4061800629878223, "learning_rate": 1.8408614350272998e-07, "loss": 0.0642, "step": 16466 }, { "epoch": 3.746757679180887, "grad_norm": 2.147802101688334, "learning_rate": 1.84022820472355e-07, "loss": 0.1161, "step": 16467 }, { "epoch": 3.7469852104664394, "grad_norm": 2.9521423904492923, "learning_rate": 1.8395950645474552e-07, "loss": 0.0229, "step": 16468 }, { "epoch": 3.7472127417519907, "grad_norm": 1.832138639034187, "learning_rate": 1.8389620145119643e-07, "loss": 0.0397, "step": 16469 }, { "epoch": 3.747440273037543, "grad_norm": 1.5042678408264638, "learning_rate": 1.8383290546300115e-07, "loss": 0.0239, "step": 16470 }, { "epoch": 3.747667804323094, "grad_norm": 2.1420077358542913, "learning_rate": 1.8376961849145315e-07, "loss": 0.0301, "step": 16471 }, { "epoch": 3.7478953356086464, "grad_norm": 4.271125932789465, "learning_rate": 1.837063405378462e-07, "loss": 0.0778, "step": 16472 }, { "epoch": 3.7481228668941977, "grad_norm": 0.5530088931478009, "learning_rate": 1.836430716034733e-07, "loss": 0.0039, "step": 16473 }, { "epoch": 3.74835039817975, "grad_norm": 1.4765769575713052, "learning_rate": 1.835798116896277e-07, "loss": 0.0175, "step": 16474 }, { "epoch": 3.748577929465301, "grad_norm": 0.8182659211026184, "learning_rate": 1.8351656079760257e-07, "loss": 0.0061, "step": 16475 }, { "epoch": 3.7488054607508534, "grad_norm": 0.8286767072329587, "learning_rate": 1.8345331892869016e-07, "loss": 0.0443, "step": 16476 }, { "epoch": 3.749032992036405, "grad_norm": 1.6379757546841336, "learning_rate": 1.8339008608418343e-07, "loss": 0.0772, "step": 16477 }, { "epoch": 3.749260523321957, "grad_norm": 1.458483436824025, "learning_rate": 1.8332686226537438e-07, "loss": 0.0384, "step": 16478 }, { "epoch": 3.7494880546075087, "grad_norm": 1.1037100961525212, "learning_rate": 1.832636474735556e-07, "loss": 0.0492, "step": 16479 }, { "epoch": 3.7497155858930604, "grad_norm": 2.6605749865861834, "learning_rate": 1.8320044171001868e-07, "loss": 0.107, "step": 16480 }, { "epoch": 3.749943117178612, "grad_norm": 1.9640031609243, "learning_rate": 1.831372449760556e-07, "loss": 0.0827, "step": 16481 }, { "epoch": 3.750170648464164, "grad_norm": 1.4651626649615666, "learning_rate": 1.830740572729582e-07, "loss": 0.0442, "step": 16482 }, { "epoch": 3.7503981797497157, "grad_norm": 1.2418009384356974, "learning_rate": 1.8301087860201753e-07, "loss": 0.0311, "step": 16483 }, { "epoch": 3.7506257110352674, "grad_norm": 2.279668400959437, "learning_rate": 1.829477089645252e-07, "loss": 0.0156, "step": 16484 }, { "epoch": 3.750853242320819, "grad_norm": 2.1209160057208942, "learning_rate": 1.8288454836177194e-07, "loss": 0.068, "step": 16485 }, { "epoch": 3.751080773606371, "grad_norm": 0.9940233032015413, "learning_rate": 1.82821396795049e-07, "loss": 0.0073, "step": 16486 }, { "epoch": 3.7513083048919227, "grad_norm": 1.2813764521510078, "learning_rate": 1.827582542656467e-07, "loss": 0.0498, "step": 16487 }, { "epoch": 3.7515358361774744, "grad_norm": 1.87235572660881, "learning_rate": 1.8269512077485576e-07, "loss": 0.0459, "step": 16488 }, { "epoch": 3.751763367463026, "grad_norm": 2.437373674384991, "learning_rate": 1.8263199632396673e-07, "loss": 0.0214, "step": 16489 }, { "epoch": 3.751990898748578, "grad_norm": 0.2629112283288956, "learning_rate": 1.825688809142693e-07, "loss": 0.0011, "step": 16490 }, { "epoch": 3.7522184300341297, "grad_norm": 2.1694187855843503, "learning_rate": 1.825057745470539e-07, "loss": 0.1934, "step": 16491 }, { "epoch": 3.7524459613196814, "grad_norm": 0.9721965253604609, "learning_rate": 1.8244267722360988e-07, "loss": 0.048, "step": 16492 }, { "epoch": 3.752673492605233, "grad_norm": 2.106099539424266, "learning_rate": 1.823795889452272e-07, "loss": 0.0266, "step": 16493 }, { "epoch": 3.752901023890785, "grad_norm": 0.9292236048432315, "learning_rate": 1.8231650971319494e-07, "loss": 0.0147, "step": 16494 }, { "epoch": 3.7531285551763367, "grad_norm": 1.998688373382482, "learning_rate": 1.8225343952880247e-07, "loss": 0.0301, "step": 16495 }, { "epoch": 3.7533560864618885, "grad_norm": 1.5761001757011304, "learning_rate": 1.8219037839333903e-07, "loss": 0.0062, "step": 16496 }, { "epoch": 3.75358361774744, "grad_norm": 1.836684233024785, "learning_rate": 1.821273263080931e-07, "loss": 0.0684, "step": 16497 }, { "epoch": 3.753811149032992, "grad_norm": 1.062984975309816, "learning_rate": 1.8206428327435376e-07, "loss": 0.0755, "step": 16498 }, { "epoch": 3.7540386803185437, "grad_norm": 1.2981167278228207, "learning_rate": 1.8200124929340903e-07, "loss": 0.0138, "step": 16499 }, { "epoch": 3.7542662116040955, "grad_norm": 1.3898796228830388, "learning_rate": 1.8193822436654767e-07, "loss": 0.0109, "step": 16500 }, { "epoch": 3.754493742889647, "grad_norm": 3.090593088362132, "learning_rate": 1.8187520849505737e-07, "loss": 0.0476, "step": 16501 }, { "epoch": 3.754721274175199, "grad_norm": 1.0279610836375865, "learning_rate": 1.8181220168022622e-07, "loss": 0.0233, "step": 16502 }, { "epoch": 3.7549488054607507, "grad_norm": 1.6957011342234354, "learning_rate": 1.8174920392334227e-07, "loss": 0.0519, "step": 16503 }, { "epoch": 3.7551763367463025, "grad_norm": 2.197244918863224, "learning_rate": 1.8168621522569263e-07, "loss": 0.0374, "step": 16504 }, { "epoch": 3.755403868031854, "grad_norm": 1.579421970036612, "learning_rate": 1.8162323558856504e-07, "loss": 0.0295, "step": 16505 }, { "epoch": 3.755631399317406, "grad_norm": 2.0253500148794434, "learning_rate": 1.8156026501324648e-07, "loss": 0.0736, "step": 16506 }, { "epoch": 3.755858930602958, "grad_norm": 1.3316645657380441, "learning_rate": 1.8149730350102381e-07, "loss": 0.0227, "step": 16507 }, { "epoch": 3.7560864618885095, "grad_norm": 1.3926081186720658, "learning_rate": 1.8143435105318402e-07, "loss": 0.0612, "step": 16508 }, { "epoch": 3.7563139931740617, "grad_norm": 1.3146489625347855, "learning_rate": 1.8137140767101374e-07, "loss": 0.0177, "step": 16509 }, { "epoch": 3.756541524459613, "grad_norm": 2.270845443467212, "learning_rate": 1.813084733557996e-07, "loss": 0.08, "step": 16510 }, { "epoch": 3.756769055745165, "grad_norm": 0.8296685792618065, "learning_rate": 1.8124554810882746e-07, "loss": 0.0122, "step": 16511 }, { "epoch": 3.7569965870307165, "grad_norm": 1.7596444081970986, "learning_rate": 1.8118263193138385e-07, "loss": 0.0357, "step": 16512 }, { "epoch": 3.7572241183162687, "grad_norm": 1.313745670101045, "learning_rate": 1.8111972482475447e-07, "loss": 0.0066, "step": 16513 }, { "epoch": 3.75745164960182, "grad_norm": 1.250042508486122, "learning_rate": 1.810568267902247e-07, "loss": 0.0108, "step": 16514 }, { "epoch": 3.757679180887372, "grad_norm": 1.2607650515091788, "learning_rate": 1.8099393782908045e-07, "loss": 0.014, "step": 16515 }, { "epoch": 3.757906712172924, "grad_norm": 1.032587792758042, "learning_rate": 1.8093105794260684e-07, "loss": 0.0147, "step": 16516 }, { "epoch": 3.7581342434584757, "grad_norm": 1.2515159033713272, "learning_rate": 1.8086818713208943e-07, "loss": 0.0441, "step": 16517 }, { "epoch": 3.7583617747440274, "grad_norm": 0.9108453186507872, "learning_rate": 1.8080532539881262e-07, "loss": 0.0087, "step": 16518 }, { "epoch": 3.758589306029579, "grad_norm": 1.0126707748496662, "learning_rate": 1.8074247274406168e-07, "loss": 0.0314, "step": 16519 }, { "epoch": 3.758816837315131, "grad_norm": 1.3157586870564921, "learning_rate": 1.8067962916912096e-07, "loss": 0.0602, "step": 16520 }, { "epoch": 3.7590443686006827, "grad_norm": 1.2836890115631474, "learning_rate": 1.8061679467527473e-07, "loss": 0.0026, "step": 16521 }, { "epoch": 3.7592718998862344, "grad_norm": 1.9267606622050355, "learning_rate": 1.805539692638073e-07, "loss": 0.0259, "step": 16522 }, { "epoch": 3.759499431171786, "grad_norm": 1.6787153373935082, "learning_rate": 1.804911529360029e-07, "loss": 0.0343, "step": 16523 }, { "epoch": 3.759726962457338, "grad_norm": 1.2831970036854718, "learning_rate": 1.8042834569314534e-07, "loss": 0.0229, "step": 16524 }, { "epoch": 3.7599544937428897, "grad_norm": 2.2238118216605205, "learning_rate": 1.803655475365182e-07, "loss": 0.0146, "step": 16525 }, { "epoch": 3.7601820250284415, "grad_norm": 1.9388356785613658, "learning_rate": 1.8030275846740484e-07, "loss": 0.0251, "step": 16526 }, { "epoch": 3.760409556313993, "grad_norm": 2.2830313759021847, "learning_rate": 1.8023997848708882e-07, "loss": 0.1227, "step": 16527 }, { "epoch": 3.760637087599545, "grad_norm": 1.4099329981518511, "learning_rate": 1.8017720759685268e-07, "loss": 0.0895, "step": 16528 }, { "epoch": 3.7608646188850967, "grad_norm": 1.47033450254706, "learning_rate": 1.8011444579798018e-07, "loss": 0.0088, "step": 16529 }, { "epoch": 3.7610921501706485, "grad_norm": 1.333064092450277, "learning_rate": 1.8005169309175342e-07, "loss": 0.025, "step": 16530 }, { "epoch": 3.7613196814562, "grad_norm": 1.8190285337077066, "learning_rate": 1.7998894947945536e-07, "loss": 0.0431, "step": 16531 }, { "epoch": 3.761547212741752, "grad_norm": 2.8972657729315885, "learning_rate": 1.7992621496236824e-07, "loss": 0.0591, "step": 16532 }, { "epoch": 3.7617747440273037, "grad_norm": 1.5349361763135014, "learning_rate": 1.7986348954177397e-07, "loss": 0.0235, "step": 16533 }, { "epoch": 3.7620022753128555, "grad_norm": 1.7840057693762772, "learning_rate": 1.7980077321895466e-07, "loss": 0.0354, "step": 16534 }, { "epoch": 3.7622298065984072, "grad_norm": 1.67690787570581, "learning_rate": 1.7973806599519229e-07, "loss": 0.0182, "step": 16535 }, { "epoch": 3.762457337883959, "grad_norm": 1.7254063807036895, "learning_rate": 1.7967536787176851e-07, "loss": 0.0418, "step": 16536 }, { "epoch": 3.7626848691695107, "grad_norm": 0.33296886864187725, "learning_rate": 1.7961267884996448e-07, "loss": 0.0013, "step": 16537 }, { "epoch": 3.7629124004550625, "grad_norm": 1.5005414623660855, "learning_rate": 1.7954999893106188e-07, "loss": 0.0278, "step": 16538 }, { "epoch": 3.7631399317406142, "grad_norm": 1.9836516869083878, "learning_rate": 1.7948732811634137e-07, "loss": 0.024, "step": 16539 }, { "epoch": 3.763367463026166, "grad_norm": 3.1858321507071286, "learning_rate": 1.794246664070838e-07, "loss": 0.0331, "step": 16540 }, { "epoch": 3.7635949943117177, "grad_norm": 1.7054860866118495, "learning_rate": 1.7936201380457006e-07, "loss": 0.0145, "step": 16541 }, { "epoch": 3.7638225255972695, "grad_norm": 1.2663295434531392, "learning_rate": 1.792993703100806e-07, "loss": 0.0708, "step": 16542 }, { "epoch": 3.7640500568828212, "grad_norm": 1.9083361545854018, "learning_rate": 1.7923673592489587e-07, "loss": 0.1363, "step": 16543 }, { "epoch": 3.764277588168373, "grad_norm": 2.1506459450014037, "learning_rate": 1.7917411065029596e-07, "loss": 0.0671, "step": 16544 }, { "epoch": 3.7645051194539247, "grad_norm": 2.126598105358592, "learning_rate": 1.791114944875605e-07, "loss": 0.0191, "step": 16545 }, { "epoch": 3.764732650739477, "grad_norm": 2.196790337850218, "learning_rate": 1.790488874379697e-07, "loss": 0.0227, "step": 16546 }, { "epoch": 3.7649601820250282, "grad_norm": 1.4455733621062459, "learning_rate": 1.7898628950280273e-07, "loss": 0.0314, "step": 16547 }, { "epoch": 3.7651877133105804, "grad_norm": 1.686525057802413, "learning_rate": 1.7892370068333914e-07, "loss": 0.1233, "step": 16548 }, { "epoch": 3.7654152445961317, "grad_norm": 2.0592518766314885, "learning_rate": 1.788611209808582e-07, "loss": 0.0594, "step": 16549 }, { "epoch": 3.765642775881684, "grad_norm": 1.2572599740477097, "learning_rate": 1.7879855039663905e-07, "loss": 0.0239, "step": 16550 }, { "epoch": 3.7658703071672353, "grad_norm": 2.3681830151814114, "learning_rate": 1.7873598893196036e-07, "loss": 0.0512, "step": 16551 }, { "epoch": 3.7660978384527874, "grad_norm": 1.2130718134773064, "learning_rate": 1.7867343658810058e-07, "loss": 0.0256, "step": 16552 }, { "epoch": 3.7663253697383388, "grad_norm": 1.662873763903303, "learning_rate": 1.786108933663385e-07, "loss": 0.0356, "step": 16553 }, { "epoch": 3.766552901023891, "grad_norm": 1.1962812805367495, "learning_rate": 1.7854835926795212e-07, "loss": 0.0234, "step": 16554 }, { "epoch": 3.7667804323094427, "grad_norm": 1.674435852008205, "learning_rate": 1.7848583429421952e-07, "loss": 0.0512, "step": 16555 }, { "epoch": 3.7670079635949945, "grad_norm": 2.037516105531274, "learning_rate": 1.784233184464188e-07, "loss": 0.0667, "step": 16556 }, { "epoch": 3.767235494880546, "grad_norm": 1.0886354945349634, "learning_rate": 1.783608117258278e-07, "loss": 0.0101, "step": 16557 }, { "epoch": 3.767463026166098, "grad_norm": 1.103805298938299, "learning_rate": 1.7829831413372377e-07, "loss": 0.0166, "step": 16558 }, { "epoch": 3.7676905574516497, "grad_norm": 0.48491463665299595, "learning_rate": 1.7823582567138392e-07, "loss": 0.0022, "step": 16559 }, { "epoch": 3.7679180887372015, "grad_norm": 1.6152883734134584, "learning_rate": 1.781733463400858e-07, "loss": 0.0137, "step": 16560 }, { "epoch": 3.768145620022753, "grad_norm": 7.194667963032002, "learning_rate": 1.7811087614110595e-07, "loss": 0.0728, "step": 16561 }, { "epoch": 3.768373151308305, "grad_norm": 3.0842934918635554, "learning_rate": 1.7804841507572133e-07, "loss": 0.0635, "step": 16562 }, { "epoch": 3.7686006825938567, "grad_norm": 1.7041437787733587, "learning_rate": 1.7798596314520875e-07, "loss": 0.0603, "step": 16563 }, { "epoch": 3.7688282138794085, "grad_norm": 2.1560074480454228, "learning_rate": 1.7792352035084428e-07, "loss": 0.0176, "step": 16564 }, { "epoch": 3.7690557451649602, "grad_norm": 1.0325943011745855, "learning_rate": 1.7786108669390443e-07, "loss": 0.0715, "step": 16565 }, { "epoch": 3.769283276450512, "grad_norm": 5.0042248621731344, "learning_rate": 1.7779866217566485e-07, "loss": 0.0584, "step": 16566 }, { "epoch": 3.7695108077360637, "grad_norm": 0.979847927348563, "learning_rate": 1.7773624679740178e-07, "loss": 0.0149, "step": 16567 }, { "epoch": 3.7697383390216155, "grad_norm": 1.5287294598990515, "learning_rate": 1.7767384056039055e-07, "loss": 0.0692, "step": 16568 }, { "epoch": 3.7699658703071672, "grad_norm": 2.119985649309592, "learning_rate": 1.776114434659068e-07, "loss": 0.0549, "step": 16569 }, { "epoch": 3.770193401592719, "grad_norm": 0.6056021957031547, "learning_rate": 1.7754905551522593e-07, "loss": 0.0037, "step": 16570 }, { "epoch": 3.7704209328782707, "grad_norm": 1.3473171737957452, "learning_rate": 1.7748667670962276e-07, "loss": 0.0126, "step": 16571 }, { "epoch": 3.7706484641638225, "grad_norm": 1.3621827875318213, "learning_rate": 1.7742430705037248e-07, "loss": 0.0929, "step": 16572 }, { "epoch": 3.7708759954493742, "grad_norm": 0.7762423531449254, "learning_rate": 1.7736194653874952e-07, "loss": 0.0051, "step": 16573 }, { "epoch": 3.771103526734926, "grad_norm": 1.259593837250778, "learning_rate": 1.7729959517602877e-07, "loss": 0.0144, "step": 16574 }, { "epoch": 3.7713310580204777, "grad_norm": 3.0621530366179144, "learning_rate": 1.7723725296348415e-07, "loss": 0.0072, "step": 16575 }, { "epoch": 3.7715585893060295, "grad_norm": 1.4382637281437416, "learning_rate": 1.7717491990238999e-07, "loss": 0.0203, "step": 16576 }, { "epoch": 3.7717861205915812, "grad_norm": 1.237875343147657, "learning_rate": 1.7711259599402058e-07, "loss": 0.0839, "step": 16577 }, { "epoch": 3.772013651877133, "grad_norm": 0.5202404820464605, "learning_rate": 1.770502812396492e-07, "loss": 0.0203, "step": 16578 }, { "epoch": 3.7722411831626848, "grad_norm": 1.5944246319650381, "learning_rate": 1.7698797564054994e-07, "loss": 0.0466, "step": 16579 }, { "epoch": 3.7724687144482365, "grad_norm": 1.2992880316358626, "learning_rate": 1.769256791979957e-07, "loss": 0.0853, "step": 16580 }, { "epoch": 3.7726962457337883, "grad_norm": 0.6241315777261341, "learning_rate": 1.768633919132602e-07, "loss": 0.0026, "step": 16581 }, { "epoch": 3.77292377701934, "grad_norm": 1.2137588018143726, "learning_rate": 1.7680111378761606e-07, "loss": 0.0858, "step": 16582 }, { "epoch": 3.7731513083048918, "grad_norm": 2.4240984761792994, "learning_rate": 1.767388448223363e-07, "loss": 0.0939, "step": 16583 }, { "epoch": 3.7733788395904435, "grad_norm": 0.9266077010276297, "learning_rate": 1.7667658501869377e-07, "loss": 0.0123, "step": 16584 }, { "epoch": 3.7736063708759957, "grad_norm": 0.82318675477275, "learning_rate": 1.7661433437796062e-07, "loss": 0.0548, "step": 16585 }, { "epoch": 3.773833902161547, "grad_norm": 1.4540988325842057, "learning_rate": 1.7655209290140947e-07, "loss": 0.0703, "step": 16586 }, { "epoch": 3.774061433447099, "grad_norm": 1.458441298611516, "learning_rate": 1.7648986059031204e-07, "loss": 0.0281, "step": 16587 }, { "epoch": 3.7742889647326505, "grad_norm": 1.8900193790097435, "learning_rate": 1.7642763744594067e-07, "loss": 0.0725, "step": 16588 }, { "epoch": 3.7745164960182027, "grad_norm": 0.9790618672178605, "learning_rate": 1.7636542346956672e-07, "loss": 0.0089, "step": 16589 }, { "epoch": 3.774744027303754, "grad_norm": 1.4457523869587399, "learning_rate": 1.7630321866246178e-07, "loss": 0.0506, "step": 16590 }, { "epoch": 3.774971558589306, "grad_norm": 1.6110190094574524, "learning_rate": 1.7624102302589756e-07, "loss": 0.0806, "step": 16591 }, { "epoch": 3.7751990898748575, "grad_norm": 1.4219156525770313, "learning_rate": 1.761788365611448e-07, "loss": 0.0144, "step": 16592 }, { "epoch": 3.7754266211604097, "grad_norm": 1.3167381569742806, "learning_rate": 1.7611665926947477e-07, "loss": 0.0111, "step": 16593 }, { "epoch": 3.7756541524459615, "grad_norm": 1.6599090034459312, "learning_rate": 1.7605449115215798e-07, "loss": 0.0644, "step": 16594 }, { "epoch": 3.7758816837315132, "grad_norm": 1.793407266716782, "learning_rate": 1.7599233221046515e-07, "loss": 0.0208, "step": 16595 }, { "epoch": 3.776109215017065, "grad_norm": 3.6058180361038743, "learning_rate": 1.7593018244566695e-07, "loss": 0.0217, "step": 16596 }, { "epoch": 3.7763367463026167, "grad_norm": 0.9819034298846003, "learning_rate": 1.758680418590332e-07, "loss": 0.0116, "step": 16597 }, { "epoch": 3.7765642775881685, "grad_norm": 1.771192400591352, "learning_rate": 1.7580591045183426e-07, "loss": 0.027, "step": 16598 }, { "epoch": 3.7767918088737202, "grad_norm": 1.7287389699213576, "learning_rate": 1.7574378822533974e-07, "loss": 0.0978, "step": 16599 }, { "epoch": 3.777019340159272, "grad_norm": 1.5855032300120973, "learning_rate": 1.7568167518081957e-07, "loss": 0.0531, "step": 16600 }, { "epoch": 3.7772468714448237, "grad_norm": 2.418491178345022, "learning_rate": 1.7561957131954293e-07, "loss": 0.0303, "step": 16601 }, { "epoch": 3.7774744027303755, "grad_norm": 1.317933782763778, "learning_rate": 1.7555747664277923e-07, "loss": 0.0298, "step": 16602 }, { "epoch": 3.7777019340159272, "grad_norm": 1.4592569933316024, "learning_rate": 1.7549539115179774e-07, "loss": 0.0929, "step": 16603 }, { "epoch": 3.777929465301479, "grad_norm": 1.508087242389133, "learning_rate": 1.7543331484786702e-07, "loss": 0.054, "step": 16604 }, { "epoch": 3.7781569965870307, "grad_norm": 1.2517598947393669, "learning_rate": 1.7537124773225626e-07, "loss": 0.0434, "step": 16605 }, { "epoch": 3.7783845278725825, "grad_norm": 1.3438965714461941, "learning_rate": 1.7530918980623348e-07, "loss": 0.0601, "step": 16606 }, { "epoch": 3.7786120591581343, "grad_norm": 1.3002329568450286, "learning_rate": 1.752471410710675e-07, "loss": 0.0843, "step": 16607 }, { "epoch": 3.778839590443686, "grad_norm": 0.9104667590189974, "learning_rate": 1.7518510152802614e-07, "loss": 0.0217, "step": 16608 }, { "epoch": 3.7790671217292378, "grad_norm": 3.9069457051966614, "learning_rate": 1.7512307117837745e-07, "loss": 0.026, "step": 16609 }, { "epoch": 3.7792946530147895, "grad_norm": 2.9589733543987617, "learning_rate": 1.750610500233895e-07, "loss": 0.0731, "step": 16610 }, { "epoch": 3.7795221843003413, "grad_norm": 1.1381065225013183, "learning_rate": 1.7499903806432948e-07, "loss": 0.0104, "step": 16611 }, { "epoch": 3.779749715585893, "grad_norm": 2.034107166295403, "learning_rate": 1.7493703530246514e-07, "loss": 0.0255, "step": 16612 }, { "epoch": 3.7799772468714448, "grad_norm": 2.435133203049698, "learning_rate": 1.7487504173906356e-07, "loss": 0.0333, "step": 16613 }, { "epoch": 3.7802047781569965, "grad_norm": 1.0577422593330723, "learning_rate": 1.748130573753916e-07, "loss": 0.0358, "step": 16614 }, { "epoch": 3.7804323094425483, "grad_norm": 1.8949826385531028, "learning_rate": 1.7475108221271624e-07, "loss": 0.0318, "step": 16615 }, { "epoch": 3.7806598407281, "grad_norm": 1.4407767308708699, "learning_rate": 1.7468911625230415e-07, "loss": 0.0466, "step": 16616 }, { "epoch": 3.7808873720136518, "grad_norm": 1.446585939805878, "learning_rate": 1.74627159495422e-07, "loss": 0.0104, "step": 16617 }, { "epoch": 3.7811149032992035, "grad_norm": 1.2448136538628694, "learning_rate": 1.7456521194333575e-07, "loss": 0.0102, "step": 16618 }, { "epoch": 3.7813424345847553, "grad_norm": 1.1310088420680628, "learning_rate": 1.7450327359731177e-07, "loss": 0.0091, "step": 16619 }, { "epoch": 3.781569965870307, "grad_norm": 3.844265846983815, "learning_rate": 1.7444134445861593e-07, "loss": 0.0248, "step": 16620 }, { "epoch": 3.781797497155859, "grad_norm": 1.1976811870931476, "learning_rate": 1.7437942452851352e-07, "loss": 0.0343, "step": 16621 }, { "epoch": 3.7820250284414105, "grad_norm": 1.653416509487304, "learning_rate": 1.7431751380827054e-07, "loss": 0.0111, "step": 16622 }, { "epoch": 3.7822525597269623, "grad_norm": 1.7102652893029122, "learning_rate": 1.7425561229915212e-07, "loss": 0.0278, "step": 16623 }, { "epoch": 3.7824800910125145, "grad_norm": 2.0006236565291293, "learning_rate": 1.7419372000242365e-07, "loss": 0.1001, "step": 16624 }, { "epoch": 3.782707622298066, "grad_norm": 0.33085365333524225, "learning_rate": 1.7413183691934982e-07, "loss": 0.0032, "step": 16625 }, { "epoch": 3.782935153583618, "grad_norm": 1.2359588125754974, "learning_rate": 1.7406996305119565e-07, "loss": 0.0149, "step": 16626 }, { "epoch": 3.7831626848691693, "grad_norm": 0.6971953361547955, "learning_rate": 1.740080983992256e-07, "loss": 0.0364, "step": 16627 }, { "epoch": 3.7833902161547215, "grad_norm": 3.6548308045431126, "learning_rate": 1.739462429647039e-07, "loss": 0.034, "step": 16628 }, { "epoch": 3.783617747440273, "grad_norm": 1.450874666787685, "learning_rate": 1.7388439674889484e-07, "loss": 0.0824, "step": 16629 }, { "epoch": 3.783845278725825, "grad_norm": 1.4070480814210213, "learning_rate": 1.738225597530626e-07, "loss": 0.0702, "step": 16630 }, { "epoch": 3.7840728100113763, "grad_norm": 1.3311830390917534, "learning_rate": 1.7376073197847115e-07, "loss": 0.0322, "step": 16631 }, { "epoch": 3.7843003412969285, "grad_norm": 1.1338531603893822, "learning_rate": 1.7369891342638397e-07, "loss": 0.0303, "step": 16632 }, { "epoch": 3.7845278725824802, "grad_norm": 1.4266297028225858, "learning_rate": 1.7363710409806425e-07, "loss": 0.0736, "step": 16633 }, { "epoch": 3.784755403868032, "grad_norm": 3.2499668400335207, "learning_rate": 1.7357530399477567e-07, "loss": 0.0106, "step": 16634 }, { "epoch": 3.7849829351535837, "grad_norm": 0.719262388805332, "learning_rate": 1.7351351311778092e-07, "loss": 0.0293, "step": 16635 }, { "epoch": 3.7852104664391355, "grad_norm": 1.4508156006586364, "learning_rate": 1.7345173146834309e-07, "loss": 0.0247, "step": 16636 }, { "epoch": 3.7854379977246873, "grad_norm": 1.136104638612223, "learning_rate": 1.7338995904772496e-07, "loss": 0.0386, "step": 16637 }, { "epoch": 3.785665529010239, "grad_norm": 1.5694677592337158, "learning_rate": 1.733281958571891e-07, "loss": 0.0356, "step": 16638 }, { "epoch": 3.7858930602957908, "grad_norm": 1.2716865058670292, "learning_rate": 1.7326644189799778e-07, "loss": 0.0395, "step": 16639 }, { "epoch": 3.7861205915813425, "grad_norm": 1.724348079086112, "learning_rate": 1.732046971714129e-07, "loss": 0.0239, "step": 16640 }, { "epoch": 3.7863481228668943, "grad_norm": 1.0614986853668413, "learning_rate": 1.731429616786967e-07, "loss": 0.0142, "step": 16641 }, { "epoch": 3.786575654152446, "grad_norm": 1.2470023189440365, "learning_rate": 1.7308123542111073e-07, "loss": 0.0342, "step": 16642 }, { "epoch": 3.7868031854379978, "grad_norm": 1.4402433386273457, "learning_rate": 1.7301951839991657e-07, "loss": 0.0182, "step": 16643 }, { "epoch": 3.7870307167235495, "grad_norm": 1.8254365255380622, "learning_rate": 1.7295781061637574e-07, "loss": 0.0235, "step": 16644 }, { "epoch": 3.7872582480091013, "grad_norm": 0.995643157480854, "learning_rate": 1.7289611207174956e-07, "loss": 0.0236, "step": 16645 }, { "epoch": 3.787485779294653, "grad_norm": 1.3634833345449706, "learning_rate": 1.7283442276729887e-07, "loss": 0.0804, "step": 16646 }, { "epoch": 3.7877133105802048, "grad_norm": 1.8545048121741956, "learning_rate": 1.7277274270428434e-07, "loss": 0.0433, "step": 16647 }, { "epoch": 3.7879408418657565, "grad_norm": 1.0968685409356271, "learning_rate": 1.7271107188396682e-07, "loss": 0.0091, "step": 16648 }, { "epoch": 3.7881683731513083, "grad_norm": 1.9537548170201429, "learning_rate": 1.726494103076063e-07, "loss": 0.032, "step": 16649 }, { "epoch": 3.78839590443686, "grad_norm": 2.312620248657657, "learning_rate": 1.7258775797646385e-07, "loss": 0.0238, "step": 16650 }, { "epoch": 3.788623435722412, "grad_norm": 1.0497807588101498, "learning_rate": 1.72526114891799e-07, "loss": 0.0285, "step": 16651 }, { "epoch": 3.7888509670079635, "grad_norm": 2.184446034224271, "learning_rate": 1.7246448105487153e-07, "loss": 0.1725, "step": 16652 }, { "epoch": 3.7890784982935153, "grad_norm": 1.5789556355119092, "learning_rate": 1.7240285646694146e-07, "loss": 0.0809, "step": 16653 }, { "epoch": 3.789306029579067, "grad_norm": 1.7211277579958304, "learning_rate": 1.7234124112926797e-07, "loss": 0.0843, "step": 16654 }, { "epoch": 3.789533560864619, "grad_norm": 1.1366706485890474, "learning_rate": 1.7227963504311043e-07, "loss": 0.019, "step": 16655 }, { "epoch": 3.7897610921501705, "grad_norm": 1.4808506458035362, "learning_rate": 1.7221803820972813e-07, "loss": 0.0394, "step": 16656 }, { "epoch": 3.7899886234357223, "grad_norm": 1.2741083699775204, "learning_rate": 1.7215645063038e-07, "loss": 0.0645, "step": 16657 }, { "epoch": 3.790216154721274, "grad_norm": 3.891952119264927, "learning_rate": 1.7209487230632475e-07, "loss": 0.0953, "step": 16658 }, { "epoch": 3.790443686006826, "grad_norm": 2.2555267815303064, "learning_rate": 1.7203330323882057e-07, "loss": 0.0215, "step": 16659 }, { "epoch": 3.7906712172923775, "grad_norm": 1.0960737744629132, "learning_rate": 1.719717434291264e-07, "loss": 0.0386, "step": 16660 }, { "epoch": 3.7908987485779293, "grad_norm": 1.6226052143745529, "learning_rate": 1.7191019287849984e-07, "loss": 0.0213, "step": 16661 }, { "epoch": 3.791126279863481, "grad_norm": 2.294376491859402, "learning_rate": 1.7184865158819918e-07, "loss": 0.0201, "step": 16662 }, { "epoch": 3.7913538111490332, "grad_norm": 1.665018055151736, "learning_rate": 1.7178711955948216e-07, "loss": 0.0701, "step": 16663 }, { "epoch": 3.7915813424345846, "grad_norm": 1.2843795480561766, "learning_rate": 1.7172559679360656e-07, "loss": 0.0432, "step": 16664 }, { "epoch": 3.7918088737201368, "grad_norm": 0.9695055308732181, "learning_rate": 1.7166408329182967e-07, "loss": 0.0582, "step": 16665 }, { "epoch": 3.792036405005688, "grad_norm": 1.3812627489907494, "learning_rate": 1.7160257905540851e-07, "loss": 0.0212, "step": 16666 }, { "epoch": 3.7922639362912403, "grad_norm": 1.0251812394783038, "learning_rate": 1.7154108408560044e-07, "loss": 0.0193, "step": 16667 }, { "epoch": 3.7924914675767916, "grad_norm": 1.3593342451803578, "learning_rate": 1.7147959838366187e-07, "loss": 0.0293, "step": 16668 }, { "epoch": 3.7927189988623438, "grad_norm": 1.9160834746716404, "learning_rate": 1.7141812195084983e-07, "loss": 0.0381, "step": 16669 }, { "epoch": 3.792946530147895, "grad_norm": 1.0003548125830841, "learning_rate": 1.713566547884208e-07, "loss": 0.0092, "step": 16670 }, { "epoch": 3.7931740614334473, "grad_norm": 1.6494920181030812, "learning_rate": 1.7129519689763077e-07, "loss": 0.1293, "step": 16671 }, { "epoch": 3.793401592718999, "grad_norm": 1.7802333262677004, "learning_rate": 1.7123374827973625e-07, "loss": 0.0935, "step": 16672 }, { "epoch": 3.7936291240045508, "grad_norm": 0.8115663101743413, "learning_rate": 1.7117230893599263e-07, "loss": 0.0031, "step": 16673 }, { "epoch": 3.7938566552901025, "grad_norm": 1.4859973444066017, "learning_rate": 1.7111087886765604e-07, "loss": 0.0292, "step": 16674 }, { "epoch": 3.7940841865756543, "grad_norm": 2.458682570684622, "learning_rate": 1.7104945807598167e-07, "loss": 0.0513, "step": 16675 }, { "epoch": 3.794311717861206, "grad_norm": 1.7078107652579582, "learning_rate": 1.7098804656222498e-07, "loss": 0.135, "step": 16676 }, { "epoch": 3.7945392491467578, "grad_norm": 1.5664900211053567, "learning_rate": 1.7092664432764138e-07, "loss": 0.0221, "step": 16677 }, { "epoch": 3.7947667804323095, "grad_norm": 1.223824128265116, "learning_rate": 1.7086525137348536e-07, "loss": 0.0589, "step": 16678 }, { "epoch": 3.7949943117178613, "grad_norm": 3.807314304580491, "learning_rate": 1.7080386770101203e-07, "loss": 0.0147, "step": 16679 }, { "epoch": 3.795221843003413, "grad_norm": 1.3956559302852374, "learning_rate": 1.707424933114757e-07, "loss": 0.0402, "step": 16680 }, { "epoch": 3.795449374288965, "grad_norm": 0.9233239312400406, "learning_rate": 1.7068112820613103e-07, "loss": 0.0121, "step": 16681 }, { "epoch": 3.7956769055745165, "grad_norm": 1.06546300203474, "learning_rate": 1.7061977238623185e-07, "loss": 0.0083, "step": 16682 }, { "epoch": 3.7959044368600683, "grad_norm": 1.2776065101770533, "learning_rate": 1.705584258530324e-07, "loss": 0.0473, "step": 16683 }, { "epoch": 3.79613196814562, "grad_norm": 1.3117225844678355, "learning_rate": 1.704970886077866e-07, "loss": 0.0162, "step": 16684 }, { "epoch": 3.796359499431172, "grad_norm": 2.599030986197993, "learning_rate": 1.7043576065174782e-07, "loss": 0.0472, "step": 16685 }, { "epoch": 3.7965870307167235, "grad_norm": 1.6682764092685978, "learning_rate": 1.703744419861697e-07, "loss": 0.0141, "step": 16686 }, { "epoch": 3.7968145620022753, "grad_norm": 1.5018990398099457, "learning_rate": 1.7031313261230524e-07, "loss": 0.0509, "step": 16687 }, { "epoch": 3.797042093287827, "grad_norm": 1.431656629102805, "learning_rate": 1.702518325314078e-07, "loss": 0.0176, "step": 16688 }, { "epoch": 3.797269624573379, "grad_norm": 2.0702087618009473, "learning_rate": 1.7019054174472992e-07, "loss": 0.0066, "step": 16689 }, { "epoch": 3.7974971558589306, "grad_norm": 0.8189045470869522, "learning_rate": 1.7012926025352434e-07, "loss": 0.0089, "step": 16690 }, { "epoch": 3.7977246871444823, "grad_norm": 1.362194686917962, "learning_rate": 1.700679880590439e-07, "loss": 0.0497, "step": 16691 }, { "epoch": 3.797952218430034, "grad_norm": 2.322265663283642, "learning_rate": 1.7000672516254036e-07, "loss": 0.0923, "step": 16692 }, { "epoch": 3.798179749715586, "grad_norm": 1.9371707321453888, "learning_rate": 1.6994547156526632e-07, "loss": 0.0425, "step": 16693 }, { "epoch": 3.7984072810011376, "grad_norm": 1.3052787232965046, "learning_rate": 1.6988422726847322e-07, "loss": 0.017, "step": 16694 }, { "epoch": 3.7986348122866893, "grad_norm": 3.0057070099376264, "learning_rate": 1.698229922734132e-07, "loss": 0.0552, "step": 16695 }, { "epoch": 3.798862343572241, "grad_norm": 2.41595743908195, "learning_rate": 1.6976176658133738e-07, "loss": 0.0477, "step": 16696 }, { "epoch": 3.799089874857793, "grad_norm": 1.204964522117702, "learning_rate": 1.6970055019349737e-07, "loss": 0.0168, "step": 16697 }, { "epoch": 3.7993174061433446, "grad_norm": 1.12116426605569, "learning_rate": 1.696393431111445e-07, "loss": 0.0498, "step": 16698 }, { "epoch": 3.7995449374288963, "grad_norm": 0.8488104015838261, "learning_rate": 1.6957814533552927e-07, "loss": 0.0101, "step": 16699 }, { "epoch": 3.799772468714448, "grad_norm": 0.7212586005029933, "learning_rate": 1.6951695686790288e-07, "loss": 0.0066, "step": 16700 }, { "epoch": 3.8, "grad_norm": 1.360196184592972, "learning_rate": 1.694557777095157e-07, "loss": 0.0245, "step": 16701 }, { "epoch": 3.800227531285552, "grad_norm": 1.3441169911935007, "learning_rate": 1.6939460786161792e-07, "loss": 0.0665, "step": 16702 }, { "epoch": 3.8004550625711033, "grad_norm": 1.014217206708859, "learning_rate": 1.6933344732546e-07, "loss": 0.0192, "step": 16703 }, { "epoch": 3.8006825938566555, "grad_norm": 1.1874992540276108, "learning_rate": 1.692722961022919e-07, "loss": 0.02, "step": 16704 }, { "epoch": 3.800910125142207, "grad_norm": 1.948880300727949, "learning_rate": 1.692111541933636e-07, "loss": 0.0277, "step": 16705 }, { "epoch": 3.801137656427759, "grad_norm": 0.9163804294140202, "learning_rate": 1.6915002159992435e-07, "loss": 0.0358, "step": 16706 }, { "epoch": 3.8013651877133103, "grad_norm": 2.1039007639569505, "learning_rate": 1.690888983232241e-07, "loss": 0.0135, "step": 16707 }, { "epoch": 3.8015927189988625, "grad_norm": 1.9104340641766175, "learning_rate": 1.6902778436451174e-07, "loss": 0.0831, "step": 16708 }, { "epoch": 3.801820250284414, "grad_norm": 1.734939210683475, "learning_rate": 1.6896667972503625e-07, "loss": 0.0124, "step": 16709 }, { "epoch": 3.802047781569966, "grad_norm": 2.268681655743103, "learning_rate": 1.689055844060466e-07, "loss": 0.0216, "step": 16710 }, { "epoch": 3.802275312855518, "grad_norm": 1.4202882275962285, "learning_rate": 1.6884449840879147e-07, "loss": 0.045, "step": 16711 }, { "epoch": 3.8025028441410695, "grad_norm": 1.7025991744332352, "learning_rate": 1.6878342173451968e-07, "loss": 0.0437, "step": 16712 }, { "epoch": 3.8027303754266213, "grad_norm": 1.556163313546284, "learning_rate": 1.6872235438447896e-07, "loss": 0.0714, "step": 16713 }, { "epoch": 3.802957906712173, "grad_norm": 0.9402739878077112, "learning_rate": 1.686612963599179e-07, "loss": 0.0403, "step": 16714 }, { "epoch": 3.803185437997725, "grad_norm": 1.379435037463227, "learning_rate": 1.686002476620842e-07, "loss": 0.0694, "step": 16715 }, { "epoch": 3.8034129692832765, "grad_norm": 0.7889480667121839, "learning_rate": 1.6853920829222513e-07, "loss": 0.0084, "step": 16716 }, { "epoch": 3.8036405005688283, "grad_norm": 0.8039603698550585, "learning_rate": 1.6847817825158916e-07, "loss": 0.0262, "step": 16717 }, { "epoch": 3.80386803185438, "grad_norm": 3.5935973484330224, "learning_rate": 1.6841715754142286e-07, "loss": 0.095, "step": 16718 }, { "epoch": 3.804095563139932, "grad_norm": 1.0645205204119816, "learning_rate": 1.6835614616297396e-07, "loss": 0.0055, "step": 16719 }, { "epoch": 3.8043230944254836, "grad_norm": 1.9336377536006708, "learning_rate": 1.6829514411748903e-07, "loss": 0.0355, "step": 16720 }, { "epoch": 3.8045506257110353, "grad_norm": 1.7724660662332885, "learning_rate": 1.6823415140621483e-07, "loss": 0.0199, "step": 16721 }, { "epoch": 3.804778156996587, "grad_norm": 1.4138174573257125, "learning_rate": 1.68173168030398e-07, "loss": 0.1018, "step": 16722 }, { "epoch": 3.805005688282139, "grad_norm": 1.7825968700406738, "learning_rate": 1.68112193991285e-07, "loss": 0.0409, "step": 16723 }, { "epoch": 3.8052332195676906, "grad_norm": 1.0184787490151188, "learning_rate": 1.6805122929012214e-07, "loss": 0.0357, "step": 16724 }, { "epoch": 3.8054607508532423, "grad_norm": 0.3152357994894127, "learning_rate": 1.679902739281552e-07, "loss": 0.0018, "step": 16725 }, { "epoch": 3.805688282138794, "grad_norm": 1.4972844719676723, "learning_rate": 1.679293279066302e-07, "loss": 0.0124, "step": 16726 }, { "epoch": 3.805915813424346, "grad_norm": 1.5218033845948682, "learning_rate": 1.678683912267927e-07, "loss": 0.0375, "step": 16727 }, { "epoch": 3.8061433447098976, "grad_norm": 1.41811197087289, "learning_rate": 1.6780746388988786e-07, "loss": 0.0134, "step": 16728 }, { "epoch": 3.8063708759954493, "grad_norm": 1.1099435873533738, "learning_rate": 1.6774654589716112e-07, "loss": 0.0241, "step": 16729 }, { "epoch": 3.806598407281001, "grad_norm": 1.9717314751719157, "learning_rate": 1.676856372498576e-07, "loss": 0.0976, "step": 16730 }, { "epoch": 3.806825938566553, "grad_norm": 1.3821298186174453, "learning_rate": 1.6762473794922233e-07, "loss": 0.052, "step": 16731 }, { "epoch": 3.8070534698521046, "grad_norm": 1.7097980680077784, "learning_rate": 1.675638479964996e-07, "loss": 0.0099, "step": 16732 }, { "epoch": 3.8072810011376563, "grad_norm": 1.7273404070095468, "learning_rate": 1.6750296739293416e-07, "loss": 0.0889, "step": 16733 }, { "epoch": 3.807508532423208, "grad_norm": 1.6066441473814979, "learning_rate": 1.6744209613977026e-07, "loss": 0.1054, "step": 16734 }, { "epoch": 3.80773606370876, "grad_norm": 0.8972331117143727, "learning_rate": 1.6738123423825177e-07, "loss": 0.0268, "step": 16735 }, { "epoch": 3.8079635949943116, "grad_norm": 1.0592180109825071, "learning_rate": 1.6732038168962275e-07, "loss": 0.0418, "step": 16736 }, { "epoch": 3.8081911262798633, "grad_norm": 1.2348513125866456, "learning_rate": 1.6725953849512694e-07, "loss": 0.088, "step": 16737 }, { "epoch": 3.808418657565415, "grad_norm": 1.0997892451763167, "learning_rate": 1.67198704656008e-07, "loss": 0.0407, "step": 16738 }, { "epoch": 3.8086461888509673, "grad_norm": 1.5628627799874866, "learning_rate": 1.6713788017350915e-07, "loss": 0.0713, "step": 16739 }, { "epoch": 3.8088737201365186, "grad_norm": 1.7566426168379492, "learning_rate": 1.6707706504887322e-07, "loss": 0.0094, "step": 16740 }, { "epoch": 3.809101251422071, "grad_norm": 0.7137660341337054, "learning_rate": 1.6701625928334375e-07, "loss": 0.028, "step": 16741 }, { "epoch": 3.809328782707622, "grad_norm": 1.0563414780323863, "learning_rate": 1.6695546287816293e-07, "loss": 0.0096, "step": 16742 }, { "epoch": 3.8095563139931743, "grad_norm": 1.4099639253853238, "learning_rate": 1.668946758345736e-07, "loss": 0.043, "step": 16743 }, { "epoch": 3.8097838452787256, "grad_norm": 2.518032744734146, "learning_rate": 1.6683389815381806e-07, "loss": 0.0723, "step": 16744 }, { "epoch": 3.810011376564278, "grad_norm": 2.0354885524847828, "learning_rate": 1.6677312983713883e-07, "loss": 0.0316, "step": 16745 }, { "epoch": 3.810238907849829, "grad_norm": 2.116439807950361, "learning_rate": 1.6671237088577757e-07, "loss": 0.0659, "step": 16746 }, { "epoch": 3.8104664391353813, "grad_norm": 1.3971126817088413, "learning_rate": 1.666516213009759e-07, "loss": 0.0882, "step": 16747 }, { "epoch": 3.8106939704209326, "grad_norm": 1.9303919067835782, "learning_rate": 1.6659088108397594e-07, "loss": 0.1408, "step": 16748 }, { "epoch": 3.810921501706485, "grad_norm": 1.8097052830095133, "learning_rate": 1.6653015023601857e-07, "loss": 0.0335, "step": 16749 }, { "epoch": 3.8111490329920366, "grad_norm": 1.8118878469648312, "learning_rate": 1.6646942875834537e-07, "loss": 0.088, "step": 16750 }, { "epoch": 3.8113765642775883, "grad_norm": 1.4532367365744694, "learning_rate": 1.6640871665219723e-07, "loss": 0.1221, "step": 16751 }, { "epoch": 3.81160409556314, "grad_norm": 1.1000049043724043, "learning_rate": 1.663480139188152e-07, "loss": 0.0608, "step": 16752 }, { "epoch": 3.811831626848692, "grad_norm": 1.4487246442438098, "learning_rate": 1.6628732055943986e-07, "loss": 0.0123, "step": 16753 }, { "epoch": 3.8120591581342436, "grad_norm": 1.8510133049138089, "learning_rate": 1.6622663657531137e-07, "loss": 0.0193, "step": 16754 }, { "epoch": 3.8122866894197953, "grad_norm": 0.9324523997823728, "learning_rate": 1.661659619676704e-07, "loss": 0.0231, "step": 16755 }, { "epoch": 3.812514220705347, "grad_norm": 0.8478880217739795, "learning_rate": 1.6610529673775668e-07, "loss": 0.0086, "step": 16756 }, { "epoch": 3.812741751990899, "grad_norm": 1.8689033757889129, "learning_rate": 1.660446408868103e-07, "loss": 0.0218, "step": 16757 }, { "epoch": 3.8129692832764506, "grad_norm": 2.4132789836982362, "learning_rate": 1.6598399441607107e-07, "loss": 0.0289, "step": 16758 }, { "epoch": 3.8131968145620023, "grad_norm": 2.579290477521502, "learning_rate": 1.6592335732677816e-07, "loss": 0.0418, "step": 16759 }, { "epoch": 3.813424345847554, "grad_norm": 1.2368921502749586, "learning_rate": 1.6586272962017127e-07, "loss": 0.0182, "step": 16760 }, { "epoch": 3.813651877133106, "grad_norm": 1.1514030069287324, "learning_rate": 1.658021112974891e-07, "loss": 0.0376, "step": 16761 }, { "epoch": 3.8138794084186576, "grad_norm": 1.3038860675659565, "learning_rate": 1.65741502359971e-07, "loss": 0.0576, "step": 16762 }, { "epoch": 3.8141069397042093, "grad_norm": 1.56212429972274, "learning_rate": 1.656809028088554e-07, "loss": 0.0807, "step": 16763 }, { "epoch": 3.814334470989761, "grad_norm": 0.7814432582903414, "learning_rate": 1.6562031264538086e-07, "loss": 0.0044, "step": 16764 }, { "epoch": 3.814562002275313, "grad_norm": 2.0127848173330234, "learning_rate": 1.6555973187078599e-07, "loss": 0.0232, "step": 16765 }, { "epoch": 3.8147895335608646, "grad_norm": 1.144037163281027, "learning_rate": 1.6549916048630855e-07, "loss": 0.0301, "step": 16766 }, { "epoch": 3.8150170648464163, "grad_norm": 2.0229370751433597, "learning_rate": 1.65438598493187e-07, "loss": 0.045, "step": 16767 }, { "epoch": 3.815244596131968, "grad_norm": 3.3932410602598195, "learning_rate": 1.6537804589265863e-07, "loss": 0.0445, "step": 16768 }, { "epoch": 3.81547212741752, "grad_norm": 2.9806259805073516, "learning_rate": 1.6531750268596145e-07, "loss": 0.0422, "step": 16769 }, { "epoch": 3.8156996587030716, "grad_norm": 1.7450852308609837, "learning_rate": 1.6525696887433226e-07, "loss": 0.0175, "step": 16770 }, { "epoch": 3.8159271899886233, "grad_norm": 2.25321013591153, "learning_rate": 1.6519644445900902e-07, "loss": 0.013, "step": 16771 }, { "epoch": 3.816154721274175, "grad_norm": 1.445459751745715, "learning_rate": 1.6513592944122837e-07, "loss": 0.0816, "step": 16772 }, { "epoch": 3.816382252559727, "grad_norm": 0.8934849313988257, "learning_rate": 1.650754238222269e-07, "loss": 0.0099, "step": 16773 }, { "epoch": 3.8166097838452786, "grad_norm": 0.9756397240029294, "learning_rate": 1.6501492760324172e-07, "loss": 0.0074, "step": 16774 }, { "epoch": 3.8168373151308304, "grad_norm": 1.5918254742866227, "learning_rate": 1.6495444078550873e-07, "loss": 0.0526, "step": 16775 }, { "epoch": 3.817064846416382, "grad_norm": 1.6108593912366158, "learning_rate": 1.6489396337026446e-07, "loss": 0.04, "step": 16776 }, { "epoch": 3.817292377701934, "grad_norm": 1.150409303053751, "learning_rate": 1.6483349535874513e-07, "loss": 0.0212, "step": 16777 }, { "epoch": 3.817519908987486, "grad_norm": 2.1141971200241687, "learning_rate": 1.6477303675218624e-07, "loss": 0.032, "step": 16778 }, { "epoch": 3.8177474402730374, "grad_norm": 2.728273734156475, "learning_rate": 1.647125875518238e-07, "loss": 0.0189, "step": 16779 }, { "epoch": 3.8179749715585896, "grad_norm": 3.0632249609747, "learning_rate": 1.6465214775889283e-07, "loss": 0.0258, "step": 16780 }, { "epoch": 3.818202502844141, "grad_norm": 1.06766406583764, "learning_rate": 1.6459171737462912e-07, "loss": 0.0393, "step": 16781 }, { "epoch": 3.818430034129693, "grad_norm": 0.9056771348681865, "learning_rate": 1.6453129640026732e-07, "loss": 0.0104, "step": 16782 }, { "epoch": 3.8186575654152444, "grad_norm": 1.6087973772410136, "learning_rate": 1.644708848370425e-07, "loss": 0.0716, "step": 16783 }, { "epoch": 3.8188850967007966, "grad_norm": 1.2079965887952866, "learning_rate": 1.6441048268618955e-07, "loss": 0.1091, "step": 16784 }, { "epoch": 3.819112627986348, "grad_norm": 1.2915120844511556, "learning_rate": 1.6435008994894268e-07, "loss": 0.0551, "step": 16785 }, { "epoch": 3.8193401592719, "grad_norm": 4.224941404814917, "learning_rate": 1.6428970662653642e-07, "loss": 0.0194, "step": 16786 }, { "epoch": 3.8195676905574514, "grad_norm": 0.7069763611094887, "learning_rate": 1.642293327202047e-07, "loss": 0.0031, "step": 16787 }, { "epoch": 3.8197952218430036, "grad_norm": 1.0360879137996888, "learning_rate": 1.6416896823118172e-07, "loss": 0.0624, "step": 16788 }, { "epoch": 3.8200227531285553, "grad_norm": 2.3082149212515484, "learning_rate": 1.6410861316070087e-07, "loss": 0.0183, "step": 16789 }, { "epoch": 3.820250284414107, "grad_norm": 1.9248627880363953, "learning_rate": 1.6404826750999587e-07, "loss": 0.0249, "step": 16790 }, { "epoch": 3.820477815699659, "grad_norm": 4.1508578850627735, "learning_rate": 1.6398793128030026e-07, "loss": 0.0229, "step": 16791 }, { "epoch": 3.8207053469852106, "grad_norm": 1.077660118773367, "learning_rate": 1.6392760447284688e-07, "loss": 0.0481, "step": 16792 }, { "epoch": 3.8209328782707623, "grad_norm": 3.1373819311140205, "learning_rate": 1.6386728708886906e-07, "loss": 0.0144, "step": 16793 }, { "epoch": 3.821160409556314, "grad_norm": 1.549665719183275, "learning_rate": 1.638069791295991e-07, "loss": 0.0129, "step": 16794 }, { "epoch": 3.821387940841866, "grad_norm": 2.8201614893536724, "learning_rate": 1.6374668059627008e-07, "loss": 0.0333, "step": 16795 }, { "epoch": 3.8216154721274176, "grad_norm": 2.0880403193685493, "learning_rate": 1.6368639149011398e-07, "loss": 0.118, "step": 16796 }, { "epoch": 3.8218430034129693, "grad_norm": 3.5475939611617138, "learning_rate": 1.636261118123632e-07, "loss": 0.1296, "step": 16797 }, { "epoch": 3.822070534698521, "grad_norm": 1.4961270680616394, "learning_rate": 1.6356584156424986e-07, "loss": 0.025, "step": 16798 }, { "epoch": 3.822298065984073, "grad_norm": 1.4051129073842517, "learning_rate": 1.6350558074700555e-07, "loss": 0.0552, "step": 16799 }, { "epoch": 3.8225255972696246, "grad_norm": 2.3341459017992916, "learning_rate": 1.6344532936186208e-07, "loss": 0.0402, "step": 16800 }, { "epoch": 3.8227531285551763, "grad_norm": 1.5480192157779151, "learning_rate": 1.6338508741005058e-07, "loss": 0.0176, "step": 16801 }, { "epoch": 3.822980659840728, "grad_norm": 4.193954970313833, "learning_rate": 1.6332485489280278e-07, "loss": 0.0305, "step": 16802 }, { "epoch": 3.82320819112628, "grad_norm": 1.2014726407052752, "learning_rate": 1.6326463181134913e-07, "loss": 0.0449, "step": 16803 }, { "epoch": 3.8234357224118316, "grad_norm": 1.8644811335630926, "learning_rate": 1.6320441816692088e-07, "loss": 0.062, "step": 16804 }, { "epoch": 3.8236632536973834, "grad_norm": 1.1079815810439222, "learning_rate": 1.631442139607487e-07, "loss": 0.016, "step": 16805 }, { "epoch": 3.823890784982935, "grad_norm": 1.38510187405092, "learning_rate": 1.6308401919406274e-07, "loss": 0.0237, "step": 16806 }, { "epoch": 3.824118316268487, "grad_norm": 1.1960132501387017, "learning_rate": 1.6302383386809367e-07, "loss": 0.0667, "step": 16807 }, { "epoch": 3.8243458475540386, "grad_norm": 0.5797615303556929, "learning_rate": 1.629636579840712e-07, "loss": 0.0027, "step": 16808 }, { "epoch": 3.8245733788395904, "grad_norm": 1.5422008712938273, "learning_rate": 1.629034915432256e-07, "loss": 0.0252, "step": 16809 }, { "epoch": 3.824800910125142, "grad_norm": 1.6064759178735952, "learning_rate": 1.6284333454678607e-07, "loss": 0.0203, "step": 16810 }, { "epoch": 3.825028441410694, "grad_norm": 1.486126926338247, "learning_rate": 1.627831869959825e-07, "loss": 0.0373, "step": 16811 }, { "epoch": 3.8252559726962456, "grad_norm": 0.8547958632890771, "learning_rate": 1.6272304889204413e-07, "loss": 0.0321, "step": 16812 }, { "epoch": 3.8254835039817974, "grad_norm": 1.2351379629852808, "learning_rate": 1.626629202361999e-07, "loss": 0.0461, "step": 16813 }, { "epoch": 3.825711035267349, "grad_norm": 2.7812985790302323, "learning_rate": 1.626028010296791e-07, "loss": 0.0341, "step": 16814 }, { "epoch": 3.825938566552901, "grad_norm": 1.1129138955777569, "learning_rate": 1.6254269127371006e-07, "loss": 0.0429, "step": 16815 }, { "epoch": 3.8261660978384526, "grad_norm": 1.4874123116496283, "learning_rate": 1.6248259096952136e-07, "loss": 0.0727, "step": 16816 }, { "epoch": 3.826393629124005, "grad_norm": 1.0432089936667477, "learning_rate": 1.6242250011834145e-07, "loss": 0.0314, "step": 16817 }, { "epoch": 3.826621160409556, "grad_norm": 0.895033617943176, "learning_rate": 1.6236241872139847e-07, "loss": 0.0453, "step": 16818 }, { "epoch": 3.8268486916951083, "grad_norm": 2.539353418559393, "learning_rate": 1.623023467799205e-07, "loss": 0.09, "step": 16819 }, { "epoch": 3.8270762229806596, "grad_norm": 1.3667723612606575, "learning_rate": 1.6224228429513503e-07, "loss": 0.0185, "step": 16820 }, { "epoch": 3.827303754266212, "grad_norm": 1.1657708085725196, "learning_rate": 1.6218223126826998e-07, "loss": 0.1152, "step": 16821 }, { "epoch": 3.827531285551763, "grad_norm": 2.5512313237470376, "learning_rate": 1.6212218770055243e-07, "loss": 0.0213, "step": 16822 }, { "epoch": 3.8277588168373153, "grad_norm": 1.5754930027836271, "learning_rate": 1.6206215359320953e-07, "loss": 0.0712, "step": 16823 }, { "epoch": 3.8279863481228666, "grad_norm": 0.9366360171003841, "learning_rate": 1.6200212894746838e-07, "loss": 0.014, "step": 16824 }, { "epoch": 3.828213879408419, "grad_norm": 1.7550177772745257, "learning_rate": 1.619421137645557e-07, "loss": 0.0531, "step": 16825 }, { "epoch": 3.82844141069397, "grad_norm": 1.0772856785156093, "learning_rate": 1.6188210804569845e-07, "loss": 0.0118, "step": 16826 }, { "epoch": 3.8286689419795223, "grad_norm": 1.96552015885871, "learning_rate": 1.6182211179212267e-07, "loss": 0.0457, "step": 16827 }, { "epoch": 3.828896473265074, "grad_norm": 1.0246587841685786, "learning_rate": 1.6176212500505453e-07, "loss": 0.0246, "step": 16828 }, { "epoch": 3.829124004550626, "grad_norm": 7.27295269389441, "learning_rate": 1.617021476857203e-07, "loss": 0.0387, "step": 16829 }, { "epoch": 3.8293515358361776, "grad_norm": 1.4199804315316222, "learning_rate": 1.6164217983534556e-07, "loss": 0.0311, "step": 16830 }, { "epoch": 3.8295790671217294, "grad_norm": 1.6413235808054383, "learning_rate": 1.6158222145515602e-07, "loss": 0.058, "step": 16831 }, { "epoch": 3.829806598407281, "grad_norm": 1.3432974263330988, "learning_rate": 1.6152227254637714e-07, "loss": 0.0112, "step": 16832 }, { "epoch": 3.830034129692833, "grad_norm": 1.4496570861932117, "learning_rate": 1.6146233311023441e-07, "loss": 0.0321, "step": 16833 }, { "epoch": 3.8302616609783846, "grad_norm": 1.569760151549326, "learning_rate": 1.6140240314795269e-07, "loss": 0.0554, "step": 16834 }, { "epoch": 3.8304891922639364, "grad_norm": 0.9794939625548293, "learning_rate": 1.613424826607566e-07, "loss": 0.0131, "step": 16835 }, { "epoch": 3.830716723549488, "grad_norm": 2.601795778295144, "learning_rate": 1.6128257164987116e-07, "loss": 0.0668, "step": 16836 }, { "epoch": 3.83094425483504, "grad_norm": 1.2363273851548033, "learning_rate": 1.6122267011652037e-07, "loss": 0.0207, "step": 16837 }, { "epoch": 3.8311717861205916, "grad_norm": 1.3366556557678009, "learning_rate": 1.6116277806192919e-07, "loss": 0.0263, "step": 16838 }, { "epoch": 3.8313993174061434, "grad_norm": 1.5635731344803598, "learning_rate": 1.6110289548732116e-07, "loss": 0.039, "step": 16839 }, { "epoch": 3.831626848691695, "grad_norm": 2.11043350614016, "learning_rate": 1.6104302239392058e-07, "loss": 0.0231, "step": 16840 }, { "epoch": 3.831854379977247, "grad_norm": 1.493707115179156, "learning_rate": 1.6098315878295093e-07, "loss": 0.0129, "step": 16841 }, { "epoch": 3.8320819112627986, "grad_norm": 1.6361221172396028, "learning_rate": 1.6092330465563549e-07, "loss": 0.0244, "step": 16842 }, { "epoch": 3.8323094425483504, "grad_norm": 2.131976665671698, "learning_rate": 1.608634600131978e-07, "loss": 0.0335, "step": 16843 }, { "epoch": 3.832536973833902, "grad_norm": 3.869102026411126, "learning_rate": 1.608036248568609e-07, "loss": 0.0523, "step": 16844 }, { "epoch": 3.832764505119454, "grad_norm": 1.0074030002782117, "learning_rate": 1.6074379918784807e-07, "loss": 0.0112, "step": 16845 }, { "epoch": 3.8329920364050056, "grad_norm": 1.325068162982002, "learning_rate": 1.6068398300738163e-07, "loss": 0.0189, "step": 16846 }, { "epoch": 3.8332195676905574, "grad_norm": 1.9264494148668263, "learning_rate": 1.606241763166841e-07, "loss": 0.1014, "step": 16847 }, { "epoch": 3.833447098976109, "grad_norm": 1.5156756819351418, "learning_rate": 1.605643791169781e-07, "loss": 0.0242, "step": 16848 }, { "epoch": 3.833674630261661, "grad_norm": 1.2327368931631841, "learning_rate": 1.605045914094855e-07, "loss": 0.0324, "step": 16849 }, { "epoch": 3.8339021615472126, "grad_norm": 1.2996942769089002, "learning_rate": 1.604448131954283e-07, "loss": 0.0188, "step": 16850 }, { "epoch": 3.8341296928327644, "grad_norm": 1.5463669808327822, "learning_rate": 1.6038504447602832e-07, "loss": 0.0561, "step": 16851 }, { "epoch": 3.834357224118316, "grad_norm": 1.3219260401980464, "learning_rate": 1.603252852525073e-07, "loss": 0.033, "step": 16852 }, { "epoch": 3.834584755403868, "grad_norm": 1.437130561960783, "learning_rate": 1.6026553552608647e-07, "loss": 0.0954, "step": 16853 }, { "epoch": 3.8348122866894196, "grad_norm": 1.3990619403042548, "learning_rate": 1.602057952979868e-07, "loss": 0.0716, "step": 16854 }, { "epoch": 3.8350398179749714, "grad_norm": 2.128090187326744, "learning_rate": 1.6014606456942966e-07, "loss": 0.0156, "step": 16855 }, { "epoch": 3.8352673492605236, "grad_norm": 1.4093294511201238, "learning_rate": 1.6008634334163538e-07, "loss": 0.0366, "step": 16856 }, { "epoch": 3.835494880546075, "grad_norm": 1.1039626292570928, "learning_rate": 1.600266316158248e-07, "loss": 0.0256, "step": 16857 }, { "epoch": 3.835722411831627, "grad_norm": 1.9835334587599442, "learning_rate": 1.599669293932183e-07, "loss": 0.0652, "step": 16858 }, { "epoch": 3.8359499431171784, "grad_norm": 1.1859866834088924, "learning_rate": 1.5990723667503628e-07, "loss": 0.0365, "step": 16859 }, { "epoch": 3.8361774744027306, "grad_norm": 1.0474794272189476, "learning_rate": 1.598475534624986e-07, "loss": 0.0309, "step": 16860 }, { "epoch": 3.836405005688282, "grad_norm": 1.6123437511241607, "learning_rate": 1.5978787975682488e-07, "loss": 0.0476, "step": 16861 }, { "epoch": 3.836632536973834, "grad_norm": 1.846117220812804, "learning_rate": 1.59728215559235e-07, "loss": 0.0158, "step": 16862 }, { "epoch": 3.8368600682593854, "grad_norm": 0.8888043729403173, "learning_rate": 1.596685608709482e-07, "loss": 0.0103, "step": 16863 }, { "epoch": 3.8370875995449376, "grad_norm": 1.1010593085355198, "learning_rate": 1.596089156931837e-07, "loss": 0.0078, "step": 16864 }, { "epoch": 3.837315130830489, "grad_norm": 1.6176787986847652, "learning_rate": 1.5954928002716094e-07, "loss": 0.1129, "step": 16865 }, { "epoch": 3.837542662116041, "grad_norm": 1.4956980731673248, "learning_rate": 1.5948965387409816e-07, "loss": 0.0099, "step": 16866 }, { "epoch": 3.837770193401593, "grad_norm": 1.2316088985187623, "learning_rate": 1.5943003723521456e-07, "loss": 0.0498, "step": 16867 }, { "epoch": 3.8379977246871446, "grad_norm": 1.6640509833564998, "learning_rate": 1.593704301117282e-07, "loss": 0.1027, "step": 16868 }, { "epoch": 3.8382252559726964, "grad_norm": 1.4134189285277239, "learning_rate": 1.5931083250485757e-07, "loss": 0.1282, "step": 16869 }, { "epoch": 3.838452787258248, "grad_norm": 0.9131404189270361, "learning_rate": 1.5925124441582056e-07, "loss": 0.0332, "step": 16870 }, { "epoch": 3.8386803185438, "grad_norm": 1.4651225840776987, "learning_rate": 1.5919166584583507e-07, "loss": 0.0291, "step": 16871 }, { "epoch": 3.8389078498293516, "grad_norm": 1.042818053372846, "learning_rate": 1.5913209679611907e-07, "loss": 0.027, "step": 16872 }, { "epoch": 3.8391353811149034, "grad_norm": 1.0330546583922284, "learning_rate": 1.5907253726788968e-07, "loss": 0.0178, "step": 16873 }, { "epoch": 3.839362912400455, "grad_norm": 2.6242094662416218, "learning_rate": 1.590129872623644e-07, "loss": 0.0167, "step": 16874 }, { "epoch": 3.839590443686007, "grad_norm": 1.1416319561534913, "learning_rate": 1.5895344678076012e-07, "loss": 0.0172, "step": 16875 }, { "epoch": 3.8398179749715586, "grad_norm": 1.5368590595896043, "learning_rate": 1.58893915824294e-07, "loss": 0.0215, "step": 16876 }, { "epoch": 3.8400455062571104, "grad_norm": 1.4062780128369847, "learning_rate": 1.588343943941825e-07, "loss": 0.1136, "step": 16877 }, { "epoch": 3.840273037542662, "grad_norm": 1.2569508387667203, "learning_rate": 1.587748824916422e-07, "loss": 0.0444, "step": 16878 }, { "epoch": 3.840500568828214, "grad_norm": 1.6548693677178834, "learning_rate": 1.5871538011788965e-07, "loss": 0.027, "step": 16879 }, { "epoch": 3.8407281001137656, "grad_norm": 1.300017454791297, "learning_rate": 1.5865588727414055e-07, "loss": 0.0777, "step": 16880 }, { "epoch": 3.8409556313993174, "grad_norm": 1.7845633525932245, "learning_rate": 1.585964039616112e-07, "loss": 0.0785, "step": 16881 }, { "epoch": 3.841183162684869, "grad_norm": 1.4071136109357611, "learning_rate": 1.5853693018151707e-07, "loss": 0.0773, "step": 16882 }, { "epoch": 3.841410693970421, "grad_norm": 1.602317934891578, "learning_rate": 1.5847746593507394e-07, "loss": 0.0816, "step": 16883 }, { "epoch": 3.8416382252559726, "grad_norm": 2.0760972223544703, "learning_rate": 1.5841801122349684e-07, "loss": 0.02, "step": 16884 }, { "epoch": 3.8418657565415244, "grad_norm": 1.3324970387733983, "learning_rate": 1.583585660480011e-07, "loss": 0.0252, "step": 16885 }, { "epoch": 3.842093287827076, "grad_norm": 1.6611335725440244, "learning_rate": 1.582991304098018e-07, "loss": 0.0419, "step": 16886 }, { "epoch": 3.842320819112628, "grad_norm": 1.5905414537752836, "learning_rate": 1.5823970431011346e-07, "loss": 0.0225, "step": 16887 }, { "epoch": 3.8425483503981797, "grad_norm": 1.7166645015982107, "learning_rate": 1.581802877501508e-07, "loss": 0.0121, "step": 16888 }, { "epoch": 3.8427758816837314, "grad_norm": 1.4569398156727877, "learning_rate": 1.5812088073112803e-07, "loss": 0.1415, "step": 16889 }, { "epoch": 3.843003412969283, "grad_norm": 1.524608603072909, "learning_rate": 1.580614832542595e-07, "loss": 0.113, "step": 16890 }, { "epoch": 3.843230944254835, "grad_norm": 0.976757634795408, "learning_rate": 1.58002095320759e-07, "loss": 0.046, "step": 16891 }, { "epoch": 3.8434584755403867, "grad_norm": 0.9558580961912094, "learning_rate": 1.5794271693184038e-07, "loss": 0.0067, "step": 16892 }, { "epoch": 3.8436860068259384, "grad_norm": 1.2639897516358494, "learning_rate": 1.5788334808871736e-07, "loss": 0.0136, "step": 16893 }, { "epoch": 3.84391353811149, "grad_norm": 2.1665099794373974, "learning_rate": 1.5782398879260315e-07, "loss": 0.0212, "step": 16894 }, { "epoch": 3.8441410693970424, "grad_norm": 1.074296468106035, "learning_rate": 1.5776463904471116e-07, "loss": 0.0427, "step": 16895 }, { "epoch": 3.8443686006825937, "grad_norm": 12.230849726808257, "learning_rate": 1.57705298846254e-07, "loss": 0.0847, "step": 16896 }, { "epoch": 3.844596131968146, "grad_norm": 1.1926673754722692, "learning_rate": 1.5764596819844478e-07, "loss": 0.1329, "step": 16897 }, { "epoch": 3.844823663253697, "grad_norm": 1.5094915020880515, "learning_rate": 1.5758664710249624e-07, "loss": 0.0905, "step": 16898 }, { "epoch": 3.8450511945392494, "grad_norm": 2.9249871741188813, "learning_rate": 1.575273355596204e-07, "loss": 0.1622, "step": 16899 }, { "epoch": 3.8452787258248007, "grad_norm": 1.3324888744609484, "learning_rate": 1.574680335710299e-07, "loss": 0.0372, "step": 16900 }, { "epoch": 3.845506257110353, "grad_norm": 1.7437641396088857, "learning_rate": 1.5740874113793631e-07, "loss": 0.0454, "step": 16901 }, { "epoch": 3.845733788395904, "grad_norm": 2.043383798338887, "learning_rate": 1.5734945826155195e-07, "loss": 0.0358, "step": 16902 }, { "epoch": 3.8459613196814564, "grad_norm": 1.4241497985012017, "learning_rate": 1.57290184943088e-07, "loss": 0.0233, "step": 16903 }, { "epoch": 3.8461888509670077, "grad_norm": 1.2172162572778729, "learning_rate": 1.5723092118375603e-07, "loss": 0.0748, "step": 16904 }, { "epoch": 3.84641638225256, "grad_norm": 0.7417082274018395, "learning_rate": 1.571716669847676e-07, "loss": 0.043, "step": 16905 }, { "epoch": 3.8466439135381116, "grad_norm": 1.4521854511752939, "learning_rate": 1.571124223473333e-07, "loss": 0.025, "step": 16906 }, { "epoch": 3.8468714448236634, "grad_norm": 1.514835225877993, "learning_rate": 1.5705318727266445e-07, "loss": 0.0752, "step": 16907 }, { "epoch": 3.847098976109215, "grad_norm": 1.1397601855563395, "learning_rate": 1.5699396176197117e-07, "loss": 0.0185, "step": 16908 }, { "epoch": 3.847326507394767, "grad_norm": 2.341987129629369, "learning_rate": 1.5693474581646448e-07, "loss": 0.034, "step": 16909 }, { "epoch": 3.8475540386803186, "grad_norm": 1.2025218395480524, "learning_rate": 1.568755394373541e-07, "loss": 0.0091, "step": 16910 }, { "epoch": 3.8477815699658704, "grad_norm": 1.086267025287573, "learning_rate": 1.5681634262585037e-07, "loss": 0.0589, "step": 16911 }, { "epoch": 3.848009101251422, "grad_norm": 1.3881119907790131, "learning_rate": 1.5675715538316336e-07, "loss": 0.0853, "step": 16912 }, { "epoch": 3.848236632536974, "grad_norm": 2.4150895481728454, "learning_rate": 1.5669797771050237e-07, "loss": 0.0267, "step": 16913 }, { "epoch": 3.8484641638225257, "grad_norm": 1.927034118225448, "learning_rate": 1.566388096090772e-07, "loss": 0.0398, "step": 16914 }, { "epoch": 3.8486916951080774, "grad_norm": 1.4888355526393218, "learning_rate": 1.5657965108009689e-07, "loss": 0.0529, "step": 16915 }, { "epoch": 3.848919226393629, "grad_norm": 0.9279312813542301, "learning_rate": 1.5652050212477073e-07, "loss": 0.0484, "step": 16916 }, { "epoch": 3.849146757679181, "grad_norm": 1.2987692609137516, "learning_rate": 1.5646136274430742e-07, "loss": 0.0319, "step": 16917 }, { "epoch": 3.8493742889647327, "grad_norm": 1.6839700637317605, "learning_rate": 1.5640223293991577e-07, "loss": 0.0211, "step": 16918 }, { "epoch": 3.8496018202502844, "grad_norm": 1.2350667350943247, "learning_rate": 1.5634311271280443e-07, "loss": 0.0125, "step": 16919 }, { "epoch": 3.849829351535836, "grad_norm": 1.6103744022378788, "learning_rate": 1.562840020641814e-07, "loss": 0.0333, "step": 16920 }, { "epoch": 3.850056882821388, "grad_norm": 2.0869352958673226, "learning_rate": 1.562249009952551e-07, "loss": 0.0289, "step": 16921 }, { "epoch": 3.8502844141069397, "grad_norm": 2.0020829233087603, "learning_rate": 1.5616580950723334e-07, "loss": 0.0757, "step": 16922 }, { "epoch": 3.8505119453924914, "grad_norm": 0.9785561348869276, "learning_rate": 1.561067276013236e-07, "loss": 0.0512, "step": 16923 }, { "epoch": 3.850739476678043, "grad_norm": 1.1121674904607943, "learning_rate": 1.5604765527873365e-07, "loss": 0.0199, "step": 16924 }, { "epoch": 3.850967007963595, "grad_norm": 3.8456882142855844, "learning_rate": 1.5598859254067075e-07, "loss": 0.0243, "step": 16925 }, { "epoch": 3.8511945392491467, "grad_norm": 1.2404598300605034, "learning_rate": 1.5592953938834226e-07, "loss": 0.0186, "step": 16926 }, { "epoch": 3.8514220705346984, "grad_norm": 1.2993569205492175, "learning_rate": 1.558704958229547e-07, "loss": 0.0338, "step": 16927 }, { "epoch": 3.85164960182025, "grad_norm": 0.7180670739415422, "learning_rate": 1.5581146184571522e-07, "loss": 0.0388, "step": 16928 }, { "epoch": 3.851877133105802, "grad_norm": 1.612953923056621, "learning_rate": 1.5575243745783023e-07, "loss": 0.0697, "step": 16929 }, { "epoch": 3.8521046643913537, "grad_norm": 1.578013613946795, "learning_rate": 1.5569342266050585e-07, "loss": 0.0509, "step": 16930 }, { "epoch": 3.8523321956769054, "grad_norm": 1.8941235027255259, "learning_rate": 1.556344174549484e-07, "loss": 0.022, "step": 16931 }, { "epoch": 3.852559726962457, "grad_norm": 1.153543322611633, "learning_rate": 1.5557542184236384e-07, "loss": 0.0177, "step": 16932 }, { "epoch": 3.852787258248009, "grad_norm": 0.9524617490446291, "learning_rate": 1.5551643582395817e-07, "loss": 0.0588, "step": 16933 }, { "epoch": 3.853014789533561, "grad_norm": 1.8228879716729451, "learning_rate": 1.554574594009367e-07, "loss": 0.0209, "step": 16934 }, { "epoch": 3.8532423208191124, "grad_norm": 2.04983965745323, "learning_rate": 1.5539849257450466e-07, "loss": 0.104, "step": 16935 }, { "epoch": 3.8534698521046646, "grad_norm": 2.6723978760784406, "learning_rate": 1.5533953534586755e-07, "loss": 0.0398, "step": 16936 }, { "epoch": 3.853697383390216, "grad_norm": 1.2658723353054555, "learning_rate": 1.5528058771623e-07, "loss": 0.024, "step": 16937 }, { "epoch": 3.853924914675768, "grad_norm": 1.6252750711478032, "learning_rate": 1.5522164968679706e-07, "loss": 0.0538, "step": 16938 }, { "epoch": 3.8541524459613195, "grad_norm": 1.0987345262971289, "learning_rate": 1.5516272125877322e-07, "loss": 0.0152, "step": 16939 }, { "epoch": 3.8543799772468716, "grad_norm": 1.3909079005256293, "learning_rate": 1.55103802433363e-07, "loss": 0.0558, "step": 16940 }, { "epoch": 3.854607508532423, "grad_norm": 1.8349881573225975, "learning_rate": 1.5504489321177063e-07, "loss": 0.0303, "step": 16941 }, { "epoch": 3.854835039817975, "grad_norm": 5.180993924386574, "learning_rate": 1.5498599359519966e-07, "loss": 0.0912, "step": 16942 }, { "epoch": 3.8550625711035265, "grad_norm": 0.8201551044538862, "learning_rate": 1.5492710358485436e-07, "loss": 0.0032, "step": 16943 }, { "epoch": 3.8552901023890787, "grad_norm": 1.9677767231006935, "learning_rate": 1.5486822318193804e-07, "loss": 0.0184, "step": 16944 }, { "epoch": 3.8555176336746304, "grad_norm": 1.6933448459716192, "learning_rate": 1.5480935238765426e-07, "loss": 0.0613, "step": 16945 }, { "epoch": 3.855745164960182, "grad_norm": 1.5734994931637485, "learning_rate": 1.5475049120320613e-07, "loss": 0.0229, "step": 16946 }, { "epoch": 3.855972696245734, "grad_norm": 1.4349147295837181, "learning_rate": 1.5469163962979698e-07, "loss": 0.0357, "step": 16947 }, { "epoch": 3.8562002275312857, "grad_norm": 1.3945109574470216, "learning_rate": 1.5463279766862932e-07, "loss": 0.0354, "step": 16948 }, { "epoch": 3.8564277588168374, "grad_norm": 3.509663984749326, "learning_rate": 1.5457396532090573e-07, "loss": 0.0354, "step": 16949 }, { "epoch": 3.856655290102389, "grad_norm": 4.099871323122945, "learning_rate": 1.5451514258782892e-07, "loss": 0.0142, "step": 16950 }, { "epoch": 3.856882821387941, "grad_norm": 1.039505462129786, "learning_rate": 1.5445632947060072e-07, "loss": 0.0057, "step": 16951 }, { "epoch": 3.8571103526734927, "grad_norm": 2.0121792235952163, "learning_rate": 1.5439752597042341e-07, "loss": 0.0471, "step": 16952 }, { "epoch": 3.8573378839590444, "grad_norm": 1.2545722008797924, "learning_rate": 1.5433873208849898e-07, "loss": 0.0139, "step": 16953 }, { "epoch": 3.857565415244596, "grad_norm": 1.6075824956639602, "learning_rate": 1.5427994782602867e-07, "loss": 0.075, "step": 16954 }, { "epoch": 3.857792946530148, "grad_norm": 0.7539914648798185, "learning_rate": 1.5422117318421435e-07, "loss": 0.0103, "step": 16955 }, { "epoch": 3.8580204778156997, "grad_norm": 1.5592809664150205, "learning_rate": 1.541624081642569e-07, "loss": 0.1317, "step": 16956 }, { "epoch": 3.8582480091012514, "grad_norm": 0.9475706692946039, "learning_rate": 1.541036527673577e-07, "loss": 0.0556, "step": 16957 }, { "epoch": 3.858475540386803, "grad_norm": 2.503561102146258, "learning_rate": 1.5404490699471704e-07, "loss": 0.0172, "step": 16958 }, { "epoch": 3.858703071672355, "grad_norm": 1.3575288908194996, "learning_rate": 1.539861708475364e-07, "loss": 0.0249, "step": 16959 }, { "epoch": 3.8589306029579067, "grad_norm": 1.049691932407562, "learning_rate": 1.539274443270157e-07, "loss": 0.0298, "step": 16960 }, { "epoch": 3.8591581342434584, "grad_norm": 1.6236298081763656, "learning_rate": 1.538687274343552e-07, "loss": 0.0107, "step": 16961 }, { "epoch": 3.85938566552901, "grad_norm": 1.838043535562507, "learning_rate": 1.5381002017075527e-07, "loss": 0.0143, "step": 16962 }, { "epoch": 3.859613196814562, "grad_norm": 0.8677497391055548, "learning_rate": 1.5375132253741537e-07, "loss": 0.008, "step": 16963 }, { "epoch": 3.8598407281001137, "grad_norm": 1.643579670211497, "learning_rate": 1.5369263453553538e-07, "loss": 0.0224, "step": 16964 }, { "epoch": 3.8600682593856654, "grad_norm": 1.1739429333558178, "learning_rate": 1.5363395616631477e-07, "loss": 0.0076, "step": 16965 }, { "epoch": 3.860295790671217, "grad_norm": 1.4931031970588742, "learning_rate": 1.535752874309531e-07, "loss": 0.037, "step": 16966 }, { "epoch": 3.860523321956769, "grad_norm": 2.779282927344376, "learning_rate": 1.5351662833064912e-07, "loss": 0.0393, "step": 16967 }, { "epoch": 3.8607508532423207, "grad_norm": 1.4826744855220768, "learning_rate": 1.5345797886660158e-07, "loss": 0.0174, "step": 16968 }, { "epoch": 3.8609783845278725, "grad_norm": 1.174302801803085, "learning_rate": 1.533993390400096e-07, "loss": 0.0166, "step": 16969 }, { "epoch": 3.861205915813424, "grad_norm": 1.2142954840582463, "learning_rate": 1.5334070885207115e-07, "loss": 0.0651, "step": 16970 }, { "epoch": 3.861433447098976, "grad_norm": 1.796762775696323, "learning_rate": 1.5328208830398492e-07, "loss": 0.1474, "step": 16971 }, { "epoch": 3.8616609783845277, "grad_norm": 2.9834848275571844, "learning_rate": 1.5322347739694897e-07, "loss": 0.017, "step": 16972 }, { "epoch": 3.86188850967008, "grad_norm": 0.6403917413281598, "learning_rate": 1.53164876132161e-07, "loss": 0.0048, "step": 16973 }, { "epoch": 3.862116040955631, "grad_norm": 0.8939745063746485, "learning_rate": 1.5310628451081901e-07, "loss": 0.0411, "step": 16974 }, { "epoch": 3.8623435722411834, "grad_norm": 1.7026903722715374, "learning_rate": 1.5304770253412017e-07, "loss": 0.0443, "step": 16975 }, { "epoch": 3.8625711035267347, "grad_norm": 1.8164917376259786, "learning_rate": 1.5298913020326212e-07, "loss": 0.0178, "step": 16976 }, { "epoch": 3.862798634812287, "grad_norm": 1.4201506806575535, "learning_rate": 1.529305675194416e-07, "loss": 0.0466, "step": 16977 }, { "epoch": 3.863026166097838, "grad_norm": 1.2223873065875999, "learning_rate": 1.528720144838558e-07, "loss": 0.0951, "step": 16978 }, { "epoch": 3.8632536973833904, "grad_norm": 3.033204542832765, "learning_rate": 1.5281347109770146e-07, "loss": 0.0359, "step": 16979 }, { "epoch": 3.8634812286689417, "grad_norm": 2.0494740227673356, "learning_rate": 1.5275493736217485e-07, "loss": 0.0138, "step": 16980 }, { "epoch": 3.863708759954494, "grad_norm": 1.6710088358680508, "learning_rate": 1.5269641327847264e-07, "loss": 0.1194, "step": 16981 }, { "epoch": 3.8639362912400452, "grad_norm": 2.133189866546891, "learning_rate": 1.5263789884779056e-07, "loss": 0.0561, "step": 16982 }, { "epoch": 3.8641638225255974, "grad_norm": 1.5093375081218128, "learning_rate": 1.5257939407132487e-07, "loss": 0.0393, "step": 16983 }, { "epoch": 3.864391353811149, "grad_norm": 4.046415030732674, "learning_rate": 1.5252089895027106e-07, "loss": 0.0417, "step": 16984 }, { "epoch": 3.864618885096701, "grad_norm": 1.3884562614740954, "learning_rate": 1.5246241348582477e-07, "loss": 0.075, "step": 16985 }, { "epoch": 3.8648464163822527, "grad_norm": 1.2511419485344135, "learning_rate": 1.5240393767918146e-07, "loss": 0.0266, "step": 16986 }, { "epoch": 3.8650739476678044, "grad_norm": 2.18872071822384, "learning_rate": 1.5234547153153604e-07, "loss": 0.0231, "step": 16987 }, { "epoch": 3.865301478953356, "grad_norm": 0.7446030479047754, "learning_rate": 1.5228701504408366e-07, "loss": 0.0038, "step": 16988 }, { "epoch": 3.865529010238908, "grad_norm": 1.5743974913330328, "learning_rate": 1.5222856821801884e-07, "loss": 0.0693, "step": 16989 }, { "epoch": 3.8657565415244597, "grad_norm": 1.1894463055669846, "learning_rate": 1.5217013105453642e-07, "loss": 0.0799, "step": 16990 }, { "epoch": 3.8659840728100114, "grad_norm": 1.6115234123400597, "learning_rate": 1.5211170355483036e-07, "loss": 0.0274, "step": 16991 }, { "epoch": 3.866211604095563, "grad_norm": 1.1934457348245575, "learning_rate": 1.52053285720095e-07, "loss": 0.0114, "step": 16992 }, { "epoch": 3.866439135381115, "grad_norm": 0.9587924714758593, "learning_rate": 1.519948775515246e-07, "loss": 0.0606, "step": 16993 }, { "epoch": 3.8666666666666667, "grad_norm": 4.609345944586875, "learning_rate": 1.5193647905031236e-07, "loss": 0.0195, "step": 16994 }, { "epoch": 3.8668941979522184, "grad_norm": 2.0374098837711108, "learning_rate": 1.5187809021765233e-07, "loss": 0.0495, "step": 16995 }, { "epoch": 3.86712172923777, "grad_norm": 1.4915767216794635, "learning_rate": 1.5181971105473744e-07, "loss": 0.0618, "step": 16996 }, { "epoch": 3.867349260523322, "grad_norm": 2.240791211305632, "learning_rate": 1.5176134156276133e-07, "loss": 0.0746, "step": 16997 }, { "epoch": 3.8675767918088737, "grad_norm": 1.0109217139942668, "learning_rate": 1.5170298174291643e-07, "loss": 0.0081, "step": 16998 }, { "epoch": 3.8678043230944255, "grad_norm": 3.781923880569875, "learning_rate": 1.5164463159639584e-07, "loss": 0.1122, "step": 16999 }, { "epoch": 3.868031854379977, "grad_norm": 1.1151380497605483, "learning_rate": 1.5158629112439226e-07, "loss": 0.0168, "step": 17000 }, { "epoch": 3.868259385665529, "grad_norm": 1.5064739755332581, "learning_rate": 1.5152796032809765e-07, "loss": 0.0224, "step": 17001 }, { "epoch": 3.8684869169510807, "grad_norm": 1.1983210370709882, "learning_rate": 1.5146963920870464e-07, "loss": 0.0396, "step": 17002 }, { "epoch": 3.8687144482366325, "grad_norm": 1.1463716417663175, "learning_rate": 1.5141132776740473e-07, "loss": 0.0525, "step": 17003 }, { "epoch": 3.868941979522184, "grad_norm": 0.9385757184630417, "learning_rate": 1.5135302600539013e-07, "loss": 0.0074, "step": 17004 }, { "epoch": 3.869169510807736, "grad_norm": 1.305486653560655, "learning_rate": 1.5129473392385208e-07, "loss": 0.0762, "step": 17005 }, { "epoch": 3.8693970420932877, "grad_norm": 1.5582639671237843, "learning_rate": 1.512364515239821e-07, "loss": 0.0923, "step": 17006 }, { "epoch": 3.8696245733788395, "grad_norm": 1.4700526545699382, "learning_rate": 1.5117817880697161e-07, "loss": 0.0961, "step": 17007 }, { "epoch": 3.8698521046643912, "grad_norm": 1.5672285237026264, "learning_rate": 1.511199157740112e-07, "loss": 0.0374, "step": 17008 }, { "epoch": 3.870079635949943, "grad_norm": 1.6299366472528434, "learning_rate": 1.5106166242629199e-07, "loss": 0.1167, "step": 17009 }, { "epoch": 3.8703071672354947, "grad_norm": 2.0105248623339165, "learning_rate": 1.5100341876500445e-07, "loss": 0.0685, "step": 17010 }, { "epoch": 3.8705346985210465, "grad_norm": 0.9957182693195173, "learning_rate": 1.5094518479133874e-07, "loss": 0.016, "step": 17011 }, { "epoch": 3.8707622298065987, "grad_norm": 1.1719169034822943, "learning_rate": 1.5088696050648526e-07, "loss": 0.0458, "step": 17012 }, { "epoch": 3.87098976109215, "grad_norm": 1.780429702531272, "learning_rate": 1.5082874591163407e-07, "loss": 0.1654, "step": 17013 }, { "epoch": 3.871217292377702, "grad_norm": 1.4828643261786665, "learning_rate": 1.5077054100797502e-07, "loss": 0.0521, "step": 17014 }, { "epoch": 3.8714448236632535, "grad_norm": 1.4858592355514715, "learning_rate": 1.5071234579669747e-07, "loss": 0.0157, "step": 17015 }, { "epoch": 3.8716723549488057, "grad_norm": 1.3356370114725464, "learning_rate": 1.5065416027899107e-07, "loss": 0.0289, "step": 17016 }, { "epoch": 3.871899886234357, "grad_norm": 0.8445765048898649, "learning_rate": 1.50595984456045e-07, "loss": 0.0047, "step": 17017 }, { "epoch": 3.872127417519909, "grad_norm": 1.9653291027785456, "learning_rate": 1.5053781832904776e-07, "loss": 0.1687, "step": 17018 }, { "epoch": 3.8723549488054605, "grad_norm": 1.1551778049136041, "learning_rate": 1.504796618991889e-07, "loss": 0.0555, "step": 17019 }, { "epoch": 3.8725824800910127, "grad_norm": 1.4390484057062323, "learning_rate": 1.5042151516765663e-07, "loss": 0.041, "step": 17020 }, { "epoch": 3.8728100113765644, "grad_norm": 1.2271089175978374, "learning_rate": 1.503633781356395e-07, "loss": 0.0117, "step": 17021 }, { "epoch": 3.873037542662116, "grad_norm": 0.7624245701773512, "learning_rate": 1.503052508043256e-07, "loss": 0.0065, "step": 17022 }, { "epoch": 3.873265073947668, "grad_norm": 1.0158770726025887, "learning_rate": 1.5024713317490316e-07, "loss": 0.0575, "step": 17023 }, { "epoch": 3.8734926052332197, "grad_norm": 2.3436178744541327, "learning_rate": 1.501890252485596e-07, "loss": 0.0323, "step": 17024 }, { "epoch": 3.8737201365187715, "grad_norm": 2.903763968050673, "learning_rate": 1.5013092702648286e-07, "loss": 0.0283, "step": 17025 }, { "epoch": 3.873947667804323, "grad_norm": 1.1209792785264905, "learning_rate": 1.5007283850986044e-07, "loss": 0.0405, "step": 17026 }, { "epoch": 3.874175199089875, "grad_norm": 1.6354051218325527, "learning_rate": 1.5001475969987925e-07, "loss": 0.0423, "step": 17027 }, { "epoch": 3.8744027303754267, "grad_norm": 1.1935201369644408, "learning_rate": 1.4995669059772662e-07, "loss": 0.0122, "step": 17028 }, { "epoch": 3.8746302616609785, "grad_norm": 0.5523853694365732, "learning_rate": 1.498986312045893e-07, "loss": 0.0014, "step": 17029 }, { "epoch": 3.87485779294653, "grad_norm": 1.3724629115155582, "learning_rate": 1.498405815216536e-07, "loss": 0.0772, "step": 17030 }, { "epoch": 3.875085324232082, "grad_norm": 1.2209110500987725, "learning_rate": 1.4978254155010628e-07, "loss": 0.0504, "step": 17031 }, { "epoch": 3.8753128555176337, "grad_norm": 2.259407334777458, "learning_rate": 1.4972451129113343e-07, "loss": 0.112, "step": 17032 }, { "epoch": 3.8755403868031855, "grad_norm": 1.2262409925714886, "learning_rate": 1.4966649074592132e-07, "loss": 0.023, "step": 17033 }, { "epoch": 3.875767918088737, "grad_norm": 2.6432487905310103, "learning_rate": 1.4960847991565544e-07, "loss": 0.0167, "step": 17034 }, { "epoch": 3.875995449374289, "grad_norm": 1.6886813598868282, "learning_rate": 1.4955047880152181e-07, "loss": 0.0392, "step": 17035 }, { "epoch": 3.8762229806598407, "grad_norm": 2.0237295122172605, "learning_rate": 1.4949248740470559e-07, "loss": 0.0158, "step": 17036 }, { "epoch": 3.8764505119453925, "grad_norm": 1.3693058439184373, "learning_rate": 1.4943450572639192e-07, "loss": 0.0331, "step": 17037 }, { "epoch": 3.8766780432309442, "grad_norm": 2.7724532525054473, "learning_rate": 1.49376533767766e-07, "loss": 0.0296, "step": 17038 }, { "epoch": 3.876905574516496, "grad_norm": 2.997756592210061, "learning_rate": 1.4931857153001265e-07, "loss": 0.1345, "step": 17039 }, { "epoch": 3.8771331058020477, "grad_norm": 1.1648622382447609, "learning_rate": 1.4926061901431679e-07, "loss": 0.0474, "step": 17040 }, { "epoch": 3.8773606370875995, "grad_norm": 1.047596506687292, "learning_rate": 1.492026762218625e-07, "loss": 0.0086, "step": 17041 }, { "epoch": 3.8775881683731512, "grad_norm": 0.5863846820061691, "learning_rate": 1.49144743153834e-07, "loss": 0.0035, "step": 17042 }, { "epoch": 3.877815699658703, "grad_norm": 1.8465885289189494, "learning_rate": 1.4908681981141558e-07, "loss": 0.0114, "step": 17043 }, { "epoch": 3.8780432309442547, "grad_norm": 1.2972589268308188, "learning_rate": 1.4902890619579085e-07, "loss": 0.0913, "step": 17044 }, { "epoch": 3.8782707622298065, "grad_norm": 1.0449303579651292, "learning_rate": 1.489710023081436e-07, "loss": 0.0425, "step": 17045 }, { "epoch": 3.8784982935153582, "grad_norm": 2.3237703644298553, "learning_rate": 1.489131081496572e-07, "loss": 0.0332, "step": 17046 }, { "epoch": 3.87872582480091, "grad_norm": 1.320510306836443, "learning_rate": 1.4885522372151516e-07, "loss": 0.0099, "step": 17047 }, { "epoch": 3.8789533560864617, "grad_norm": 2.759570560927932, "learning_rate": 1.4879734902490033e-07, "loss": 0.0307, "step": 17048 }, { "epoch": 3.8791808873720135, "grad_norm": 2.2930137455264608, "learning_rate": 1.4873948406099535e-07, "loss": 0.0619, "step": 17049 }, { "epoch": 3.8794084186575652, "grad_norm": 2.520560772271782, "learning_rate": 1.486816288309833e-07, "loss": 0.0409, "step": 17050 }, { "epoch": 3.8796359499431174, "grad_norm": 2.1666321467821317, "learning_rate": 1.4862378333604627e-07, "loss": 0.0379, "step": 17051 }, { "epoch": 3.8798634812286688, "grad_norm": 1.2443236992453346, "learning_rate": 1.485659475773666e-07, "loss": 0.0541, "step": 17052 }, { "epoch": 3.880091012514221, "grad_norm": 1.2022646335742588, "learning_rate": 1.4850812155612648e-07, "loss": 0.0768, "step": 17053 }, { "epoch": 3.8803185437997723, "grad_norm": 1.0702687830771944, "learning_rate": 1.4845030527350776e-07, "loss": 0.0081, "step": 17054 }, { "epoch": 3.8805460750853245, "grad_norm": 1.4196561413792446, "learning_rate": 1.4839249873069215e-07, "loss": 0.0168, "step": 17055 }, { "epoch": 3.8807736063708758, "grad_norm": 3.615142243959072, "learning_rate": 1.4833470192886084e-07, "loss": 0.0375, "step": 17056 }, { "epoch": 3.881001137656428, "grad_norm": 1.641812357249254, "learning_rate": 1.4827691486919538e-07, "loss": 0.0171, "step": 17057 }, { "epoch": 3.8812286689419793, "grad_norm": 4.136329487558453, "learning_rate": 1.4821913755287652e-07, "loss": 0.0523, "step": 17058 }, { "epoch": 3.8814562002275315, "grad_norm": 1.6502922269555536, "learning_rate": 1.4816136998108527e-07, "loss": 0.0359, "step": 17059 }, { "epoch": 3.881683731513083, "grad_norm": 1.5350761068641532, "learning_rate": 1.481036121550026e-07, "loss": 0.0517, "step": 17060 }, { "epoch": 3.881911262798635, "grad_norm": 1.9396639221788805, "learning_rate": 1.4804586407580848e-07, "loss": 0.0662, "step": 17061 }, { "epoch": 3.8821387940841867, "grad_norm": 1.0653912954851859, "learning_rate": 1.4798812574468357e-07, "loss": 0.0245, "step": 17062 }, { "epoch": 3.8823663253697385, "grad_norm": 1.795950738924393, "learning_rate": 1.4793039716280758e-07, "loss": 0.0652, "step": 17063 }, { "epoch": 3.88259385665529, "grad_norm": 0.8110121958280269, "learning_rate": 1.4787267833136076e-07, "loss": 0.0318, "step": 17064 }, { "epoch": 3.882821387940842, "grad_norm": 1.1720620540914102, "learning_rate": 1.4781496925152238e-07, "loss": 0.0484, "step": 17065 }, { "epoch": 3.8830489192263937, "grad_norm": 2.331093434970467, "learning_rate": 1.4775726992447213e-07, "loss": 0.0654, "step": 17066 }, { "epoch": 3.8832764505119455, "grad_norm": 1.293267424171297, "learning_rate": 1.476995803513894e-07, "loss": 0.0278, "step": 17067 }, { "epoch": 3.8835039817974972, "grad_norm": 0.7566786341184336, "learning_rate": 1.4764190053345299e-07, "loss": 0.0095, "step": 17068 }, { "epoch": 3.883731513083049, "grad_norm": 2.450282289913112, "learning_rate": 1.4758423047184203e-07, "loss": 0.0404, "step": 17069 }, { "epoch": 3.8839590443686007, "grad_norm": 2.1452607310183898, "learning_rate": 1.475265701677349e-07, "loss": 0.0305, "step": 17070 }, { "epoch": 3.8841865756541525, "grad_norm": 1.7155738663335849, "learning_rate": 1.4746891962231037e-07, "loss": 0.0595, "step": 17071 }, { "epoch": 3.8844141069397042, "grad_norm": 2.107006410548398, "learning_rate": 1.4741127883674634e-07, "loss": 0.1108, "step": 17072 }, { "epoch": 3.884641638225256, "grad_norm": 1.218804968882839, "learning_rate": 1.4735364781222116e-07, "loss": 0.0945, "step": 17073 }, { "epoch": 3.8848691695108077, "grad_norm": 0.8453230757711131, "learning_rate": 1.4729602654991286e-07, "loss": 0.0251, "step": 17074 }, { "epoch": 3.8850967007963595, "grad_norm": 0.7679235431326168, "learning_rate": 1.4723841505099875e-07, "loss": 0.0074, "step": 17075 }, { "epoch": 3.8853242320819112, "grad_norm": 1.105819552938013, "learning_rate": 1.4718081331665655e-07, "loss": 0.0525, "step": 17076 }, { "epoch": 3.885551763367463, "grad_norm": 2.475937888988641, "learning_rate": 1.4712322134806328e-07, "loss": 0.0224, "step": 17077 }, { "epoch": 3.8857792946530147, "grad_norm": 2.274123556968982, "learning_rate": 1.4706563914639643e-07, "loss": 0.0121, "step": 17078 }, { "epoch": 3.8860068259385665, "grad_norm": 1.3942724455855113, "learning_rate": 1.4700806671283235e-07, "loss": 0.0664, "step": 17079 }, { "epoch": 3.8862343572241183, "grad_norm": 1.517710829074852, "learning_rate": 1.4695050404854804e-07, "loss": 0.0477, "step": 17080 }, { "epoch": 3.88646188850967, "grad_norm": 1.5803331516905859, "learning_rate": 1.4689295115472007e-07, "loss": 0.0404, "step": 17081 }, { "epoch": 3.8866894197952218, "grad_norm": 1.6460762717451076, "learning_rate": 1.468354080325244e-07, "loss": 0.0603, "step": 17082 }, { "epoch": 3.8869169510807735, "grad_norm": 1.4272397625608466, "learning_rate": 1.467778746831374e-07, "loss": 0.0395, "step": 17083 }, { "epoch": 3.8871444823663253, "grad_norm": 1.5207816032454322, "learning_rate": 1.4672035110773474e-07, "loss": 0.0815, "step": 17084 }, { "epoch": 3.887372013651877, "grad_norm": 1.1351237507032945, "learning_rate": 1.4666283730749214e-07, "loss": 0.0098, "step": 17085 }, { "epoch": 3.8875995449374288, "grad_norm": 1.335519160258612, "learning_rate": 1.4660533328358525e-07, "loss": 0.0148, "step": 17086 }, { "epoch": 3.8878270762229805, "grad_norm": 1.365571857566174, "learning_rate": 1.4654783903718903e-07, "loss": 0.0192, "step": 17087 }, { "epoch": 3.8880546075085323, "grad_norm": 1.495543379416661, "learning_rate": 1.4649035456947896e-07, "loss": 0.0607, "step": 17088 }, { "epoch": 3.888282138794084, "grad_norm": 1.6858563914795697, "learning_rate": 1.4643287988162954e-07, "loss": 0.0673, "step": 17089 }, { "epoch": 3.888509670079636, "grad_norm": 2.0672347193040386, "learning_rate": 1.4637541497481568e-07, "loss": 0.0278, "step": 17090 }, { "epoch": 3.8887372013651875, "grad_norm": 1.0586697336455964, "learning_rate": 1.4631795985021166e-07, "loss": 0.0314, "step": 17091 }, { "epoch": 3.8889647326507397, "grad_norm": 1.4788185739865267, "learning_rate": 1.462605145089919e-07, "loss": 0.0543, "step": 17092 }, { "epoch": 3.889192263936291, "grad_norm": 0.808748185262478, "learning_rate": 1.4620307895233062e-07, "loss": 0.0469, "step": 17093 }, { "epoch": 3.8894197952218432, "grad_norm": 0.7050073929383736, "learning_rate": 1.461456531814013e-07, "loss": 0.0047, "step": 17094 }, { "epoch": 3.8896473265073945, "grad_norm": 0.6727763966642678, "learning_rate": 1.4608823719737812e-07, "loss": 0.0085, "step": 17095 }, { "epoch": 3.8898748577929467, "grad_norm": 0.785638635026637, "learning_rate": 1.4603083100143404e-07, "loss": 0.0379, "step": 17096 }, { "epoch": 3.890102389078498, "grad_norm": 1.0301109849708239, "learning_rate": 1.4597343459474277e-07, "loss": 0.0208, "step": 17097 }, { "epoch": 3.8903299203640502, "grad_norm": 1.2857957238733881, "learning_rate": 1.45916047978477e-07, "loss": 0.0119, "step": 17098 }, { "epoch": 3.890557451649602, "grad_norm": 1.444177862593212, "learning_rate": 1.4585867115380986e-07, "loss": 0.0216, "step": 17099 }, { "epoch": 3.8907849829351537, "grad_norm": 0.8352871003272329, "learning_rate": 1.458013041219141e-07, "loss": 0.0151, "step": 17100 }, { "epoch": 3.8910125142207055, "grad_norm": 1.0240667182656602, "learning_rate": 1.4574394688396192e-07, "loss": 0.0248, "step": 17101 }, { "epoch": 3.8912400455062572, "grad_norm": 1.0384935731952474, "learning_rate": 1.4568659944112592e-07, "loss": 0.059, "step": 17102 }, { "epoch": 3.891467576791809, "grad_norm": 1.5820853658791028, "learning_rate": 1.4562926179457787e-07, "loss": 0.1132, "step": 17103 }, { "epoch": 3.8916951080773607, "grad_norm": 1.237761018667376, "learning_rate": 1.4557193394548994e-07, "loss": 0.035, "step": 17104 }, { "epoch": 3.8919226393629125, "grad_norm": 1.42690409628255, "learning_rate": 1.4551461589503345e-07, "loss": 0.0615, "step": 17105 }, { "epoch": 3.8921501706484642, "grad_norm": 1.0582526171775295, "learning_rate": 1.4545730764438008e-07, "loss": 0.0082, "step": 17106 }, { "epoch": 3.892377701934016, "grad_norm": 1.2461739841588306, "learning_rate": 1.454000091947013e-07, "loss": 0.0345, "step": 17107 }, { "epoch": 3.8926052332195678, "grad_norm": 0.7298878377715212, "learning_rate": 1.4534272054716782e-07, "loss": 0.0071, "step": 17108 }, { "epoch": 3.8928327645051195, "grad_norm": 1.4849357942713335, "learning_rate": 1.4528544170295082e-07, "loss": 0.022, "step": 17109 }, { "epoch": 3.8930602957906713, "grad_norm": 1.3954937562552072, "learning_rate": 1.4522817266322063e-07, "loss": 0.0635, "step": 17110 }, { "epoch": 3.893287827076223, "grad_norm": 1.5751883392315034, "learning_rate": 1.4517091342914818e-07, "loss": 0.0242, "step": 17111 }, { "epoch": 3.8935153583617748, "grad_norm": 1.9174523557552885, "learning_rate": 1.4511366400190332e-07, "loss": 0.0481, "step": 17112 }, { "epoch": 3.8937428896473265, "grad_norm": 1.0997896921309334, "learning_rate": 1.4505642438265627e-07, "loss": 0.0432, "step": 17113 }, { "epoch": 3.8939704209328783, "grad_norm": 1.0075618596680014, "learning_rate": 1.4499919457257704e-07, "loss": 0.0208, "step": 17114 }, { "epoch": 3.89419795221843, "grad_norm": 1.3953057530622524, "learning_rate": 1.449419745728351e-07, "loss": 0.0166, "step": 17115 }, { "epoch": 3.8944254835039818, "grad_norm": 1.0973290826031306, "learning_rate": 1.448847643846002e-07, "loss": 0.0305, "step": 17116 }, { "epoch": 3.8946530147895335, "grad_norm": 2.5552127416762307, "learning_rate": 1.4482756400904137e-07, "loss": 0.1133, "step": 17117 }, { "epoch": 3.8948805460750853, "grad_norm": 1.41166495508508, "learning_rate": 1.4477037344732754e-07, "loss": 0.0194, "step": 17118 }, { "epoch": 3.895108077360637, "grad_norm": 2.621529729272415, "learning_rate": 1.447131927006279e-07, "loss": 0.0327, "step": 17119 }, { "epoch": 3.8953356086461888, "grad_norm": 1.512406957905578, "learning_rate": 1.4465602177011089e-07, "loss": 0.0282, "step": 17120 }, { "epoch": 3.8955631399317405, "grad_norm": 1.3649037359849667, "learning_rate": 1.445988606569453e-07, "loss": 0.0134, "step": 17121 }, { "epoch": 3.8957906712172923, "grad_norm": 1.011170974709487, "learning_rate": 1.4454170936229902e-07, "loss": 0.0115, "step": 17122 }, { "epoch": 3.896018202502844, "grad_norm": 1.4432058587568608, "learning_rate": 1.4448456788734042e-07, "loss": 0.0396, "step": 17123 }, { "epoch": 3.896245733788396, "grad_norm": 1.7543584353263741, "learning_rate": 1.444274362332373e-07, "loss": 0.0952, "step": 17124 }, { "epoch": 3.8964732650739475, "grad_norm": 1.2311924544326984, "learning_rate": 1.44370314401157e-07, "loss": 0.1133, "step": 17125 }, { "epoch": 3.8967007963594993, "grad_norm": 1.1096683155041407, "learning_rate": 1.4431320239226728e-07, "loss": 0.0891, "step": 17126 }, { "epoch": 3.896928327645051, "grad_norm": 1.2010763771105355, "learning_rate": 1.4425610020773534e-07, "loss": 0.0128, "step": 17127 }, { "epoch": 3.897155858930603, "grad_norm": 2.315317033726398, "learning_rate": 1.4419900784872847e-07, "loss": 0.0454, "step": 17128 }, { "epoch": 3.897383390216155, "grad_norm": 2.625100976473711, "learning_rate": 1.4414192531641314e-07, "loss": 0.0538, "step": 17129 }, { "epoch": 3.8976109215017063, "grad_norm": 2.18046900604808, "learning_rate": 1.4408485261195642e-07, "loss": 0.0536, "step": 17130 }, { "epoch": 3.8978384527872585, "grad_norm": 1.6669481195624605, "learning_rate": 1.4402778973652456e-07, "loss": 0.0065, "step": 17131 }, { "epoch": 3.89806598407281, "grad_norm": 0.9704329343169763, "learning_rate": 1.439707366912836e-07, "loss": 0.0095, "step": 17132 }, { "epoch": 3.898293515358362, "grad_norm": 1.6719503903385942, "learning_rate": 1.4391369347739984e-07, "loss": 0.0953, "step": 17133 }, { "epoch": 3.8985210466439133, "grad_norm": 1.6690113938373274, "learning_rate": 1.4385666009603908e-07, "loss": 0.076, "step": 17134 }, { "epoch": 3.8987485779294655, "grad_norm": 2.265427276394839, "learning_rate": 1.4379963654836723e-07, "loss": 0.0564, "step": 17135 }, { "epoch": 3.898976109215017, "grad_norm": 1.0929515391840807, "learning_rate": 1.4374262283554956e-07, "loss": 0.0232, "step": 17136 }, { "epoch": 3.899203640500569, "grad_norm": 1.9483502494560085, "learning_rate": 1.4368561895875106e-07, "loss": 0.0113, "step": 17137 }, { "epoch": 3.8994311717861208, "grad_norm": 3.816412820231194, "learning_rate": 1.4362862491913724e-07, "loss": 0.0624, "step": 17138 }, { "epoch": 3.8996587030716725, "grad_norm": 2.3928106762864614, "learning_rate": 1.4357164071787236e-07, "loss": 0.0462, "step": 17139 }, { "epoch": 3.8998862343572243, "grad_norm": 2.8050844811683024, "learning_rate": 1.4351466635612183e-07, "loss": 0.0417, "step": 17140 }, { "epoch": 3.900113765642776, "grad_norm": 1.5219477443511846, "learning_rate": 1.4345770183504956e-07, "loss": 0.0604, "step": 17141 }, { "epoch": 3.9003412969283278, "grad_norm": 1.5272849401548436, "learning_rate": 1.4340074715582014e-07, "loss": 0.1378, "step": 17142 }, { "epoch": 3.9005688282138795, "grad_norm": 2.028644542994032, "learning_rate": 1.4334380231959738e-07, "loss": 0.0711, "step": 17143 }, { "epoch": 3.9007963594994313, "grad_norm": 2.493483178210126, "learning_rate": 1.4328686732754505e-07, "loss": 0.0448, "step": 17144 }, { "epoch": 3.901023890784983, "grad_norm": 2.027575698642228, "learning_rate": 1.4322994218082692e-07, "loss": 0.0662, "step": 17145 }, { "epoch": 3.9012514220705348, "grad_norm": 1.766250557433546, "learning_rate": 1.4317302688060642e-07, "loss": 0.0264, "step": 17146 }, { "epoch": 3.9014789533560865, "grad_norm": 1.3227817461881273, "learning_rate": 1.4311612142804708e-07, "loss": 0.0682, "step": 17147 }, { "epoch": 3.9017064846416383, "grad_norm": 1.9449197974078145, "learning_rate": 1.4305922582431167e-07, "loss": 0.0147, "step": 17148 }, { "epoch": 3.90193401592719, "grad_norm": 1.2180899044236768, "learning_rate": 1.4300234007056284e-07, "loss": 0.0425, "step": 17149 }, { "epoch": 3.9021615472127418, "grad_norm": 0.9038230552677609, "learning_rate": 1.4294546416796362e-07, "loss": 0.0597, "step": 17150 }, { "epoch": 3.9023890784982935, "grad_norm": 1.5002277520910414, "learning_rate": 1.4288859811767614e-07, "loss": 0.065, "step": 17151 }, { "epoch": 3.9026166097838453, "grad_norm": 1.8416558603326068, "learning_rate": 1.4283174192086267e-07, "loss": 0.0156, "step": 17152 }, { "epoch": 3.902844141069397, "grad_norm": 1.6110282610167095, "learning_rate": 1.4277489557868541e-07, "loss": 0.0282, "step": 17153 }, { "epoch": 3.903071672354949, "grad_norm": 1.2054809903083743, "learning_rate": 1.4271805909230634e-07, "loss": 0.061, "step": 17154 }, { "epoch": 3.9032992036405005, "grad_norm": 1.0056785503056402, "learning_rate": 1.4266123246288683e-07, "loss": 0.0092, "step": 17155 }, { "epoch": 3.9035267349260523, "grad_norm": 1.2764924293460695, "learning_rate": 1.4260441569158823e-07, "loss": 0.009, "step": 17156 }, { "epoch": 3.903754266211604, "grad_norm": 1.8561087770714313, "learning_rate": 1.4254760877957206e-07, "loss": 0.1109, "step": 17157 }, { "epoch": 3.903981797497156, "grad_norm": 1.8151497502185279, "learning_rate": 1.4249081172799904e-07, "loss": 0.0779, "step": 17158 }, { "epoch": 3.9042093287827075, "grad_norm": 1.7217734539717064, "learning_rate": 1.424340245380302e-07, "loss": 0.0363, "step": 17159 }, { "epoch": 3.9044368600682593, "grad_norm": 1.2725568467895465, "learning_rate": 1.4237724721082605e-07, "loss": 0.0507, "step": 17160 }, { "epoch": 3.904664391353811, "grad_norm": 4.588097294586528, "learning_rate": 1.4232047974754727e-07, "loss": 0.0759, "step": 17161 }, { "epoch": 3.904891922639363, "grad_norm": 2.185642563201324, "learning_rate": 1.422637221493539e-07, "loss": 0.1092, "step": 17162 }, { "epoch": 3.9051194539249146, "grad_norm": 2.9374551642307027, "learning_rate": 1.422069744174058e-07, "loss": 0.1106, "step": 17163 }, { "epoch": 3.9053469852104663, "grad_norm": 1.262883085143067, "learning_rate": 1.4215023655286314e-07, "loss": 0.0776, "step": 17164 }, { "epoch": 3.905574516496018, "grad_norm": 0.3478659478956447, "learning_rate": 1.420935085568851e-07, "loss": 0.0037, "step": 17165 }, { "epoch": 3.90580204778157, "grad_norm": 2.444523936209404, "learning_rate": 1.4203679043063141e-07, "loss": 0.0211, "step": 17166 }, { "epoch": 3.9060295790671216, "grad_norm": 1.2470667657891032, "learning_rate": 1.4198008217526135e-07, "loss": 0.0589, "step": 17167 }, { "epoch": 3.9062571103526738, "grad_norm": 1.1835418512655034, "learning_rate": 1.4192338379193365e-07, "loss": 0.0992, "step": 17168 }, { "epoch": 3.906484641638225, "grad_norm": 1.3003259988574771, "learning_rate": 1.4186669528180743e-07, "loss": 0.0125, "step": 17169 }, { "epoch": 3.9067121729237773, "grad_norm": 1.359717282207203, "learning_rate": 1.41810016646041e-07, "loss": 0.0189, "step": 17170 }, { "epoch": 3.9069397042093286, "grad_norm": 1.850516390629431, "learning_rate": 1.4175334788579304e-07, "loss": 0.1009, "step": 17171 }, { "epoch": 3.9071672354948808, "grad_norm": 1.6789695087841356, "learning_rate": 1.4169668900222151e-07, "loss": 0.0236, "step": 17172 }, { "epoch": 3.907394766780432, "grad_norm": 1.8005941659288516, "learning_rate": 1.416400399964845e-07, "loss": 0.1487, "step": 17173 }, { "epoch": 3.9076222980659843, "grad_norm": 1.147133015731603, "learning_rate": 1.4158340086973997e-07, "loss": 0.0106, "step": 17174 }, { "epoch": 3.9078498293515356, "grad_norm": 1.3476633641478784, "learning_rate": 1.4152677162314526e-07, "loss": 0.0366, "step": 17175 }, { "epoch": 3.9080773606370878, "grad_norm": 1.2543748311338505, "learning_rate": 1.4147015225785806e-07, "loss": 0.0079, "step": 17176 }, { "epoch": 3.9083048919226395, "grad_norm": 1.328445304255163, "learning_rate": 1.414135427750353e-07, "loss": 0.0182, "step": 17177 }, { "epoch": 3.9085324232081913, "grad_norm": 1.1583873086197622, "learning_rate": 1.4135694317583416e-07, "loss": 0.0152, "step": 17178 }, { "epoch": 3.908759954493743, "grad_norm": 0.9398610312140154, "learning_rate": 1.4130035346141123e-07, "loss": 0.0299, "step": 17179 }, { "epoch": 3.908987485779295, "grad_norm": 0.9141723509344224, "learning_rate": 1.412437736329232e-07, "loss": 0.0427, "step": 17180 }, { "epoch": 3.9092150170648465, "grad_norm": 2.19165414836094, "learning_rate": 1.4118720369152662e-07, "loss": 0.0834, "step": 17181 }, { "epoch": 3.9094425483503983, "grad_norm": 1.2395633788676943, "learning_rate": 1.4113064363837742e-07, "loss": 0.0106, "step": 17182 }, { "epoch": 3.90967007963595, "grad_norm": 3.188149116844447, "learning_rate": 1.4107409347463184e-07, "loss": 0.0648, "step": 17183 }, { "epoch": 3.909897610921502, "grad_norm": 2.198389723786086, "learning_rate": 1.4101755320144536e-07, "loss": 0.0331, "step": 17184 }, { "epoch": 3.9101251422070535, "grad_norm": 0.6706039140854361, "learning_rate": 1.409610228199739e-07, "loss": 0.0063, "step": 17185 }, { "epoch": 3.9103526734926053, "grad_norm": 1.5274379599608146, "learning_rate": 1.4090450233137244e-07, "loss": 0.0354, "step": 17186 }, { "epoch": 3.910580204778157, "grad_norm": 1.693869705250836, "learning_rate": 1.4084799173679644e-07, "loss": 0.0744, "step": 17187 }, { "epoch": 3.910807736063709, "grad_norm": 2.417508460264038, "learning_rate": 1.4079149103740094e-07, "loss": 0.0159, "step": 17188 }, { "epoch": 3.9110352673492605, "grad_norm": 1.785081950287186, "learning_rate": 1.4073500023434042e-07, "loss": 0.0103, "step": 17189 }, { "epoch": 3.9112627986348123, "grad_norm": 1.2455907733121, "learning_rate": 1.4067851932876976e-07, "loss": 0.0235, "step": 17190 }, { "epoch": 3.911490329920364, "grad_norm": 1.846064904868319, "learning_rate": 1.4062204832184305e-07, "loss": 0.015, "step": 17191 }, { "epoch": 3.911717861205916, "grad_norm": 1.5701719223528807, "learning_rate": 1.4056558721471465e-07, "loss": 0.0262, "step": 17192 }, { "epoch": 3.9119453924914676, "grad_norm": 0.5643851669423207, "learning_rate": 1.4050913600853838e-07, "loss": 0.0141, "step": 17193 }, { "epoch": 3.9121729237770193, "grad_norm": 1.4544684412152997, "learning_rate": 1.40452694704468e-07, "loss": 0.0083, "step": 17194 }, { "epoch": 3.912400455062571, "grad_norm": 2.6309328350246486, "learning_rate": 1.403962633036573e-07, "loss": 0.0569, "step": 17195 }, { "epoch": 3.912627986348123, "grad_norm": 0.6749585374910001, "learning_rate": 1.4033984180725935e-07, "loss": 0.0129, "step": 17196 }, { "epoch": 3.9128555176336746, "grad_norm": 0.6695916142594626, "learning_rate": 1.4028343021642757e-07, "loss": 0.0043, "step": 17197 }, { "epoch": 3.9130830489192263, "grad_norm": 1.8257935089804438, "learning_rate": 1.4022702853231457e-07, "loss": 0.0226, "step": 17198 }, { "epoch": 3.913310580204778, "grad_norm": 1.9186978240176475, "learning_rate": 1.4017063675607346e-07, "loss": 0.0263, "step": 17199 }, { "epoch": 3.91353811149033, "grad_norm": 1.5371678769446084, "learning_rate": 1.4011425488885642e-07, "loss": 0.0817, "step": 17200 }, { "epoch": 3.9137656427758816, "grad_norm": 0.9691466178371826, "learning_rate": 1.4005788293181604e-07, "loss": 0.0186, "step": 17201 }, { "epoch": 3.9139931740614333, "grad_norm": 0.847504467777277, "learning_rate": 1.4000152088610456e-07, "loss": 0.0204, "step": 17202 }, { "epoch": 3.914220705346985, "grad_norm": 1.175321665037465, "learning_rate": 1.3994516875287356e-07, "loss": 0.1114, "step": 17203 }, { "epoch": 3.914448236632537, "grad_norm": 0.43504580193291326, "learning_rate": 1.3988882653327518e-07, "loss": 0.0018, "step": 17204 }, { "epoch": 3.9146757679180886, "grad_norm": 1.643380793181983, "learning_rate": 1.3983249422846074e-07, "loss": 0.0159, "step": 17205 }, { "epoch": 3.9149032992036403, "grad_norm": 2.3357511044737733, "learning_rate": 1.3977617183958126e-07, "loss": 0.0319, "step": 17206 }, { "epoch": 3.9151308304891925, "grad_norm": 1.239755515053777, "learning_rate": 1.3971985936778843e-07, "loss": 0.0193, "step": 17207 }, { "epoch": 3.915358361774744, "grad_norm": 1.8512016923437151, "learning_rate": 1.396635568142328e-07, "loss": 0.0248, "step": 17208 }, { "epoch": 3.915585893060296, "grad_norm": 1.2710259430747008, "learning_rate": 1.3960726418006546e-07, "loss": 0.0353, "step": 17209 }, { "epoch": 3.9158134243458473, "grad_norm": 1.601346237211267, "learning_rate": 1.3955098146643641e-07, "loss": 0.0193, "step": 17210 }, { "epoch": 3.9160409556313995, "grad_norm": 1.1403416063832748, "learning_rate": 1.3949470867449646e-07, "loss": 0.041, "step": 17211 }, { "epoch": 3.916268486916951, "grad_norm": 2.046598010952856, "learning_rate": 1.3943844580539538e-07, "loss": 0.0597, "step": 17212 }, { "epoch": 3.916496018202503, "grad_norm": 1.2915225906441885, "learning_rate": 1.3938219286028314e-07, "loss": 0.0129, "step": 17213 }, { "epoch": 3.9167235494880543, "grad_norm": 1.1859127413534925, "learning_rate": 1.393259498403097e-07, "loss": 0.0073, "step": 17214 }, { "epoch": 3.9169510807736065, "grad_norm": 1.2032843659954862, "learning_rate": 1.392697167466242e-07, "loss": 0.0075, "step": 17215 }, { "epoch": 3.9171786120591583, "grad_norm": 1.1693725723930328, "learning_rate": 1.392134935803763e-07, "loss": 0.0579, "step": 17216 }, { "epoch": 3.91740614334471, "grad_norm": 2.372979578394685, "learning_rate": 1.391572803427147e-07, "loss": 0.0221, "step": 17217 }, { "epoch": 3.917633674630262, "grad_norm": 1.5222423655379678, "learning_rate": 1.391010770347887e-07, "loss": 0.0186, "step": 17218 }, { "epoch": 3.9178612059158135, "grad_norm": 4.051546803685135, "learning_rate": 1.390448836577467e-07, "loss": 0.022, "step": 17219 }, { "epoch": 3.9180887372013653, "grad_norm": 2.722115043048687, "learning_rate": 1.3898870021273722e-07, "loss": 0.0448, "step": 17220 }, { "epoch": 3.918316268486917, "grad_norm": 1.9822603037763293, "learning_rate": 1.3893252670090882e-07, "loss": 0.0517, "step": 17221 }, { "epoch": 3.918543799772469, "grad_norm": 0.815920277564396, "learning_rate": 1.388763631234092e-07, "loss": 0.012, "step": 17222 }, { "epoch": 3.9187713310580206, "grad_norm": 0.8400345185640246, "learning_rate": 1.388202094813866e-07, "loss": 0.0351, "step": 17223 }, { "epoch": 3.9189988623435723, "grad_norm": 1.3393766795734623, "learning_rate": 1.3876406577598852e-07, "loss": 0.1111, "step": 17224 }, { "epoch": 3.919226393629124, "grad_norm": 0.9484815384918542, "learning_rate": 1.3870793200836222e-07, "loss": 0.0087, "step": 17225 }, { "epoch": 3.919453924914676, "grad_norm": 1.12621239762288, "learning_rate": 1.3865180817965527e-07, "loss": 0.0138, "step": 17226 }, { "epoch": 3.9196814562002276, "grad_norm": 0.9775693757338475, "learning_rate": 1.385956942910146e-07, "loss": 0.0105, "step": 17227 }, { "epoch": 3.9199089874857793, "grad_norm": 1.2408741906176421, "learning_rate": 1.385395903435873e-07, "loss": 0.075, "step": 17228 }, { "epoch": 3.920136518771331, "grad_norm": 4.178004420269688, "learning_rate": 1.3848349633851974e-07, "loss": 0.0606, "step": 17229 }, { "epoch": 3.920364050056883, "grad_norm": 1.838415364028637, "learning_rate": 1.384274122769587e-07, "loss": 0.0088, "step": 17230 }, { "epoch": 3.9205915813424346, "grad_norm": 2.62838432567276, "learning_rate": 1.3837133816005015e-07, "loss": 0.0301, "step": 17231 }, { "epoch": 3.9208191126279863, "grad_norm": 1.0737541141157207, "learning_rate": 1.3831527398894012e-07, "loss": 0.0554, "step": 17232 }, { "epoch": 3.921046643913538, "grad_norm": 1.844531840043007, "learning_rate": 1.3825921976477453e-07, "loss": 0.0448, "step": 17233 }, { "epoch": 3.92127417519909, "grad_norm": 1.7949275034577985, "learning_rate": 1.3820317548869908e-07, "loss": 0.0137, "step": 17234 }, { "epoch": 3.9215017064846416, "grad_norm": 1.063133369183518, "learning_rate": 1.3814714116185935e-07, "loss": 0.0333, "step": 17235 }, { "epoch": 3.9217292377701933, "grad_norm": 0.7642686368663021, "learning_rate": 1.380911167854003e-07, "loss": 0.0097, "step": 17236 }, { "epoch": 3.921956769055745, "grad_norm": 1.890360307676006, "learning_rate": 1.3803510236046724e-07, "loss": 0.0396, "step": 17237 }, { "epoch": 3.922184300341297, "grad_norm": 1.2506704595408946, "learning_rate": 1.3797909788820479e-07, "loss": 0.0871, "step": 17238 }, { "epoch": 3.9224118316268486, "grad_norm": 1.122929635513107, "learning_rate": 1.3792310336975756e-07, "loss": 0.0517, "step": 17239 }, { "epoch": 3.9226393629124003, "grad_norm": 1.7082491210290023, "learning_rate": 1.3786711880627002e-07, "loss": 0.0289, "step": 17240 }, { "epoch": 3.922866894197952, "grad_norm": 1.0062555519250627, "learning_rate": 1.3781114419888644e-07, "loss": 0.0392, "step": 17241 }, { "epoch": 3.923094425483504, "grad_norm": 1.3923345391256892, "learning_rate": 1.3775517954875098e-07, "loss": 0.0882, "step": 17242 }, { "epoch": 3.9233219567690556, "grad_norm": 1.0661730153196212, "learning_rate": 1.3769922485700736e-07, "loss": 0.0206, "step": 17243 }, { "epoch": 3.9235494880546073, "grad_norm": 1.6622800827570015, "learning_rate": 1.3764328012479886e-07, "loss": 0.0114, "step": 17244 }, { "epoch": 3.923777019340159, "grad_norm": 1.1885819099925619, "learning_rate": 1.3758734535326944e-07, "loss": 0.036, "step": 17245 }, { "epoch": 3.9240045506257113, "grad_norm": 3.392762882594372, "learning_rate": 1.3753142054356177e-07, "loss": 0.0287, "step": 17246 }, { "epoch": 3.9242320819112626, "grad_norm": 2.2184981540280186, "learning_rate": 1.3747550569681918e-07, "loss": 0.0192, "step": 17247 }, { "epoch": 3.924459613196815, "grad_norm": 0.8380365727155968, "learning_rate": 1.3741960081418432e-07, "loss": 0.0359, "step": 17248 }, { "epoch": 3.924687144482366, "grad_norm": 1.055146399274025, "learning_rate": 1.3736370589680013e-07, "loss": 0.0876, "step": 17249 }, { "epoch": 3.9249146757679183, "grad_norm": 1.8443063095902925, "learning_rate": 1.3730782094580862e-07, "loss": 0.039, "step": 17250 }, { "epoch": 3.9251422070534696, "grad_norm": 2.40650582925355, "learning_rate": 1.3725194596235199e-07, "loss": 0.0242, "step": 17251 }, { "epoch": 3.925369738339022, "grad_norm": 0.9729129598893421, "learning_rate": 1.3719608094757245e-07, "loss": 0.0408, "step": 17252 }, { "epoch": 3.925597269624573, "grad_norm": 1.3868046950816575, "learning_rate": 1.3714022590261153e-07, "loss": 0.0079, "step": 17253 }, { "epoch": 3.9258248009101253, "grad_norm": 0.7140400852969495, "learning_rate": 1.3708438082861084e-07, "loss": 0.0042, "step": 17254 }, { "epoch": 3.926052332195677, "grad_norm": 1.3624943955771698, "learning_rate": 1.3702854572671204e-07, "loss": 0.0319, "step": 17255 }, { "epoch": 3.926279863481229, "grad_norm": 1.6562571388307843, "learning_rate": 1.3697272059805588e-07, "loss": 0.0619, "step": 17256 }, { "epoch": 3.9265073947667806, "grad_norm": 3.127878577053254, "learning_rate": 1.3691690544378376e-07, "loss": 0.0173, "step": 17257 }, { "epoch": 3.9267349260523323, "grad_norm": 2.3931890031979917, "learning_rate": 1.3686110026503602e-07, "loss": 0.0338, "step": 17258 }, { "epoch": 3.926962457337884, "grad_norm": 1.334146226622999, "learning_rate": 1.3680530506295356e-07, "loss": 0.0343, "step": 17259 }, { "epoch": 3.927189988623436, "grad_norm": 1.2556931692214843, "learning_rate": 1.3674951983867626e-07, "loss": 0.0444, "step": 17260 }, { "epoch": 3.9274175199089876, "grad_norm": 1.4321032863115297, "learning_rate": 1.3669374459334493e-07, "loss": 0.0088, "step": 17261 }, { "epoch": 3.9276450511945393, "grad_norm": 2.4131803462127372, "learning_rate": 1.3663797932809912e-07, "loss": 0.0336, "step": 17262 }, { "epoch": 3.927872582480091, "grad_norm": 0.8589431435386023, "learning_rate": 1.3658222404407853e-07, "loss": 0.0134, "step": 17263 }, { "epoch": 3.928100113765643, "grad_norm": 1.3187440502780519, "learning_rate": 1.3652647874242287e-07, "loss": 0.007, "step": 17264 }, { "epoch": 3.9283276450511946, "grad_norm": 0.9289347364797845, "learning_rate": 1.3647074342427126e-07, "loss": 0.0551, "step": 17265 }, { "epoch": 3.9285551763367463, "grad_norm": 3.4278458684595297, "learning_rate": 1.3641501809076292e-07, "loss": 0.0307, "step": 17266 }, { "epoch": 3.928782707622298, "grad_norm": 0.7805414286582564, "learning_rate": 1.3635930274303688e-07, "loss": 0.019, "step": 17267 }, { "epoch": 3.92901023890785, "grad_norm": 2.2828956549274255, "learning_rate": 1.3630359738223192e-07, "loss": 0.0918, "step": 17268 }, { "epoch": 3.9292377701934016, "grad_norm": 2.9454084665529257, "learning_rate": 1.3624790200948646e-07, "loss": 0.0196, "step": 17269 }, { "epoch": 3.9294653014789533, "grad_norm": 1.1906997128509078, "learning_rate": 1.3619221662593858e-07, "loss": 0.0579, "step": 17270 }, { "epoch": 3.929692832764505, "grad_norm": 6.057199722549506, "learning_rate": 1.3613654123272675e-07, "loss": 0.0486, "step": 17271 }, { "epoch": 3.929920364050057, "grad_norm": 1.314029487388204, "learning_rate": 1.3608087583098846e-07, "loss": 0.1234, "step": 17272 }, { "epoch": 3.9301478953356086, "grad_norm": 1.2440252644218146, "learning_rate": 1.3602522042186177e-07, "loss": 0.0851, "step": 17273 }, { "epoch": 3.9303754266211604, "grad_norm": 1.5390836267162207, "learning_rate": 1.3596957500648418e-07, "loss": 0.017, "step": 17274 }, { "epoch": 3.930602957906712, "grad_norm": 1.3906425254296357, "learning_rate": 1.3591393958599272e-07, "loss": 0.0172, "step": 17275 }, { "epoch": 3.930830489192264, "grad_norm": 1.4518494669605941, "learning_rate": 1.3585831416152473e-07, "loss": 0.019, "step": 17276 }, { "epoch": 3.9310580204778156, "grad_norm": 1.1882875348261899, "learning_rate": 1.3580269873421682e-07, "loss": 0.0174, "step": 17277 }, { "epoch": 3.9312855517633674, "grad_norm": 1.4820059766261402, "learning_rate": 1.3574709330520603e-07, "loss": 0.0648, "step": 17278 }, { "epoch": 3.931513083048919, "grad_norm": 0.898748674785513, "learning_rate": 1.3569149787562846e-07, "loss": 0.0279, "step": 17279 }, { "epoch": 3.931740614334471, "grad_norm": 1.9904121253152647, "learning_rate": 1.356359124466205e-07, "loss": 0.0292, "step": 17280 }, { "epoch": 3.9319681456200226, "grad_norm": 2.8234197414190034, "learning_rate": 1.3558033701931844e-07, "loss": 0.1291, "step": 17281 }, { "epoch": 3.9321956769055744, "grad_norm": 1.2608825587093444, "learning_rate": 1.3552477159485788e-07, "loss": 0.0752, "step": 17282 }, { "epoch": 3.932423208191126, "grad_norm": 1.0469639027695121, "learning_rate": 1.354692161743746e-07, "loss": 0.0238, "step": 17283 }, { "epoch": 3.932650739476678, "grad_norm": 1.0964526172013982, "learning_rate": 1.354136707590039e-07, "loss": 0.0253, "step": 17284 }, { "epoch": 3.93287827076223, "grad_norm": 1.0137520589955102, "learning_rate": 1.3535813534988133e-07, "loss": 0.0329, "step": 17285 }, { "epoch": 3.9331058020477814, "grad_norm": 2.016325953244981, "learning_rate": 1.353026099481415e-07, "loss": 0.0431, "step": 17286 }, { "epoch": 3.9333333333333336, "grad_norm": 1.1142928408583217, "learning_rate": 1.3524709455491954e-07, "loss": 0.0337, "step": 17287 }, { "epoch": 3.933560864618885, "grad_norm": 1.9759099361637877, "learning_rate": 1.351915891713502e-07, "loss": 0.1124, "step": 17288 }, { "epoch": 3.933788395904437, "grad_norm": 1.6168843762417024, "learning_rate": 1.3513609379856754e-07, "loss": 0.0285, "step": 17289 }, { "epoch": 3.9340159271899884, "grad_norm": 1.1559172978397936, "learning_rate": 1.350806084377062e-07, "loss": 0.0244, "step": 17290 }, { "epoch": 3.9342434584755406, "grad_norm": 1.6442556917471485, "learning_rate": 1.3502513308989975e-07, "loss": 0.0246, "step": 17291 }, { "epoch": 3.934470989761092, "grad_norm": 1.0472853057932152, "learning_rate": 1.3496966775628243e-07, "loss": 0.0236, "step": 17292 }, { "epoch": 3.934698521046644, "grad_norm": 0.8186530031160278, "learning_rate": 1.3491421243798744e-07, "loss": 0.024, "step": 17293 }, { "epoch": 3.934926052332196, "grad_norm": 1.147555921553288, "learning_rate": 1.3485876713614843e-07, "loss": 0.0401, "step": 17294 }, { "epoch": 3.9351535836177476, "grad_norm": 1.3683820739105956, "learning_rate": 1.3480333185189876e-07, "loss": 0.01, "step": 17295 }, { "epoch": 3.9353811149032993, "grad_norm": 1.9419522711238506, "learning_rate": 1.3474790658637104e-07, "loss": 0.0597, "step": 17296 }, { "epoch": 3.935608646188851, "grad_norm": 1.2316567917034487, "learning_rate": 1.3469249134069842e-07, "loss": 0.024, "step": 17297 }, { "epoch": 3.935836177474403, "grad_norm": 1.9878477723991208, "learning_rate": 1.3463708611601316e-07, "loss": 0.0602, "step": 17298 }, { "epoch": 3.9360637087599546, "grad_norm": 1.7838554420334007, "learning_rate": 1.3458169091344792e-07, "loss": 0.1374, "step": 17299 }, { "epoch": 3.9362912400455063, "grad_norm": 1.8473087012422613, "learning_rate": 1.3452630573413464e-07, "loss": 0.0811, "step": 17300 }, { "epoch": 3.936518771331058, "grad_norm": 3.152343715600976, "learning_rate": 1.3447093057920544e-07, "loss": 0.0642, "step": 17301 }, { "epoch": 3.93674630261661, "grad_norm": 1.5705759264489887, "learning_rate": 1.3441556544979216e-07, "loss": 0.0253, "step": 17302 }, { "epoch": 3.9369738339021616, "grad_norm": 1.8457171616099464, "learning_rate": 1.3436021034702616e-07, "loss": 0.0514, "step": 17303 }, { "epoch": 3.9372013651877134, "grad_norm": 1.7044314149945181, "learning_rate": 1.3430486527203905e-07, "loss": 0.1059, "step": 17304 }, { "epoch": 3.937428896473265, "grad_norm": 1.781770541547597, "learning_rate": 1.3424953022596165e-07, "loss": 0.0448, "step": 17305 }, { "epoch": 3.937656427758817, "grad_norm": 1.444265908034335, "learning_rate": 1.3419420520992522e-07, "loss": 0.0506, "step": 17306 }, { "epoch": 3.9378839590443686, "grad_norm": 1.1969285664666311, "learning_rate": 1.3413889022506023e-07, "loss": 0.0062, "step": 17307 }, { "epoch": 3.9381114903299204, "grad_norm": 1.3948164522646593, "learning_rate": 1.3408358527249733e-07, "loss": 0.034, "step": 17308 }, { "epoch": 3.938339021615472, "grad_norm": 1.481467826076583, "learning_rate": 1.3402829035336704e-07, "loss": 0.0334, "step": 17309 }, { "epoch": 3.938566552901024, "grad_norm": 1.3469597383600107, "learning_rate": 1.339730054687992e-07, "loss": 0.0112, "step": 17310 }, { "epoch": 3.9387940841865756, "grad_norm": 2.101210659962643, "learning_rate": 1.33917730619924e-07, "loss": 0.0169, "step": 17311 }, { "epoch": 3.9390216154721274, "grad_norm": 1.3230547763942784, "learning_rate": 1.3386246580787104e-07, "loss": 0.0352, "step": 17312 }, { "epoch": 3.939249146757679, "grad_norm": 1.6192379740482348, "learning_rate": 1.338072110337696e-07, "loss": 0.0572, "step": 17313 }, { "epoch": 3.939476678043231, "grad_norm": 0.8377672021463274, "learning_rate": 1.3375196629874916e-07, "loss": 0.0167, "step": 17314 }, { "epoch": 3.9397042093287826, "grad_norm": 1.2739017609670815, "learning_rate": 1.3369673160393892e-07, "loss": 0.0177, "step": 17315 }, { "epoch": 3.9399317406143344, "grad_norm": 1.7699996960438533, "learning_rate": 1.3364150695046783e-07, "loss": 0.1088, "step": 17316 }, { "epoch": 3.940159271899886, "grad_norm": 3.2486556876041432, "learning_rate": 1.335862923394643e-07, "loss": 0.029, "step": 17317 }, { "epoch": 3.940386803185438, "grad_norm": 2.0667849797216773, "learning_rate": 1.3353108777205714e-07, "loss": 0.1477, "step": 17318 }, { "epoch": 3.9406143344709896, "grad_norm": 1.0753351759853162, "learning_rate": 1.3347589324937447e-07, "loss": 0.042, "step": 17319 }, { "epoch": 3.9408418657565414, "grad_norm": 1.4568185275442296, "learning_rate": 1.334207087725442e-07, "loss": 0.0182, "step": 17320 }, { "epoch": 3.941069397042093, "grad_norm": 1.718070077864023, "learning_rate": 1.3336553434269434e-07, "loss": 0.0624, "step": 17321 }, { "epoch": 3.941296928327645, "grad_norm": 2.3255775109971517, "learning_rate": 1.3331036996095253e-07, "loss": 0.0704, "step": 17322 }, { "epoch": 3.9415244596131966, "grad_norm": 1.8097496168106122, "learning_rate": 1.3325521562844654e-07, "loss": 0.0927, "step": 17323 }, { "epoch": 3.941751990898749, "grad_norm": 1.2072392590809606, "learning_rate": 1.332000713463031e-07, "loss": 0.0261, "step": 17324 }, { "epoch": 3.9419795221843, "grad_norm": 2.4306725215716973, "learning_rate": 1.3314493711564972e-07, "loss": 0.026, "step": 17325 }, { "epoch": 3.9422070534698523, "grad_norm": 1.0556426788455948, "learning_rate": 1.33089812937613e-07, "loss": 0.0289, "step": 17326 }, { "epoch": 3.9424345847554036, "grad_norm": 0.8898028052238124, "learning_rate": 1.330346988133193e-07, "loss": 0.0186, "step": 17327 }, { "epoch": 3.942662116040956, "grad_norm": 1.198396629556553, "learning_rate": 1.3297959474389567e-07, "loss": 0.0265, "step": 17328 }, { "epoch": 3.942889647326507, "grad_norm": 1.5917869328782834, "learning_rate": 1.3292450073046797e-07, "loss": 0.0231, "step": 17329 }, { "epoch": 3.9431171786120593, "grad_norm": 3.8437573526176534, "learning_rate": 1.3286941677416233e-07, "loss": 0.0728, "step": 17330 }, { "epoch": 3.9433447098976107, "grad_norm": 1.2858249883990607, "learning_rate": 1.3281434287610458e-07, "loss": 0.0844, "step": 17331 }, { "epoch": 3.943572241183163, "grad_norm": 1.8565258518443697, "learning_rate": 1.3275927903742005e-07, "loss": 0.0087, "step": 17332 }, { "epoch": 3.9437997724687146, "grad_norm": 1.4418424403283865, "learning_rate": 1.3270422525923442e-07, "loss": 0.037, "step": 17333 }, { "epoch": 3.9440273037542664, "grad_norm": 1.5037238272149758, "learning_rate": 1.3264918154267281e-07, "loss": 0.0108, "step": 17334 }, { "epoch": 3.944254835039818, "grad_norm": 2.0309784252297036, "learning_rate": 1.325941478888604e-07, "loss": 0.0189, "step": 17335 }, { "epoch": 3.94448236632537, "grad_norm": 0.8998624733242878, "learning_rate": 1.3253912429892157e-07, "loss": 0.012, "step": 17336 }, { "epoch": 3.9447098976109216, "grad_norm": 1.419463227005536, "learning_rate": 1.324841107739814e-07, "loss": 0.0298, "step": 17337 }, { "epoch": 3.9449374288964734, "grad_norm": 0.6313424515914078, "learning_rate": 1.3242910731516393e-07, "loss": 0.0253, "step": 17338 }, { "epoch": 3.945164960182025, "grad_norm": 2.241879549859087, "learning_rate": 1.3237411392359332e-07, "loss": 0.1128, "step": 17339 }, { "epoch": 3.945392491467577, "grad_norm": 1.8270387112435924, "learning_rate": 1.3231913060039355e-07, "loss": 0.0103, "step": 17340 }, { "epoch": 3.9456200227531286, "grad_norm": 1.405498130856383, "learning_rate": 1.3226415734668845e-07, "loss": 0.024, "step": 17341 }, { "epoch": 3.9458475540386804, "grad_norm": 0.859813113774081, "learning_rate": 1.3220919416360182e-07, "loss": 0.0159, "step": 17342 }, { "epoch": 3.946075085324232, "grad_norm": 2.1284709389310064, "learning_rate": 1.3215424105225651e-07, "loss": 0.0603, "step": 17343 }, { "epoch": 3.946302616609784, "grad_norm": 1.261414442449971, "learning_rate": 1.3209929801377614e-07, "loss": 0.0072, "step": 17344 }, { "epoch": 3.9465301478953356, "grad_norm": 0.9350949118363375, "learning_rate": 1.3204436504928336e-07, "loss": 0.0396, "step": 17345 }, { "epoch": 3.9467576791808874, "grad_norm": 1.2486803154182204, "learning_rate": 1.3198944215990078e-07, "loss": 0.0112, "step": 17346 }, { "epoch": 3.946985210466439, "grad_norm": 1.2585393360602577, "learning_rate": 1.319345293467511e-07, "loss": 0.072, "step": 17347 }, { "epoch": 3.947212741751991, "grad_norm": 1.258814229597693, "learning_rate": 1.318796266109567e-07, "loss": 0.0251, "step": 17348 }, { "epoch": 3.9474402730375426, "grad_norm": 2.3329637209022955, "learning_rate": 1.318247339536397e-07, "loss": 0.0727, "step": 17349 }, { "epoch": 3.9476678043230944, "grad_norm": 1.1412823100165017, "learning_rate": 1.3176985137592197e-07, "loss": 0.032, "step": 17350 }, { "epoch": 3.947895335608646, "grad_norm": 1.7906289750093853, "learning_rate": 1.3171497887892502e-07, "loss": 0.0582, "step": 17351 }, { "epoch": 3.948122866894198, "grad_norm": 1.445210016452402, "learning_rate": 1.3166011646377063e-07, "loss": 0.0158, "step": 17352 }, { "epoch": 3.9483503981797496, "grad_norm": 0.8391758168300611, "learning_rate": 1.316052641315798e-07, "loss": 0.0586, "step": 17353 }, { "epoch": 3.9485779294653014, "grad_norm": 1.88758840056977, "learning_rate": 1.315504218834738e-07, "loss": 0.0612, "step": 17354 }, { "epoch": 3.948805460750853, "grad_norm": 1.3631251212254123, "learning_rate": 1.3149558972057338e-07, "loss": 0.0118, "step": 17355 }, { "epoch": 3.949032992036405, "grad_norm": 2.0235627055128775, "learning_rate": 1.3144076764399951e-07, "loss": 0.0982, "step": 17356 }, { "epoch": 3.9492605233219567, "grad_norm": 0.8821989616095355, "learning_rate": 1.3138595565487244e-07, "loss": 0.0465, "step": 17357 }, { "epoch": 3.9494880546075084, "grad_norm": 1.0291220274931778, "learning_rate": 1.3133115375431222e-07, "loss": 0.0374, "step": 17358 }, { "epoch": 3.94971558589306, "grad_norm": 1.8407859013489276, "learning_rate": 1.3127636194343936e-07, "loss": 0.1205, "step": 17359 }, { "epoch": 3.949943117178612, "grad_norm": 1.0231606390205825, "learning_rate": 1.3122158022337322e-07, "loss": 0.0118, "step": 17360 }, { "epoch": 3.9501706484641637, "grad_norm": 2.241519510547751, "learning_rate": 1.3116680859523368e-07, "loss": 0.0146, "step": 17361 }, { "epoch": 3.9503981797497154, "grad_norm": 0.7882202024865825, "learning_rate": 1.3111204706014035e-07, "loss": 0.0079, "step": 17362 }, { "epoch": 3.9506257110352676, "grad_norm": 1.7774188842377838, "learning_rate": 1.3105729561921202e-07, "loss": 0.1335, "step": 17363 }, { "epoch": 3.950853242320819, "grad_norm": 1.869569518800184, "learning_rate": 1.3100255427356816e-07, "loss": 0.1095, "step": 17364 }, { "epoch": 3.951080773606371, "grad_norm": 1.1428746774697176, "learning_rate": 1.3094782302432725e-07, "loss": 0.018, "step": 17365 }, { "epoch": 3.9513083048919224, "grad_norm": 2.1590955421213582, "learning_rate": 1.3089310187260818e-07, "loss": 0.0261, "step": 17366 }, { "epoch": 3.9515358361774746, "grad_norm": 1.2892907112812446, "learning_rate": 1.3083839081952898e-07, "loss": 0.0608, "step": 17367 }, { "epoch": 3.951763367463026, "grad_norm": 1.4693823605319383, "learning_rate": 1.3078368986620808e-07, "loss": 0.0998, "step": 17368 }, { "epoch": 3.951990898748578, "grad_norm": 1.1589787989751914, "learning_rate": 1.3072899901376363e-07, "loss": 0.022, "step": 17369 }, { "epoch": 3.9522184300341294, "grad_norm": 1.081735374171112, "learning_rate": 1.3067431826331307e-07, "loss": 0.0429, "step": 17370 }, { "epoch": 3.9524459613196816, "grad_norm": 1.3258586465971414, "learning_rate": 1.3061964761597427e-07, "loss": 0.0566, "step": 17371 }, { "epoch": 3.9526734926052334, "grad_norm": 1.082891501503934, "learning_rate": 1.3056498707286433e-07, "loss": 0.0391, "step": 17372 }, { "epoch": 3.952901023890785, "grad_norm": 1.4098784393637551, "learning_rate": 1.3051033663510064e-07, "loss": 0.0848, "step": 17373 }, { "epoch": 3.953128555176337, "grad_norm": 0.6973638339409973, "learning_rate": 1.3045569630379993e-07, "loss": 0.0126, "step": 17374 }, { "epoch": 3.9533560864618886, "grad_norm": 0.9807684827312043, "learning_rate": 1.3040106608007906e-07, "loss": 0.0723, "step": 17375 }, { "epoch": 3.9535836177474404, "grad_norm": 0.7850297682651005, "learning_rate": 1.3034644596505475e-07, "loss": 0.0039, "step": 17376 }, { "epoch": 3.953811149032992, "grad_norm": 2.0681022561532427, "learning_rate": 1.3029183595984296e-07, "loss": 0.0539, "step": 17377 }, { "epoch": 3.954038680318544, "grad_norm": 1.3076786754019223, "learning_rate": 1.3023723606556028e-07, "loss": 0.0158, "step": 17378 }, { "epoch": 3.9542662116040956, "grad_norm": 1.35642825915365, "learning_rate": 1.3018264628332215e-07, "loss": 0.0119, "step": 17379 }, { "epoch": 3.9544937428896474, "grad_norm": 1.7478786710080858, "learning_rate": 1.3012806661424475e-07, "loss": 0.063, "step": 17380 }, { "epoch": 3.954721274175199, "grad_norm": 1.0823070115652766, "learning_rate": 1.3007349705944314e-07, "loss": 0.0618, "step": 17381 }, { "epoch": 3.954948805460751, "grad_norm": 0.8167940207092858, "learning_rate": 1.300189376200328e-07, "loss": 0.0054, "step": 17382 }, { "epoch": 3.9551763367463026, "grad_norm": 1.0118433717147854, "learning_rate": 1.2996438829712904e-07, "loss": 0.0027, "step": 17383 }, { "epoch": 3.9554038680318544, "grad_norm": 2.189264201237235, "learning_rate": 1.2990984909184641e-07, "loss": 0.111, "step": 17384 }, { "epoch": 3.955631399317406, "grad_norm": 1.5175509579508053, "learning_rate": 1.2985532000529995e-07, "loss": 0.0181, "step": 17385 }, { "epoch": 3.955858930602958, "grad_norm": 1.8461223080543883, "learning_rate": 1.2980080103860377e-07, "loss": 0.02, "step": 17386 }, { "epoch": 3.9560864618885097, "grad_norm": 2.1845299751452707, "learning_rate": 1.297462921928723e-07, "loss": 0.0162, "step": 17387 }, { "epoch": 3.9563139931740614, "grad_norm": 1.079115880732195, "learning_rate": 1.2969179346921968e-07, "loss": 0.0793, "step": 17388 }, { "epoch": 3.956541524459613, "grad_norm": 1.3702618493144139, "learning_rate": 1.2963730486875964e-07, "loss": 0.0882, "step": 17389 }, { "epoch": 3.956769055745165, "grad_norm": 1.3337991911066458, "learning_rate": 1.2958282639260597e-07, "loss": 0.0666, "step": 17390 }, { "epoch": 3.9569965870307167, "grad_norm": 1.8144842793907614, "learning_rate": 1.2952835804187185e-07, "loss": 0.0155, "step": 17391 }, { "epoch": 3.9572241183162684, "grad_norm": 1.3100598517147204, "learning_rate": 1.2947389981767088e-07, "loss": 0.019, "step": 17392 }, { "epoch": 3.95745164960182, "grad_norm": 8.329812534268365, "learning_rate": 1.2941945172111573e-07, "loss": 0.0659, "step": 17393 }, { "epoch": 3.957679180887372, "grad_norm": 1.878599757963684, "learning_rate": 1.2936501375331935e-07, "loss": 0.0121, "step": 17394 }, { "epoch": 3.9579067121729237, "grad_norm": 1.4828670999331335, "learning_rate": 1.2931058591539453e-07, "loss": 0.0096, "step": 17395 }, { "epoch": 3.9581342434584754, "grad_norm": 1.2408563538581954, "learning_rate": 1.2925616820845338e-07, "loss": 0.0301, "step": 17396 }, { "epoch": 3.958361774744027, "grad_norm": 1.0377651202059408, "learning_rate": 1.2920176063360838e-07, "loss": 0.044, "step": 17397 }, { "epoch": 3.958589306029579, "grad_norm": 2.3388699672583684, "learning_rate": 1.2914736319197123e-07, "loss": 0.0311, "step": 17398 }, { "epoch": 3.9588168373151307, "grad_norm": 1.3316580521071049, "learning_rate": 1.2909297588465401e-07, "loss": 0.1052, "step": 17399 }, { "epoch": 3.9590443686006824, "grad_norm": 2.755483762942289, "learning_rate": 1.2903859871276797e-07, "loss": 0.0263, "step": 17400 }, { "epoch": 3.959271899886234, "grad_norm": 0.9850722400186287, "learning_rate": 1.2898423167742474e-07, "loss": 0.0185, "step": 17401 }, { "epoch": 3.9594994311717864, "grad_norm": 0.9761473679406193, "learning_rate": 1.2892987477973551e-07, "loss": 0.0606, "step": 17402 }, { "epoch": 3.9597269624573377, "grad_norm": 1.6063259666054053, "learning_rate": 1.28875528020811e-07, "loss": 0.0942, "step": 17403 }, { "epoch": 3.95995449374289, "grad_norm": 1.3665369519867658, "learning_rate": 1.2882119140176222e-07, "loss": 0.0764, "step": 17404 }, { "epoch": 3.960182025028441, "grad_norm": 3.221377920829245, "learning_rate": 1.2876686492369942e-07, "loss": 0.0702, "step": 17405 }, { "epoch": 3.9604095563139934, "grad_norm": 1.2865338364002115, "learning_rate": 1.287125485877333e-07, "loss": 0.0088, "step": 17406 }, { "epoch": 3.9606370875995447, "grad_norm": 0.4255463194261964, "learning_rate": 1.2865824239497352e-07, "loss": 0.003, "step": 17407 }, { "epoch": 3.960864618885097, "grad_norm": 1.0227147389118114, "learning_rate": 1.2860394634653036e-07, "loss": 0.0052, "step": 17408 }, { "epoch": 3.961092150170648, "grad_norm": 1.043360991708512, "learning_rate": 1.2854966044351353e-07, "loss": 0.0123, "step": 17409 }, { "epoch": 3.9613196814562004, "grad_norm": 2.6017001600996585, "learning_rate": 1.2849538468703232e-07, "loss": 0.0482, "step": 17410 }, { "epoch": 3.961547212741752, "grad_norm": 1.1045826211655974, "learning_rate": 1.2844111907819624e-07, "loss": 0.0546, "step": 17411 }, { "epoch": 3.961774744027304, "grad_norm": 1.2717678008477638, "learning_rate": 1.2838686361811417e-07, "loss": 0.0505, "step": 17412 }, { "epoch": 3.9620022753128556, "grad_norm": 2.256504025981248, "learning_rate": 1.2833261830789526e-07, "loss": 0.1032, "step": 17413 }, { "epoch": 3.9622298065984074, "grad_norm": 1.1163421373783846, "learning_rate": 1.282783831486479e-07, "loss": 0.0083, "step": 17414 }, { "epoch": 3.962457337883959, "grad_norm": 1.852145577920321, "learning_rate": 1.282241581414806e-07, "loss": 0.2232, "step": 17415 }, { "epoch": 3.962684869169511, "grad_norm": 1.9590505524393254, "learning_rate": 1.2816994328750193e-07, "loss": 0.1395, "step": 17416 }, { "epoch": 3.9629124004550627, "grad_norm": 1.478168486227001, "learning_rate": 1.281157385878195e-07, "loss": 0.0401, "step": 17417 }, { "epoch": 3.9631399317406144, "grad_norm": 1.3796125371894818, "learning_rate": 1.2806154404354164e-07, "loss": 0.0149, "step": 17418 }, { "epoch": 3.963367463026166, "grad_norm": 2.2189934313937005, "learning_rate": 1.2800735965577563e-07, "loss": 0.0132, "step": 17419 }, { "epoch": 3.963594994311718, "grad_norm": 0.8773202961140246, "learning_rate": 1.2795318542562886e-07, "loss": 0.0099, "step": 17420 }, { "epoch": 3.9638225255972697, "grad_norm": 1.6571615921767826, "learning_rate": 1.278990213542086e-07, "loss": 0.0258, "step": 17421 }, { "epoch": 3.9640500568828214, "grad_norm": 0.8175519289257359, "learning_rate": 1.2784486744262205e-07, "loss": 0.0119, "step": 17422 }, { "epoch": 3.964277588168373, "grad_norm": 1.4256433481918178, "learning_rate": 1.2779072369197606e-07, "loss": 0.0256, "step": 17423 }, { "epoch": 3.964505119453925, "grad_norm": 0.7764289203714202, "learning_rate": 1.277365901033769e-07, "loss": 0.0395, "step": 17424 }, { "epoch": 3.9647326507394767, "grad_norm": 1.2345394369978415, "learning_rate": 1.276824666779313e-07, "loss": 0.0437, "step": 17425 }, { "epoch": 3.9649601820250284, "grad_norm": 0.7814308886514265, "learning_rate": 1.276283534167453e-07, "loss": 0.0287, "step": 17426 }, { "epoch": 3.96518771331058, "grad_norm": 1.9472584167162739, "learning_rate": 1.2757425032092472e-07, "loss": 0.0829, "step": 17427 }, { "epoch": 3.965415244596132, "grad_norm": 1.1363754645942015, "learning_rate": 1.275201573915754e-07, "loss": 0.0099, "step": 17428 }, { "epoch": 3.9656427758816837, "grad_norm": 1.4021888225222743, "learning_rate": 1.2746607462980308e-07, "loss": 0.0878, "step": 17429 }, { "epoch": 3.9658703071672354, "grad_norm": 2.209823975870244, "learning_rate": 1.2741200203671312e-07, "loss": 0.0836, "step": 17430 }, { "epoch": 3.966097838452787, "grad_norm": 1.3818328728092004, "learning_rate": 1.2735793961341031e-07, "loss": 0.0059, "step": 17431 }, { "epoch": 3.966325369738339, "grad_norm": 1.6601150546054, "learning_rate": 1.273038873610001e-07, "loss": 0.0239, "step": 17432 }, { "epoch": 3.9665529010238907, "grad_norm": 1.8632334450137322, "learning_rate": 1.272498452805869e-07, "loss": 0.0259, "step": 17433 }, { "epoch": 3.9667804323094424, "grad_norm": 1.4781180954556699, "learning_rate": 1.271958133732751e-07, "loss": 0.0325, "step": 17434 }, { "epoch": 3.967007963594994, "grad_norm": 1.5208794899510145, "learning_rate": 1.2714179164016918e-07, "loss": 0.0462, "step": 17435 }, { "epoch": 3.967235494880546, "grad_norm": 1.3801437914555996, "learning_rate": 1.2708778008237327e-07, "loss": 0.0774, "step": 17436 }, { "epoch": 3.9674630261660977, "grad_norm": 1.6755164257984123, "learning_rate": 1.270337787009914e-07, "loss": 0.023, "step": 17437 }, { "epoch": 3.9676905574516494, "grad_norm": 1.7812254460250057, "learning_rate": 1.2697978749712714e-07, "loss": 0.0185, "step": 17438 }, { "epoch": 3.967918088737201, "grad_norm": 2.046465688199126, "learning_rate": 1.2692580647188374e-07, "loss": 0.0361, "step": 17439 }, { "epoch": 3.968145620022753, "grad_norm": 1.01576070116025, "learning_rate": 1.268718356263649e-07, "loss": 0.0321, "step": 17440 }, { "epoch": 3.968373151308305, "grad_norm": 3.455393957793571, "learning_rate": 1.268178749616733e-07, "loss": 0.0281, "step": 17441 }, { "epoch": 3.9686006825938565, "grad_norm": 1.516977523060208, "learning_rate": 1.2676392447891192e-07, "loss": 0.0387, "step": 17442 }, { "epoch": 3.9688282138794087, "grad_norm": 1.4535259992075393, "learning_rate": 1.2670998417918344e-07, "loss": 0.0489, "step": 17443 }, { "epoch": 3.96905574516496, "grad_norm": 1.1835859092260865, "learning_rate": 1.2665605406359047e-07, "loss": 0.0185, "step": 17444 }, { "epoch": 3.969283276450512, "grad_norm": 1.4549805576335375, "learning_rate": 1.2660213413323513e-07, "loss": 0.0285, "step": 17445 }, { "epoch": 3.9695108077360635, "grad_norm": 1.1288649771835457, "learning_rate": 1.2654822438921918e-07, "loss": 0.0192, "step": 17446 }, { "epoch": 3.9697383390216157, "grad_norm": 1.2710530364910853, "learning_rate": 1.264943248326448e-07, "loss": 0.0515, "step": 17447 }, { "epoch": 3.969965870307167, "grad_norm": 1.5522257138478948, "learning_rate": 1.2644043546461316e-07, "loss": 0.0599, "step": 17448 }, { "epoch": 3.970193401592719, "grad_norm": 1.0916377277272589, "learning_rate": 1.2638655628622626e-07, "loss": 0.0055, "step": 17449 }, { "epoch": 3.970420932878271, "grad_norm": 1.2570407695915142, "learning_rate": 1.2633268729858483e-07, "loss": 0.0412, "step": 17450 }, { "epoch": 3.9706484641638227, "grad_norm": 2.8445928702367005, "learning_rate": 1.262788285027902e-07, "loss": 0.0353, "step": 17451 }, { "epoch": 3.9708759954493744, "grad_norm": 1.6430889048201722, "learning_rate": 1.2622497989994292e-07, "loss": 0.018, "step": 17452 }, { "epoch": 3.971103526734926, "grad_norm": 1.2500412113798696, "learning_rate": 1.2617114149114346e-07, "loss": 0.0339, "step": 17453 }, { "epoch": 3.971331058020478, "grad_norm": 0.9149489032634037, "learning_rate": 1.2611731327749227e-07, "loss": 0.0119, "step": 17454 }, { "epoch": 3.9715585893060297, "grad_norm": 4.760910081993258, "learning_rate": 1.260634952600895e-07, "loss": 0.0408, "step": 17455 }, { "epoch": 3.9717861205915814, "grad_norm": 1.2301482667446315, "learning_rate": 1.2600968744003531e-07, "loss": 0.0136, "step": 17456 }, { "epoch": 3.972013651877133, "grad_norm": 2.2671747062104814, "learning_rate": 1.2595588981842932e-07, "loss": 0.157, "step": 17457 }, { "epoch": 3.972241183162685, "grad_norm": 1.4881380690306936, "learning_rate": 1.2590210239637072e-07, "loss": 0.0696, "step": 17458 }, { "epoch": 3.9724687144482367, "grad_norm": 4.4705749988147145, "learning_rate": 1.2584832517495933e-07, "loss": 0.0292, "step": 17459 }, { "epoch": 3.9726962457337884, "grad_norm": 2.259242056248824, "learning_rate": 1.2579455815529384e-07, "loss": 0.027, "step": 17460 }, { "epoch": 3.97292377701934, "grad_norm": 0.8169963951429502, "learning_rate": 1.2574080133847331e-07, "loss": 0.013, "step": 17461 }, { "epoch": 3.973151308304892, "grad_norm": 1.462868850875589, "learning_rate": 1.2568705472559652e-07, "loss": 0.036, "step": 17462 }, { "epoch": 3.9733788395904437, "grad_norm": 1.0268677816170169, "learning_rate": 1.2563331831776193e-07, "loss": 0.0351, "step": 17463 }, { "epoch": 3.9736063708759954, "grad_norm": 1.602260616594539, "learning_rate": 1.2557959211606789e-07, "loss": 0.0309, "step": 17464 }, { "epoch": 3.973833902161547, "grad_norm": 0.7733798817320852, "learning_rate": 1.2552587612161208e-07, "loss": 0.0294, "step": 17465 }, { "epoch": 3.974061433447099, "grad_norm": 1.813553040355812, "learning_rate": 1.2547217033549282e-07, "loss": 0.0175, "step": 17466 }, { "epoch": 3.9742889647326507, "grad_norm": 1.8598364261211457, "learning_rate": 1.2541847475880742e-07, "loss": 0.0205, "step": 17467 }, { "epoch": 3.9745164960182024, "grad_norm": 1.3157370446536822, "learning_rate": 1.2536478939265344e-07, "loss": 0.0822, "step": 17468 }, { "epoch": 3.974744027303754, "grad_norm": 1.7809393925355832, "learning_rate": 1.2531111423812824e-07, "loss": 0.0556, "step": 17469 }, { "epoch": 3.974971558589306, "grad_norm": 6.755487241417287, "learning_rate": 1.2525744929632864e-07, "loss": 0.0548, "step": 17470 }, { "epoch": 3.9751990898748577, "grad_norm": 1.957811673664015, "learning_rate": 1.2520379456835165e-07, "loss": 0.0202, "step": 17471 }, { "epoch": 3.9754266211604095, "grad_norm": 1.3632626613289127, "learning_rate": 1.251501500552936e-07, "loss": 0.0125, "step": 17472 }, { "epoch": 3.975654152445961, "grad_norm": 1.7276539283309478, "learning_rate": 1.250965157582512e-07, "loss": 0.0238, "step": 17473 }, { "epoch": 3.975881683731513, "grad_norm": 1.1226076474255342, "learning_rate": 1.2504289167832044e-07, "loss": 0.0042, "step": 17474 }, { "epoch": 3.9761092150170647, "grad_norm": 0.9781031555044777, "learning_rate": 1.2498927781659723e-07, "loss": 0.0678, "step": 17475 }, { "epoch": 3.9763367463026165, "grad_norm": 1.5148149599443648, "learning_rate": 1.2493567417417764e-07, "loss": 0.0431, "step": 17476 }, { "epoch": 3.976564277588168, "grad_norm": 2.1704869633886843, "learning_rate": 1.2488208075215688e-07, "loss": 0.0264, "step": 17477 }, { "epoch": 3.9767918088737204, "grad_norm": 2.4062272741247113, "learning_rate": 1.248284975516307e-07, "loss": 0.0384, "step": 17478 }, { "epoch": 3.9770193401592717, "grad_norm": 1.2085590079311461, "learning_rate": 1.2477492457369371e-07, "loss": 0.0051, "step": 17479 }, { "epoch": 3.977246871444824, "grad_norm": 1.1321437801519234, "learning_rate": 1.2472136181944144e-07, "loss": 0.0232, "step": 17480 }, { "epoch": 3.9774744027303752, "grad_norm": 2.5393313599941796, "learning_rate": 1.2466780928996808e-07, "loss": 0.0326, "step": 17481 }, { "epoch": 3.9777019340159274, "grad_norm": 1.457428160020947, "learning_rate": 1.246142669863684e-07, "loss": 0.0327, "step": 17482 }, { "epoch": 3.9779294653014787, "grad_norm": 1.6812542670895718, "learning_rate": 1.2456073490973685e-07, "loss": 0.0725, "step": 17483 }, { "epoch": 3.978156996587031, "grad_norm": 1.0963296009537056, "learning_rate": 1.245072130611672e-07, "loss": 0.0204, "step": 17484 }, { "epoch": 3.9783845278725822, "grad_norm": 1.0894262555747978, "learning_rate": 1.244537014417537e-07, "loss": 0.0114, "step": 17485 }, { "epoch": 3.9786120591581344, "grad_norm": 1.2923025913518316, "learning_rate": 1.2440020005258964e-07, "loss": 0.0714, "step": 17486 }, { "epoch": 3.9788395904436857, "grad_norm": 0.9633350564319675, "learning_rate": 1.243467088947689e-07, "loss": 0.0885, "step": 17487 }, { "epoch": 3.979067121729238, "grad_norm": 1.6521154321896039, "learning_rate": 1.2429322796938433e-07, "loss": 0.0218, "step": 17488 }, { "epoch": 3.9792946530147897, "grad_norm": 3.785541535203269, "learning_rate": 1.2423975727752913e-07, "loss": 0.0323, "step": 17489 }, { "epoch": 3.9795221843003414, "grad_norm": 1.5595799435111473, "learning_rate": 1.2418629682029642e-07, "loss": 0.066, "step": 17490 }, { "epoch": 3.979749715585893, "grad_norm": 1.2182959029146174, "learning_rate": 1.2413284659877846e-07, "loss": 0.0102, "step": 17491 }, { "epoch": 3.979977246871445, "grad_norm": 1.1458637484131784, "learning_rate": 1.2407940661406792e-07, "loss": 0.0068, "step": 17492 }, { "epoch": 3.9802047781569967, "grad_norm": 1.5398483428561016, "learning_rate": 1.2402597686725678e-07, "loss": 0.0362, "step": 17493 }, { "epoch": 3.9804323094425484, "grad_norm": 1.1576687890639898, "learning_rate": 1.2397255735943732e-07, "loss": 0.0253, "step": 17494 }, { "epoch": 3.9806598407281, "grad_norm": 3.3118341285812596, "learning_rate": 1.2391914809170105e-07, "loss": 0.1296, "step": 17495 }, { "epoch": 3.980887372013652, "grad_norm": 0.9402971767212225, "learning_rate": 1.2386574906513974e-07, "loss": 0.037, "step": 17496 }, { "epoch": 3.9811149032992037, "grad_norm": 1.4594592420796144, "learning_rate": 1.2381236028084484e-07, "loss": 0.1081, "step": 17497 }, { "epoch": 3.9813424345847555, "grad_norm": 2.0603181423942436, "learning_rate": 1.237589817399073e-07, "loss": 0.0525, "step": 17498 }, { "epoch": 3.981569965870307, "grad_norm": 3.590979747529372, "learning_rate": 1.2370561344341837e-07, "loss": 0.0248, "step": 17499 }, { "epoch": 3.981797497155859, "grad_norm": 1.6091073796377438, "learning_rate": 1.2365225539246843e-07, "loss": 0.0319, "step": 17500 }, { "epoch": 3.9820250284414107, "grad_norm": 1.0593276907831535, "learning_rate": 1.2359890758814837e-07, "loss": 0.0166, "step": 17501 }, { "epoch": 3.9822525597269625, "grad_norm": 1.2054214834200214, "learning_rate": 1.235455700315482e-07, "loss": 0.0109, "step": 17502 }, { "epoch": 3.982480091012514, "grad_norm": 1.1729970665487202, "learning_rate": 1.2349224272375824e-07, "loss": 0.0828, "step": 17503 }, { "epoch": 3.982707622298066, "grad_norm": 1.6291210316182203, "learning_rate": 1.234389256658685e-07, "loss": 0.0227, "step": 17504 }, { "epoch": 3.9829351535836177, "grad_norm": 1.93810933094655, "learning_rate": 1.2338561885896842e-07, "loss": 0.0156, "step": 17505 }, { "epoch": 3.9831626848691695, "grad_norm": 0.974986379216525, "learning_rate": 1.2333232230414779e-07, "loss": 0.0119, "step": 17506 }, { "epoch": 3.983390216154721, "grad_norm": 1.8125587724190262, "learning_rate": 1.2327903600249568e-07, "loss": 0.0773, "step": 17507 }, { "epoch": 3.983617747440273, "grad_norm": 1.3612121444927976, "learning_rate": 1.2322575995510095e-07, "loss": 0.013, "step": 17508 }, { "epoch": 3.9838452787258247, "grad_norm": 0.9631068074484834, "learning_rate": 1.2317249416305305e-07, "loss": 0.0368, "step": 17509 }, { "epoch": 3.9840728100113765, "grad_norm": 1.118225678796317, "learning_rate": 1.2311923862744013e-07, "loss": 0.0509, "step": 17510 }, { "epoch": 3.9843003412969282, "grad_norm": 0.8697383631581728, "learning_rate": 1.2306599334935096e-07, "loss": 0.0098, "step": 17511 }, { "epoch": 3.98452787258248, "grad_norm": 2.299553443886373, "learning_rate": 1.2301275832987355e-07, "loss": 0.008, "step": 17512 }, { "epoch": 3.9847554038680317, "grad_norm": 0.7215358730606009, "learning_rate": 1.229595335700961e-07, "loss": 0.0044, "step": 17513 }, { "epoch": 3.9849829351535835, "grad_norm": 0.947835086122089, "learning_rate": 1.2290631907110621e-07, "loss": 0.0168, "step": 17514 }, { "epoch": 3.9852104664391352, "grad_norm": 1.9463822758702067, "learning_rate": 1.2285311483399162e-07, "loss": 0.0218, "step": 17515 }, { "epoch": 3.985437997724687, "grad_norm": 1.7996528463447998, "learning_rate": 1.227999208598399e-07, "loss": 0.0253, "step": 17516 }, { "epoch": 3.985665529010239, "grad_norm": 1.4411168990813055, "learning_rate": 1.2274673714973788e-07, "loss": 0.0218, "step": 17517 }, { "epoch": 3.9858930602957905, "grad_norm": 1.734902034124956, "learning_rate": 1.2269356370477284e-07, "loss": 0.1282, "step": 17518 }, { "epoch": 3.9861205915813427, "grad_norm": 0.8744925037225678, "learning_rate": 1.2264040052603135e-07, "loss": 0.0413, "step": 17519 }, { "epoch": 3.986348122866894, "grad_norm": 0.7348532100571107, "learning_rate": 1.2258724761460013e-07, "loss": 0.0205, "step": 17520 }, { "epoch": 3.986575654152446, "grad_norm": 1.7352424768965682, "learning_rate": 1.225341049715653e-07, "loss": 0.0031, "step": 17521 }, { "epoch": 3.9868031854379975, "grad_norm": 1.3111850128443474, "learning_rate": 1.224809725980132e-07, "loss": 0.0069, "step": 17522 }, { "epoch": 3.9870307167235497, "grad_norm": 1.5124291475301284, "learning_rate": 1.2242785049502977e-07, "loss": 0.018, "step": 17523 }, { "epoch": 3.987258248009101, "grad_norm": 1.0635293110777913, "learning_rate": 1.223747386637006e-07, "loss": 0.0055, "step": 17524 }, { "epoch": 3.987485779294653, "grad_norm": 1.6127619524981291, "learning_rate": 1.2232163710511134e-07, "loss": 0.0102, "step": 17525 }, { "epoch": 3.9877133105802045, "grad_norm": 1.4200173678584134, "learning_rate": 1.222685458203472e-07, "loss": 0.0348, "step": 17526 }, { "epoch": 3.9879408418657567, "grad_norm": 1.1637924867221243, "learning_rate": 1.222154648104932e-07, "loss": 0.0426, "step": 17527 }, { "epoch": 3.9881683731513085, "grad_norm": 1.5666161252885533, "learning_rate": 1.2216239407663423e-07, "loss": 0.0272, "step": 17528 }, { "epoch": 3.98839590443686, "grad_norm": 1.695707062509678, "learning_rate": 1.2210933361985505e-07, "loss": 0.029, "step": 17529 }, { "epoch": 3.988623435722412, "grad_norm": 1.5214599397917938, "learning_rate": 1.2205628344124017e-07, "loss": 0.0853, "step": 17530 }, { "epoch": 3.9888509670079637, "grad_norm": 2.399219441587077, "learning_rate": 1.2200324354187368e-07, "loss": 0.0267, "step": 17531 }, { "epoch": 3.9890784982935155, "grad_norm": 1.3857649114200794, "learning_rate": 1.2195021392283984e-07, "loss": 0.0336, "step": 17532 }, { "epoch": 3.989306029579067, "grad_norm": 1.9325645525857051, "learning_rate": 1.2189719458522235e-07, "loss": 0.0193, "step": 17533 }, { "epoch": 3.989533560864619, "grad_norm": 1.325134899077989, "learning_rate": 1.218441855301046e-07, "loss": 0.0591, "step": 17534 }, { "epoch": 3.9897610921501707, "grad_norm": 1.454567870724177, "learning_rate": 1.2179118675857028e-07, "loss": 0.0553, "step": 17535 }, { "epoch": 3.9899886234357225, "grad_norm": 1.2606422993460262, "learning_rate": 1.2173819827170245e-07, "loss": 0.0229, "step": 17536 }, { "epoch": 3.9902161547212742, "grad_norm": 1.3456635999489608, "learning_rate": 1.216852200705844e-07, "loss": 0.0166, "step": 17537 }, { "epoch": 3.990443686006826, "grad_norm": 1.2190375766657209, "learning_rate": 1.216322521562985e-07, "loss": 0.0225, "step": 17538 }, { "epoch": 3.9906712172923777, "grad_norm": 1.8101160216885523, "learning_rate": 1.215792945299276e-07, "loss": 0.0874, "step": 17539 }, { "epoch": 3.9908987485779295, "grad_norm": 1.1523283051959026, "learning_rate": 1.2152634719255395e-07, "loss": 0.0774, "step": 17540 }, { "epoch": 3.9911262798634812, "grad_norm": 1.043736683133158, "learning_rate": 1.2147341014525956e-07, "loss": 0.0087, "step": 17541 }, { "epoch": 3.991353811149033, "grad_norm": 0.616172899421579, "learning_rate": 1.214204833891265e-07, "loss": 0.0394, "step": 17542 }, { "epoch": 3.9915813424345847, "grad_norm": 2.415156171775591, "learning_rate": 1.213675669252365e-07, "loss": 0.012, "step": 17543 }, { "epoch": 3.9918088737201365, "grad_norm": 0.8510070780442802, "learning_rate": 1.213146607546712e-07, "loss": 0.011, "step": 17544 }, { "epoch": 3.9920364050056882, "grad_norm": 1.9106737135162577, "learning_rate": 1.2126176487851188e-07, "loss": 0.1107, "step": 17545 }, { "epoch": 3.99226393629124, "grad_norm": 0.8569346775133895, "learning_rate": 1.2120887929783928e-07, "loss": 0.005, "step": 17546 }, { "epoch": 3.9924914675767917, "grad_norm": 1.5961502118203121, "learning_rate": 1.2115600401373468e-07, "loss": 0.1255, "step": 17547 }, { "epoch": 3.9927189988623435, "grad_norm": 1.9162282349049988, "learning_rate": 1.2110313902727848e-07, "loss": 0.0248, "step": 17548 }, { "epoch": 3.9929465301478952, "grad_norm": 2.046375374329796, "learning_rate": 1.2105028433955133e-07, "loss": 0.034, "step": 17549 }, { "epoch": 3.993174061433447, "grad_norm": 1.497584690575713, "learning_rate": 1.2099743995163338e-07, "loss": 0.0849, "step": 17550 }, { "epoch": 3.9934015927189987, "grad_norm": 3.5324603547799533, "learning_rate": 1.2094460586460486e-07, "loss": 0.0149, "step": 17551 }, { "epoch": 3.9936291240045505, "grad_norm": 1.291050027612159, "learning_rate": 1.208917820795455e-07, "loss": 0.0195, "step": 17552 }, { "epoch": 3.9938566552901023, "grad_norm": 0.8951670550670242, "learning_rate": 1.2083896859753469e-07, "loss": 0.0145, "step": 17553 }, { "epoch": 3.994084186575654, "grad_norm": 1.2979066708541436, "learning_rate": 1.2078616541965225e-07, "loss": 0.083, "step": 17554 }, { "epoch": 3.9943117178612058, "grad_norm": 1.2078456778928917, "learning_rate": 1.2073337254697698e-07, "loss": 0.0618, "step": 17555 }, { "epoch": 3.994539249146758, "grad_norm": 2.8045266084143305, "learning_rate": 1.2068058998058803e-07, "loss": 0.0252, "step": 17556 }, { "epoch": 3.9947667804323093, "grad_norm": 1.3965950950831982, "learning_rate": 1.2062781772156416e-07, "loss": 0.041, "step": 17557 }, { "epoch": 3.9949943117178615, "grad_norm": 1.0633210659327859, "learning_rate": 1.205750557709842e-07, "loss": 0.0151, "step": 17558 }, { "epoch": 3.9952218430034128, "grad_norm": 1.6315785283417714, "learning_rate": 1.2052230412992632e-07, "loss": 0.0132, "step": 17559 }, { "epoch": 3.995449374288965, "grad_norm": 2.2108424842124124, "learning_rate": 1.204695627994684e-07, "loss": 0.0305, "step": 17560 }, { "epoch": 3.9956769055745163, "grad_norm": 1.1466185369565065, "learning_rate": 1.2041683178068883e-07, "loss": 0.0162, "step": 17561 }, { "epoch": 3.9959044368600685, "grad_norm": 1.4009441859380178, "learning_rate": 1.2036411107466499e-07, "loss": 0.0166, "step": 17562 }, { "epoch": 3.9961319681456198, "grad_norm": 1.028793839936148, "learning_rate": 1.2031140068247445e-07, "loss": 0.0107, "step": 17563 }, { "epoch": 3.996359499431172, "grad_norm": 1.3188424946725195, "learning_rate": 1.202587006051948e-07, "loss": 0.0097, "step": 17564 }, { "epoch": 3.9965870307167233, "grad_norm": 2.0563391537169946, "learning_rate": 1.2020601084390273e-07, "loss": 0.0236, "step": 17565 }, { "epoch": 3.9968145620022755, "grad_norm": 1.408462856395183, "learning_rate": 1.2015333139967547e-07, "loss": 0.0408, "step": 17566 }, { "epoch": 3.9970420932878272, "grad_norm": 1.5387214396691131, "learning_rate": 1.2010066227358938e-07, "loss": 0.0118, "step": 17567 }, { "epoch": 3.997269624573379, "grad_norm": 0.9677307726081278, "learning_rate": 1.2004800346672124e-07, "loss": 0.0152, "step": 17568 }, { "epoch": 3.9974971558589307, "grad_norm": 1.689552048992298, "learning_rate": 1.1999535498014677e-07, "loss": 0.0744, "step": 17569 }, { "epoch": 3.9977246871444825, "grad_norm": 1.5273193082270093, "learning_rate": 1.199427168149427e-07, "loss": 0.0396, "step": 17570 }, { "epoch": 3.9979522184300342, "grad_norm": 1.4786117185966168, "learning_rate": 1.1989008897218463e-07, "loss": 0.07, "step": 17571 }, { "epoch": 3.998179749715586, "grad_norm": 1.5566012122106916, "learning_rate": 1.198374714529478e-07, "loss": 0.1054, "step": 17572 }, { "epoch": 3.9984072810011377, "grad_norm": 0.8832266207927764, "learning_rate": 1.1978486425830812e-07, "loss": 0.0377, "step": 17573 }, { "epoch": 3.9986348122866895, "grad_norm": 1.2116237534095144, "learning_rate": 1.1973226738934035e-07, "loss": 0.0587, "step": 17574 }, { "epoch": 3.9988623435722412, "grad_norm": 0.9358083581102714, "learning_rate": 1.196796808471197e-07, "loss": 0.0092, "step": 17575 }, { "epoch": 3.999089874857793, "grad_norm": 1.5695744785566712, "learning_rate": 1.1962710463272111e-07, "loss": 0.0085, "step": 17576 }, { "epoch": 3.9993174061433447, "grad_norm": 1.6764500026628666, "learning_rate": 1.195745387472188e-07, "loss": 0.0219, "step": 17577 }, { "epoch": 3.9995449374288965, "grad_norm": 1.1245396657095872, "learning_rate": 1.195219831916874e-07, "loss": 0.0086, "step": 17578 }, { "epoch": 3.9997724687144482, "grad_norm": 1.0682898424158809, "learning_rate": 1.1946943796720074e-07, "loss": 0.0542, "step": 17579 }, { "epoch": 4.0, "grad_norm": 1.5395240214133907, "learning_rate": 1.194169030748331e-07, "loss": 0.014, "step": 17580 }, { "epoch": 4.000227531285552, "grad_norm": 1.0571334079912167, "learning_rate": 1.193643785156579e-07, "loss": 0.0098, "step": 17581 }, { "epoch": 4.0004550625711035, "grad_norm": 0.9624445306320226, "learning_rate": 1.1931186429074878e-07, "loss": 0.039, "step": 17582 }, { "epoch": 4.000682593856656, "grad_norm": 1.1176222880093825, "learning_rate": 1.1925936040117916e-07, "loss": 0.0132, "step": 17583 }, { "epoch": 4.000910125142207, "grad_norm": 0.7035514485343167, "learning_rate": 1.1920686684802188e-07, "loss": 0.0079, "step": 17584 }, { "epoch": 4.001137656427759, "grad_norm": 0.8438694392904499, "learning_rate": 1.1915438363235006e-07, "loss": 0.0172, "step": 17585 }, { "epoch": 4.0013651877133105, "grad_norm": 0.5063157923365902, "learning_rate": 1.1910191075523602e-07, "loss": 0.0035, "step": 17586 }, { "epoch": 4.001592718998863, "grad_norm": 1.2447867049151733, "learning_rate": 1.190494482177526e-07, "loss": 0.0124, "step": 17587 }, { "epoch": 4.001820250284414, "grad_norm": 0.5680643518626523, "learning_rate": 1.1899699602097172e-07, "loss": 0.005, "step": 17588 }, { "epoch": 4.002047781569966, "grad_norm": 0.7190138151338809, "learning_rate": 1.1894455416596555e-07, "loss": 0.0054, "step": 17589 }, { "epoch": 4.0022753128555175, "grad_norm": 1.7722345774293897, "learning_rate": 1.1889212265380604e-07, "loss": 0.1021, "step": 17590 }, { "epoch": 4.00250284414107, "grad_norm": 0.9091327818168273, "learning_rate": 1.1883970148556447e-07, "loss": 0.0207, "step": 17591 }, { "epoch": 4.002730375426621, "grad_norm": 0.717359968186265, "learning_rate": 1.1878729066231262e-07, "loss": 0.0136, "step": 17592 }, { "epoch": 4.002957906712173, "grad_norm": 0.8273631282236071, "learning_rate": 1.1873489018512124e-07, "loss": 0.0066, "step": 17593 }, { "epoch": 4.0031854379977245, "grad_norm": 1.308538246114031, "learning_rate": 1.186825000550617e-07, "loss": 0.005, "step": 17594 }, { "epoch": 4.003412969283277, "grad_norm": 0.8183260547360695, "learning_rate": 1.1863012027320445e-07, "loss": 0.0067, "step": 17595 }, { "epoch": 4.003640500568828, "grad_norm": 0.613055369946934, "learning_rate": 1.1857775084062016e-07, "loss": 0.006, "step": 17596 }, { "epoch": 4.00386803185438, "grad_norm": 0.9857233444515943, "learning_rate": 1.1852539175837938e-07, "loss": 0.0612, "step": 17597 }, { "epoch": 4.0040955631399315, "grad_norm": 0.3667593589602403, "learning_rate": 1.1847304302755179e-07, "loss": 0.0026, "step": 17598 }, { "epoch": 4.004323094425484, "grad_norm": 1.2722399378571283, "learning_rate": 1.1842070464920772e-07, "loss": 0.0822, "step": 17599 }, { "epoch": 4.004550625711035, "grad_norm": 1.0402831703411202, "learning_rate": 1.1836837662441653e-07, "loss": 0.0417, "step": 17600 }, { "epoch": 4.004778156996587, "grad_norm": 0.8382023383384775, "learning_rate": 1.1831605895424807e-07, "loss": 0.0347, "step": 17601 }, { "epoch": 4.0050056882821385, "grad_norm": 0.8919481831931207, "learning_rate": 1.1826375163977124e-07, "loss": 0.0199, "step": 17602 }, { "epoch": 4.005233219567691, "grad_norm": 1.080894003881803, "learning_rate": 1.1821145468205522e-07, "loss": 0.041, "step": 17603 }, { "epoch": 4.005460750853242, "grad_norm": 0.990129461334136, "learning_rate": 1.181591680821692e-07, "loss": 0.0481, "step": 17604 }, { "epoch": 4.005688282138794, "grad_norm": 1.6997089126353844, "learning_rate": 1.181068918411813e-07, "loss": 0.0193, "step": 17605 }, { "epoch": 4.0059158134243456, "grad_norm": 3.1942620121026803, "learning_rate": 1.1805462596016039e-07, "loss": 0.0317, "step": 17606 }, { "epoch": 4.006143344709898, "grad_norm": 0.9493398195216897, "learning_rate": 1.1800237044017435e-07, "loss": 0.0181, "step": 17607 }, { "epoch": 4.006370875995449, "grad_norm": 1.099875797825517, "learning_rate": 1.1795012528229149e-07, "loss": 0.0038, "step": 17608 }, { "epoch": 4.006598407281001, "grad_norm": 1.173495008229192, "learning_rate": 1.1789789048757924e-07, "loss": 0.0453, "step": 17609 }, { "epoch": 4.006825938566553, "grad_norm": 1.1363960472384433, "learning_rate": 1.1784566605710548e-07, "loss": 0.0523, "step": 17610 }, { "epoch": 4.007053469852105, "grad_norm": 0.9616035726584296, "learning_rate": 1.1779345199193756e-07, "loss": 0.0058, "step": 17611 }, { "epoch": 4.007281001137656, "grad_norm": 1.2477129891318461, "learning_rate": 1.1774124829314255e-07, "loss": 0.055, "step": 17612 }, { "epoch": 4.007508532423208, "grad_norm": 1.1205604211917224, "learning_rate": 1.1768905496178748e-07, "loss": 0.0147, "step": 17613 }, { "epoch": 4.00773606370876, "grad_norm": 1.4522416677509637, "learning_rate": 1.1763687199893903e-07, "loss": 0.0407, "step": 17614 }, { "epoch": 4.007963594994312, "grad_norm": 1.198465218921505, "learning_rate": 1.1758469940566362e-07, "loss": 0.0572, "step": 17615 }, { "epoch": 4.008191126279863, "grad_norm": 3.484382268583334, "learning_rate": 1.1753253718302763e-07, "loss": 0.0216, "step": 17616 }, { "epoch": 4.008418657565415, "grad_norm": 1.7377758432270554, "learning_rate": 1.1748038533209716e-07, "loss": 0.0283, "step": 17617 }, { "epoch": 4.008646188850967, "grad_norm": 1.174267558431606, "learning_rate": 1.1742824385393835e-07, "loss": 0.0235, "step": 17618 }, { "epoch": 4.008873720136519, "grad_norm": 0.8773348997663771, "learning_rate": 1.1737611274961654e-07, "loss": 0.0213, "step": 17619 }, { "epoch": 4.009101251422071, "grad_norm": 1.7836214756922373, "learning_rate": 1.1732399202019739e-07, "loss": 0.1038, "step": 17620 }, { "epoch": 4.009328782707622, "grad_norm": 1.8048385617830567, "learning_rate": 1.1727188166674612e-07, "loss": 0.0121, "step": 17621 }, { "epoch": 4.0095563139931745, "grad_norm": 1.574125187646925, "learning_rate": 1.1721978169032755e-07, "loss": 0.0143, "step": 17622 }, { "epoch": 4.009783845278726, "grad_norm": 0.9252998964105739, "learning_rate": 1.1716769209200673e-07, "loss": 0.066, "step": 17623 }, { "epoch": 4.010011376564278, "grad_norm": 0.6391540319069499, "learning_rate": 1.1711561287284818e-07, "loss": 0.0032, "step": 17624 }, { "epoch": 4.010238907849829, "grad_norm": 1.1409603946562579, "learning_rate": 1.1706354403391657e-07, "loss": 0.0138, "step": 17625 }, { "epoch": 4.0104664391353815, "grad_norm": 0.5528911041341994, "learning_rate": 1.1701148557627572e-07, "loss": 0.0024, "step": 17626 }, { "epoch": 4.010693970420933, "grad_norm": 0.6392411860270091, "learning_rate": 1.1695943750098984e-07, "loss": 0.005, "step": 17627 }, { "epoch": 4.010921501706485, "grad_norm": 1.5610887171715773, "learning_rate": 1.1690739980912267e-07, "loss": 0.0102, "step": 17628 }, { "epoch": 4.011149032992036, "grad_norm": 1.367791222068332, "learning_rate": 1.1685537250173746e-07, "loss": 0.0632, "step": 17629 }, { "epoch": 4.0113765642775885, "grad_norm": 1.1153171174661403, "learning_rate": 1.1680335557989809e-07, "loss": 0.0145, "step": 17630 }, { "epoch": 4.01160409556314, "grad_norm": 0.5939257195043021, "learning_rate": 1.1675134904466722e-07, "loss": 0.0035, "step": 17631 }, { "epoch": 4.011831626848692, "grad_norm": 1.2001748961858993, "learning_rate": 1.1669935289710818e-07, "loss": 0.0196, "step": 17632 }, { "epoch": 4.012059158134243, "grad_norm": 0.9777006184863581, "learning_rate": 1.1664736713828339e-07, "loss": 0.0168, "step": 17633 }, { "epoch": 4.0122866894197955, "grad_norm": 5.481817617119042, "learning_rate": 1.1659539176925535e-07, "loss": 0.0036, "step": 17634 }, { "epoch": 4.012514220705347, "grad_norm": 1.0156187428730126, "learning_rate": 1.165434267910863e-07, "loss": 0.0137, "step": 17635 }, { "epoch": 4.012741751990899, "grad_norm": 1.1321324498676768, "learning_rate": 1.1649147220483847e-07, "loss": 0.0952, "step": 17636 }, { "epoch": 4.01296928327645, "grad_norm": 1.221791132054507, "learning_rate": 1.1643952801157383e-07, "loss": 0.0096, "step": 17637 }, { "epoch": 4.0131968145620025, "grad_norm": 0.7836909743737849, "learning_rate": 1.1638759421235365e-07, "loss": 0.0136, "step": 17638 }, { "epoch": 4.013424345847554, "grad_norm": 1.1312514566335505, "learning_rate": 1.1633567080823971e-07, "loss": 0.0107, "step": 17639 }, { "epoch": 4.013651877133106, "grad_norm": 2.1088658219844243, "learning_rate": 1.1628375780029316e-07, "loss": 0.0421, "step": 17640 }, { "epoch": 4.013879408418657, "grad_norm": 0.8406010229644675, "learning_rate": 1.1623185518957469e-07, "loss": 0.0107, "step": 17641 }, { "epoch": 4.0141069397042095, "grad_norm": 0.2011705093949806, "learning_rate": 1.161799629771454e-07, "loss": 0.0008, "step": 17642 }, { "epoch": 4.014334470989761, "grad_norm": 0.9206908860467881, "learning_rate": 1.1612808116406575e-07, "loss": 0.0062, "step": 17643 }, { "epoch": 4.014562002275313, "grad_norm": 1.5817593410217157, "learning_rate": 1.1607620975139635e-07, "loss": 0.1187, "step": 17644 }, { "epoch": 4.014789533560864, "grad_norm": 0.3975974740908195, "learning_rate": 1.1602434874019706e-07, "loss": 0.0016, "step": 17645 }, { "epoch": 4.0150170648464165, "grad_norm": 1.0225423769590525, "learning_rate": 1.1597249813152806e-07, "loss": 0.0106, "step": 17646 }, { "epoch": 4.015244596131968, "grad_norm": 1.7890766247746916, "learning_rate": 1.1592065792644894e-07, "loss": 0.0581, "step": 17647 }, { "epoch": 4.01547212741752, "grad_norm": 0.8663950938534116, "learning_rate": 1.158688281260191e-07, "loss": 0.0456, "step": 17648 }, { "epoch": 4.015699658703071, "grad_norm": 1.4004556020866825, "learning_rate": 1.1581700873129799e-07, "loss": 0.0062, "step": 17649 }, { "epoch": 4.0159271899886235, "grad_norm": 1.3321024855524801, "learning_rate": 1.1576519974334469e-07, "loss": 0.0738, "step": 17650 }, { "epoch": 4.016154721274175, "grad_norm": 1.0846089415533704, "learning_rate": 1.157134011632182e-07, "loss": 0.0272, "step": 17651 }, { "epoch": 4.016382252559727, "grad_norm": 1.5729592803689596, "learning_rate": 1.1566161299197715e-07, "loss": 0.0569, "step": 17652 }, { "epoch": 4.016609783845278, "grad_norm": 0.9691931314499261, "learning_rate": 1.156098352306797e-07, "loss": 0.0765, "step": 17653 }, { "epoch": 4.0168373151308305, "grad_norm": 0.7062113223179699, "learning_rate": 1.155580678803845e-07, "loss": 0.0055, "step": 17654 }, { "epoch": 4.017064846416382, "grad_norm": 1.2355755652692513, "learning_rate": 1.1550631094214927e-07, "loss": 0.0176, "step": 17655 }, { "epoch": 4.017292377701934, "grad_norm": 1.6743855859713996, "learning_rate": 1.1545456441703191e-07, "loss": 0.0693, "step": 17656 }, { "epoch": 4.017519908987485, "grad_norm": 1.88066308905507, "learning_rate": 1.1540282830609005e-07, "loss": 0.0646, "step": 17657 }, { "epoch": 4.0177474402730375, "grad_norm": 0.944756390635904, "learning_rate": 1.1535110261038124e-07, "loss": 0.065, "step": 17658 }, { "epoch": 4.01797497155859, "grad_norm": 0.5480778168493167, "learning_rate": 1.1529938733096255e-07, "loss": 0.0014, "step": 17659 }, { "epoch": 4.018202502844141, "grad_norm": 1.0565391528511252, "learning_rate": 1.1524768246889077e-07, "loss": 0.0555, "step": 17660 }, { "epoch": 4.018430034129693, "grad_norm": 1.596314781856497, "learning_rate": 1.1519598802522291e-07, "loss": 0.0484, "step": 17661 }, { "epoch": 4.0186575654152445, "grad_norm": 6.048657860017804, "learning_rate": 1.1514430400101528e-07, "loss": 0.0078, "step": 17662 }, { "epoch": 4.018885096700797, "grad_norm": 1.0962488962537595, "learning_rate": 1.1509263039732433e-07, "loss": 0.0097, "step": 17663 }, { "epoch": 4.019112627986348, "grad_norm": 0.7854612285523758, "learning_rate": 1.1504096721520613e-07, "loss": 0.0099, "step": 17664 }, { "epoch": 4.0193401592719, "grad_norm": 1.4755413974747906, "learning_rate": 1.1498931445571675e-07, "loss": 0.1334, "step": 17665 }, { "epoch": 4.0195676905574516, "grad_norm": 0.11122629787742627, "learning_rate": 1.1493767211991178e-07, "loss": 0.0004, "step": 17666 }, { "epoch": 4.019795221843004, "grad_norm": 1.6602104324568576, "learning_rate": 1.1488604020884648e-07, "loss": 0.034, "step": 17667 }, { "epoch": 4.020022753128555, "grad_norm": 0.8864872131101048, "learning_rate": 1.1483441872357643e-07, "loss": 0.0092, "step": 17668 }, { "epoch": 4.020250284414107, "grad_norm": 1.4700842243515864, "learning_rate": 1.1478280766515644e-07, "loss": 0.1052, "step": 17669 }, { "epoch": 4.020477815699659, "grad_norm": 1.2796924006246668, "learning_rate": 1.1473120703464138e-07, "loss": 0.086, "step": 17670 }, { "epoch": 4.020705346985211, "grad_norm": 2.085631496891512, "learning_rate": 1.146796168330861e-07, "loss": 0.0787, "step": 17671 }, { "epoch": 4.020932878270762, "grad_norm": 1.2739016763501765, "learning_rate": 1.146280370615447e-07, "loss": 0.0377, "step": 17672 }, { "epoch": 4.021160409556314, "grad_norm": 2.4824615244749126, "learning_rate": 1.1457646772107173e-07, "loss": 0.0064, "step": 17673 }, { "epoch": 4.021387940841866, "grad_norm": 0.576322105638251, "learning_rate": 1.1452490881272073e-07, "loss": 0.0079, "step": 17674 }, { "epoch": 4.021615472127418, "grad_norm": 2.5072073060960287, "learning_rate": 1.1447336033754586e-07, "loss": 0.0333, "step": 17675 }, { "epoch": 4.021843003412969, "grad_norm": 2.492320488912514, "learning_rate": 1.1442182229660044e-07, "loss": 0.0142, "step": 17676 }, { "epoch": 4.022070534698521, "grad_norm": 0.8066309813057573, "learning_rate": 1.143702946909378e-07, "loss": 0.0044, "step": 17677 }, { "epoch": 4.022298065984073, "grad_norm": 0.6916374702487017, "learning_rate": 1.143187775216114e-07, "loss": 0.0035, "step": 17678 }, { "epoch": 4.022525597269625, "grad_norm": 1.1756478752328448, "learning_rate": 1.1426727078967366e-07, "loss": 0.0551, "step": 17679 }, { "epoch": 4.022753128555176, "grad_norm": 0.6185243829532452, "learning_rate": 1.1421577449617776e-07, "loss": 0.0065, "step": 17680 }, { "epoch": 4.022980659840728, "grad_norm": 1.2989248729868352, "learning_rate": 1.1416428864217586e-07, "loss": 0.0187, "step": 17681 }, { "epoch": 4.02320819112628, "grad_norm": 0.7853857616138913, "learning_rate": 1.1411281322872045e-07, "loss": 0.0193, "step": 17682 }, { "epoch": 4.023435722411832, "grad_norm": 1.6956190714539259, "learning_rate": 1.1406134825686339e-07, "loss": 0.056, "step": 17683 }, { "epoch": 4.023663253697383, "grad_norm": 1.3686721959694828, "learning_rate": 1.1400989372765653e-07, "loss": 0.0548, "step": 17684 }, { "epoch": 4.023890784982935, "grad_norm": 1.2918852215646446, "learning_rate": 1.1395844964215183e-07, "loss": 0.0387, "step": 17685 }, { "epoch": 4.024118316268487, "grad_norm": 0.8397108333451235, "learning_rate": 1.1390701600140032e-07, "loss": 0.0036, "step": 17686 }, { "epoch": 4.024345847554039, "grad_norm": 1.2509361189736141, "learning_rate": 1.1385559280645358e-07, "loss": 0.021, "step": 17687 }, { "epoch": 4.02457337883959, "grad_norm": 1.8481802493342834, "learning_rate": 1.1380418005836218e-07, "loss": 0.0872, "step": 17688 }, { "epoch": 4.024800910125142, "grad_norm": 2.3446365263483444, "learning_rate": 1.1375277775817734e-07, "loss": 0.0062, "step": 17689 }, { "epoch": 4.025028441410694, "grad_norm": 0.5806140750985324, "learning_rate": 1.1370138590694927e-07, "loss": 0.002, "step": 17690 }, { "epoch": 4.025255972696246, "grad_norm": 0.8262123508200857, "learning_rate": 1.1365000450572844e-07, "loss": 0.0293, "step": 17691 }, { "epoch": 4.025483503981797, "grad_norm": 0.8542610490388911, "learning_rate": 1.1359863355556514e-07, "loss": 0.008, "step": 17692 }, { "epoch": 4.025711035267349, "grad_norm": 1.3727077143236466, "learning_rate": 1.1354727305750907e-07, "loss": 0.0062, "step": 17693 }, { "epoch": 4.025938566552901, "grad_norm": 0.5957789824666445, "learning_rate": 1.1349592301261023e-07, "loss": 0.0036, "step": 17694 }, { "epoch": 4.026166097838453, "grad_norm": 1.9313353785564855, "learning_rate": 1.1344458342191777e-07, "loss": 0.1033, "step": 17695 }, { "epoch": 4.026393629124004, "grad_norm": 2.6992112058186972, "learning_rate": 1.1339325428648115e-07, "loss": 0.0604, "step": 17696 }, { "epoch": 4.026621160409556, "grad_norm": 1.2435942526032817, "learning_rate": 1.1334193560734966e-07, "loss": 0.0846, "step": 17697 }, { "epoch": 4.0268486916951085, "grad_norm": 1.2501288241856328, "learning_rate": 1.1329062738557176e-07, "loss": 0.0076, "step": 17698 }, { "epoch": 4.02707622298066, "grad_norm": 0.8370531825004323, "learning_rate": 1.132393296221964e-07, "loss": 0.0231, "step": 17699 }, { "epoch": 4.027303754266212, "grad_norm": 1.4624349786139064, "learning_rate": 1.1318804231827178e-07, "loss": 0.0633, "step": 17700 }, { "epoch": 4.027531285551763, "grad_norm": 2.5720657781918708, "learning_rate": 1.1313676547484636e-07, "loss": 0.0112, "step": 17701 }, { "epoch": 4.0277588168373155, "grad_norm": 0.7621269793509923, "learning_rate": 1.1308549909296792e-07, "loss": 0.0273, "step": 17702 }, { "epoch": 4.027986348122867, "grad_norm": 0.8860518538698643, "learning_rate": 1.130342431736843e-07, "loss": 0.0586, "step": 17703 }, { "epoch": 4.028213879408419, "grad_norm": 2.20768124560104, "learning_rate": 1.1298299771804334e-07, "loss": 0.13, "step": 17704 }, { "epoch": 4.02844141069397, "grad_norm": 0.6311718252512913, "learning_rate": 1.1293176272709199e-07, "loss": 0.0026, "step": 17705 }, { "epoch": 4.0286689419795225, "grad_norm": 2.0889308968518803, "learning_rate": 1.1288053820187772e-07, "loss": 0.0363, "step": 17706 }, { "epoch": 4.028896473265074, "grad_norm": 2.138390409000319, "learning_rate": 1.1282932414344729e-07, "loss": 0.0874, "step": 17707 }, { "epoch": 4.029124004550626, "grad_norm": 1.4607851078986323, "learning_rate": 1.1277812055284756e-07, "loss": 0.0136, "step": 17708 }, { "epoch": 4.029351535836177, "grad_norm": 0.7622105097108781, "learning_rate": 1.1272692743112483e-07, "loss": 0.0054, "step": 17709 }, { "epoch": 4.0295790671217295, "grad_norm": 0.46893734923350855, "learning_rate": 1.1267574477932551e-07, "loss": 0.0136, "step": 17710 }, { "epoch": 4.029806598407281, "grad_norm": 0.7120895976926754, "learning_rate": 1.1262457259849584e-07, "loss": 0.0068, "step": 17711 }, { "epoch": 4.030034129692833, "grad_norm": 1.752540370355385, "learning_rate": 1.1257341088968136e-07, "loss": 0.0249, "step": 17712 }, { "epoch": 4.030261660978384, "grad_norm": 1.1282218143809397, "learning_rate": 1.1252225965392813e-07, "loss": 0.0827, "step": 17713 }, { "epoch": 4.0304891922639365, "grad_norm": 0.7797615541970444, "learning_rate": 1.1247111889228112e-07, "loss": 0.0047, "step": 17714 }, { "epoch": 4.030716723549488, "grad_norm": 1.1993355853485497, "learning_rate": 1.1241998860578598e-07, "loss": 0.0171, "step": 17715 }, { "epoch": 4.03094425483504, "grad_norm": 0.9452734371700335, "learning_rate": 1.1236886879548734e-07, "loss": 0.0101, "step": 17716 }, { "epoch": 4.031171786120591, "grad_norm": 0.8085449127232492, "learning_rate": 1.1231775946243021e-07, "loss": 0.0083, "step": 17717 }, { "epoch": 4.0313993174061435, "grad_norm": 0.3905971456606157, "learning_rate": 1.1226666060765931e-07, "loss": 0.0022, "step": 17718 }, { "epoch": 4.031626848691695, "grad_norm": 1.1764093447510922, "learning_rate": 1.1221557223221866e-07, "loss": 0.0178, "step": 17719 }, { "epoch": 4.031854379977247, "grad_norm": 1.6960375074528107, "learning_rate": 1.1216449433715276e-07, "loss": 0.0905, "step": 17720 }, { "epoch": 4.032081911262798, "grad_norm": 1.3212572546345167, "learning_rate": 1.1211342692350537e-07, "loss": 0.0188, "step": 17721 }, { "epoch": 4.0323094425483506, "grad_norm": 0.9523744761951703, "learning_rate": 1.1206236999232005e-07, "loss": 0.0137, "step": 17722 }, { "epoch": 4.032536973833902, "grad_norm": 0.7248391037706122, "learning_rate": 1.1201132354464052e-07, "loss": 0.0176, "step": 17723 }, { "epoch": 4.032764505119454, "grad_norm": 1.231328894695138, "learning_rate": 1.1196028758151005e-07, "loss": 0.0169, "step": 17724 }, { "epoch": 4.032992036405005, "grad_norm": 1.6737768209836088, "learning_rate": 1.1190926210397184e-07, "loss": 0.0683, "step": 17725 }, { "epoch": 4.033219567690558, "grad_norm": 0.7231647043607078, "learning_rate": 1.1185824711306851e-07, "loss": 0.0043, "step": 17726 }, { "epoch": 4.033447098976109, "grad_norm": 0.478856522316905, "learning_rate": 1.1180724260984294e-07, "loss": 0.0038, "step": 17727 }, { "epoch": 4.033674630261661, "grad_norm": 0.8305683421075072, "learning_rate": 1.1175624859533753e-07, "loss": 0.0225, "step": 17728 }, { "epoch": 4.033902161547212, "grad_norm": 0.691028502302993, "learning_rate": 1.1170526507059423e-07, "loss": 0.0076, "step": 17729 }, { "epoch": 4.034129692832765, "grad_norm": 0.9489487636444688, "learning_rate": 1.1165429203665527e-07, "loss": 0.0051, "step": 17730 }, { "epoch": 4.034357224118316, "grad_norm": 1.4616018869897855, "learning_rate": 1.1160332949456246e-07, "loss": 0.0371, "step": 17731 }, { "epoch": 4.034584755403868, "grad_norm": 3.1358393270136755, "learning_rate": 1.1155237744535745e-07, "loss": 0.0221, "step": 17732 }, { "epoch": 4.034812286689419, "grad_norm": 1.3656426178549155, "learning_rate": 1.1150143589008143e-07, "loss": 0.0107, "step": 17733 }, { "epoch": 4.035039817974972, "grad_norm": 1.2425720226166015, "learning_rate": 1.1145050482977572e-07, "loss": 0.0329, "step": 17734 }, { "epoch": 4.035267349260523, "grad_norm": 1.8916113547946796, "learning_rate": 1.1139958426548123e-07, "loss": 0.0411, "step": 17735 }, { "epoch": 4.035494880546075, "grad_norm": 0.9434934380331651, "learning_rate": 1.1134867419823843e-07, "loss": 0.0204, "step": 17736 }, { "epoch": 4.035722411831627, "grad_norm": 2.33446662168472, "learning_rate": 1.1129777462908803e-07, "loss": 0.0314, "step": 17737 }, { "epoch": 4.035949943117179, "grad_norm": 0.44546829860031345, "learning_rate": 1.112468855590703e-07, "loss": 0.0039, "step": 17738 }, { "epoch": 4.036177474402731, "grad_norm": 0.9038217567613691, "learning_rate": 1.1119600698922546e-07, "loss": 0.0223, "step": 17739 }, { "epoch": 4.036405005688282, "grad_norm": 1.210011380237789, "learning_rate": 1.1114513892059329e-07, "loss": 0.025, "step": 17740 }, { "epoch": 4.036632536973834, "grad_norm": 1.1200955663745586, "learning_rate": 1.1109428135421326e-07, "loss": 0.0077, "step": 17741 }, { "epoch": 4.036860068259386, "grad_norm": 0.6367905349718476, "learning_rate": 1.1104343429112513e-07, "loss": 0.0034, "step": 17742 }, { "epoch": 4.037087599544938, "grad_norm": 1.2519417581448573, "learning_rate": 1.1099259773236768e-07, "loss": 0.0653, "step": 17743 }, { "epoch": 4.037315130830489, "grad_norm": 1.8518205271360764, "learning_rate": 1.1094177167898027e-07, "loss": 0.0297, "step": 17744 }, { "epoch": 4.037542662116041, "grad_norm": 1.2092089655152651, "learning_rate": 1.1089095613200152e-07, "loss": 0.0539, "step": 17745 }, { "epoch": 4.037770193401593, "grad_norm": 1.0557938834804164, "learning_rate": 1.1084015109247032e-07, "loss": 0.0345, "step": 17746 }, { "epoch": 4.037997724687145, "grad_norm": 2.200564361369353, "learning_rate": 1.1078935656142472e-07, "loss": 0.0847, "step": 17747 }, { "epoch": 4.038225255972696, "grad_norm": 1.8423505304732024, "learning_rate": 1.107385725399028e-07, "loss": 0.0492, "step": 17748 }, { "epoch": 4.038452787258248, "grad_norm": 0.552680736655265, "learning_rate": 1.1068779902894282e-07, "loss": 0.0022, "step": 17749 }, { "epoch": 4.0386803185438, "grad_norm": 0.9546278333465631, "learning_rate": 1.1063703602958192e-07, "loss": 0.0203, "step": 17750 }, { "epoch": 4.038907849829352, "grad_norm": 1.777160397521106, "learning_rate": 1.1058628354285839e-07, "loss": 0.0532, "step": 17751 }, { "epoch": 4.039135381114903, "grad_norm": 1.0086576422751938, "learning_rate": 1.1053554156980888e-07, "loss": 0.0134, "step": 17752 }, { "epoch": 4.039362912400455, "grad_norm": 0.8955851143346794, "learning_rate": 1.1048481011147096e-07, "loss": 0.0388, "step": 17753 }, { "epoch": 4.039590443686007, "grad_norm": 0.7154834416815623, "learning_rate": 1.1043408916888112e-07, "loss": 0.0057, "step": 17754 }, { "epoch": 4.039817974971559, "grad_norm": 0.9604620877154276, "learning_rate": 1.1038337874307609e-07, "loss": 0.0178, "step": 17755 }, { "epoch": 4.04004550625711, "grad_norm": 0.5530260399342334, "learning_rate": 1.1033267883509224e-07, "loss": 0.0022, "step": 17756 }, { "epoch": 4.040273037542662, "grad_norm": 0.6237624015689472, "learning_rate": 1.1028198944596586e-07, "loss": 0.0045, "step": 17757 }, { "epoch": 4.040500568828214, "grad_norm": 1.0446275111792858, "learning_rate": 1.102313105767331e-07, "loss": 0.0049, "step": 17758 }, { "epoch": 4.040728100113766, "grad_norm": 0.7053871222344081, "learning_rate": 1.1018064222842962e-07, "loss": 0.0051, "step": 17759 }, { "epoch": 4.040955631399317, "grad_norm": 1.9103838253614398, "learning_rate": 1.1012998440209082e-07, "loss": 0.059, "step": 17760 }, { "epoch": 4.041183162684869, "grad_norm": 1.1798299918160666, "learning_rate": 1.1007933709875234e-07, "loss": 0.0166, "step": 17761 }, { "epoch": 4.041410693970421, "grad_norm": 1.2354552684211535, "learning_rate": 1.1002870031944904e-07, "loss": 0.0579, "step": 17762 }, { "epoch": 4.041638225255973, "grad_norm": 1.1408311157446738, "learning_rate": 1.09978074065216e-07, "loss": 0.025, "step": 17763 }, { "epoch": 4.041865756541524, "grad_norm": 1.2403441956419412, "learning_rate": 1.0992745833708789e-07, "loss": 0.0911, "step": 17764 }, { "epoch": 4.042093287827076, "grad_norm": 2.25367088157972, "learning_rate": 1.0987685313609934e-07, "loss": 0.0766, "step": 17765 }, { "epoch": 4.042320819112628, "grad_norm": 0.9122833232882369, "learning_rate": 1.0982625846328455e-07, "loss": 0.0137, "step": 17766 }, { "epoch": 4.04254835039818, "grad_norm": 0.7387355697713722, "learning_rate": 1.0977567431967734e-07, "loss": 0.0066, "step": 17767 }, { "epoch": 4.042775881683731, "grad_norm": 1.1041523035354963, "learning_rate": 1.0972510070631194e-07, "loss": 0.0469, "step": 17768 }, { "epoch": 4.043003412969283, "grad_norm": 1.4850313675789772, "learning_rate": 1.0967453762422163e-07, "loss": 0.0215, "step": 17769 }, { "epoch": 4.043230944254835, "grad_norm": 0.6836619767280068, "learning_rate": 1.0962398507444002e-07, "loss": 0.0074, "step": 17770 }, { "epoch": 4.043458475540387, "grad_norm": 1.9958639431159153, "learning_rate": 1.0957344305800033e-07, "loss": 0.006, "step": 17771 }, { "epoch": 4.043686006825938, "grad_norm": 1.0065536226294012, "learning_rate": 1.095229115759356e-07, "loss": 0.0204, "step": 17772 }, { "epoch": 4.04391353811149, "grad_norm": 1.7179781101761271, "learning_rate": 1.0947239062927849e-07, "loss": 0.0316, "step": 17773 }, { "epoch": 4.044141069397042, "grad_norm": 1.03727472606269, "learning_rate": 1.0942188021906143e-07, "loss": 0.0105, "step": 17774 }, { "epoch": 4.044368600682594, "grad_norm": 1.638087795229217, "learning_rate": 1.0937138034631707e-07, "loss": 0.0091, "step": 17775 }, { "epoch": 4.044596131968146, "grad_norm": 1.278674648786237, "learning_rate": 1.0932089101207723e-07, "loss": 0.0792, "step": 17776 }, { "epoch": 4.044823663253697, "grad_norm": 1.1288932953398363, "learning_rate": 1.0927041221737392e-07, "loss": 0.021, "step": 17777 }, { "epoch": 4.0450511945392496, "grad_norm": 0.7759097220816846, "learning_rate": 1.0921994396323909e-07, "loss": 0.0298, "step": 17778 }, { "epoch": 4.045278725824801, "grad_norm": 1.2875620496903326, "learning_rate": 1.0916948625070375e-07, "loss": 0.0327, "step": 17779 }, { "epoch": 4.045506257110353, "grad_norm": 1.1054125774694699, "learning_rate": 1.0911903908079967e-07, "loss": 0.0186, "step": 17780 }, { "epoch": 4.045733788395904, "grad_norm": 5.515843662988748, "learning_rate": 1.0906860245455738e-07, "loss": 0.0158, "step": 17781 }, { "epoch": 4.045961319681457, "grad_norm": 1.7366509953064182, "learning_rate": 1.0901817637300818e-07, "loss": 0.1181, "step": 17782 }, { "epoch": 4.046188850967008, "grad_norm": 1.0743083239292675, "learning_rate": 1.0896776083718231e-07, "loss": 0.0131, "step": 17783 }, { "epoch": 4.04641638225256, "grad_norm": 0.768878454340582, "learning_rate": 1.089173558481104e-07, "loss": 0.0029, "step": 17784 }, { "epoch": 4.046643913538111, "grad_norm": 1.319718634974289, "learning_rate": 1.0886696140682267e-07, "loss": 0.0195, "step": 17785 }, { "epoch": 4.046871444823664, "grad_norm": 1.3222745683862769, "learning_rate": 1.0881657751434883e-07, "loss": 0.016, "step": 17786 }, { "epoch": 4.047098976109215, "grad_norm": 1.3094453914172894, "learning_rate": 1.0876620417171898e-07, "loss": 0.0767, "step": 17787 }, { "epoch": 4.047326507394767, "grad_norm": 1.6117837073346692, "learning_rate": 1.087158413799623e-07, "loss": 0.0881, "step": 17788 }, { "epoch": 4.047554038680318, "grad_norm": 1.4549388023043701, "learning_rate": 1.086654891401085e-07, "loss": 0.0719, "step": 17789 }, { "epoch": 4.047781569965871, "grad_norm": 0.9522579501941167, "learning_rate": 1.086151474531863e-07, "loss": 0.0207, "step": 17790 }, { "epoch": 4.048009101251422, "grad_norm": 1.5603632296450076, "learning_rate": 1.0856481632022478e-07, "loss": 0.0169, "step": 17791 }, { "epoch": 4.048236632536974, "grad_norm": 1.377084140917742, "learning_rate": 1.0851449574225272e-07, "loss": 0.0145, "step": 17792 }, { "epoch": 4.048464163822525, "grad_norm": 0.9702403844175964, "learning_rate": 1.0846418572029831e-07, "loss": 0.0102, "step": 17793 }, { "epoch": 4.048691695108078, "grad_norm": 1.1325131241413577, "learning_rate": 1.0841388625539018e-07, "loss": 0.0072, "step": 17794 }, { "epoch": 4.048919226393629, "grad_norm": 0.8733483775999983, "learning_rate": 1.0836359734855591e-07, "loss": 0.0068, "step": 17795 }, { "epoch": 4.049146757679181, "grad_norm": 0.6244884443291712, "learning_rate": 1.083133190008237e-07, "loss": 0.0029, "step": 17796 }, { "epoch": 4.049374288964732, "grad_norm": 0.5102457169527927, "learning_rate": 1.0826305121322087e-07, "loss": 0.0026, "step": 17797 }, { "epoch": 4.049601820250285, "grad_norm": 1.4184130400158612, "learning_rate": 1.082127939867749e-07, "loss": 0.0135, "step": 17798 }, { "epoch": 4.049829351535836, "grad_norm": 0.6523002417641088, "learning_rate": 1.0816254732251314e-07, "loss": 0.027, "step": 17799 }, { "epoch": 4.050056882821388, "grad_norm": 1.0159688861089575, "learning_rate": 1.0811231122146215e-07, "loss": 0.0083, "step": 17800 }, { "epoch": 4.050284414106939, "grad_norm": 1.2477034621110916, "learning_rate": 1.0806208568464905e-07, "loss": 0.0257, "step": 17801 }, { "epoch": 4.050511945392492, "grad_norm": 1.0377992360658157, "learning_rate": 1.0801187071310012e-07, "loss": 0.0219, "step": 17802 }, { "epoch": 4.050739476678043, "grad_norm": 1.083301940986508, "learning_rate": 1.0796166630784178e-07, "loss": 0.0098, "step": 17803 }, { "epoch": 4.050967007963595, "grad_norm": 1.3218435116778569, "learning_rate": 1.079114724699e-07, "loss": 0.0168, "step": 17804 }, { "epoch": 4.051194539249146, "grad_norm": 1.284271897299535, "learning_rate": 1.0786128920030067e-07, "loss": 0.0933, "step": 17805 }, { "epoch": 4.051422070534699, "grad_norm": 1.6446776537552465, "learning_rate": 1.0781111650006969e-07, "loss": 0.0412, "step": 17806 }, { "epoch": 4.05164960182025, "grad_norm": 1.0936717044289146, "learning_rate": 1.0776095437023209e-07, "loss": 0.0341, "step": 17807 }, { "epoch": 4.051877133105802, "grad_norm": 1.2728377858160622, "learning_rate": 1.0771080281181352e-07, "loss": 0.0183, "step": 17808 }, { "epoch": 4.052104664391353, "grad_norm": 0.8424467332054607, "learning_rate": 1.0766066182583877e-07, "loss": 0.0116, "step": 17809 }, { "epoch": 4.052332195676906, "grad_norm": 0.9191780618094041, "learning_rate": 1.0761053141333244e-07, "loss": 0.0078, "step": 17810 }, { "epoch": 4.052559726962457, "grad_norm": 0.6568960896388525, "learning_rate": 1.0756041157531927e-07, "loss": 0.0074, "step": 17811 }, { "epoch": 4.052787258248009, "grad_norm": 1.0025134950660513, "learning_rate": 1.075103023128237e-07, "loss": 0.0032, "step": 17812 }, { "epoch": 4.05301478953356, "grad_norm": 1.036655447845105, "learning_rate": 1.0746020362686994e-07, "loss": 0.0136, "step": 17813 }, { "epoch": 4.053242320819113, "grad_norm": 0.990677388430804, "learning_rate": 1.074101155184816e-07, "loss": 0.0678, "step": 17814 }, { "epoch": 4.053469852104665, "grad_norm": 1.130856989419854, "learning_rate": 1.0736003798868279e-07, "loss": 0.0105, "step": 17815 }, { "epoch": 4.053697383390216, "grad_norm": 0.6384590672347978, "learning_rate": 1.0730997103849679e-07, "loss": 0.0027, "step": 17816 }, { "epoch": 4.053924914675768, "grad_norm": 1.8525104554104028, "learning_rate": 1.0725991466894653e-07, "loss": 0.0753, "step": 17817 }, { "epoch": 4.05415244596132, "grad_norm": 0.7100992653672858, "learning_rate": 1.0720986888105571e-07, "loss": 0.009, "step": 17818 }, { "epoch": 4.054379977246872, "grad_norm": 0.9274031737709287, "learning_rate": 1.0715983367584677e-07, "loss": 0.0533, "step": 17819 }, { "epoch": 4.054607508532423, "grad_norm": 0.8786220512398047, "learning_rate": 1.0710980905434258e-07, "loss": 0.0079, "step": 17820 }, { "epoch": 4.054835039817975, "grad_norm": 1.2852694066135404, "learning_rate": 1.0705979501756531e-07, "loss": 0.0633, "step": 17821 }, { "epoch": 4.055062571103527, "grad_norm": 0.8509973040436096, "learning_rate": 1.0700979156653739e-07, "loss": 0.0585, "step": 17822 }, { "epoch": 4.055290102389079, "grad_norm": 1.4428587307522485, "learning_rate": 1.0695979870228052e-07, "loss": 0.0724, "step": 17823 }, { "epoch": 4.05551763367463, "grad_norm": 1.7524933790956565, "learning_rate": 1.0690981642581665e-07, "loss": 0.0115, "step": 17824 }, { "epoch": 4.055745164960182, "grad_norm": 2.5901800895682703, "learning_rate": 1.068598447381674e-07, "loss": 0.0156, "step": 17825 }, { "epoch": 4.055972696245734, "grad_norm": 1.1350473845200717, "learning_rate": 1.0680988364035388e-07, "loss": 0.0854, "step": 17826 }, { "epoch": 4.056200227531286, "grad_norm": 0.6186334483501811, "learning_rate": 1.0675993313339745e-07, "loss": 0.0038, "step": 17827 }, { "epoch": 4.056427758816837, "grad_norm": 1.1079307172042738, "learning_rate": 1.0670999321831889e-07, "loss": 0.0245, "step": 17828 }, { "epoch": 4.056655290102389, "grad_norm": 0.6779164739441222, "learning_rate": 1.0666006389613868e-07, "loss": 0.0108, "step": 17829 }, { "epoch": 4.056882821387941, "grad_norm": 1.1706483690271612, "learning_rate": 1.0661014516787749e-07, "loss": 0.0597, "step": 17830 }, { "epoch": 4.057110352673493, "grad_norm": 1.1449400789656652, "learning_rate": 1.0656023703455559e-07, "loss": 0.0172, "step": 17831 }, { "epoch": 4.057337883959044, "grad_norm": 0.5976085853641901, "learning_rate": 1.0651033949719308e-07, "loss": 0.0145, "step": 17832 }, { "epoch": 4.057565415244596, "grad_norm": 0.9945605820144422, "learning_rate": 1.064604525568095e-07, "loss": 0.0306, "step": 17833 }, { "epoch": 4.057792946530148, "grad_norm": 0.6479788132157002, "learning_rate": 1.0641057621442477e-07, "loss": 0.0041, "step": 17834 }, { "epoch": 4.0580204778157, "grad_norm": 0.7229885080527947, "learning_rate": 1.063607104710581e-07, "loss": 0.0075, "step": 17835 }, { "epoch": 4.058248009101251, "grad_norm": 0.941587839627253, "learning_rate": 1.0631085532772853e-07, "loss": 0.0103, "step": 17836 }, { "epoch": 4.058475540386803, "grad_norm": 1.5603377607984013, "learning_rate": 1.062610107854551e-07, "loss": 0.0317, "step": 17837 }, { "epoch": 4.058703071672355, "grad_norm": 0.3823904799005061, "learning_rate": 1.0621117684525665e-07, "loss": 0.0015, "step": 17838 }, { "epoch": 4.058930602957907, "grad_norm": 1.0058963518828683, "learning_rate": 1.0616135350815176e-07, "loss": 0.0053, "step": 17839 }, { "epoch": 4.059158134243458, "grad_norm": 0.9611659490036095, "learning_rate": 1.0611154077515839e-07, "loss": 0.0095, "step": 17840 }, { "epoch": 4.05938566552901, "grad_norm": 1.0903000791813695, "learning_rate": 1.0606173864729508e-07, "loss": 0.0255, "step": 17841 }, { "epoch": 4.059613196814562, "grad_norm": 1.1266772618412237, "learning_rate": 1.0601194712557939e-07, "loss": 0.0842, "step": 17842 }, { "epoch": 4.059840728100114, "grad_norm": 0.935997905208159, "learning_rate": 1.0596216621102884e-07, "loss": 0.0447, "step": 17843 }, { "epoch": 4.060068259385665, "grad_norm": 0.9062428087280867, "learning_rate": 1.0591239590466102e-07, "loss": 0.021, "step": 17844 }, { "epoch": 4.060295790671217, "grad_norm": 1.011050213743479, "learning_rate": 1.0586263620749327e-07, "loss": 0.0375, "step": 17845 }, { "epoch": 4.060523321956769, "grad_norm": 3.654792862632505, "learning_rate": 1.0581288712054251e-07, "loss": 0.0214, "step": 17846 }, { "epoch": 4.060750853242321, "grad_norm": 0.9031484792451855, "learning_rate": 1.0576314864482559e-07, "loss": 0.0153, "step": 17847 }, { "epoch": 4.060978384527872, "grad_norm": 1.6612631389735752, "learning_rate": 1.0571342078135878e-07, "loss": 0.054, "step": 17848 }, { "epoch": 4.061205915813424, "grad_norm": 1.3735932468270746, "learning_rate": 1.0566370353115876e-07, "loss": 0.0175, "step": 17849 }, { "epoch": 4.061433447098976, "grad_norm": 1.3795497887641655, "learning_rate": 1.0561399689524138e-07, "loss": 0.0104, "step": 17850 }, { "epoch": 4.061660978384528, "grad_norm": 0.8869279336503397, "learning_rate": 1.0556430087462266e-07, "loss": 0.0449, "step": 17851 }, { "epoch": 4.06188850967008, "grad_norm": 1.3647366820649798, "learning_rate": 1.0551461547031843e-07, "loss": 0.0346, "step": 17852 }, { "epoch": 4.062116040955631, "grad_norm": 0.4789279960665667, "learning_rate": 1.0546494068334411e-07, "loss": 0.0019, "step": 17853 }, { "epoch": 4.062343572241184, "grad_norm": 2.658531188677953, "learning_rate": 1.0541527651471498e-07, "loss": 0.0537, "step": 17854 }, { "epoch": 4.062571103526735, "grad_norm": 0.6090359160370868, "learning_rate": 1.0536562296544587e-07, "loss": 0.0096, "step": 17855 }, { "epoch": 4.062798634812287, "grad_norm": 2.5171101663974147, "learning_rate": 1.0531598003655192e-07, "loss": 0.0363, "step": 17856 }, { "epoch": 4.063026166097838, "grad_norm": 1.201722323007972, "learning_rate": 1.0526634772904734e-07, "loss": 0.0442, "step": 17857 }, { "epoch": 4.063253697383391, "grad_norm": 1.7388173162411853, "learning_rate": 1.0521672604394684e-07, "loss": 0.0473, "step": 17858 }, { "epoch": 4.063481228668942, "grad_norm": 0.902265260737337, "learning_rate": 1.0516711498226452e-07, "loss": 0.006, "step": 17859 }, { "epoch": 4.063708759954494, "grad_norm": 0.8123574758078983, "learning_rate": 1.0511751454501451e-07, "loss": 0.0047, "step": 17860 }, { "epoch": 4.063936291240045, "grad_norm": 0.704497086021004, "learning_rate": 1.0506792473321037e-07, "loss": 0.0059, "step": 17861 }, { "epoch": 4.064163822525598, "grad_norm": 1.7853145743582302, "learning_rate": 1.0501834554786545e-07, "loss": 0.1484, "step": 17862 }, { "epoch": 4.064391353811149, "grad_norm": 0.3299001243568171, "learning_rate": 1.0496877698999341e-07, "loss": 0.0016, "step": 17863 }, { "epoch": 4.064618885096701, "grad_norm": 0.977426419448408, "learning_rate": 1.0491921906060708e-07, "loss": 0.0482, "step": 17864 }, { "epoch": 4.064846416382252, "grad_norm": 1.1949567050467196, "learning_rate": 1.0486967176071936e-07, "loss": 0.021, "step": 17865 }, { "epoch": 4.065073947667805, "grad_norm": 0.4023395209173142, "learning_rate": 1.0482013509134317e-07, "loss": 0.0016, "step": 17866 }, { "epoch": 4.065301478953356, "grad_norm": 1.3788245797282679, "learning_rate": 1.0477060905349057e-07, "loss": 0.0097, "step": 17867 }, { "epoch": 4.065529010238908, "grad_norm": 1.4703436632927267, "learning_rate": 1.0472109364817413e-07, "loss": 0.058, "step": 17868 }, { "epoch": 4.065756541524459, "grad_norm": 1.6266379458918416, "learning_rate": 1.0467158887640556e-07, "loss": 0.0094, "step": 17869 }, { "epoch": 4.065984072810012, "grad_norm": 1.4604983428901792, "learning_rate": 1.046220947391969e-07, "loss": 0.1001, "step": 17870 }, { "epoch": 4.066211604095563, "grad_norm": 1.352031069317672, "learning_rate": 1.0457261123755947e-07, "loss": 0.0111, "step": 17871 }, { "epoch": 4.066439135381115, "grad_norm": 1.6758830486649432, "learning_rate": 1.045231383725047e-07, "loss": 0.0465, "step": 17872 }, { "epoch": 4.066666666666666, "grad_norm": 1.0773984772132772, "learning_rate": 1.0447367614504399e-07, "loss": 0.0145, "step": 17873 }, { "epoch": 4.066894197952219, "grad_norm": 1.1455690752672245, "learning_rate": 1.0442422455618787e-07, "loss": 0.056, "step": 17874 }, { "epoch": 4.06712172923777, "grad_norm": 1.7738387401305173, "learning_rate": 1.0437478360694732e-07, "loss": 0.025, "step": 17875 }, { "epoch": 4.067349260523322, "grad_norm": 1.1718310495043067, "learning_rate": 1.0432535329833259e-07, "loss": 0.04, "step": 17876 }, { "epoch": 4.067576791808873, "grad_norm": 1.1362728108105022, "learning_rate": 1.0427593363135411e-07, "loss": 0.0497, "step": 17877 }, { "epoch": 4.067804323094426, "grad_norm": 1.076536223189081, "learning_rate": 1.0422652460702183e-07, "loss": 0.0365, "step": 17878 }, { "epoch": 4.068031854379977, "grad_norm": 1.376113279392039, "learning_rate": 1.0417712622634579e-07, "loss": 0.0914, "step": 17879 }, { "epoch": 4.068259385665529, "grad_norm": 1.2334153463261226, "learning_rate": 1.0412773849033546e-07, "loss": 0.0487, "step": 17880 }, { "epoch": 4.0684869169510804, "grad_norm": 1.7560090173138314, "learning_rate": 1.0407836140000009e-07, "loss": 0.0545, "step": 17881 }, { "epoch": 4.068714448236633, "grad_norm": 2.7377112337335103, "learning_rate": 1.0402899495634909e-07, "loss": 0.1222, "step": 17882 }, { "epoch": 4.068941979522184, "grad_norm": 0.19038372211709875, "learning_rate": 1.0397963916039123e-07, "loss": 0.0006, "step": 17883 }, { "epoch": 4.069169510807736, "grad_norm": 1.2733053845878108, "learning_rate": 1.039302940131353e-07, "loss": 0.0439, "step": 17884 }, { "epoch": 4.0693970420932875, "grad_norm": 0.8216509853927865, "learning_rate": 1.0388095951559007e-07, "loss": 0.0047, "step": 17885 }, { "epoch": 4.06962457337884, "grad_norm": 2.279633590131751, "learning_rate": 1.0383163566876339e-07, "loss": 0.0168, "step": 17886 }, { "epoch": 4.069852104664391, "grad_norm": 1.4274440263369697, "learning_rate": 1.0378232247366387e-07, "loss": 0.0533, "step": 17887 }, { "epoch": 4.070079635949943, "grad_norm": 0.3135280066668031, "learning_rate": 1.0373301993129886e-07, "loss": 0.0012, "step": 17888 }, { "epoch": 4.0703071672354945, "grad_norm": 0.890748616762057, "learning_rate": 1.0368372804267647e-07, "loss": 0.0215, "step": 17889 }, { "epoch": 4.070534698521047, "grad_norm": 1.3858729201733135, "learning_rate": 1.036344468088038e-07, "loss": 0.0575, "step": 17890 }, { "epoch": 4.070762229806599, "grad_norm": 1.5980825935905185, "learning_rate": 1.0358517623068817e-07, "loss": 0.0711, "step": 17891 }, { "epoch": 4.07098976109215, "grad_norm": 0.7747449253709726, "learning_rate": 1.0353591630933677e-07, "loss": 0.0301, "step": 17892 }, { "epoch": 4.071217292377702, "grad_norm": 0.9195407811870486, "learning_rate": 1.0348666704575606e-07, "loss": 0.0076, "step": 17893 }, { "epoch": 4.071444823663254, "grad_norm": 0.9470476608065559, "learning_rate": 1.0343742844095297e-07, "loss": 0.0377, "step": 17894 }, { "epoch": 4.071672354948806, "grad_norm": 0.7719486523531424, "learning_rate": 1.0338820049593343e-07, "loss": 0.0044, "step": 17895 }, { "epoch": 4.071899886234357, "grad_norm": 1.4853223776009261, "learning_rate": 1.0333898321170397e-07, "loss": 0.1257, "step": 17896 }, { "epoch": 4.072127417519909, "grad_norm": 2.2233071958411657, "learning_rate": 1.0328977658927014e-07, "loss": 0.0448, "step": 17897 }, { "epoch": 4.072354948805461, "grad_norm": 1.0522522083738164, "learning_rate": 1.0324058062963785e-07, "loss": 0.0112, "step": 17898 }, { "epoch": 4.072582480091013, "grad_norm": 1.574161314885063, "learning_rate": 1.0319139533381268e-07, "loss": 0.0823, "step": 17899 }, { "epoch": 4.072810011376564, "grad_norm": 1.7077097336882163, "learning_rate": 1.0314222070279961e-07, "loss": 0.0841, "step": 17900 }, { "epoch": 4.073037542662116, "grad_norm": 0.7549391695117332, "learning_rate": 1.0309305673760393e-07, "loss": 0.0074, "step": 17901 }, { "epoch": 4.073265073947668, "grad_norm": 1.9058444817688882, "learning_rate": 1.0304390343923019e-07, "loss": 0.0115, "step": 17902 }, { "epoch": 4.07349260523322, "grad_norm": 1.3519608374240724, "learning_rate": 1.0299476080868329e-07, "loss": 0.0176, "step": 17903 }, { "epoch": 4.073720136518771, "grad_norm": 1.1357038239147355, "learning_rate": 1.0294562884696733e-07, "loss": 0.0091, "step": 17904 }, { "epoch": 4.073947667804323, "grad_norm": 0.8465303100949455, "learning_rate": 1.0289650755508661e-07, "loss": 0.0069, "step": 17905 }, { "epoch": 4.074175199089875, "grad_norm": 0.9149882159128289, "learning_rate": 1.0284739693404523e-07, "loss": 0.0084, "step": 17906 }, { "epoch": 4.074402730375427, "grad_norm": 1.2456878539417091, "learning_rate": 1.0279829698484664e-07, "loss": 0.0108, "step": 17907 }, { "epoch": 4.074630261660978, "grad_norm": 0.9632870317194674, "learning_rate": 1.0274920770849469e-07, "loss": 0.0466, "step": 17908 }, { "epoch": 4.07485779294653, "grad_norm": 1.3875419827506719, "learning_rate": 1.0270012910599224e-07, "loss": 0.0063, "step": 17909 }, { "epoch": 4.075085324232082, "grad_norm": 1.343148156361176, "learning_rate": 1.0265106117834281e-07, "loss": 0.0367, "step": 17910 }, { "epoch": 4.075312855517634, "grad_norm": 1.3039498408599162, "learning_rate": 1.0260200392654893e-07, "loss": 0.0412, "step": 17911 }, { "epoch": 4.075540386803185, "grad_norm": 0.6354168921591018, "learning_rate": 1.0255295735161332e-07, "loss": 0.0019, "step": 17912 }, { "epoch": 4.075767918088737, "grad_norm": 5.866216526335787, "learning_rate": 1.0250392145453863e-07, "loss": 0.1019, "step": 17913 }, { "epoch": 4.075995449374289, "grad_norm": 1.2169037492283878, "learning_rate": 1.0245489623632663e-07, "loss": 0.0476, "step": 17914 }, { "epoch": 4.076222980659841, "grad_norm": 0.9664348145119588, "learning_rate": 1.0240588169797983e-07, "loss": 0.0373, "step": 17915 }, { "epoch": 4.076450511945392, "grad_norm": 1.4418238100649399, "learning_rate": 1.0235687784049965e-07, "loss": 0.0185, "step": 17916 }, { "epoch": 4.076678043230944, "grad_norm": 1.5893333421542992, "learning_rate": 1.0230788466488756e-07, "loss": 0.0659, "step": 17917 }, { "epoch": 4.076905574516496, "grad_norm": 0.8587703095234741, "learning_rate": 1.0225890217214505e-07, "loss": 0.0274, "step": 17918 }, { "epoch": 4.077133105802048, "grad_norm": 1.0726875324904652, "learning_rate": 1.0220993036327329e-07, "loss": 0.0135, "step": 17919 }, { "epoch": 4.077360637087599, "grad_norm": 1.3859235975614055, "learning_rate": 1.0216096923927318e-07, "loss": 0.0706, "step": 17920 }, { "epoch": 4.077588168373151, "grad_norm": 1.1867290722578727, "learning_rate": 1.021120188011452e-07, "loss": 0.0588, "step": 17921 }, { "epoch": 4.077815699658703, "grad_norm": 1.3435436852480653, "learning_rate": 1.0206307904989006e-07, "loss": 0.069, "step": 17922 }, { "epoch": 4.078043230944255, "grad_norm": 3.5869506954033783, "learning_rate": 1.0201414998650793e-07, "loss": 0.0658, "step": 17923 }, { "epoch": 4.078270762229806, "grad_norm": 1.2738570265802924, "learning_rate": 1.019652316119986e-07, "loss": 0.0125, "step": 17924 }, { "epoch": 4.078498293515358, "grad_norm": 0.9227725679468154, "learning_rate": 1.0191632392736197e-07, "loss": 0.0211, "step": 17925 }, { "epoch": 4.07872582480091, "grad_norm": 1.5022707590631537, "learning_rate": 1.0186742693359779e-07, "loss": 0.0208, "step": 17926 }, { "epoch": 4.078953356086462, "grad_norm": 0.598491687477446, "learning_rate": 1.0181854063170543e-07, "loss": 0.0046, "step": 17927 }, { "epoch": 4.079180887372013, "grad_norm": 0.9440004545804108, "learning_rate": 1.0176966502268376e-07, "loss": 0.007, "step": 17928 }, { "epoch": 4.079408418657565, "grad_norm": 0.5114182799238967, "learning_rate": 1.0172080010753212e-07, "loss": 0.0034, "step": 17929 }, { "epoch": 4.079635949943118, "grad_norm": 1.3669220125133144, "learning_rate": 1.0167194588724893e-07, "loss": 0.0732, "step": 17930 }, { "epoch": 4.079863481228669, "grad_norm": 0.8801073230543675, "learning_rate": 1.0162310236283258e-07, "loss": 0.0336, "step": 17931 }, { "epoch": 4.080091012514221, "grad_norm": 0.8739015703207645, "learning_rate": 1.0157426953528151e-07, "loss": 0.007, "step": 17932 }, { "epoch": 4.080318543799772, "grad_norm": 1.348566823734545, "learning_rate": 1.0152544740559378e-07, "loss": 0.0086, "step": 17933 }, { "epoch": 4.080546075085325, "grad_norm": 1.3611087312741847, "learning_rate": 1.0147663597476737e-07, "loss": 0.0109, "step": 17934 }, { "epoch": 4.080773606370876, "grad_norm": 0.8507417065664921, "learning_rate": 1.0142783524379966e-07, "loss": 0.0121, "step": 17935 }, { "epoch": 4.081001137656428, "grad_norm": 1.0892324315008006, "learning_rate": 1.0137904521368806e-07, "loss": 0.0506, "step": 17936 }, { "epoch": 4.081228668941979, "grad_norm": 1.154832684743039, "learning_rate": 1.013302658854299e-07, "loss": 0.0067, "step": 17937 }, { "epoch": 4.081456200227532, "grad_norm": 1.4302628596649232, "learning_rate": 1.0128149726002179e-07, "loss": 0.0134, "step": 17938 }, { "epoch": 4.081683731513083, "grad_norm": 0.7617606644533511, "learning_rate": 1.0123273933846101e-07, "loss": 0.0048, "step": 17939 }, { "epoch": 4.081911262798635, "grad_norm": 0.980940036117552, "learning_rate": 1.0118399212174362e-07, "loss": 0.0108, "step": 17940 }, { "epoch": 4.0821387940841865, "grad_norm": 1.5554441558769252, "learning_rate": 1.0113525561086628e-07, "loss": 0.0267, "step": 17941 }, { "epoch": 4.082366325369739, "grad_norm": 1.130929429857344, "learning_rate": 1.010865298068249e-07, "loss": 0.0716, "step": 17942 }, { "epoch": 4.08259385665529, "grad_norm": 0.8421351196391298, "learning_rate": 1.0103781471061523e-07, "loss": 0.0173, "step": 17943 }, { "epoch": 4.082821387940842, "grad_norm": 0.8419501177248709, "learning_rate": 1.00989110323233e-07, "loss": 0.0241, "step": 17944 }, { "epoch": 4.0830489192263935, "grad_norm": 0.6355695051633136, "learning_rate": 1.0094041664567372e-07, "loss": 0.0031, "step": 17945 }, { "epoch": 4.083276450511946, "grad_norm": 0.9301700384174404, "learning_rate": 1.0089173367893265e-07, "loss": 0.0085, "step": 17946 }, { "epoch": 4.083503981797497, "grad_norm": 1.320748342719137, "learning_rate": 1.0084306142400454e-07, "loss": 0.0251, "step": 17947 }, { "epoch": 4.083731513083049, "grad_norm": 1.0689877037275994, "learning_rate": 1.0079439988188452e-07, "loss": 0.0087, "step": 17948 }, { "epoch": 4.0839590443686005, "grad_norm": 0.7308518286313753, "learning_rate": 1.0074574905356689e-07, "loss": 0.0055, "step": 17949 }, { "epoch": 4.084186575654153, "grad_norm": 0.8304694822400084, "learning_rate": 1.0069710894004582e-07, "loss": 0.0316, "step": 17950 }, { "epoch": 4.084414106939704, "grad_norm": 1.051764677496109, "learning_rate": 1.0064847954231571e-07, "loss": 0.0315, "step": 17951 }, { "epoch": 4.084641638225256, "grad_norm": 0.4081987151464875, "learning_rate": 1.0059986086137035e-07, "loss": 0.0026, "step": 17952 }, { "epoch": 4.0848691695108075, "grad_norm": 1.0801863239061997, "learning_rate": 1.0055125289820354e-07, "loss": 0.0098, "step": 17953 }, { "epoch": 4.08509670079636, "grad_norm": 1.0738208403793628, "learning_rate": 1.0050265565380871e-07, "loss": 0.0101, "step": 17954 }, { "epoch": 4.085324232081911, "grad_norm": 1.0550857178639612, "learning_rate": 1.004540691291788e-07, "loss": 0.0072, "step": 17955 }, { "epoch": 4.085551763367463, "grad_norm": 1.5788245096149796, "learning_rate": 1.0040549332530728e-07, "loss": 0.0103, "step": 17956 }, { "epoch": 4.0857792946530145, "grad_norm": 1.4831344599494964, "learning_rate": 1.0035692824318654e-07, "loss": 0.006, "step": 17957 }, { "epoch": 4.086006825938567, "grad_norm": 1.657714993712265, "learning_rate": 1.0030837388380933e-07, "loss": 0.077, "step": 17958 }, { "epoch": 4.086234357224118, "grad_norm": 2.2537565607371177, "learning_rate": 1.0025983024816811e-07, "loss": 0.0725, "step": 17959 }, { "epoch": 4.08646188850967, "grad_norm": 0.3607891300064617, "learning_rate": 1.0021129733725501e-07, "loss": 0.0018, "step": 17960 }, { "epoch": 4.0866894197952215, "grad_norm": 0.7848337797909744, "learning_rate": 1.0016277515206195e-07, "loss": 0.0055, "step": 17961 }, { "epoch": 4.086916951080774, "grad_norm": 1.507250695425442, "learning_rate": 1.0011426369358035e-07, "loss": 0.0316, "step": 17962 }, { "epoch": 4.087144482366325, "grad_norm": 1.1750325422441334, "learning_rate": 1.0006576296280213e-07, "loss": 0.0238, "step": 17963 }, { "epoch": 4.087372013651877, "grad_norm": 0.8021953297542286, "learning_rate": 1.0001727296071816e-07, "loss": 0.0195, "step": 17964 }, { "epoch": 4.0875995449374285, "grad_norm": 1.7531777036400922, "learning_rate": 9.996879368831966e-08, "loss": 0.0723, "step": 17965 }, { "epoch": 4.087827076222981, "grad_norm": 1.0349430754758553, "learning_rate": 9.992032514659751e-08, "loss": 0.0162, "step": 17966 }, { "epoch": 4.088054607508532, "grad_norm": 2.0137710238842117, "learning_rate": 9.987186733654238e-08, "loss": 0.0109, "step": 17967 }, { "epoch": 4.088282138794084, "grad_norm": 1.0256002196880305, "learning_rate": 9.982342025914458e-08, "loss": 0.0355, "step": 17968 }, { "epoch": 4.088509670079636, "grad_norm": 1.140621165828426, "learning_rate": 9.977498391539414e-08, "loss": 0.0109, "step": 17969 }, { "epoch": 4.088737201365188, "grad_norm": 1.5669233746662488, "learning_rate": 9.97265583062812e-08, "loss": 0.0585, "step": 17970 }, { "epoch": 4.08896473265074, "grad_norm": 1.2186699167358344, "learning_rate": 9.967814343279529e-08, "loss": 0.0347, "step": 17971 }, { "epoch": 4.089192263936291, "grad_norm": 0.8149037748865293, "learning_rate": 9.962973929592612e-08, "loss": 0.008, "step": 17972 }, { "epoch": 4.089419795221843, "grad_norm": 0.9988556251099353, "learning_rate": 9.958134589666297e-08, "loss": 0.0296, "step": 17973 }, { "epoch": 4.089647326507395, "grad_norm": 0.653081556908374, "learning_rate": 9.953296323599474e-08, "loss": 0.0129, "step": 17974 }, { "epoch": 4.089874857792947, "grad_norm": 1.1840256976833754, "learning_rate": 9.948459131491055e-08, "loss": 0.0097, "step": 17975 }, { "epoch": 4.090102389078498, "grad_norm": 1.5319429861903997, "learning_rate": 9.943623013439867e-08, "loss": 0.1011, "step": 17976 }, { "epoch": 4.09032992036405, "grad_norm": 1.4325497008744947, "learning_rate": 9.93878796954479e-08, "loss": 0.0745, "step": 17977 }, { "epoch": 4.090557451649602, "grad_norm": 1.4973600429817322, "learning_rate": 9.933953999904614e-08, "loss": 0.0459, "step": 17978 }, { "epoch": 4.090784982935154, "grad_norm": 2.1299450138804623, "learning_rate": 9.929121104618148e-08, "loss": 0.0728, "step": 17979 }, { "epoch": 4.091012514220705, "grad_norm": 1.0871775169381304, "learning_rate": 9.924289283784181e-08, "loss": 0.0107, "step": 17980 }, { "epoch": 4.091240045506257, "grad_norm": 1.3764471273167553, "learning_rate": 9.919458537501432e-08, "loss": 0.0228, "step": 17981 }, { "epoch": 4.091467576791809, "grad_norm": 1.81240109423337, "learning_rate": 9.914628865868677e-08, "loss": 0.0992, "step": 17982 }, { "epoch": 4.091695108077361, "grad_norm": 1.2396675617010526, "learning_rate": 9.909800268984581e-08, "loss": 0.0768, "step": 17983 }, { "epoch": 4.091922639362912, "grad_norm": 1.3011855300854922, "learning_rate": 9.904972746947878e-08, "loss": 0.0203, "step": 17984 }, { "epoch": 4.092150170648464, "grad_norm": 2.0689378367376405, "learning_rate": 9.900146299857188e-08, "loss": 0.1055, "step": 17985 }, { "epoch": 4.092377701934016, "grad_norm": 3.427939814243696, "learning_rate": 9.895320927811174e-08, "loss": 0.1237, "step": 17986 }, { "epoch": 4.092605233219568, "grad_norm": 0.9125435776064373, "learning_rate": 9.890496630908478e-08, "loss": 0.0104, "step": 17987 }, { "epoch": 4.092832764505119, "grad_norm": 0.5125954805470897, "learning_rate": 9.885673409247662e-08, "loss": 0.0016, "step": 17988 }, { "epoch": 4.093060295790671, "grad_norm": 1.1569906861384653, "learning_rate": 9.880851262927336e-08, "loss": 0.0104, "step": 17989 }, { "epoch": 4.093287827076223, "grad_norm": 1.0630247919746347, "learning_rate": 9.876030192046034e-08, "loss": 0.0199, "step": 17990 }, { "epoch": 4.093515358361775, "grad_norm": 1.0599037695409264, "learning_rate": 9.871210196702313e-08, "loss": 0.0452, "step": 17991 }, { "epoch": 4.093742889647326, "grad_norm": 1.271293998480983, "learning_rate": 9.866391276994652e-08, "loss": 0.0123, "step": 17992 }, { "epoch": 4.093970420932878, "grad_norm": 1.542489387765818, "learning_rate": 9.861573433021562e-08, "loss": 0.0841, "step": 17993 }, { "epoch": 4.09419795221843, "grad_norm": 2.0494385158713717, "learning_rate": 9.85675666488152e-08, "loss": 0.1746, "step": 17994 }, { "epoch": 4.094425483503982, "grad_norm": 1.3824074892146, "learning_rate": 9.851940972672946e-08, "loss": 0.0131, "step": 17995 }, { "epoch": 4.094653014789533, "grad_norm": 0.9323023664568114, "learning_rate": 9.847126356494298e-08, "loss": 0.0317, "step": 17996 }, { "epoch": 4.0948805460750854, "grad_norm": 1.092044686250523, "learning_rate": 9.84231281644394e-08, "loss": 0.0304, "step": 17997 }, { "epoch": 4.095108077360637, "grad_norm": 0.8715857921100381, "learning_rate": 9.837500352620267e-08, "loss": 0.0039, "step": 17998 }, { "epoch": 4.095335608646189, "grad_norm": 1.7052471876496182, "learning_rate": 9.832688965121656e-08, "loss": 0.0353, "step": 17999 }, { "epoch": 4.09556313993174, "grad_norm": 0.9330077473960893, "learning_rate": 9.827878654046414e-08, "loss": 0.0224, "step": 18000 }, { "epoch": 4.0957906712172925, "grad_norm": 3.373370514514379, "learning_rate": 9.823069419492882e-08, "loss": 0.0177, "step": 18001 }, { "epoch": 4.096018202502844, "grad_norm": 0.9733564964962587, "learning_rate": 9.818261261559326e-08, "loss": 0.0205, "step": 18002 }, { "epoch": 4.096245733788396, "grad_norm": 1.1476889558279602, "learning_rate": 9.813454180344038e-08, "loss": 0.0068, "step": 18003 }, { "epoch": 4.096473265073947, "grad_norm": 1.559838377230793, "learning_rate": 9.808648175945249e-08, "loss": 0.0736, "step": 18004 }, { "epoch": 4.0967007963594995, "grad_norm": 1.3291948342303879, "learning_rate": 9.803843248461182e-08, "loss": 0.0336, "step": 18005 }, { "epoch": 4.096928327645051, "grad_norm": 0.7169365032699758, "learning_rate": 9.799039397990068e-08, "loss": 0.0091, "step": 18006 }, { "epoch": 4.097155858930603, "grad_norm": 1.1991453540445187, "learning_rate": 9.794236624630062e-08, "loss": 0.058, "step": 18007 }, { "epoch": 4.097383390216155, "grad_norm": 1.582642297506143, "learning_rate": 9.789434928479343e-08, "loss": 0.0126, "step": 18008 }, { "epoch": 4.0976109215017065, "grad_norm": 0.5402319315646525, "learning_rate": 9.784634309636019e-08, "loss": 0.0112, "step": 18009 }, { "epoch": 4.097838452787259, "grad_norm": 1.41579708545338, "learning_rate": 9.77983476819825e-08, "loss": 0.0357, "step": 18010 }, { "epoch": 4.09806598407281, "grad_norm": 0.9709711341091735, "learning_rate": 9.775036304264087e-08, "loss": 0.0371, "step": 18011 }, { "epoch": 4.098293515358362, "grad_norm": 1.2433271895520783, "learning_rate": 9.77023891793162e-08, "loss": 0.0249, "step": 18012 }, { "epoch": 4.0985210466439135, "grad_norm": 6.5121808804324814, "learning_rate": 9.765442609298918e-08, "loss": 0.0217, "step": 18013 }, { "epoch": 4.098748577929466, "grad_norm": 2.2748506042957497, "learning_rate": 9.760647378463974e-08, "loss": 0.0167, "step": 18014 }, { "epoch": 4.098976109215017, "grad_norm": 0.8046327927651605, "learning_rate": 9.755853225524823e-08, "loss": 0.0467, "step": 18015 }, { "epoch": 4.099203640500569, "grad_norm": 0.9240429742063583, "learning_rate": 9.751060150579424e-08, "loss": 0.0161, "step": 18016 }, { "epoch": 4.0994311717861205, "grad_norm": 1.091852563686734, "learning_rate": 9.746268153725764e-08, "loss": 0.0461, "step": 18017 }, { "epoch": 4.099658703071673, "grad_norm": 0.7899287379401222, "learning_rate": 9.741477235061755e-08, "loss": 0.0135, "step": 18018 }, { "epoch": 4.099886234357224, "grad_norm": 1.2675388927925964, "learning_rate": 9.736687394685328e-08, "loss": 0.1058, "step": 18019 }, { "epoch": 4.100113765642776, "grad_norm": 0.7544514896223706, "learning_rate": 9.731898632694393e-08, "loss": 0.0039, "step": 18020 }, { "epoch": 4.1003412969283275, "grad_norm": 0.8147281614375641, "learning_rate": 9.727110949186797e-08, "loss": 0.0034, "step": 18021 }, { "epoch": 4.10056882821388, "grad_norm": 0.4816456390783287, "learning_rate": 9.722324344260417e-08, "loss": 0.0022, "step": 18022 }, { "epoch": 4.100796359499431, "grad_norm": 0.8926612697019127, "learning_rate": 9.717538818013066e-08, "loss": 0.0172, "step": 18023 }, { "epoch": 4.101023890784983, "grad_norm": 1.26893051307476, "learning_rate": 9.712754370542545e-08, "loss": 0.009, "step": 18024 }, { "epoch": 4.1012514220705345, "grad_norm": 1.9661181187999126, "learning_rate": 9.707971001946647e-08, "loss": 0.0677, "step": 18025 }, { "epoch": 4.101478953356087, "grad_norm": 1.0922671983152592, "learning_rate": 9.70318871232314e-08, "loss": 0.0091, "step": 18026 }, { "epoch": 4.101706484641638, "grad_norm": 0.7191016823334612, "learning_rate": 9.698407501769778e-08, "loss": 0.0037, "step": 18027 }, { "epoch": 4.10193401592719, "grad_norm": 1.353497622854553, "learning_rate": 9.693627370384256e-08, "loss": 0.0203, "step": 18028 }, { "epoch": 4.1021615472127415, "grad_norm": 0.9121395944052006, "learning_rate": 9.688848318264283e-08, "loss": 0.0569, "step": 18029 }, { "epoch": 4.102389078498294, "grad_norm": 1.27385136481969, "learning_rate": 9.684070345507535e-08, "loss": 0.0646, "step": 18030 }, { "epoch": 4.102616609783845, "grad_norm": 1.306366840868002, "learning_rate": 9.679293452211649e-08, "loss": 0.0115, "step": 18031 }, { "epoch": 4.102844141069397, "grad_norm": 1.0095739241977226, "learning_rate": 9.674517638474268e-08, "loss": 0.0241, "step": 18032 }, { "epoch": 4.1030716723549485, "grad_norm": 0.2825243388448537, "learning_rate": 9.669742904393e-08, "loss": 0.0013, "step": 18033 }, { "epoch": 4.103299203640501, "grad_norm": 0.7858053383692557, "learning_rate": 9.664969250065451e-08, "loss": 0.0078, "step": 18034 }, { "epoch": 4.103526734926052, "grad_norm": 3.489237767139496, "learning_rate": 9.66019667558915e-08, "loss": 0.0427, "step": 18035 }, { "epoch": 4.103754266211604, "grad_norm": 1.609732844580629, "learning_rate": 9.655425181061675e-08, "loss": 0.0211, "step": 18036 }, { "epoch": 4.1039817974971555, "grad_norm": 0.635843340131873, "learning_rate": 9.650654766580524e-08, "loss": 0.0044, "step": 18037 }, { "epoch": 4.104209328782708, "grad_norm": 1.2909186135119064, "learning_rate": 9.645885432243194e-08, "loss": 0.0109, "step": 18038 }, { "epoch": 4.104436860068259, "grad_norm": 2.036403142177265, "learning_rate": 9.641117178147162e-08, "loss": 0.1047, "step": 18039 }, { "epoch": 4.104664391353811, "grad_norm": 0.6366169629340517, "learning_rate": 9.636350004389887e-08, "loss": 0.0059, "step": 18040 }, { "epoch": 4.1048919226393625, "grad_norm": 1.3603490416770223, "learning_rate": 9.631583911068817e-08, "loss": 0.0241, "step": 18041 }, { "epoch": 4.105119453924915, "grad_norm": 1.321532418326707, "learning_rate": 9.626818898281355e-08, "loss": 0.0454, "step": 18042 }, { "epoch": 4.105346985210466, "grad_norm": 0.9071503921335757, "learning_rate": 9.62205496612486e-08, "loss": 0.0332, "step": 18043 }, { "epoch": 4.105574516496018, "grad_norm": 0.866716159841621, "learning_rate": 9.617292114696739e-08, "loss": 0.0041, "step": 18044 }, { "epoch": 4.1058020477815695, "grad_norm": 0.7416399604251772, "learning_rate": 9.612530344094297e-08, "loss": 0.0253, "step": 18045 }, { "epoch": 4.106029579067122, "grad_norm": 0.7763437835589604, "learning_rate": 9.607769654414879e-08, "loss": 0.0211, "step": 18046 }, { "epoch": 4.106257110352674, "grad_norm": 1.956870095031686, "learning_rate": 9.603010045755784e-08, "loss": 0.0511, "step": 18047 }, { "epoch": 4.106484641638225, "grad_norm": 0.49661729552495637, "learning_rate": 9.598251518214301e-08, "loss": 0.0018, "step": 18048 }, { "epoch": 4.106712172923777, "grad_norm": 1.5434335460843795, "learning_rate": 9.593494071887672e-08, "loss": 0.1106, "step": 18049 }, { "epoch": 4.106939704209329, "grad_norm": 1.2530310118036274, "learning_rate": 9.588737706873116e-08, "loss": 0.0146, "step": 18050 }, { "epoch": 4.107167235494881, "grad_norm": 0.8663165650108859, "learning_rate": 9.583982423267877e-08, "loss": 0.0034, "step": 18051 }, { "epoch": 4.107394766780432, "grad_norm": 1.8087965912505763, "learning_rate": 9.579228221169106e-08, "loss": 0.0543, "step": 18052 }, { "epoch": 4.1076222980659844, "grad_norm": 1.6422468365521408, "learning_rate": 9.574475100673989e-08, "loss": 0.1204, "step": 18053 }, { "epoch": 4.107849829351536, "grad_norm": 1.2120704952498658, "learning_rate": 9.569723061879678e-08, "loss": 0.0251, "step": 18054 }, { "epoch": 4.108077360637088, "grad_norm": 2.0504961265244015, "learning_rate": 9.564972104883302e-08, "loss": 0.0525, "step": 18055 }, { "epoch": 4.108304891922639, "grad_norm": 1.589666921354047, "learning_rate": 9.560222229781952e-08, "loss": 0.0551, "step": 18056 }, { "epoch": 4.1085324232081915, "grad_norm": 1.9273290541937598, "learning_rate": 9.555473436672687e-08, "loss": 0.0267, "step": 18057 }, { "epoch": 4.108759954493743, "grad_norm": 1.3357695502862432, "learning_rate": 9.550725725652597e-08, "loss": 0.0249, "step": 18058 }, { "epoch": 4.108987485779295, "grad_norm": 0.7681219501039132, "learning_rate": 9.545979096818674e-08, "loss": 0.0051, "step": 18059 }, { "epoch": 4.109215017064846, "grad_norm": 1.645263545605852, "learning_rate": 9.541233550267987e-08, "loss": 0.006, "step": 18060 }, { "epoch": 4.1094425483503985, "grad_norm": 0.9575269876553336, "learning_rate": 9.536489086097493e-08, "loss": 0.0083, "step": 18061 }, { "epoch": 4.10967007963595, "grad_norm": 1.3713798697673203, "learning_rate": 9.531745704404155e-08, "loss": 0.0515, "step": 18062 }, { "epoch": 4.109897610921502, "grad_norm": 0.7204949686235169, "learning_rate": 9.527003405284937e-08, "loss": 0.0034, "step": 18063 }, { "epoch": 4.110125142207053, "grad_norm": 1.6109188931156275, "learning_rate": 9.522262188836742e-08, "loss": 0.0881, "step": 18064 }, { "epoch": 4.1103526734926055, "grad_norm": 1.7171774068921843, "learning_rate": 9.517522055156485e-08, "loss": 0.0879, "step": 18065 }, { "epoch": 4.110580204778157, "grad_norm": 1.4186948976432925, "learning_rate": 9.512783004341046e-08, "loss": 0.1078, "step": 18066 }, { "epoch": 4.110807736063709, "grad_norm": 0.7828747556966982, "learning_rate": 9.508045036487293e-08, "loss": 0.0065, "step": 18067 }, { "epoch": 4.11103526734926, "grad_norm": 0.8239672299750732, "learning_rate": 9.503308151692044e-08, "loss": 0.0255, "step": 18068 }, { "epoch": 4.1112627986348125, "grad_norm": 0.4393061346913466, "learning_rate": 9.49857235005211e-08, "loss": 0.0024, "step": 18069 }, { "epoch": 4.111490329920364, "grad_norm": 0.9511761759902982, "learning_rate": 9.493837631664303e-08, "loss": 0.0074, "step": 18070 }, { "epoch": 4.111717861205916, "grad_norm": 0.9323668518832005, "learning_rate": 9.489103996625365e-08, "loss": 0.0096, "step": 18071 }, { "epoch": 4.111945392491467, "grad_norm": 1.3987914240379633, "learning_rate": 9.484371445032052e-08, "loss": 0.078, "step": 18072 }, { "epoch": 4.1121729237770195, "grad_norm": 2.6185467836100065, "learning_rate": 9.4796399769811e-08, "loss": 0.0224, "step": 18073 }, { "epoch": 4.112400455062571, "grad_norm": 1.418192138452374, "learning_rate": 9.474909592569213e-08, "loss": 0.0683, "step": 18074 }, { "epoch": 4.112627986348123, "grad_norm": 1.6117298228863197, "learning_rate": 9.47018029189306e-08, "loss": 0.1054, "step": 18075 }, { "epoch": 4.112855517633674, "grad_norm": 1.798366243660924, "learning_rate": 9.465452075049293e-08, "loss": 0.0762, "step": 18076 }, { "epoch": 4.1130830489192265, "grad_norm": 0.9055082559851524, "learning_rate": 9.460724942134576e-08, "loss": 0.0055, "step": 18077 }, { "epoch": 4.113310580204778, "grad_norm": 2.303980032936961, "learning_rate": 9.455998893245486e-08, "loss": 0.0084, "step": 18078 }, { "epoch": 4.11353811149033, "grad_norm": 1.4808376288140976, "learning_rate": 9.451273928478631e-08, "loss": 0.0466, "step": 18079 }, { "epoch": 4.113765642775881, "grad_norm": 1.1870454594613948, "learning_rate": 9.446550047930596e-08, "loss": 0.0574, "step": 18080 }, { "epoch": 4.1139931740614335, "grad_norm": 2.4564768093397267, "learning_rate": 9.441827251697907e-08, "loss": 0.0028, "step": 18081 }, { "epoch": 4.114220705346985, "grad_norm": 1.1677930183133942, "learning_rate": 9.43710553987711e-08, "loss": 0.0085, "step": 18082 }, { "epoch": 4.114448236632537, "grad_norm": 1.7208864128903854, "learning_rate": 9.432384912564679e-08, "loss": 0.0505, "step": 18083 }, { "epoch": 4.114675767918088, "grad_norm": 1.2005947215010508, "learning_rate": 9.427665369857126e-08, "loss": 0.0609, "step": 18084 }, { "epoch": 4.1149032992036405, "grad_norm": 1.676676006324908, "learning_rate": 9.422946911850881e-08, "loss": 0.0034, "step": 18085 }, { "epoch": 4.115130830489193, "grad_norm": 1.4025498859223506, "learning_rate": 9.418229538642396e-08, "loss": 0.0177, "step": 18086 }, { "epoch": 4.115358361774744, "grad_norm": 1.2737569753053617, "learning_rate": 9.413513250328098e-08, "loss": 0.0069, "step": 18087 }, { "epoch": 4.115585893060296, "grad_norm": 1.6072450791341244, "learning_rate": 9.408798047004355e-08, "loss": 0.074, "step": 18088 }, { "epoch": 4.1158134243458475, "grad_norm": 1.5184443415598392, "learning_rate": 9.404083928767558e-08, "loss": 0.0905, "step": 18089 }, { "epoch": 4.1160409556314, "grad_norm": 1.0338885030371394, "learning_rate": 9.399370895714036e-08, "loss": 0.0157, "step": 18090 }, { "epoch": 4.116268486916951, "grad_norm": 2.2228758754716553, "learning_rate": 9.39465894794013e-08, "loss": 0.0775, "step": 18091 }, { "epoch": 4.116496018202503, "grad_norm": 0.7284408635665377, "learning_rate": 9.389948085542125e-08, "loss": 0.0065, "step": 18092 }, { "epoch": 4.1167235494880545, "grad_norm": 1.7143411245199478, "learning_rate": 9.385238308616318e-08, "loss": 0.041, "step": 18093 }, { "epoch": 4.116951080773607, "grad_norm": 0.7399220676174549, "learning_rate": 9.380529617258979e-08, "loss": 0.0112, "step": 18094 }, { "epoch": 4.117178612059158, "grad_norm": 1.3984134325218351, "learning_rate": 9.375822011566317e-08, "loss": 0.064, "step": 18095 }, { "epoch": 4.11740614334471, "grad_norm": 0.8892062923127612, "learning_rate": 9.371115491634573e-08, "loss": 0.0265, "step": 18096 }, { "epoch": 4.1176336746302615, "grad_norm": 1.1661993590602626, "learning_rate": 9.366410057559915e-08, "loss": 0.0435, "step": 18097 }, { "epoch": 4.117861205915814, "grad_norm": 1.3426733388825371, "learning_rate": 9.361705709438542e-08, "loss": 0.0256, "step": 18098 }, { "epoch": 4.118088737201365, "grad_norm": 1.5575396302866726, "learning_rate": 9.357002447366565e-08, "loss": 0.0259, "step": 18099 }, { "epoch": 4.118316268486917, "grad_norm": 0.44863780651252727, "learning_rate": 9.352300271440136e-08, "loss": 0.0022, "step": 18100 }, { "epoch": 4.1185437997724685, "grad_norm": 1.3413191570599563, "learning_rate": 9.347599181755366e-08, "loss": 0.0493, "step": 18101 }, { "epoch": 4.118771331058021, "grad_norm": 0.8782327930549733, "learning_rate": 9.342899178408312e-08, "loss": 0.0145, "step": 18102 }, { "epoch": 4.118998862343572, "grad_norm": 0.8626034093393949, "learning_rate": 9.338200261495061e-08, "loss": 0.0082, "step": 18103 }, { "epoch": 4.119226393629124, "grad_norm": 1.877020295455659, "learning_rate": 9.33350243111162e-08, "loss": 0.1511, "step": 18104 }, { "epoch": 4.1194539249146755, "grad_norm": 2.357158601859884, "learning_rate": 9.328805687354031e-08, "loss": 0.0877, "step": 18105 }, { "epoch": 4.119681456200228, "grad_norm": 1.1684920926024815, "learning_rate": 9.324110030318266e-08, "loss": 0.0111, "step": 18106 }, { "epoch": 4.119908987485779, "grad_norm": 1.2113772272501544, "learning_rate": 9.319415460100304e-08, "loss": 0.0289, "step": 18107 }, { "epoch": 4.120136518771331, "grad_norm": 0.19027672000097356, "learning_rate": 9.314721976796108e-08, "loss": 0.0004, "step": 18108 }, { "epoch": 4.1203640500568826, "grad_norm": 1.6698808009875432, "learning_rate": 9.310029580501575e-08, "loss": 0.0083, "step": 18109 }, { "epoch": 4.120591581342435, "grad_norm": 1.161303482227358, "learning_rate": 9.305338271312642e-08, "loss": 0.0162, "step": 18110 }, { "epoch": 4.120819112627986, "grad_norm": 1.1162254911643383, "learning_rate": 9.300648049325157e-08, "loss": 0.0255, "step": 18111 }, { "epoch": 4.121046643913538, "grad_norm": 2.0511304416630685, "learning_rate": 9.295958914635007e-08, "loss": 0.0096, "step": 18112 }, { "epoch": 4.12127417519909, "grad_norm": 2.0333111867805758, "learning_rate": 9.291270867338006e-08, "loss": 0.1005, "step": 18113 }, { "epoch": 4.121501706484642, "grad_norm": 0.5042098498285044, "learning_rate": 9.286583907529983e-08, "loss": 0.0026, "step": 18114 }, { "epoch": 4.121729237770193, "grad_norm": 1.0834390848260942, "learning_rate": 9.28189803530674e-08, "loss": 0.0585, "step": 18115 }, { "epoch": 4.121956769055745, "grad_norm": 1.1332822323640561, "learning_rate": 9.277213250764027e-08, "loss": 0.0621, "step": 18116 }, { "epoch": 4.122184300341297, "grad_norm": 0.9349572091842074, "learning_rate": 9.272529553997613e-08, "loss": 0.0061, "step": 18117 }, { "epoch": 4.122411831626849, "grad_norm": 1.7090747299458167, "learning_rate": 9.267846945103222e-08, "loss": 0.0177, "step": 18118 }, { "epoch": 4.1226393629124, "grad_norm": 1.2810645635823517, "learning_rate": 9.263165424176511e-08, "loss": 0.0678, "step": 18119 }, { "epoch": 4.122866894197952, "grad_norm": 2.118443569565964, "learning_rate": 9.258484991313231e-08, "loss": 0.0519, "step": 18120 }, { "epoch": 4.123094425483504, "grad_norm": 0.42942063847053963, "learning_rate": 9.253805646609e-08, "loss": 0.0084, "step": 18121 }, { "epoch": 4.123321956769056, "grad_norm": 2.0722476883130247, "learning_rate": 9.249127390159471e-08, "loss": 0.0221, "step": 18122 }, { "epoch": 4.123549488054607, "grad_norm": 2.6648445985033495, "learning_rate": 9.244450222060248e-08, "loss": 0.0147, "step": 18123 }, { "epoch": 4.123777019340159, "grad_norm": 0.3599736521189349, "learning_rate": 9.23977414240694e-08, "loss": 0.0014, "step": 18124 }, { "epoch": 4.1240045506257115, "grad_norm": 1.0856053717704324, "learning_rate": 9.235099151295092e-08, "loss": 0.0095, "step": 18125 }, { "epoch": 4.124232081911263, "grad_norm": 1.094709326525311, "learning_rate": 9.230425248820272e-08, "loss": 0.0365, "step": 18126 }, { "epoch": 4.124459613196815, "grad_norm": 0.9488797890726599, "learning_rate": 9.225752435078013e-08, "loss": 0.0117, "step": 18127 }, { "epoch": 4.124687144482366, "grad_norm": 0.9507319346344374, "learning_rate": 9.22108071016379e-08, "loss": 0.0508, "step": 18128 }, { "epoch": 4.1249146757679185, "grad_norm": 0.7229120637314826, "learning_rate": 9.216410074173121e-08, "loss": 0.0159, "step": 18129 }, { "epoch": 4.12514220705347, "grad_norm": 0.754006532224796, "learning_rate": 9.211740527201446e-08, "loss": 0.0035, "step": 18130 }, { "epoch": 4.125369738339022, "grad_norm": 1.3349746731877474, "learning_rate": 9.207072069344181e-08, "loss": 0.0567, "step": 18131 }, { "epoch": 4.125597269624573, "grad_norm": 1.1904438201761347, "learning_rate": 9.202404700696763e-08, "loss": 0.0177, "step": 18132 }, { "epoch": 4.1258248009101255, "grad_norm": 1.102935343782153, "learning_rate": 9.197738421354588e-08, "loss": 0.031, "step": 18133 }, { "epoch": 4.126052332195677, "grad_norm": 1.0706641617862205, "learning_rate": 9.193073231413032e-08, "loss": 0.0241, "step": 18134 }, { "epoch": 4.126279863481229, "grad_norm": 0.7447153728303589, "learning_rate": 9.18840913096742e-08, "loss": 0.0011, "step": 18135 }, { "epoch": 4.12650739476678, "grad_norm": 2.617357156220698, "learning_rate": 9.1837461201131e-08, "loss": 0.0505, "step": 18136 }, { "epoch": 4.1267349260523325, "grad_norm": 0.7151963274833665, "learning_rate": 9.179084198945371e-08, "loss": 0.0059, "step": 18137 }, { "epoch": 4.126962457337884, "grad_norm": 1.2826033506739507, "learning_rate": 9.174423367559491e-08, "loss": 0.0162, "step": 18138 }, { "epoch": 4.127189988623436, "grad_norm": 0.7957071112943023, "learning_rate": 9.169763626050737e-08, "loss": 0.0134, "step": 18139 }, { "epoch": 4.127417519908987, "grad_norm": 0.4246055263790382, "learning_rate": 9.165104974514338e-08, "loss": 0.0021, "step": 18140 }, { "epoch": 4.1276450511945395, "grad_norm": 1.398485173956156, "learning_rate": 9.160447413045539e-08, "loss": 0.0655, "step": 18141 }, { "epoch": 4.127872582480091, "grad_norm": 1.5867716559602285, "learning_rate": 9.155790941739489e-08, "loss": 0.1201, "step": 18142 }, { "epoch": 4.128100113765643, "grad_norm": 1.2514230084882894, "learning_rate": 9.151135560691395e-08, "loss": 0.0093, "step": 18143 }, { "epoch": 4.128327645051194, "grad_norm": 0.7927952874639022, "learning_rate": 9.146481269996378e-08, "loss": 0.0253, "step": 18144 }, { "epoch": 4.1285551763367465, "grad_norm": 1.2836489575503023, "learning_rate": 9.141828069749566e-08, "loss": 0.0073, "step": 18145 }, { "epoch": 4.128782707622298, "grad_norm": 0.5259190520705496, "learning_rate": 9.137175960046062e-08, "loss": 0.0163, "step": 18146 }, { "epoch": 4.12901023890785, "grad_norm": 0.9416722589561942, "learning_rate": 9.132524940980957e-08, "loss": 0.0036, "step": 18147 }, { "epoch": 4.129237770193401, "grad_norm": 0.4530018491778273, "learning_rate": 9.127875012649313e-08, "loss": 0.0015, "step": 18148 }, { "epoch": 4.1294653014789535, "grad_norm": 1.652517346390681, "learning_rate": 9.12322617514616e-08, "loss": 0.0158, "step": 18149 }, { "epoch": 4.129692832764505, "grad_norm": 1.2611743224597975, "learning_rate": 9.118578428566498e-08, "loss": 0.0181, "step": 18150 }, { "epoch": 4.129920364050057, "grad_norm": 1.4075369496101766, "learning_rate": 9.11393177300534e-08, "loss": 0.0188, "step": 18151 }, { "epoch": 4.130147895335608, "grad_norm": 0.9889518909078743, "learning_rate": 9.109286208557625e-08, "loss": 0.0073, "step": 18152 }, { "epoch": 4.1303754266211605, "grad_norm": 0.9683521817238748, "learning_rate": 9.104641735318325e-08, "loss": 0.0132, "step": 18153 }, { "epoch": 4.130602957906712, "grad_norm": 1.2702058226174757, "learning_rate": 9.099998353382364e-08, "loss": 0.0079, "step": 18154 }, { "epoch": 4.130830489192264, "grad_norm": 7.403780045473981, "learning_rate": 9.095356062844646e-08, "loss": 0.0651, "step": 18155 }, { "epoch": 4.131058020477815, "grad_norm": 1.2967601826706274, "learning_rate": 9.090714863800046e-08, "loss": 0.0273, "step": 18156 }, { "epoch": 4.1312855517633675, "grad_norm": 1.0225803967986091, "learning_rate": 9.086074756343405e-08, "loss": 0.0048, "step": 18157 }, { "epoch": 4.131513083048919, "grad_norm": 1.1629982133173344, "learning_rate": 9.081435740569591e-08, "loss": 0.02, "step": 18158 }, { "epoch": 4.131740614334471, "grad_norm": 1.4179584201143567, "learning_rate": 9.076797816573384e-08, "loss": 0.011, "step": 18159 }, { "epoch": 4.131968145620022, "grad_norm": 1.8681524078908986, "learning_rate": 9.072160984449581e-08, "loss": 0.01, "step": 18160 }, { "epoch": 4.1321956769055745, "grad_norm": 0.8604662803879098, "learning_rate": 9.067525244292969e-08, "loss": 0.0474, "step": 18161 }, { "epoch": 4.132423208191126, "grad_norm": 1.753678991019194, "learning_rate": 9.062890596198296e-08, "loss": 0.0689, "step": 18162 }, { "epoch": 4.132650739476678, "grad_norm": 1.5405185895655913, "learning_rate": 9.058257040260269e-08, "loss": 0.0271, "step": 18163 }, { "epoch": 4.13287827076223, "grad_norm": 0.9062543653973791, "learning_rate": 9.053624576573585e-08, "loss": 0.0232, "step": 18164 }, { "epoch": 4.1331058020477816, "grad_norm": 0.9397555089959025, "learning_rate": 9.048993205232947e-08, "loss": 0.0176, "step": 18165 }, { "epoch": 4.133333333333334, "grad_norm": 2.12365994706768, "learning_rate": 9.044362926332976e-08, "loss": 0.0232, "step": 18166 }, { "epoch": 4.133560864618885, "grad_norm": 1.1649391188609732, "learning_rate": 9.039733739968338e-08, "loss": 0.0634, "step": 18167 }, { "epoch": 4.133788395904437, "grad_norm": 0.15594558465730507, "learning_rate": 9.035105646233638e-08, "loss": 0.0005, "step": 18168 }, { "epoch": 4.134015927189989, "grad_norm": 1.3664754297754864, "learning_rate": 9.030478645223454e-08, "loss": 0.0894, "step": 18169 }, { "epoch": 4.134243458475541, "grad_norm": 0.5944519967488505, "learning_rate": 9.025852737032374e-08, "loss": 0.0057, "step": 18170 }, { "epoch": 4.134470989761092, "grad_norm": 1.760022720460159, "learning_rate": 9.021227921754922e-08, "loss": 0.0723, "step": 18171 }, { "epoch": 4.134698521046644, "grad_norm": 1.209325976837224, "learning_rate": 9.016604199485635e-08, "loss": 0.0118, "step": 18172 }, { "epoch": 4.134926052332196, "grad_norm": 1.5629610238482377, "learning_rate": 9.011981570319003e-08, "loss": 0.0071, "step": 18173 }, { "epoch": 4.135153583617748, "grad_norm": 1.2601378675785688, "learning_rate": 9.007360034349502e-08, "loss": 0.0213, "step": 18174 }, { "epoch": 4.135381114903299, "grad_norm": 4.05447722637739, "learning_rate": 9.00273959167161e-08, "loss": 0.0358, "step": 18175 }, { "epoch": 4.135608646188851, "grad_norm": 1.1895742707706352, "learning_rate": 8.998120242379734e-08, "loss": 0.1055, "step": 18176 }, { "epoch": 4.135836177474403, "grad_norm": 1.0291220991016166, "learning_rate": 8.993501986568308e-08, "loss": 0.0127, "step": 18177 }, { "epoch": 4.136063708759955, "grad_norm": 0.9945269033677457, "learning_rate": 8.988884824331696e-08, "loss": 0.0311, "step": 18178 }, { "epoch": 4.136291240045506, "grad_norm": 1.2488061989114376, "learning_rate": 8.984268755764289e-08, "loss": 0.0107, "step": 18179 }, { "epoch": 4.136518771331058, "grad_norm": 1.114163592570371, "learning_rate": 8.979653780960392e-08, "loss": 0.0092, "step": 18180 }, { "epoch": 4.13674630261661, "grad_norm": 1.4177909317066333, "learning_rate": 8.975039900014381e-08, "loss": 0.047, "step": 18181 }, { "epoch": 4.136973833902162, "grad_norm": 1.2740495656750626, "learning_rate": 8.970427113020527e-08, "loss": 0.0215, "step": 18182 }, { "epoch": 4.137201365187713, "grad_norm": 0.21284544055237373, "learning_rate": 8.965815420073098e-08, "loss": 0.0006, "step": 18183 }, { "epoch": 4.137428896473265, "grad_norm": 2.704243827313853, "learning_rate": 8.961204821266375e-08, "loss": 0.0077, "step": 18184 }, { "epoch": 4.137656427758817, "grad_norm": 2.5837106529922025, "learning_rate": 8.95659531669455e-08, "loss": 0.0894, "step": 18185 }, { "epoch": 4.137883959044369, "grad_norm": 1.5106838406890262, "learning_rate": 8.951986906451868e-08, "loss": 0.0537, "step": 18186 }, { "epoch": 4.13811149032992, "grad_norm": 0.74032160875533, "learning_rate": 8.947379590632514e-08, "loss": 0.0591, "step": 18187 }, { "epoch": 4.138339021615472, "grad_norm": 0.5463449038282626, "learning_rate": 8.942773369330623e-08, "loss": 0.0051, "step": 18188 }, { "epoch": 4.138566552901024, "grad_norm": 1.8577465441723628, "learning_rate": 8.938168242640383e-08, "loss": 0.0269, "step": 18189 }, { "epoch": 4.138794084186576, "grad_norm": 1.4077976906010128, "learning_rate": 8.933564210655873e-08, "loss": 0.0804, "step": 18190 }, { "epoch": 4.139021615472127, "grad_norm": 0.9694905405743681, "learning_rate": 8.928961273471225e-08, "loss": 0.0517, "step": 18191 }, { "epoch": 4.139249146757679, "grad_norm": 1.188185860478206, "learning_rate": 8.92435943118048e-08, "loss": 0.0216, "step": 18192 }, { "epoch": 4.139476678043231, "grad_norm": 1.2212999946324041, "learning_rate": 8.919758683877705e-08, "loss": 0.0115, "step": 18193 }, { "epoch": 4.139704209328783, "grad_norm": 1.2795552947873132, "learning_rate": 8.915159031656955e-08, "loss": 0.0731, "step": 18194 }, { "epoch": 4.139931740614334, "grad_norm": 2.0514275009413194, "learning_rate": 8.910560474612202e-08, "loss": 0.0307, "step": 18195 }, { "epoch": 4.140159271899886, "grad_norm": 1.8813570612117556, "learning_rate": 8.905963012837463e-08, "loss": 0.0121, "step": 18196 }, { "epoch": 4.140386803185438, "grad_norm": 1.363654816121894, "learning_rate": 8.901366646426678e-08, "loss": 0.0569, "step": 18197 }, { "epoch": 4.14061433447099, "grad_norm": 2.3132767733637816, "learning_rate": 8.89677137547381e-08, "loss": 0.1076, "step": 18198 }, { "epoch": 4.140841865756541, "grad_norm": 1.3192167164785595, "learning_rate": 8.892177200072744e-08, "loss": 0.0625, "step": 18199 }, { "epoch": 4.141069397042093, "grad_norm": 2.230136923821272, "learning_rate": 8.887584120317406e-08, "loss": 0.153, "step": 18200 }, { "epoch": 4.141296928327645, "grad_norm": 0.7272627117657243, "learning_rate": 8.882992136301671e-08, "loss": 0.0108, "step": 18201 }, { "epoch": 4.141524459613197, "grad_norm": 0.5579979798666085, "learning_rate": 8.878401248119374e-08, "loss": 0.0033, "step": 18202 }, { "epoch": 4.141751990898749, "grad_norm": 0.903879674575911, "learning_rate": 8.873811455864356e-08, "loss": 0.0578, "step": 18203 }, { "epoch": 4.1419795221843, "grad_norm": 1.8713502217736653, "learning_rate": 8.869222759630418e-08, "loss": 0.1361, "step": 18204 }, { "epoch": 4.1422070534698525, "grad_norm": 1.119113699333717, "learning_rate": 8.864635159511352e-08, "loss": 0.0106, "step": 18205 }, { "epoch": 4.142434584755404, "grad_norm": 1.4440080803507132, "learning_rate": 8.860048655600903e-08, "loss": 0.0602, "step": 18206 }, { "epoch": 4.142662116040956, "grad_norm": 0.3319437777575485, "learning_rate": 8.855463247992822e-08, "loss": 0.0017, "step": 18207 }, { "epoch": 4.142889647326507, "grad_norm": 1.4214738375035836, "learning_rate": 8.85087893678084e-08, "loss": 0.0785, "step": 18208 }, { "epoch": 4.1431171786120595, "grad_norm": 0.9041911080265744, "learning_rate": 8.846295722058626e-08, "loss": 0.0121, "step": 18209 }, { "epoch": 4.143344709897611, "grad_norm": 0.6804737549792497, "learning_rate": 8.841713603919881e-08, "loss": 0.0054, "step": 18210 }, { "epoch": 4.143572241183163, "grad_norm": 0.8645034955699819, "learning_rate": 8.837132582458225e-08, "loss": 0.0047, "step": 18211 }, { "epoch": 4.143799772468714, "grad_norm": 1.209597593157905, "learning_rate": 8.832552657767314e-08, "loss": 0.0037, "step": 18212 }, { "epoch": 4.1440273037542665, "grad_norm": 1.4960619228092082, "learning_rate": 8.827973829940724e-08, "loss": 0.0275, "step": 18213 }, { "epoch": 4.144254835039818, "grad_norm": 0.8456692365829984, "learning_rate": 8.823396099072054e-08, "loss": 0.0087, "step": 18214 }, { "epoch": 4.14448236632537, "grad_norm": 0.6837286403440459, "learning_rate": 8.818819465254882e-08, "loss": 0.0039, "step": 18215 }, { "epoch": 4.144709897610921, "grad_norm": 1.94278162554357, "learning_rate": 8.814243928582711e-08, "loss": 0.0733, "step": 18216 }, { "epoch": 4.1449374288964735, "grad_norm": 2.501960789103098, "learning_rate": 8.809669489149082e-08, "loss": 0.0199, "step": 18217 }, { "epoch": 4.145164960182025, "grad_norm": 2.014831406768581, "learning_rate": 8.80509614704747e-08, "loss": 0.0586, "step": 18218 }, { "epoch": 4.145392491467577, "grad_norm": 0.6649040240883671, "learning_rate": 8.80052390237137e-08, "loss": 0.004, "step": 18219 }, { "epoch": 4.145620022753128, "grad_norm": 1.2524465501633053, "learning_rate": 8.7959527552142e-08, "loss": 0.1045, "step": 18220 }, { "epoch": 4.1458475540386805, "grad_norm": 1.432676885162629, "learning_rate": 8.791382705669399e-08, "loss": 0.0588, "step": 18221 }, { "epoch": 4.146075085324232, "grad_norm": 1.8445960868399718, "learning_rate": 8.786813753830393e-08, "loss": 0.1039, "step": 18222 }, { "epoch": 4.146302616609784, "grad_norm": 1.6751751081317927, "learning_rate": 8.782245899790522e-08, "loss": 0.0565, "step": 18223 }, { "epoch": 4.146530147895335, "grad_norm": 1.0735371972908812, "learning_rate": 8.777679143643185e-08, "loss": 0.0332, "step": 18224 }, { "epoch": 4.146757679180888, "grad_norm": 1.1318620586024952, "learning_rate": 8.773113485481696e-08, "loss": 0.0425, "step": 18225 }, { "epoch": 4.146985210466439, "grad_norm": 1.6445941756377873, "learning_rate": 8.768548925399356e-08, "loss": 0.0162, "step": 18226 }, { "epoch": 4.147212741751991, "grad_norm": 1.1566663632960494, "learning_rate": 8.763985463489472e-08, "loss": 0.0152, "step": 18227 }, { "epoch": 4.147440273037542, "grad_norm": 1.398112322267478, "learning_rate": 8.759423099845308e-08, "loss": 0.0165, "step": 18228 }, { "epoch": 4.147667804323095, "grad_norm": 1.5749329443708762, "learning_rate": 8.75486183456014e-08, "loss": 0.0515, "step": 18229 }, { "epoch": 4.147895335608646, "grad_norm": 0.5863352302158221, "learning_rate": 8.75030166772714e-08, "loss": 0.0028, "step": 18230 }, { "epoch": 4.148122866894198, "grad_norm": 1.3252889804966164, "learning_rate": 8.745742599439562e-08, "loss": 0.015, "step": 18231 }, { "epoch": 4.148350398179749, "grad_norm": 0.8299421048692042, "learning_rate": 8.741184629790552e-08, "loss": 0.0054, "step": 18232 }, { "epoch": 4.148577929465302, "grad_norm": 0.9985847021374388, "learning_rate": 8.736627758873261e-08, "loss": 0.0323, "step": 18233 }, { "epoch": 4.148805460750853, "grad_norm": 1.7210973774539466, "learning_rate": 8.732071986780833e-08, "loss": 0.0898, "step": 18234 }, { "epoch": 4.149032992036405, "grad_norm": 1.090063905370368, "learning_rate": 8.727517313606382e-08, "loss": 0.016, "step": 18235 }, { "epoch": 4.149260523321956, "grad_norm": 1.8637753686826635, "learning_rate": 8.722963739443014e-08, "loss": 0.0543, "step": 18236 }, { "epoch": 4.149488054607509, "grad_norm": 1.1023175211380694, "learning_rate": 8.718411264383782e-08, "loss": 0.007, "step": 18237 }, { "epoch": 4.14971558589306, "grad_norm": 1.2284282518947267, "learning_rate": 8.713859888521705e-08, "loss": 0.0345, "step": 18238 }, { "epoch": 4.149943117178612, "grad_norm": 1.8101228102104967, "learning_rate": 8.709309611949851e-08, "loss": 0.0197, "step": 18239 }, { "epoch": 4.150170648464163, "grad_norm": 1.6105570845649047, "learning_rate": 8.704760434761165e-08, "loss": 0.0302, "step": 18240 }, { "epoch": 4.150398179749716, "grad_norm": 1.6123578756550498, "learning_rate": 8.700212357048682e-08, "loss": 0.0689, "step": 18241 }, { "epoch": 4.150625711035268, "grad_norm": 1.866673340758924, "learning_rate": 8.695665378905309e-08, "loss": 0.0634, "step": 18242 }, { "epoch": 4.150853242320819, "grad_norm": 0.8141537519871483, "learning_rate": 8.691119500424019e-08, "loss": 0.0081, "step": 18243 }, { "epoch": 4.151080773606371, "grad_norm": 1.9860337586693582, "learning_rate": 8.686574721697698e-08, "loss": 0.0679, "step": 18244 }, { "epoch": 4.151308304891923, "grad_norm": 0.851317187203046, "learning_rate": 8.682031042819214e-08, "loss": 0.0061, "step": 18245 }, { "epoch": 4.151535836177475, "grad_norm": 1.8245695349208082, "learning_rate": 8.677488463881459e-08, "loss": 0.0737, "step": 18246 }, { "epoch": 4.151763367463026, "grad_norm": 1.2451210250127622, "learning_rate": 8.672946984977268e-08, "loss": 0.0526, "step": 18247 }, { "epoch": 4.151990898748578, "grad_norm": 1.5377370489244073, "learning_rate": 8.668406606199476e-08, "loss": 0.0103, "step": 18248 }, { "epoch": 4.15221843003413, "grad_norm": 0.698076892424102, "learning_rate": 8.663867327640852e-08, "loss": 0.0057, "step": 18249 }, { "epoch": 4.152445961319682, "grad_norm": 0.8801938916702616, "learning_rate": 8.659329149394199e-08, "loss": 0.0148, "step": 18250 }, { "epoch": 4.152673492605233, "grad_norm": 0.885361544090206, "learning_rate": 8.654792071552246e-08, "loss": 0.0189, "step": 18251 }, { "epoch": 4.152901023890785, "grad_norm": 1.361413983349167, "learning_rate": 8.650256094207721e-08, "loss": 0.0123, "step": 18252 }, { "epoch": 4.153128555176337, "grad_norm": 2.3261597182825344, "learning_rate": 8.645721217453343e-08, "loss": 0.006, "step": 18253 }, { "epoch": 4.153356086461889, "grad_norm": 0.298090035991673, "learning_rate": 8.641187441381788e-08, "loss": 0.0012, "step": 18254 }, { "epoch": 4.15358361774744, "grad_norm": 1.1423158283878079, "learning_rate": 8.636654766085744e-08, "loss": 0.0479, "step": 18255 }, { "epoch": 4.153811149032992, "grad_norm": 1.0911898700906093, "learning_rate": 8.632123191657828e-08, "loss": 0.0895, "step": 18256 }, { "epoch": 4.154038680318544, "grad_norm": 0.9175530784595918, "learning_rate": 8.627592718190642e-08, "loss": 0.0094, "step": 18257 }, { "epoch": 4.154266211604096, "grad_norm": 0.9126283823240062, "learning_rate": 8.623063345776818e-08, "loss": 0.0083, "step": 18258 }, { "epoch": 4.154493742889647, "grad_norm": 0.9677846986755797, "learning_rate": 8.618535074508889e-08, "loss": 0.0328, "step": 18259 }, { "epoch": 4.154721274175199, "grad_norm": 2.0215137448071454, "learning_rate": 8.614007904479429e-08, "loss": 0.0824, "step": 18260 }, { "epoch": 4.154948805460751, "grad_norm": 1.7077066881038894, "learning_rate": 8.609481835780954e-08, "loss": 0.0509, "step": 18261 }, { "epoch": 4.155176336746303, "grad_norm": 0.3784513377214527, "learning_rate": 8.604956868505993e-08, "loss": 0.0017, "step": 18262 }, { "epoch": 4.155403868031854, "grad_norm": 0.8707174453156198, "learning_rate": 8.60043300274701e-08, "loss": 0.0077, "step": 18263 }, { "epoch": 4.155631399317406, "grad_norm": 0.6318788591957287, "learning_rate": 8.595910238596447e-08, "loss": 0.0088, "step": 18264 }, { "epoch": 4.155858930602958, "grad_norm": 1.0314577765774413, "learning_rate": 8.591388576146775e-08, "loss": 0.063, "step": 18265 }, { "epoch": 4.15608646188851, "grad_norm": 1.2842175086265966, "learning_rate": 8.586868015490371e-08, "loss": 0.1054, "step": 18266 }, { "epoch": 4.156313993174061, "grad_norm": 1.1116369962011274, "learning_rate": 8.582348556719654e-08, "loss": 0.0754, "step": 18267 }, { "epoch": 4.156541524459613, "grad_norm": 1.515939274909, "learning_rate": 8.577830199926987e-08, "loss": 0.0389, "step": 18268 }, { "epoch": 4.156769055745165, "grad_norm": 0.9798522078673725, "learning_rate": 8.573312945204728e-08, "loss": 0.0158, "step": 18269 }, { "epoch": 4.156996587030717, "grad_norm": 2.2115751086308206, "learning_rate": 8.56879679264519e-08, "loss": 0.0194, "step": 18270 }, { "epoch": 4.157224118316268, "grad_norm": 1.2030970412196706, "learning_rate": 8.564281742340668e-08, "loss": 0.0142, "step": 18271 }, { "epoch": 4.15745164960182, "grad_norm": 0.9027793173574894, "learning_rate": 8.559767794383451e-08, "loss": 0.0096, "step": 18272 }, { "epoch": 4.157679180887372, "grad_norm": 4.325653953639335, "learning_rate": 8.55525494886579e-08, "loss": 0.0197, "step": 18273 }, { "epoch": 4.157906712172924, "grad_norm": 1.1671676433234766, "learning_rate": 8.550743205879918e-08, "loss": 0.0283, "step": 18274 }, { "epoch": 4.158134243458475, "grad_norm": 1.1265924979911761, "learning_rate": 8.546232565518065e-08, "loss": 0.0102, "step": 18275 }, { "epoch": 4.158361774744027, "grad_norm": 1.7249555251072741, "learning_rate": 8.541723027872394e-08, "loss": 0.0307, "step": 18276 }, { "epoch": 4.158589306029579, "grad_norm": 1.9844068064250573, "learning_rate": 8.537214593035103e-08, "loss": 0.1106, "step": 18277 }, { "epoch": 4.158816837315131, "grad_norm": 1.149205675408911, "learning_rate": 8.532707261098297e-08, "loss": 0.0157, "step": 18278 }, { "epoch": 4.159044368600682, "grad_norm": 1.5200308613449462, "learning_rate": 8.528201032154134e-08, "loss": 0.0451, "step": 18279 }, { "epoch": 4.159271899886234, "grad_norm": 2.082866007388764, "learning_rate": 8.523695906294684e-08, "loss": 0.016, "step": 18280 }, { "epoch": 4.1594994311717866, "grad_norm": 1.6742201101136218, "learning_rate": 8.519191883612034e-08, "loss": 0.0935, "step": 18281 }, { "epoch": 4.159726962457338, "grad_norm": 1.0711820646332229, "learning_rate": 8.514688964198256e-08, "loss": 0.0549, "step": 18282 }, { "epoch": 4.15995449374289, "grad_norm": 0.8778104509590616, "learning_rate": 8.510187148145353e-08, "loss": 0.0054, "step": 18283 }, { "epoch": 4.160182025028441, "grad_norm": 0.5802680837090033, "learning_rate": 8.505686435545355e-08, "loss": 0.0025, "step": 18284 }, { "epoch": 4.160409556313994, "grad_norm": 1.3673226675707089, "learning_rate": 8.501186826490239e-08, "loss": 0.0398, "step": 18285 }, { "epoch": 4.160637087599545, "grad_norm": 1.5522300172046104, "learning_rate": 8.49668832107197e-08, "loss": 0.0635, "step": 18286 }, { "epoch": 4.160864618885097, "grad_norm": 3.098260739353039, "learning_rate": 8.492190919382484e-08, "loss": 0.1068, "step": 18287 }, { "epoch": 4.161092150170648, "grad_norm": 0.9145518393535631, "learning_rate": 8.487694621513707e-08, "loss": 0.0182, "step": 18288 }, { "epoch": 4.161319681456201, "grad_norm": 1.180338324858442, "learning_rate": 8.483199427557543e-08, "loss": 0.0503, "step": 18289 }, { "epoch": 4.161547212741752, "grad_norm": 1.024060867539862, "learning_rate": 8.478705337605843e-08, "loss": 0.0114, "step": 18290 }, { "epoch": 4.161774744027304, "grad_norm": 1.0668953339501883, "learning_rate": 8.474212351750479e-08, "loss": 0.0301, "step": 18291 }, { "epoch": 4.162002275312855, "grad_norm": 0.7118960518782076, "learning_rate": 8.469720470083264e-08, "loss": 0.0079, "step": 18292 }, { "epoch": 4.162229806598408, "grad_norm": 0.7167192155018148, "learning_rate": 8.465229692696024e-08, "loss": 0.0027, "step": 18293 }, { "epoch": 4.162457337883959, "grad_norm": 1.3433486144140192, "learning_rate": 8.46074001968051e-08, "loss": 0.0186, "step": 18294 }, { "epoch": 4.162684869169511, "grad_norm": 1.3216118407611235, "learning_rate": 8.456251451128509e-08, "loss": 0.0252, "step": 18295 }, { "epoch": 4.162912400455062, "grad_norm": 1.4279528427974764, "learning_rate": 8.45176398713176e-08, "loss": 0.0786, "step": 18296 }, { "epoch": 4.163139931740615, "grad_norm": 0.6979844841418066, "learning_rate": 8.447277627781962e-08, "loss": 0.0049, "step": 18297 }, { "epoch": 4.163367463026166, "grad_norm": 1.0270650995223247, "learning_rate": 8.442792373170825e-08, "loss": 0.0077, "step": 18298 }, { "epoch": 4.163594994311718, "grad_norm": 1.0825221967800207, "learning_rate": 8.43830822339e-08, "loss": 0.0498, "step": 18299 }, { "epoch": 4.163822525597269, "grad_norm": 1.6950159566966703, "learning_rate": 8.433825178531154e-08, "loss": 0.1141, "step": 18300 }, { "epoch": 4.164050056882822, "grad_norm": 0.4119146325930328, "learning_rate": 8.429343238685897e-08, "loss": 0.0011, "step": 18301 }, { "epoch": 4.164277588168373, "grad_norm": 1.7464221706193663, "learning_rate": 8.424862403945834e-08, "loss": 0.0936, "step": 18302 }, { "epoch": 4.164505119453925, "grad_norm": 1.5065510934911945, "learning_rate": 8.420382674402566e-08, "loss": 0.1002, "step": 18303 }, { "epoch": 4.164732650739476, "grad_norm": 1.226102367399084, "learning_rate": 8.415904050147617e-08, "loss": 0.0108, "step": 18304 }, { "epoch": 4.164960182025029, "grad_norm": 1.2958475462715537, "learning_rate": 8.411426531272561e-08, "loss": 0.0752, "step": 18305 }, { "epoch": 4.16518771331058, "grad_norm": 1.2583862547594198, "learning_rate": 8.406950117868864e-08, "loss": 0.0192, "step": 18306 }, { "epoch": 4.165415244596132, "grad_norm": 1.492976022175099, "learning_rate": 8.402474810028045e-08, "loss": 0.0918, "step": 18307 }, { "epoch": 4.165642775881683, "grad_norm": 2.5715756222473156, "learning_rate": 8.398000607841579e-08, "loss": 0.0176, "step": 18308 }, { "epoch": 4.165870307167236, "grad_norm": 2.8824022226553145, "learning_rate": 8.393527511400886e-08, "loss": 0.0278, "step": 18309 }, { "epoch": 4.166097838452787, "grad_norm": 1.8237657013620137, "learning_rate": 8.389055520797406e-08, "loss": 0.0298, "step": 18310 }, { "epoch": 4.166325369738339, "grad_norm": 0.709563476132176, "learning_rate": 8.384584636122524e-08, "loss": 0.0065, "step": 18311 }, { "epoch": 4.16655290102389, "grad_norm": 1.654970784786381, "learning_rate": 8.38011485746764e-08, "loss": 0.0243, "step": 18312 }, { "epoch": 4.166780432309443, "grad_norm": 1.1642608329882198, "learning_rate": 8.37564618492407e-08, "loss": 0.0708, "step": 18313 }, { "epoch": 4.167007963594994, "grad_norm": 0.4874574956408873, "learning_rate": 8.371178618583178e-08, "loss": 0.002, "step": 18314 }, { "epoch": 4.167235494880546, "grad_norm": 1.6617626262290468, "learning_rate": 8.366712158536273e-08, "loss": 0.0215, "step": 18315 }, { "epoch": 4.167463026166097, "grad_norm": 1.4022390585308195, "learning_rate": 8.362246804874616e-08, "loss": 0.0306, "step": 18316 }, { "epoch": 4.16769055745165, "grad_norm": 1.5117025846292862, "learning_rate": 8.357782557689493e-08, "loss": 0.0226, "step": 18317 }, { "epoch": 4.167918088737201, "grad_norm": 1.7919589540023104, "learning_rate": 8.353319417072133e-08, "loss": 0.0994, "step": 18318 }, { "epoch": 4.168145620022753, "grad_norm": 1.7258926641296386, "learning_rate": 8.348857383113771e-08, "loss": 0.0161, "step": 18319 }, { "epoch": 4.168373151308305, "grad_norm": 0.6833368607896485, "learning_rate": 8.344396455905575e-08, "loss": 0.0047, "step": 18320 }, { "epoch": 4.168600682593857, "grad_norm": 0.6865090198942064, "learning_rate": 8.33993663553874e-08, "loss": 0.0045, "step": 18321 }, { "epoch": 4.168828213879409, "grad_norm": 0.6731148912603154, "learning_rate": 8.335477922104415e-08, "loss": 0.0072, "step": 18322 }, { "epoch": 4.16905574516496, "grad_norm": 1.0335812179986192, "learning_rate": 8.331020315693716e-08, "loss": 0.023, "step": 18323 }, { "epoch": 4.169283276450512, "grad_norm": 1.7646595361192778, "learning_rate": 8.326563816397771e-08, "loss": 0.0197, "step": 18324 }, { "epoch": 4.169510807736064, "grad_norm": 0.43304788022255086, "learning_rate": 8.322108424307633e-08, "loss": 0.0014, "step": 18325 }, { "epoch": 4.169738339021616, "grad_norm": 0.6223141360951513, "learning_rate": 8.317654139514388e-08, "loss": 0.0049, "step": 18326 }, { "epoch": 4.169965870307167, "grad_norm": 1.8585819942622597, "learning_rate": 8.313200962109045e-08, "loss": 0.0385, "step": 18327 }, { "epoch": 4.170193401592719, "grad_norm": 1.1172589816504999, "learning_rate": 8.308748892182645e-08, "loss": 0.0145, "step": 18328 }, { "epoch": 4.170420932878271, "grad_norm": 1.6021346570729844, "learning_rate": 8.304297929826177e-08, "loss": 0.0051, "step": 18329 }, { "epoch": 4.170648464163823, "grad_norm": 1.7749123292383675, "learning_rate": 8.299848075130595e-08, "loss": 0.0502, "step": 18330 }, { "epoch": 4.170875995449374, "grad_norm": 1.107847370341531, "learning_rate": 8.295399328186869e-08, "loss": 0.0728, "step": 18331 }, { "epoch": 4.171103526734926, "grad_norm": 0.6760441537985346, "learning_rate": 8.290951689085912e-08, "loss": 0.0141, "step": 18332 }, { "epoch": 4.171331058020478, "grad_norm": 1.6437559942659594, "learning_rate": 8.286505157918602e-08, "loss": 0.0472, "step": 18333 }, { "epoch": 4.17155858930603, "grad_norm": 0.6142787007918024, "learning_rate": 8.282059734775846e-08, "loss": 0.0009, "step": 18334 }, { "epoch": 4.171786120591581, "grad_norm": 0.3684456868680149, "learning_rate": 8.277615419748489e-08, "loss": 0.0007, "step": 18335 }, { "epoch": 4.172013651877133, "grad_norm": 1.9047587334006146, "learning_rate": 8.273172212927386e-08, "loss": 0.037, "step": 18336 }, { "epoch": 4.172241183162685, "grad_norm": 1.0617898021529972, "learning_rate": 8.268730114403316e-08, "loss": 0.0714, "step": 18337 }, { "epoch": 4.172468714448237, "grad_norm": 1.1796345889225772, "learning_rate": 8.264289124267098e-08, "loss": 0.005, "step": 18338 }, { "epoch": 4.172696245733788, "grad_norm": 1.790228573259607, "learning_rate": 8.259849242609481e-08, "loss": 0.0173, "step": 18339 }, { "epoch": 4.17292377701934, "grad_norm": 1.5671165844303248, "learning_rate": 8.255410469521199e-08, "loss": 0.0672, "step": 18340 }, { "epoch": 4.173151308304892, "grad_norm": 0.5823458813967665, "learning_rate": 8.250972805092974e-08, "loss": 0.0026, "step": 18341 }, { "epoch": 4.173378839590444, "grad_norm": 1.1435874581453744, "learning_rate": 8.246536249415523e-08, "loss": 0.0075, "step": 18342 }, { "epoch": 4.173606370875995, "grad_norm": 19.113279966220954, "learning_rate": 8.24210080257952e-08, "loss": 0.0112, "step": 18343 }, { "epoch": 4.173833902161547, "grad_norm": 1.3944321180973338, "learning_rate": 8.237666464675601e-08, "loss": 0.0454, "step": 18344 }, { "epoch": 4.174061433447099, "grad_norm": 1.4889767374345217, "learning_rate": 8.233233235794399e-08, "loss": 0.0397, "step": 18345 }, { "epoch": 4.174288964732651, "grad_norm": 1.7115797029008126, "learning_rate": 8.228801116026537e-08, "loss": 0.0139, "step": 18346 }, { "epoch": 4.174516496018202, "grad_norm": 1.8272172449734192, "learning_rate": 8.224370105462572e-08, "loss": 0.029, "step": 18347 }, { "epoch": 4.174744027303754, "grad_norm": 1.0782343444574027, "learning_rate": 8.219940204193075e-08, "loss": 0.028, "step": 18348 }, { "epoch": 4.174971558589306, "grad_norm": 0.8173906610236752, "learning_rate": 8.2155114123086e-08, "loss": 0.0081, "step": 18349 }, { "epoch": 4.175199089874858, "grad_norm": 0.449725997882845, "learning_rate": 8.211083729899663e-08, "loss": 0.0016, "step": 18350 }, { "epoch": 4.175426621160409, "grad_norm": 2.335080396849895, "learning_rate": 8.206657157056749e-08, "loss": 0.0248, "step": 18351 }, { "epoch": 4.175654152445961, "grad_norm": 1.5723902810434913, "learning_rate": 8.202231693870321e-08, "loss": 0.0603, "step": 18352 }, { "epoch": 4.175881683731513, "grad_norm": 19.27823114465281, "learning_rate": 8.197807340430838e-08, "loss": 0.0602, "step": 18353 }, { "epoch": 4.176109215017065, "grad_norm": 1.3456698924651065, "learning_rate": 8.193384096828717e-08, "loss": 0.0202, "step": 18354 }, { "epoch": 4.176336746302616, "grad_norm": 1.193226353170926, "learning_rate": 8.188961963154371e-08, "loss": 0.0387, "step": 18355 }, { "epoch": 4.176564277588168, "grad_norm": 1.4403441718960546, "learning_rate": 8.184540939498171e-08, "loss": 0.1014, "step": 18356 }, { "epoch": 4.17679180887372, "grad_norm": 1.1993265738564098, "learning_rate": 8.180121025950498e-08, "loss": 0.061, "step": 18357 }, { "epoch": 4.177019340159272, "grad_norm": 1.6127778727844173, "learning_rate": 8.17570222260166e-08, "loss": 0.0447, "step": 18358 }, { "epoch": 4.177246871444824, "grad_norm": 1.590050637150792, "learning_rate": 8.171284529541974e-08, "loss": 0.0923, "step": 18359 }, { "epoch": 4.177474402730375, "grad_norm": 1.5064310253621707, "learning_rate": 8.166867946861743e-08, "loss": 0.0593, "step": 18360 }, { "epoch": 4.177701934015928, "grad_norm": 1.5659681814433841, "learning_rate": 8.162452474651216e-08, "loss": 0.0342, "step": 18361 }, { "epoch": 4.177929465301479, "grad_norm": 0.8003501751831141, "learning_rate": 8.158038113000636e-08, "loss": 0.0091, "step": 18362 }, { "epoch": 4.178156996587031, "grad_norm": 0.8316485009885133, "learning_rate": 8.153624862000254e-08, "loss": 0.0093, "step": 18363 }, { "epoch": 4.178384527872582, "grad_norm": 0.483901804865641, "learning_rate": 8.149212721740241e-08, "loss": 0.0018, "step": 18364 }, { "epoch": 4.178612059158135, "grad_norm": 2.28100325326597, "learning_rate": 8.144801692310786e-08, "loss": 0.0156, "step": 18365 }, { "epoch": 4.178839590443686, "grad_norm": 0.8485826675091946, "learning_rate": 8.140391773802027e-08, "loss": 0.0097, "step": 18366 }, { "epoch": 4.179067121729238, "grad_norm": 1.2252451039082717, "learning_rate": 8.135982966304107e-08, "loss": 0.0588, "step": 18367 }, { "epoch": 4.179294653014789, "grad_norm": 1.4116626836263346, "learning_rate": 8.131575269907134e-08, "loss": 0.0615, "step": 18368 }, { "epoch": 4.179522184300342, "grad_norm": 1.5590565854162841, "learning_rate": 8.127168684701204e-08, "loss": 0.1153, "step": 18369 }, { "epoch": 4.179749715585893, "grad_norm": 0.7164502996902116, "learning_rate": 8.12276321077636e-08, "loss": 0.0034, "step": 18370 }, { "epoch": 4.179977246871445, "grad_norm": 0.6231632420993681, "learning_rate": 8.118358848222637e-08, "loss": 0.0033, "step": 18371 }, { "epoch": 4.180204778156996, "grad_norm": 1.4954172150875369, "learning_rate": 8.113955597130082e-08, "loss": 0.0544, "step": 18372 }, { "epoch": 4.180432309442549, "grad_norm": 1.2780933679317215, "learning_rate": 8.10955345758866e-08, "loss": 0.0066, "step": 18373 }, { "epoch": 4.1806598407281, "grad_norm": 1.6492919702228945, "learning_rate": 8.105152429688346e-08, "loss": 0.0164, "step": 18374 }, { "epoch": 4.180887372013652, "grad_norm": 1.1672961505377495, "learning_rate": 8.100752513519105e-08, "loss": 0.0199, "step": 18375 }, { "epoch": 4.181114903299203, "grad_norm": 2.142709077201025, "learning_rate": 8.096353709170863e-08, "loss": 0.0585, "step": 18376 }, { "epoch": 4.181342434584756, "grad_norm": 0.7624787475582085, "learning_rate": 8.091956016733515e-08, "loss": 0.0048, "step": 18377 }, { "epoch": 4.181569965870307, "grad_norm": 3.1554174137157487, "learning_rate": 8.087559436296931e-08, "loss": 0.0501, "step": 18378 }, { "epoch": 4.181797497155859, "grad_norm": 1.1547291492221234, "learning_rate": 8.083163967950995e-08, "loss": 0.028, "step": 18379 }, { "epoch": 4.18202502844141, "grad_norm": 1.7409427539603015, "learning_rate": 8.078769611785519e-08, "loss": 0.0674, "step": 18380 }, { "epoch": 4.182252559726963, "grad_norm": 1.15197711171382, "learning_rate": 8.074376367890317e-08, "loss": 0.0053, "step": 18381 }, { "epoch": 4.182480091012514, "grad_norm": 1.5719020715464005, "learning_rate": 8.069984236355202e-08, "loss": 0.0072, "step": 18382 }, { "epoch": 4.182707622298066, "grad_norm": 1.118490421849315, "learning_rate": 8.065593217269907e-08, "loss": 0.0326, "step": 18383 }, { "epoch": 4.1829351535836174, "grad_norm": 6.052845515555627, "learning_rate": 8.061203310724213e-08, "loss": 0.0195, "step": 18384 }, { "epoch": 4.18316268486917, "grad_norm": 2.7716101945008673, "learning_rate": 8.056814516807808e-08, "loss": 0.0147, "step": 18385 }, { "epoch": 4.183390216154721, "grad_norm": 1.8809478754055506, "learning_rate": 8.052426835610424e-08, "loss": 0.0712, "step": 18386 }, { "epoch": 4.183617747440273, "grad_norm": 1.4839274993433276, "learning_rate": 8.048040267221703e-08, "loss": 0.0308, "step": 18387 }, { "epoch": 4.1838452787258245, "grad_norm": 4.6057482138219425, "learning_rate": 8.043654811731312e-08, "loss": 0.0304, "step": 18388 }, { "epoch": 4.184072810011377, "grad_norm": 2.0222273551178778, "learning_rate": 8.039270469228908e-08, "loss": 0.0516, "step": 18389 }, { "epoch": 4.184300341296928, "grad_norm": 1.2988792881606612, "learning_rate": 8.03488723980405e-08, "loss": 0.0376, "step": 18390 }, { "epoch": 4.18452787258248, "grad_norm": 1.7144688295140722, "learning_rate": 8.030505123546369e-08, "loss": 0.0347, "step": 18391 }, { "epoch": 4.1847554038680315, "grad_norm": 1.469028132953236, "learning_rate": 8.02612412054539e-08, "loss": 0.0146, "step": 18392 }, { "epoch": 4.184982935153584, "grad_norm": 1.4614545707765048, "learning_rate": 8.021744230890687e-08, "loss": 0.0319, "step": 18393 }, { "epoch": 4.185210466439135, "grad_norm": 1.4484414898797635, "learning_rate": 8.017365454671744e-08, "loss": 0.0162, "step": 18394 }, { "epoch": 4.185437997724687, "grad_norm": 5.1702400582753265, "learning_rate": 8.012987791978074e-08, "loss": 0.0614, "step": 18395 }, { "epoch": 4.1856655290102385, "grad_norm": 1.5321558794529864, "learning_rate": 8.008611242899151e-08, "loss": 0.0187, "step": 18396 }, { "epoch": 4.185893060295791, "grad_norm": 0.7880752014597083, "learning_rate": 8.004235807524414e-08, "loss": 0.006, "step": 18397 }, { "epoch": 4.186120591581343, "grad_norm": 1.0266286080621279, "learning_rate": 7.999861485943297e-08, "loss": 0.0448, "step": 18398 }, { "epoch": 4.186348122866894, "grad_norm": 0.6145541291222575, "learning_rate": 7.995488278245194e-08, "loss": 0.0022, "step": 18399 }, { "epoch": 4.186575654152446, "grad_norm": 1.0413344304322105, "learning_rate": 7.991116184519497e-08, "loss": 0.006, "step": 18400 }, { "epoch": 4.186803185437998, "grad_norm": 0.9473082144503917, "learning_rate": 7.986745204855546e-08, "loss": 0.0457, "step": 18401 }, { "epoch": 4.18703071672355, "grad_norm": 1.9673763514330518, "learning_rate": 7.982375339342686e-08, "loss": 0.0109, "step": 18402 }, { "epoch": 4.187258248009101, "grad_norm": 0.8357340345247041, "learning_rate": 7.978006588070248e-08, "loss": 0.0056, "step": 18403 }, { "epoch": 4.187485779294653, "grad_norm": 1.8031059026250293, "learning_rate": 7.973638951127489e-08, "loss": 0.0565, "step": 18404 }, { "epoch": 4.187713310580205, "grad_norm": 1.0948491733815016, "learning_rate": 7.969272428603694e-08, "loss": 0.0473, "step": 18405 }, { "epoch": 4.187940841865757, "grad_norm": 2.159230585419183, "learning_rate": 7.964907020588094e-08, "loss": 0.0816, "step": 18406 }, { "epoch": 4.188168373151308, "grad_norm": 1.1806132565623235, "learning_rate": 7.960542727169937e-08, "loss": 0.021, "step": 18407 }, { "epoch": 4.18839590443686, "grad_norm": 1.3374681020479195, "learning_rate": 7.956179548438379e-08, "loss": 0.0084, "step": 18408 }, { "epoch": 4.188623435722412, "grad_norm": 1.325070176863771, "learning_rate": 7.951817484482629e-08, "loss": 0.1095, "step": 18409 }, { "epoch": 4.188850967007964, "grad_norm": 2.1288091455582037, "learning_rate": 7.947456535391834e-08, "loss": 0.0219, "step": 18410 }, { "epoch": 4.189078498293515, "grad_norm": 3.3145324632231565, "learning_rate": 7.943096701255114e-08, "loss": 0.0106, "step": 18411 }, { "epoch": 4.189306029579067, "grad_norm": 1.3250722695867048, "learning_rate": 7.93873798216159e-08, "loss": 0.0507, "step": 18412 }, { "epoch": 4.189533560864619, "grad_norm": 1.1635173247708968, "learning_rate": 7.934380378200324e-08, "loss": 0.0142, "step": 18413 }, { "epoch": 4.189761092150171, "grad_norm": 0.6334619412795234, "learning_rate": 7.930023889460403e-08, "loss": 0.0138, "step": 18414 }, { "epoch": 4.189988623435722, "grad_norm": 1.1465746199650548, "learning_rate": 7.925668516030841e-08, "loss": 0.083, "step": 18415 }, { "epoch": 4.190216154721274, "grad_norm": 1.2564494110764133, "learning_rate": 7.921314258000669e-08, "loss": 0.08, "step": 18416 }, { "epoch": 4.190443686006826, "grad_norm": 1.5294116258618657, "learning_rate": 7.916961115458887e-08, "loss": 0.0335, "step": 18417 }, { "epoch": 4.190671217292378, "grad_norm": 1.442219115356272, "learning_rate": 7.912609088494442e-08, "loss": 0.0239, "step": 18418 }, { "epoch": 4.190898748577929, "grad_norm": 1.1127263486959722, "learning_rate": 7.908258177196309e-08, "loss": 0.0164, "step": 18419 }, { "epoch": 4.191126279863481, "grad_norm": 1.0377476673378592, "learning_rate": 7.903908381653405e-08, "loss": 0.0437, "step": 18420 }, { "epoch": 4.191353811149033, "grad_norm": 1.1796519470185487, "learning_rate": 7.899559701954606e-08, "loss": 0.0206, "step": 18421 }, { "epoch": 4.191581342434585, "grad_norm": 1.4553286522361222, "learning_rate": 7.895212138188813e-08, "loss": 0.0246, "step": 18422 }, { "epoch": 4.191808873720136, "grad_norm": 0.7292437721943876, "learning_rate": 7.890865690444879e-08, "loss": 0.0042, "step": 18423 }, { "epoch": 4.192036405005688, "grad_norm": 0.7789474744379062, "learning_rate": 7.886520358811649e-08, "loss": 0.0132, "step": 18424 }, { "epoch": 4.19226393629124, "grad_norm": 1.3360824355044387, "learning_rate": 7.882176143377912e-08, "loss": 0.0059, "step": 18425 }, { "epoch": 4.192491467576792, "grad_norm": 1.6934554066263545, "learning_rate": 7.87783304423248e-08, "loss": 0.0659, "step": 18426 }, { "epoch": 4.192718998862343, "grad_norm": 1.2549510314452543, "learning_rate": 7.873491061464108e-08, "loss": 0.0343, "step": 18427 }, { "epoch": 4.192946530147895, "grad_norm": 1.5413470808636711, "learning_rate": 7.869150195161504e-08, "loss": 0.0217, "step": 18428 }, { "epoch": 4.193174061433447, "grad_norm": 1.305307285057614, "learning_rate": 7.86481044541345e-08, "loss": 0.0079, "step": 18429 }, { "epoch": 4.193401592718999, "grad_norm": 2.4192822489801835, "learning_rate": 7.86047181230859e-08, "loss": 0.0313, "step": 18430 }, { "epoch": 4.19362912400455, "grad_norm": 1.2669432613809781, "learning_rate": 7.856134295935638e-08, "loss": 0.0532, "step": 18431 }, { "epoch": 4.193856655290102, "grad_norm": 1.1916604808160665, "learning_rate": 7.851797896383212e-08, "loss": 0.0516, "step": 18432 }, { "epoch": 4.194084186575654, "grad_norm": 1.2968854630965096, "learning_rate": 7.847462613739962e-08, "loss": 0.1072, "step": 18433 }, { "epoch": 4.194311717861206, "grad_norm": 1.7130385652645375, "learning_rate": 7.843128448094477e-08, "loss": 0.0203, "step": 18434 }, { "epoch": 4.194539249146757, "grad_norm": 1.2052392254742195, "learning_rate": 7.838795399535346e-08, "loss": 0.0826, "step": 18435 }, { "epoch": 4.194766780432309, "grad_norm": 2.5922481990803417, "learning_rate": 7.834463468151143e-08, "loss": 0.0857, "step": 18436 }, { "epoch": 4.194994311717862, "grad_norm": 0.754849682223629, "learning_rate": 7.830132654030376e-08, "loss": 0.0095, "step": 18437 }, { "epoch": 4.195221843003413, "grad_norm": 0.7937801721106361, "learning_rate": 7.82580295726159e-08, "loss": 0.0119, "step": 18438 }, { "epoch": 4.195449374288965, "grad_norm": 1.8495367603091333, "learning_rate": 7.821474377933257e-08, "loss": 0.0237, "step": 18439 }, { "epoch": 4.1956769055745164, "grad_norm": 2.030500958340592, "learning_rate": 7.81714691613384e-08, "loss": 0.0189, "step": 18440 }, { "epoch": 4.195904436860069, "grad_norm": 1.102202211571131, "learning_rate": 7.81282057195179e-08, "loss": 0.0217, "step": 18441 }, { "epoch": 4.19613196814562, "grad_norm": 1.4731565145689094, "learning_rate": 7.808495345475537e-08, "loss": 0.1066, "step": 18442 }, { "epoch": 4.196359499431172, "grad_norm": 2.5525181731996898, "learning_rate": 7.80417123679348e-08, "loss": 0.1132, "step": 18443 }, { "epoch": 4.1965870307167235, "grad_norm": 1.2168474730359466, "learning_rate": 7.799848245993988e-08, "loss": 0.0503, "step": 18444 }, { "epoch": 4.196814562002276, "grad_norm": 1.164584752101177, "learning_rate": 7.795526373165426e-08, "loss": 0.0373, "step": 18445 }, { "epoch": 4.197042093287827, "grad_norm": 0.9860551560957871, "learning_rate": 7.79120561839613e-08, "loss": 0.0334, "step": 18446 }, { "epoch": 4.197269624573379, "grad_norm": 1.427836754079944, "learning_rate": 7.786885981774371e-08, "loss": 0.0249, "step": 18447 }, { "epoch": 4.1974971558589305, "grad_norm": 1.0801770677011355, "learning_rate": 7.782567463388464e-08, "loss": 0.0363, "step": 18448 }, { "epoch": 4.197724687144483, "grad_norm": 1.0393282057542643, "learning_rate": 7.778250063326671e-08, "loss": 0.0194, "step": 18449 }, { "epoch": 4.197952218430034, "grad_norm": 2.0578475127543645, "learning_rate": 7.773933781677241e-08, "loss": 0.0713, "step": 18450 }, { "epoch": 4.198179749715586, "grad_norm": 1.4295947603218462, "learning_rate": 7.769618618528374e-08, "loss": 0.0278, "step": 18451 }, { "epoch": 4.1984072810011375, "grad_norm": 1.038367985799069, "learning_rate": 7.765304573968256e-08, "loss": 0.0329, "step": 18452 }, { "epoch": 4.19863481228669, "grad_norm": 1.574083518687605, "learning_rate": 7.760991648085088e-08, "loss": 0.0559, "step": 18453 }, { "epoch": 4.198862343572241, "grad_norm": 1.8556435575845311, "learning_rate": 7.756679840966987e-08, "loss": 0.0898, "step": 18454 }, { "epoch": 4.199089874857793, "grad_norm": 1.0880382228066, "learning_rate": 7.75236915270209e-08, "loss": 0.076, "step": 18455 }, { "epoch": 4.1993174061433445, "grad_norm": 1.4969425470424866, "learning_rate": 7.748059583378508e-08, "loss": 0.0488, "step": 18456 }, { "epoch": 4.199544937428897, "grad_norm": 1.4231811836010042, "learning_rate": 7.743751133084322e-08, "loss": 0.075, "step": 18457 }, { "epoch": 4.199772468714448, "grad_norm": 0.6268653369795347, "learning_rate": 7.739443801907582e-08, "loss": 0.008, "step": 18458 }, { "epoch": 4.2, "grad_norm": 2.379570196931411, "learning_rate": 7.735137589936305e-08, "loss": 0.065, "step": 18459 }, { "epoch": 4.2002275312855515, "grad_norm": 1.2517143281020346, "learning_rate": 7.73083249725854e-08, "loss": 0.0135, "step": 18460 }, { "epoch": 4.200455062571104, "grad_norm": 3.1123511104568853, "learning_rate": 7.726528523962231e-08, "loss": 0.0191, "step": 18461 }, { "epoch": 4.200682593856655, "grad_norm": 1.2686928133206608, "learning_rate": 7.722225670135371e-08, "loss": 0.0848, "step": 18462 }, { "epoch": 4.200910125142207, "grad_norm": 0.3236109765050441, "learning_rate": 7.717923935865901e-08, "loss": 0.0016, "step": 18463 }, { "epoch": 4.2011376564277585, "grad_norm": 0.7549985413762813, "learning_rate": 7.713623321241745e-08, "loss": 0.0315, "step": 18464 }, { "epoch": 4.201365187713311, "grad_norm": 1.4416932486074197, "learning_rate": 7.709323826350798e-08, "loss": 0.0497, "step": 18465 }, { "epoch": 4.201592718998862, "grad_norm": 1.2209178901236637, "learning_rate": 7.705025451280906e-08, "loss": 0.0119, "step": 18466 }, { "epoch": 4.201820250284414, "grad_norm": 1.5739719464423088, "learning_rate": 7.700728196119958e-08, "loss": 0.0318, "step": 18467 }, { "epoch": 4.2020477815699655, "grad_norm": 1.2789998754774352, "learning_rate": 7.696432060955758e-08, "loss": 0.0201, "step": 18468 }, { "epoch": 4.202275312855518, "grad_norm": 1.3111614094774673, "learning_rate": 7.692137045876111e-08, "loss": 0.0282, "step": 18469 }, { "epoch": 4.202502844141069, "grad_norm": 1.360344228074998, "learning_rate": 7.687843150968828e-08, "loss": 0.02, "step": 18470 }, { "epoch": 4.202730375426621, "grad_norm": 0.926373584358048, "learning_rate": 7.683550376321624e-08, "loss": 0.0082, "step": 18471 }, { "epoch": 4.2029579067121725, "grad_norm": 1.1307401375696815, "learning_rate": 7.679258722022277e-08, "loss": 0.0059, "step": 18472 }, { "epoch": 4.203185437997725, "grad_norm": 0.3566436957310059, "learning_rate": 7.674968188158473e-08, "loss": 0.0019, "step": 18473 }, { "epoch": 4.203412969283276, "grad_norm": 1.3272319066678757, "learning_rate": 7.67067877481792e-08, "loss": 0.0109, "step": 18474 }, { "epoch": 4.203640500568828, "grad_norm": 1.3670984948882239, "learning_rate": 7.666390482088268e-08, "loss": 0.0068, "step": 18475 }, { "epoch": 4.20386803185438, "grad_norm": 1.6421025799370914, "learning_rate": 7.662103310057165e-08, "loss": 0.0484, "step": 18476 }, { "epoch": 4.204095563139932, "grad_norm": 0.9338025134042187, "learning_rate": 7.657817258812261e-08, "loss": 0.041, "step": 18477 }, { "epoch": 4.204323094425484, "grad_norm": 1.128486981192691, "learning_rate": 7.653532328441112e-08, "loss": 0.0058, "step": 18478 }, { "epoch": 4.204550625711035, "grad_norm": 0.8157896530287017, "learning_rate": 7.649248519031337e-08, "loss": 0.0307, "step": 18479 }, { "epoch": 4.204778156996587, "grad_norm": 2.0812511560796016, "learning_rate": 7.644965830670447e-08, "loss": 0.092, "step": 18480 }, { "epoch": 4.205005688282139, "grad_norm": 1.5435327642762768, "learning_rate": 7.64068426344601e-08, "loss": 0.054, "step": 18481 }, { "epoch": 4.205233219567691, "grad_norm": 1.7815610239261346, "learning_rate": 7.636403817445508e-08, "loss": 0.0211, "step": 18482 }, { "epoch": 4.205460750853242, "grad_norm": 2.035460302480811, "learning_rate": 7.632124492756431e-08, "loss": 0.1722, "step": 18483 }, { "epoch": 4.205688282138794, "grad_norm": 1.4073780175023305, "learning_rate": 7.627846289466251e-08, "loss": 0.0091, "step": 18484 }, { "epoch": 4.205915813424346, "grad_norm": 1.7124577019104685, "learning_rate": 7.623569207662391e-08, "loss": 0.1131, "step": 18485 }, { "epoch": 4.206143344709898, "grad_norm": 4.622924777912155, "learning_rate": 7.61929324743229e-08, "loss": 0.0045, "step": 18486 }, { "epoch": 4.206370875995449, "grad_norm": 1.4988819565701157, "learning_rate": 7.615018408863306e-08, "loss": 0.0219, "step": 18487 }, { "epoch": 4.206598407281001, "grad_norm": 0.43086268445755593, "learning_rate": 7.610744692042836e-08, "loss": 0.0014, "step": 18488 }, { "epoch": 4.206825938566553, "grad_norm": 0.7400936417159779, "learning_rate": 7.606472097058233e-08, "loss": 0.0078, "step": 18489 }, { "epoch": 4.207053469852105, "grad_norm": 2.2093077056463333, "learning_rate": 7.602200623996789e-08, "loss": 0.1189, "step": 18490 }, { "epoch": 4.207281001137656, "grad_norm": 1.427585232232956, "learning_rate": 7.597930272945836e-08, "loss": 0.0116, "step": 18491 }, { "epoch": 4.207508532423208, "grad_norm": 1.9209356154606434, "learning_rate": 7.593661043992631e-08, "loss": 0.041, "step": 18492 }, { "epoch": 4.20773606370876, "grad_norm": 1.2479127716943306, "learning_rate": 7.589392937224452e-08, "loss": 0.094, "step": 18493 }, { "epoch": 4.207963594994312, "grad_norm": 1.409348934469912, "learning_rate": 7.585125952728499e-08, "loss": 0.052, "step": 18494 }, { "epoch": 4.208191126279863, "grad_norm": 1.1344119918155777, "learning_rate": 7.580860090592002e-08, "loss": 0.071, "step": 18495 }, { "epoch": 4.208418657565415, "grad_norm": 1.0857372200113933, "learning_rate": 7.576595350902161e-08, "loss": 0.0121, "step": 18496 }, { "epoch": 4.208646188850967, "grad_norm": 1.3634174675136423, "learning_rate": 7.572331733746108e-08, "loss": 0.0394, "step": 18497 }, { "epoch": 4.208873720136519, "grad_norm": 1.3224479260935365, "learning_rate": 7.568069239211009e-08, "loss": 0.0052, "step": 18498 }, { "epoch": 4.20910125142207, "grad_norm": 1.2945452084011155, "learning_rate": 7.563807867383963e-08, "loss": 0.0053, "step": 18499 }, { "epoch": 4.2093287827076225, "grad_norm": 4.633161947484316, "learning_rate": 7.559547618352085e-08, "loss": 0.0253, "step": 18500 }, { "epoch": 4.209556313993174, "grad_norm": 0.850156809024888, "learning_rate": 7.555288492202426e-08, "loss": 0.0039, "step": 18501 }, { "epoch": 4.209783845278726, "grad_norm": 1.706518862648319, "learning_rate": 7.551030489022039e-08, "loss": 0.0561, "step": 18502 }, { "epoch": 4.210011376564277, "grad_norm": 1.0452933408027523, "learning_rate": 7.546773608897967e-08, "loss": 0.0165, "step": 18503 }, { "epoch": 4.2102389078498295, "grad_norm": 1.8467779131169582, "learning_rate": 7.542517851917189e-08, "loss": 0.0097, "step": 18504 }, { "epoch": 4.210466439135381, "grad_norm": 0.8777233479210499, "learning_rate": 7.538263218166711e-08, "loss": 0.0054, "step": 18505 }, { "epoch": 4.210693970420933, "grad_norm": 1.8018172808310768, "learning_rate": 7.534009707733464e-08, "loss": 0.0368, "step": 18506 }, { "epoch": 4.210921501706484, "grad_norm": 1.9435503820161169, "learning_rate": 7.529757320704399e-08, "loss": 0.0169, "step": 18507 }, { "epoch": 4.2111490329920365, "grad_norm": 0.2730499226363583, "learning_rate": 7.525506057166412e-08, "loss": 0.0013, "step": 18508 }, { "epoch": 4.211376564277588, "grad_norm": 1.1302931562262963, "learning_rate": 7.5212559172064e-08, "loss": 0.1006, "step": 18509 }, { "epoch": 4.21160409556314, "grad_norm": 1.3933463970104893, "learning_rate": 7.517006900911242e-08, "loss": 0.0894, "step": 18510 }, { "epoch": 4.211831626848691, "grad_norm": 1.6966063049005473, "learning_rate": 7.512759008367753e-08, "loss": 0.0284, "step": 18511 }, { "epoch": 4.2120591581342435, "grad_norm": 1.201682953632722, "learning_rate": 7.508512239662778e-08, "loss": 0.0596, "step": 18512 }, { "epoch": 4.212286689419795, "grad_norm": 1.2472164114849245, "learning_rate": 7.504266594883083e-08, "loss": 0.0238, "step": 18513 }, { "epoch": 4.212514220705347, "grad_norm": 0.6937306111155852, "learning_rate": 7.500022074115481e-08, "loss": 0.0113, "step": 18514 }, { "epoch": 4.212741751990899, "grad_norm": 0.6623502407810139, "learning_rate": 7.495778677446677e-08, "loss": 0.0026, "step": 18515 }, { "epoch": 4.2129692832764505, "grad_norm": 1.0184745606413075, "learning_rate": 7.491536404963432e-08, "loss": 0.0227, "step": 18516 }, { "epoch": 4.213196814562003, "grad_norm": 0.6458747873643547, "learning_rate": 7.487295256752445e-08, "loss": 0.0042, "step": 18517 }, { "epoch": 4.213424345847554, "grad_norm": 1.1889521766429574, "learning_rate": 7.483055232900383e-08, "loss": 0.0069, "step": 18518 }, { "epoch": 4.213651877133106, "grad_norm": 1.874300710763185, "learning_rate": 7.478816333493926e-08, "loss": 0.1054, "step": 18519 }, { "epoch": 4.2138794084186575, "grad_norm": 0.5048153948627846, "learning_rate": 7.474578558619681e-08, "loss": 0.0029, "step": 18520 }, { "epoch": 4.21410693970421, "grad_norm": 1.4987085917466227, "learning_rate": 7.470341908364293e-08, "loss": 0.0853, "step": 18521 }, { "epoch": 4.214334470989761, "grad_norm": 1.337359651646019, "learning_rate": 7.466106382814316e-08, "loss": 0.0317, "step": 18522 }, { "epoch": 4.214562002275313, "grad_norm": 1.7123354121574084, "learning_rate": 7.461871982056334e-08, "loss": 0.0374, "step": 18523 }, { "epoch": 4.2147895335608645, "grad_norm": 1.1022624602195281, "learning_rate": 7.457638706176912e-08, "loss": 0.0491, "step": 18524 }, { "epoch": 4.215017064846417, "grad_norm": 1.1977994157326646, "learning_rate": 7.453406555262533e-08, "loss": 0.0459, "step": 18525 }, { "epoch": 4.215244596131968, "grad_norm": 1.11584894344055, "learning_rate": 7.449175529399721e-08, "loss": 0.007, "step": 18526 }, { "epoch": 4.21547212741752, "grad_norm": 1.1717213577168575, "learning_rate": 7.44494562867494e-08, "loss": 0.0082, "step": 18527 }, { "epoch": 4.2156996587030715, "grad_norm": 1.6742013140602652, "learning_rate": 7.440716853174636e-08, "loss": 0.0214, "step": 18528 }, { "epoch": 4.215927189988624, "grad_norm": 1.4717789568792456, "learning_rate": 7.436489202985238e-08, "loss": 0.0157, "step": 18529 }, { "epoch": 4.216154721274175, "grad_norm": 1.4007603885988509, "learning_rate": 7.432262678193157e-08, "loss": 0.098, "step": 18530 }, { "epoch": 4.216382252559727, "grad_norm": 1.384087518010359, "learning_rate": 7.428037278884782e-08, "loss": 0.0055, "step": 18531 }, { "epoch": 4.2166097838452785, "grad_norm": 0.6915684587567564, "learning_rate": 7.423813005146461e-08, "loss": 0.0034, "step": 18532 }, { "epoch": 4.216837315130831, "grad_norm": 0.21073150001880378, "learning_rate": 7.419589857064549e-08, "loss": 0.0006, "step": 18533 }, { "epoch": 4.217064846416382, "grad_norm": 1.7929811384542669, "learning_rate": 7.415367834725337e-08, "loss": 0.0871, "step": 18534 }, { "epoch": 4.217292377701934, "grad_norm": 0.924447755272314, "learning_rate": 7.411146938215117e-08, "loss": 0.0083, "step": 18535 }, { "epoch": 4.2175199089874855, "grad_norm": 1.0063197902037777, "learning_rate": 7.406927167620163e-08, "loss": 0.02, "step": 18536 }, { "epoch": 4.217747440273038, "grad_norm": 1.1742413216599112, "learning_rate": 7.402708523026713e-08, "loss": 0.039, "step": 18537 }, { "epoch": 4.217974971558589, "grad_norm": 0.8541857196621122, "learning_rate": 7.398491004521016e-08, "loss": 0.0068, "step": 18538 }, { "epoch": 4.218202502844141, "grad_norm": 2.422234177529992, "learning_rate": 7.394274612189236e-08, "loss": 0.0107, "step": 18539 }, { "epoch": 4.2184300341296925, "grad_norm": 1.1447227737128858, "learning_rate": 7.39005934611757e-08, "loss": 0.0118, "step": 18540 }, { "epoch": 4.218657565415245, "grad_norm": 1.1277975953660049, "learning_rate": 7.385845206392163e-08, "loss": 0.0408, "step": 18541 }, { "epoch": 4.218885096700796, "grad_norm": 0.9331176559818032, "learning_rate": 7.381632193099133e-08, "loss": 0.0054, "step": 18542 }, { "epoch": 4.219112627986348, "grad_norm": 1.3957129961849013, "learning_rate": 7.377420306324594e-08, "loss": 0.0814, "step": 18543 }, { "epoch": 4.2193401592718995, "grad_norm": 3.0605537286073594, "learning_rate": 7.373209546154628e-08, "loss": 0.0755, "step": 18544 }, { "epoch": 4.219567690557452, "grad_norm": 0.7031825346888259, "learning_rate": 7.368999912675319e-08, "loss": 0.0031, "step": 18545 }, { "epoch": 4.219795221843003, "grad_norm": 0.8903890719797846, "learning_rate": 7.364791405972683e-08, "loss": 0.0055, "step": 18546 }, { "epoch": 4.220022753128555, "grad_norm": 0.6177914247434627, "learning_rate": 7.360584026132718e-08, "loss": 0.0031, "step": 18547 }, { "epoch": 4.2202502844141065, "grad_norm": 1.4235319251756675, "learning_rate": 7.356377773241448e-08, "loss": 0.0127, "step": 18548 }, { "epoch": 4.220477815699659, "grad_norm": 0.9635418341536544, "learning_rate": 7.352172647384797e-08, "loss": 0.0149, "step": 18549 }, { "epoch": 4.22070534698521, "grad_norm": 1.5346083924602802, "learning_rate": 7.347968648648764e-08, "loss": 0.0247, "step": 18550 }, { "epoch": 4.220932878270762, "grad_norm": 0.6899284063502144, "learning_rate": 7.34376577711924e-08, "loss": 0.0051, "step": 18551 }, { "epoch": 4.2211604095563136, "grad_norm": 1.708974879921746, "learning_rate": 7.339564032882135e-08, "loss": 0.0329, "step": 18552 }, { "epoch": 4.221387940841866, "grad_norm": 1.148939902266209, "learning_rate": 7.335363416023325e-08, "loss": 0.0921, "step": 18553 }, { "epoch": 4.221615472127418, "grad_norm": 0.8240125439981407, "learning_rate": 7.331163926628644e-08, "loss": 0.0056, "step": 18554 }, { "epoch": 4.221843003412969, "grad_norm": 1.3155310636497062, "learning_rate": 7.326965564783933e-08, "loss": 0.0877, "step": 18555 }, { "epoch": 4.2220705346985214, "grad_norm": 1.0413655666875934, "learning_rate": 7.322768330574999e-08, "loss": 0.0056, "step": 18556 }, { "epoch": 4.222298065984073, "grad_norm": 2.08371669002521, "learning_rate": 7.318572224087648e-08, "loss": 0.0153, "step": 18557 }, { "epoch": 4.222525597269625, "grad_norm": 1.6830969843072614, "learning_rate": 7.314377245407615e-08, "loss": 0.0553, "step": 18558 }, { "epoch": 4.222753128555176, "grad_norm": 1.3999433243395785, "learning_rate": 7.310183394620634e-08, "loss": 0.0294, "step": 18559 }, { "epoch": 4.2229806598407285, "grad_norm": 0.8852686521009023, "learning_rate": 7.305990671812438e-08, "loss": 0.0141, "step": 18560 }, { "epoch": 4.22320819112628, "grad_norm": 0.7447111196039714, "learning_rate": 7.301799077068697e-08, "loss": 0.0038, "step": 18561 }, { "epoch": 4.223435722411832, "grad_norm": 0.7468440135706335, "learning_rate": 7.297608610475092e-08, "loss": 0.0139, "step": 18562 }, { "epoch": 4.223663253697383, "grad_norm": 1.3274634666210035, "learning_rate": 7.29341927211727e-08, "loss": 0.0083, "step": 18563 }, { "epoch": 4.2238907849829355, "grad_norm": 1.2225896839987564, "learning_rate": 7.289231062080869e-08, "loss": 0.0155, "step": 18564 }, { "epoch": 4.224118316268487, "grad_norm": 1.1989938427217253, "learning_rate": 7.285043980451468e-08, "loss": 0.0098, "step": 18565 }, { "epoch": 4.224345847554039, "grad_norm": 0.9315354105734843, "learning_rate": 7.280858027314631e-08, "loss": 0.0125, "step": 18566 }, { "epoch": 4.22457337883959, "grad_norm": 0.5803024102262643, "learning_rate": 7.276673202755942e-08, "loss": 0.004, "step": 18567 }, { "epoch": 4.2248009101251425, "grad_norm": 1.2419753233447048, "learning_rate": 7.272489506860901e-08, "loss": 0.0182, "step": 18568 }, { "epoch": 4.225028441410694, "grad_norm": 1.7831953551783866, "learning_rate": 7.268306939715038e-08, "loss": 0.0865, "step": 18569 }, { "epoch": 4.225255972696246, "grad_norm": 0.7612454045727743, "learning_rate": 7.264125501403818e-08, "loss": 0.0497, "step": 18570 }, { "epoch": 4.225483503981797, "grad_norm": 1.631355473101773, "learning_rate": 7.259945192012736e-08, "loss": 0.0302, "step": 18571 }, { "epoch": 4.2257110352673495, "grad_norm": 1.536685543185215, "learning_rate": 7.255766011627197e-08, "loss": 0.0283, "step": 18572 }, { "epoch": 4.225938566552901, "grad_norm": 1.6060164400298647, "learning_rate": 7.251587960332624e-08, "loss": 0.1057, "step": 18573 }, { "epoch": 4.226166097838453, "grad_norm": 1.1858853973148635, "learning_rate": 7.247411038214413e-08, "loss": 0.0665, "step": 18574 }, { "epoch": 4.226393629124004, "grad_norm": 1.0485793168363347, "learning_rate": 7.243235245357922e-08, "loss": 0.0712, "step": 18575 }, { "epoch": 4.2266211604095565, "grad_norm": 0.30015401474410364, "learning_rate": 7.239060581848506e-08, "loss": 0.0009, "step": 18576 }, { "epoch": 4.226848691695108, "grad_norm": 0.6291017132480974, "learning_rate": 7.234887047771498e-08, "loss": 0.0033, "step": 18577 }, { "epoch": 4.22707622298066, "grad_norm": 1.3625286510133416, "learning_rate": 7.230714643212171e-08, "loss": 0.0672, "step": 18578 }, { "epoch": 4.227303754266211, "grad_norm": 2.182907342210854, "learning_rate": 7.226543368255825e-08, "loss": 0.0348, "step": 18579 }, { "epoch": 4.2275312855517635, "grad_norm": 1.1942803780883713, "learning_rate": 7.222373222987695e-08, "loss": 0.0035, "step": 18580 }, { "epoch": 4.227758816837315, "grad_norm": 1.4443231637498157, "learning_rate": 7.218204207493036e-08, "loss": 0.0637, "step": 18581 }, { "epoch": 4.227986348122867, "grad_norm": 1.9735979941466066, "learning_rate": 7.214036321857021e-08, "loss": 0.018, "step": 18582 }, { "epoch": 4.228213879408418, "grad_norm": 0.9307021055647327, "learning_rate": 7.20986956616486e-08, "loss": 0.0159, "step": 18583 }, { "epoch": 4.2284414106939705, "grad_norm": 4.3201808368136305, "learning_rate": 7.205703940501713e-08, "loss": 0.0346, "step": 18584 }, { "epoch": 4.228668941979522, "grad_norm": 0.6299687184610033, "learning_rate": 7.201539444952694e-08, "loss": 0.0027, "step": 18585 }, { "epoch": 4.228896473265074, "grad_norm": 1.695346408874705, "learning_rate": 7.197376079602956e-08, "loss": 0.0853, "step": 18586 }, { "epoch": 4.229124004550625, "grad_norm": 1.294020532662031, "learning_rate": 7.193213844537548e-08, "loss": 0.0669, "step": 18587 }, { "epoch": 4.2293515358361775, "grad_norm": 0.522332977692884, "learning_rate": 7.189052739841579e-08, "loss": 0.0036, "step": 18588 }, { "epoch": 4.229579067121729, "grad_norm": 3.2630105920660566, "learning_rate": 7.184892765600054e-08, "loss": 0.0335, "step": 18589 }, { "epoch": 4.229806598407281, "grad_norm": 1.2626171395019092, "learning_rate": 7.180733921898025e-08, "loss": 0.012, "step": 18590 }, { "epoch": 4.230034129692832, "grad_norm": 0.5461750164285969, "learning_rate": 7.176576208820493e-08, "loss": 0.0026, "step": 18591 }, { "epoch": 4.2302616609783845, "grad_norm": 2.1608227841025465, "learning_rate": 7.172419626452403e-08, "loss": 0.0179, "step": 18592 }, { "epoch": 4.230489192263937, "grad_norm": 1.0377730795946678, "learning_rate": 7.168264174878752e-08, "loss": 0.0237, "step": 18593 }, { "epoch": 4.230716723549488, "grad_norm": 1.2743484260371585, "learning_rate": 7.164109854184427e-08, "loss": 0.0076, "step": 18594 }, { "epoch": 4.23094425483504, "grad_norm": 1.2416948383804622, "learning_rate": 7.159956664454363e-08, "loss": 0.0777, "step": 18595 }, { "epoch": 4.2311717861205915, "grad_norm": 0.752283837574757, "learning_rate": 7.155804605773426e-08, "loss": 0.0021, "step": 18596 }, { "epoch": 4.231399317406144, "grad_norm": 1.6037623832287324, "learning_rate": 7.151653678226484e-08, "loss": 0.0354, "step": 18597 }, { "epoch": 4.231626848691695, "grad_norm": 0.8671174693948369, "learning_rate": 7.147503881898388e-08, "loss": 0.0048, "step": 18598 }, { "epoch": 4.231854379977247, "grad_norm": 0.6474755331287151, "learning_rate": 7.143355216873927e-08, "loss": 0.0023, "step": 18599 }, { "epoch": 4.2320819112627985, "grad_norm": 1.5526117060959412, "learning_rate": 7.13920768323792e-08, "loss": 0.0957, "step": 18600 }, { "epoch": 4.232309442548351, "grad_norm": 1.5027434905034602, "learning_rate": 7.135061281075107e-08, "loss": 0.0229, "step": 18601 }, { "epoch": 4.232536973833902, "grad_norm": 1.1807019655591533, "learning_rate": 7.130916010470252e-08, "loss": 0.0282, "step": 18602 }, { "epoch": 4.232764505119454, "grad_norm": 1.7207749312478824, "learning_rate": 7.126771871508058e-08, "loss": 0.0719, "step": 18603 }, { "epoch": 4.2329920364050055, "grad_norm": 1.6058183693657377, "learning_rate": 7.122628864273236e-08, "loss": 0.0286, "step": 18604 }, { "epoch": 4.233219567690558, "grad_norm": 1.275939462960031, "learning_rate": 7.118486988850482e-08, "loss": 0.1365, "step": 18605 }, { "epoch": 4.233447098976109, "grad_norm": 4.02480554826888, "learning_rate": 7.114346245324407e-08, "loss": 0.0036, "step": 18606 }, { "epoch": 4.233674630261661, "grad_norm": 1.6012255121339325, "learning_rate": 7.110206633779676e-08, "loss": 0.0849, "step": 18607 }, { "epoch": 4.2339021615472126, "grad_norm": 0.8399092394866189, "learning_rate": 7.106068154300864e-08, "loss": 0.0274, "step": 18608 }, { "epoch": 4.234129692832765, "grad_norm": 1.609211987189209, "learning_rate": 7.101930806972572e-08, "loss": 0.0301, "step": 18609 }, { "epoch": 4.234357224118316, "grad_norm": 1.2791824213679155, "learning_rate": 7.09779459187937e-08, "loss": 0.0099, "step": 18610 }, { "epoch": 4.234584755403868, "grad_norm": 1.1161350592396468, "learning_rate": 7.093659509105768e-08, "loss": 0.0256, "step": 18611 }, { "epoch": 4.23481228668942, "grad_norm": 0.4507882536788598, "learning_rate": 7.089525558736307e-08, "loss": 0.0025, "step": 18612 }, { "epoch": 4.235039817974972, "grad_norm": 1.1057449822337322, "learning_rate": 7.08539274085545e-08, "loss": 0.0275, "step": 18613 }, { "epoch": 4.235267349260523, "grad_norm": 1.9856989084763987, "learning_rate": 7.081261055547697e-08, "loss": 0.0186, "step": 18614 }, { "epoch": 4.235494880546075, "grad_norm": 1.6647961282245558, "learning_rate": 7.077130502897455e-08, "loss": 0.0485, "step": 18615 }, { "epoch": 4.235722411831627, "grad_norm": 2.504049575661321, "learning_rate": 7.073001082989167e-08, "loss": 0.0933, "step": 18616 }, { "epoch": 4.235949943117179, "grad_norm": 1.4826856127027654, "learning_rate": 7.06887279590724e-08, "loss": 0.0953, "step": 18617 }, { "epoch": 4.23617747440273, "grad_norm": 1.2957614836824487, "learning_rate": 7.064745641736017e-08, "loss": 0.0777, "step": 18618 }, { "epoch": 4.236405005688282, "grad_norm": 0.8770286532547925, "learning_rate": 7.060619620559886e-08, "loss": 0.0088, "step": 18619 }, { "epoch": 4.236632536973834, "grad_norm": 1.4110307634139467, "learning_rate": 7.056494732463143e-08, "loss": 0.0786, "step": 18620 }, { "epoch": 4.236860068259386, "grad_norm": 1.53655712320265, "learning_rate": 7.052370977530123e-08, "loss": 0.0383, "step": 18621 }, { "epoch": 4.237087599544937, "grad_norm": 1.7320936963829257, "learning_rate": 7.048248355845077e-08, "loss": 0.1333, "step": 18622 }, { "epoch": 4.237315130830489, "grad_norm": 1.041138782060803, "learning_rate": 7.044126867492285e-08, "loss": 0.0427, "step": 18623 }, { "epoch": 4.237542662116041, "grad_norm": 1.0276907990663628, "learning_rate": 7.040006512555986e-08, "loss": 0.0068, "step": 18624 }, { "epoch": 4.237770193401593, "grad_norm": 0.6237176873562663, "learning_rate": 7.035887291120369e-08, "loss": 0.0049, "step": 18625 }, { "epoch": 4.237997724687144, "grad_norm": 1.5932409204745202, "learning_rate": 7.031769203269652e-08, "loss": 0.0245, "step": 18626 }, { "epoch": 4.238225255972696, "grad_norm": 1.838719836584669, "learning_rate": 7.027652249087975e-08, "loss": 0.0132, "step": 18627 }, { "epoch": 4.238452787258248, "grad_norm": 0.7207080340614257, "learning_rate": 7.023536428659502e-08, "loss": 0.0052, "step": 18628 }, { "epoch": 4.2386803185438, "grad_norm": 1.3831059950616542, "learning_rate": 7.019421742068335e-08, "loss": 0.0242, "step": 18629 }, { "epoch": 4.238907849829351, "grad_norm": 2.2833390403674527, "learning_rate": 7.015308189398586e-08, "loss": 0.038, "step": 18630 }, { "epoch": 4.239135381114903, "grad_norm": 1.177930048214178, "learning_rate": 7.011195770734321e-08, "loss": 0.0799, "step": 18631 }, { "epoch": 4.2393629124004555, "grad_norm": 1.4768627397416119, "learning_rate": 7.007084486159589e-08, "loss": 0.0297, "step": 18632 }, { "epoch": 4.239590443686007, "grad_norm": 1.066910154075339, "learning_rate": 7.002974335758431e-08, "loss": 0.0143, "step": 18633 }, { "epoch": 4.239817974971559, "grad_norm": 0.3048048334255295, "learning_rate": 6.998865319614842e-08, "loss": 0.0013, "step": 18634 }, { "epoch": 4.24004550625711, "grad_norm": 1.7344235332243856, "learning_rate": 6.994757437812784e-08, "loss": 0.0346, "step": 18635 }, { "epoch": 4.2402730375426625, "grad_norm": 1.4642527536609007, "learning_rate": 6.990650690436231e-08, "loss": 0.0239, "step": 18636 }, { "epoch": 4.240500568828214, "grad_norm": 1.2425490587634291, "learning_rate": 6.986545077569115e-08, "loss": 0.0632, "step": 18637 }, { "epoch": 4.240728100113766, "grad_norm": 8.451745312422252, "learning_rate": 6.982440599295363e-08, "loss": 0.0281, "step": 18638 }, { "epoch": 4.240955631399317, "grad_norm": 3.5560945029148154, "learning_rate": 6.978337255698839e-08, "loss": 0.0279, "step": 18639 }, { "epoch": 4.2411831626848695, "grad_norm": 0.5434726655661437, "learning_rate": 6.974235046863432e-08, "loss": 0.0039, "step": 18640 }, { "epoch": 4.241410693970421, "grad_norm": 1.3869080053757215, "learning_rate": 6.970133972872966e-08, "loss": 0.0323, "step": 18641 }, { "epoch": 4.241638225255973, "grad_norm": 1.1554048476461223, "learning_rate": 6.966034033811256e-08, "loss": 0.066, "step": 18642 }, { "epoch": 4.241865756541524, "grad_norm": 1.241014890456311, "learning_rate": 6.961935229762102e-08, "loss": 0.0145, "step": 18643 }, { "epoch": 4.2420932878270765, "grad_norm": 1.8614920967464983, "learning_rate": 6.957837560809276e-08, "loss": 0.0055, "step": 18644 }, { "epoch": 4.242320819112628, "grad_norm": 0.7697711074409945, "learning_rate": 6.953741027036547e-08, "loss": 0.0185, "step": 18645 }, { "epoch": 4.24254835039818, "grad_norm": 0.6116500765750258, "learning_rate": 6.949645628527609e-08, "loss": 0.0051, "step": 18646 }, { "epoch": 4.242775881683731, "grad_norm": 1.1311237312246196, "learning_rate": 6.945551365366191e-08, "loss": 0.0089, "step": 18647 }, { "epoch": 4.2430034129692835, "grad_norm": 1.5196210640555305, "learning_rate": 6.941458237635958e-08, "loss": 0.0376, "step": 18648 }, { "epoch": 4.243230944254835, "grad_norm": 0.7397303462599334, "learning_rate": 6.93736624542056e-08, "loss": 0.0116, "step": 18649 }, { "epoch": 4.243458475540387, "grad_norm": 1.0216193474955866, "learning_rate": 6.933275388803631e-08, "loss": 0.0334, "step": 18650 }, { "epoch": 4.243686006825938, "grad_norm": 0.4323201841819569, "learning_rate": 6.929185667868793e-08, "loss": 0.0015, "step": 18651 }, { "epoch": 4.2439135381114905, "grad_norm": 1.2719537918054125, "learning_rate": 6.925097082699637e-08, "loss": 0.0175, "step": 18652 }, { "epoch": 4.244141069397042, "grad_norm": 1.191552453183591, "learning_rate": 6.921009633379716e-08, "loss": 0.0362, "step": 18653 }, { "epoch": 4.244368600682594, "grad_norm": 1.7181443169370376, "learning_rate": 6.91692331999256e-08, "loss": 0.0977, "step": 18654 }, { "epoch": 4.244596131968145, "grad_norm": 0.9893464117697726, "learning_rate": 6.912838142621704e-08, "loss": 0.0172, "step": 18655 }, { "epoch": 4.2448236632536975, "grad_norm": 3.5576351055504745, "learning_rate": 6.908754101350626e-08, "loss": 0.0162, "step": 18656 }, { "epoch": 4.245051194539249, "grad_norm": 1.2272203778948108, "learning_rate": 6.904671196262799e-08, "loss": 0.082, "step": 18657 }, { "epoch": 4.245278725824801, "grad_norm": 5.303226874190367, "learning_rate": 6.900589427441679e-08, "loss": 0.0095, "step": 18658 }, { "epoch": 4.245506257110352, "grad_norm": 1.606748825609581, "learning_rate": 6.896508794970697e-08, "loss": 0.0851, "step": 18659 }, { "epoch": 4.2457337883959045, "grad_norm": 0.926015290039646, "learning_rate": 6.892429298933238e-08, "loss": 0.0165, "step": 18660 }, { "epoch": 4.245961319681456, "grad_norm": 0.921990408821965, "learning_rate": 6.888350939412675e-08, "loss": 0.0252, "step": 18661 }, { "epoch": 4.246188850967008, "grad_norm": 1.3059283124461645, "learning_rate": 6.884273716492383e-08, "loss": 0.0853, "step": 18662 }, { "epoch": 4.246416382252559, "grad_norm": 1.873034899922903, "learning_rate": 6.880197630255665e-08, "loss": 0.0502, "step": 18663 }, { "epoch": 4.2466439135381115, "grad_norm": 1.1142009174057952, "learning_rate": 6.876122680785851e-08, "loss": 0.0787, "step": 18664 }, { "epoch": 4.246871444823663, "grad_norm": 0.7246766253910961, "learning_rate": 6.872048868166225e-08, "loss": 0.0078, "step": 18665 }, { "epoch": 4.247098976109215, "grad_norm": 1.9549313650713016, "learning_rate": 6.867976192480038e-08, "loss": 0.0029, "step": 18666 }, { "epoch": 4.247326507394766, "grad_norm": 1.2248109620458003, "learning_rate": 6.863904653810539e-08, "loss": 0.0116, "step": 18667 }, { "epoch": 4.2475540386803186, "grad_norm": 1.571717683292836, "learning_rate": 6.859834252240918e-08, "loss": 0.0791, "step": 18668 }, { "epoch": 4.24778156996587, "grad_norm": 1.1256929522136085, "learning_rate": 6.85576498785441e-08, "loss": 0.0142, "step": 18669 }, { "epoch": 4.248009101251422, "grad_norm": 1.290171435266086, "learning_rate": 6.851696860734121e-08, "loss": 0.0083, "step": 18670 }, { "epoch": 4.248236632536974, "grad_norm": 1.1280321503841082, "learning_rate": 6.847629870963268e-08, "loss": 0.0297, "step": 18671 }, { "epoch": 4.248464163822526, "grad_norm": 1.0368535571441335, "learning_rate": 6.843564018624932e-08, "loss": 0.0206, "step": 18672 }, { "epoch": 4.248691695108078, "grad_norm": 1.0808666010829637, "learning_rate": 6.839499303802205e-08, "loss": 0.0269, "step": 18673 }, { "epoch": 4.248919226393629, "grad_norm": 1.622586392830888, "learning_rate": 6.835435726578188e-08, "loss": 0.1051, "step": 18674 }, { "epoch": 4.249146757679181, "grad_norm": 2.6060488758951164, "learning_rate": 6.831373287035912e-08, "loss": 0.1087, "step": 18675 }, { "epoch": 4.249374288964733, "grad_norm": 1.7527358608916275, "learning_rate": 6.827311985258406e-08, "loss": 0.0293, "step": 18676 }, { "epoch": 4.249601820250285, "grad_norm": 1.2178382530890217, "learning_rate": 6.823251821328689e-08, "loss": 0.0636, "step": 18677 }, { "epoch": 4.249829351535836, "grad_norm": 1.4627192231059174, "learning_rate": 6.819192795329748e-08, "loss": 0.0653, "step": 18678 }, { "epoch": 4.250056882821388, "grad_norm": 1.2806484039276693, "learning_rate": 6.815134907344533e-08, "loss": 0.0153, "step": 18679 }, { "epoch": 4.25028441410694, "grad_norm": 1.0593922766867845, "learning_rate": 6.811078157455963e-08, "loss": 0.0383, "step": 18680 }, { "epoch": 4.250511945392492, "grad_norm": 2.9049151631415397, "learning_rate": 6.807022545746985e-08, "loss": 0.0176, "step": 18681 }, { "epoch": 4.250739476678043, "grad_norm": 0.9896818090734263, "learning_rate": 6.80296807230045e-08, "loss": 0.0658, "step": 18682 }, { "epoch": 4.250967007963595, "grad_norm": 0.8023507963985468, "learning_rate": 6.79891473719925e-08, "loss": 0.0064, "step": 18683 }, { "epoch": 4.251194539249147, "grad_norm": 1.4845059087365096, "learning_rate": 6.794862540526228e-08, "loss": 0.0159, "step": 18684 }, { "epoch": 4.251422070534699, "grad_norm": 1.1245640523373865, "learning_rate": 6.790811482364185e-08, "loss": 0.0366, "step": 18685 }, { "epoch": 4.25164960182025, "grad_norm": 1.3479612955261535, "learning_rate": 6.786761562795945e-08, "loss": 0.0357, "step": 18686 }, { "epoch": 4.251877133105802, "grad_norm": 0.6530507041493341, "learning_rate": 6.782712781904247e-08, "loss": 0.0072, "step": 18687 }, { "epoch": 4.252104664391354, "grad_norm": 0.3932761025035928, "learning_rate": 6.778665139771871e-08, "loss": 0.0011, "step": 18688 }, { "epoch": 4.252332195676906, "grad_norm": 1.7863170357270404, "learning_rate": 6.774618636481517e-08, "loss": 0.0892, "step": 18689 }, { "epoch": 4.252559726962457, "grad_norm": 0.9422659469385196, "learning_rate": 6.770573272115909e-08, "loss": 0.0462, "step": 18690 }, { "epoch": 4.252787258248009, "grad_norm": 1.2028502044189215, "learning_rate": 6.76652904675773e-08, "loss": 0.073, "step": 18691 }, { "epoch": 4.253014789533561, "grad_norm": 1.1842504590886531, "learning_rate": 6.76248596048961e-08, "loss": 0.0377, "step": 18692 }, { "epoch": 4.253242320819113, "grad_norm": 1.6885688317265939, "learning_rate": 6.758444013394213e-08, "loss": 0.0529, "step": 18693 }, { "epoch": 4.253469852104664, "grad_norm": 0.9072747691057093, "learning_rate": 6.754403205554122e-08, "loss": 0.0061, "step": 18694 }, { "epoch": 4.253697383390216, "grad_norm": 0.7454483336042647, "learning_rate": 6.750363537051947e-08, "loss": 0.004, "step": 18695 }, { "epoch": 4.253924914675768, "grad_norm": 1.2949457801533357, "learning_rate": 6.746325007970226e-08, "loss": 0.0215, "step": 18696 }, { "epoch": 4.25415244596132, "grad_norm": 1.329225263207367, "learning_rate": 6.742287618391519e-08, "loss": 0.0116, "step": 18697 }, { "epoch": 4.254379977246871, "grad_norm": 0.7845576882508639, "learning_rate": 6.738251368398355e-08, "loss": 0.026, "step": 18698 }, { "epoch": 4.254607508532423, "grad_norm": 1.2898887495284854, "learning_rate": 6.734216258073189e-08, "loss": 0.0728, "step": 18699 }, { "epoch": 4.254835039817975, "grad_norm": 0.9970425325915344, "learning_rate": 6.730182287498527e-08, "loss": 0.033, "step": 18700 }, { "epoch": 4.255062571103527, "grad_norm": 1.5099847263685278, "learning_rate": 6.726149456756786e-08, "loss": 0.0331, "step": 18701 }, { "epoch": 4.255290102389078, "grad_norm": 1.103473533984226, "learning_rate": 6.722117765930424e-08, "loss": 0.0565, "step": 18702 }, { "epoch": 4.25551763367463, "grad_norm": 0.970195143196978, "learning_rate": 6.71808721510181e-08, "loss": 0.047, "step": 18703 }, { "epoch": 4.255745164960182, "grad_norm": 1.2428766032356757, "learning_rate": 6.71405780435333e-08, "loss": 0.0466, "step": 18704 }, { "epoch": 4.255972696245734, "grad_norm": 1.208870478489433, "learning_rate": 6.710029533767349e-08, "loss": 0.0212, "step": 18705 }, { "epoch": 4.256200227531286, "grad_norm": 0.84681711741412, "learning_rate": 6.706002403426185e-08, "loss": 0.0063, "step": 18706 }, { "epoch": 4.256427758816837, "grad_norm": 1.362476182247585, "learning_rate": 6.701976413412157e-08, "loss": 0.0914, "step": 18707 }, { "epoch": 4.256655290102389, "grad_norm": 1.580023835001725, "learning_rate": 6.697951563807537e-08, "loss": 0.0702, "step": 18708 }, { "epoch": 4.256882821387941, "grad_norm": 1.0175311379185794, "learning_rate": 6.693927854694596e-08, "loss": 0.0413, "step": 18709 }, { "epoch": 4.257110352673493, "grad_norm": 1.131279221575934, "learning_rate": 6.689905286155554e-08, "loss": 0.0532, "step": 18710 }, { "epoch": 4.257337883959044, "grad_norm": 1.0542221222416435, "learning_rate": 6.685883858272645e-08, "loss": 0.0234, "step": 18711 }, { "epoch": 4.2575654152445965, "grad_norm": 0.553023166993489, "learning_rate": 6.681863571128051e-08, "loss": 0.0134, "step": 18712 }, { "epoch": 4.257792946530148, "grad_norm": 1.2124546873374005, "learning_rate": 6.677844424803938e-08, "loss": 0.0178, "step": 18713 }, { "epoch": 4.2580204778157, "grad_norm": 1.334417285092154, "learning_rate": 6.673826419382454e-08, "loss": 0.0416, "step": 18714 }, { "epoch": 4.258248009101251, "grad_norm": 0.9782983567974667, "learning_rate": 6.669809554945714e-08, "loss": 0.02, "step": 18715 }, { "epoch": 4.2584755403868035, "grad_norm": 1.0444197616187558, "learning_rate": 6.665793831575825e-08, "loss": 0.0553, "step": 18716 }, { "epoch": 4.258703071672355, "grad_norm": 1.1506145927411804, "learning_rate": 6.661779249354842e-08, "loss": 0.0228, "step": 18717 }, { "epoch": 4.258930602957907, "grad_norm": 1.5403025720669747, "learning_rate": 6.65776580836483e-08, "loss": 0.0216, "step": 18718 }, { "epoch": 4.259158134243458, "grad_norm": 0.4777993857469384, "learning_rate": 6.653753508687827e-08, "loss": 0.0016, "step": 18719 }, { "epoch": 4.2593856655290105, "grad_norm": 0.69813643458329, "learning_rate": 6.649742350405817e-08, "loss": 0.006, "step": 18720 }, { "epoch": 4.259613196814562, "grad_norm": 1.4420974274692342, "learning_rate": 6.645732333600794e-08, "loss": 0.0083, "step": 18721 }, { "epoch": 4.259840728100114, "grad_norm": 1.3891834135399743, "learning_rate": 6.641723458354712e-08, "loss": 0.0249, "step": 18722 }, { "epoch": 4.260068259385665, "grad_norm": 1.788259391200228, "learning_rate": 6.637715724749493e-08, "loss": 0.0151, "step": 18723 }, { "epoch": 4.2602957906712176, "grad_norm": 1.7335681223314783, "learning_rate": 6.633709132867052e-08, "loss": 0.0847, "step": 18724 }, { "epoch": 4.260523321956769, "grad_norm": 1.1564331513491743, "learning_rate": 6.629703682789285e-08, "loss": 0.0578, "step": 18725 }, { "epoch": 4.260750853242321, "grad_norm": 0.5016906741003977, "learning_rate": 6.625699374598067e-08, "loss": 0.0024, "step": 18726 }, { "epoch": 4.260978384527872, "grad_norm": 1.3900609630967466, "learning_rate": 6.62169620837521e-08, "loss": 0.0085, "step": 18727 }, { "epoch": 4.261205915813425, "grad_norm": 1.222999228864869, "learning_rate": 6.617694184202557e-08, "loss": 0.0494, "step": 18728 }, { "epoch": 4.261433447098976, "grad_norm": 1.189593049027546, "learning_rate": 6.613693302161897e-08, "loss": 0.02, "step": 18729 }, { "epoch": 4.261660978384528, "grad_norm": 0.5828320937916365, "learning_rate": 6.609693562334977e-08, "loss": 0.005, "step": 18730 }, { "epoch": 4.261888509670079, "grad_norm": 1.7218656133332082, "learning_rate": 6.605694964803567e-08, "loss": 0.1092, "step": 18731 }, { "epoch": 4.262116040955632, "grad_norm": 2.4795747339373237, "learning_rate": 6.601697509649382e-08, "loss": 0.0125, "step": 18732 }, { "epoch": 4.262343572241183, "grad_norm": 2.3701635733360784, "learning_rate": 6.597701196954145e-08, "loss": 0.0166, "step": 18733 }, { "epoch": 4.262571103526735, "grad_norm": 1.1755309464968653, "learning_rate": 6.593706026799499e-08, "loss": 0.029, "step": 18734 }, { "epoch": 4.262798634812286, "grad_norm": 1.0415058459514155, "learning_rate": 6.589711999267126e-08, "loss": 0.0404, "step": 18735 }, { "epoch": 4.263026166097839, "grad_norm": 1.4079224646403607, "learning_rate": 6.585719114438637e-08, "loss": 0.0712, "step": 18736 }, { "epoch": 4.26325369738339, "grad_norm": 0.9851654694473401, "learning_rate": 6.581727372395645e-08, "loss": 0.0339, "step": 18737 }, { "epoch": 4.263481228668942, "grad_norm": 2.9225058615318864, "learning_rate": 6.577736773219746e-08, "loss": 0.0376, "step": 18738 }, { "epoch": 4.263708759954493, "grad_norm": 1.5603889607234478, "learning_rate": 6.57374731699248e-08, "loss": 0.0489, "step": 18739 }, { "epoch": 4.263936291240046, "grad_norm": 1.370901760952965, "learning_rate": 6.569759003795403e-08, "loss": 0.0801, "step": 18740 }, { "epoch": 4.264163822525597, "grad_norm": 0.8434189686045317, "learning_rate": 6.565771833710031e-08, "loss": 0.0146, "step": 18741 }, { "epoch": 4.264391353811149, "grad_norm": 1.1901376435837503, "learning_rate": 6.561785806817822e-08, "loss": 0.0542, "step": 18742 }, { "epoch": 4.2646188850967, "grad_norm": 1.329833502413111, "learning_rate": 6.55780092320027e-08, "loss": 0.0436, "step": 18743 }, { "epoch": 4.264846416382253, "grad_norm": 1.2094762623850994, "learning_rate": 6.553817182938807e-08, "loss": 0.0296, "step": 18744 }, { "epoch": 4.265073947667805, "grad_norm": 1.012491447312437, "learning_rate": 6.549834586114877e-08, "loss": 0.0043, "step": 18745 }, { "epoch": 4.265301478953356, "grad_norm": 1.455876458114111, "learning_rate": 6.545853132809851e-08, "loss": 0.0239, "step": 18746 }, { "epoch": 4.265529010238907, "grad_norm": 1.4328754822960905, "learning_rate": 6.541872823105123e-08, "loss": 0.0297, "step": 18747 }, { "epoch": 4.26575654152446, "grad_norm": 0.9928239822510527, "learning_rate": 6.53789365708203e-08, "loss": 0.0523, "step": 18748 }, { "epoch": 4.265984072810012, "grad_norm": 1.1028988402416307, "learning_rate": 6.533915634821884e-08, "loss": 0.0243, "step": 18749 }, { "epoch": 4.266211604095563, "grad_norm": 1.4520365215114435, "learning_rate": 6.529938756406013e-08, "loss": 0.0246, "step": 18750 }, { "epoch": 4.266439135381115, "grad_norm": 0.24954280225977854, "learning_rate": 6.525963021915688e-08, "loss": 0.0008, "step": 18751 }, { "epoch": 4.266666666666667, "grad_norm": 2.1362224598696984, "learning_rate": 6.521988431432175e-08, "loss": 0.0521, "step": 18752 }, { "epoch": 4.266894197952219, "grad_norm": 1.2250894538904198, "learning_rate": 6.518014985036694e-08, "loss": 0.0124, "step": 18753 }, { "epoch": 4.26712172923777, "grad_norm": 0.32133788068680236, "learning_rate": 6.514042682810466e-08, "loss": 0.0011, "step": 18754 }, { "epoch": 4.267349260523322, "grad_norm": 1.8634148943877795, "learning_rate": 6.510071524834676e-08, "loss": 0.0051, "step": 18755 }, { "epoch": 4.267576791808874, "grad_norm": 1.7103695183780983, "learning_rate": 6.506101511190473e-08, "loss": 0.0502, "step": 18756 }, { "epoch": 4.267804323094426, "grad_norm": 1.4554088266501903, "learning_rate": 6.502132641959001e-08, "loss": 0.0752, "step": 18757 }, { "epoch": 4.268031854379977, "grad_norm": 1.4308876426014463, "learning_rate": 6.49816491722139e-08, "loss": 0.0396, "step": 18758 }, { "epoch": 4.268259385665529, "grad_norm": 0.9393948585108111, "learning_rate": 6.494198337058735e-08, "loss": 0.0063, "step": 18759 }, { "epoch": 4.268486916951081, "grad_norm": 3.486436109550495, "learning_rate": 6.490232901552096e-08, "loss": 0.0398, "step": 18760 }, { "epoch": 4.268714448236633, "grad_norm": 1.1350470810849707, "learning_rate": 6.486268610782505e-08, "loss": 0.0033, "step": 18761 }, { "epoch": 4.268941979522184, "grad_norm": 0.7825468566087406, "learning_rate": 6.482305464831015e-08, "loss": 0.0403, "step": 18762 }, { "epoch": 4.269169510807736, "grad_norm": 1.324904686573975, "learning_rate": 6.478343463778597e-08, "loss": 0.0065, "step": 18763 }, { "epoch": 4.269397042093288, "grad_norm": 0.9556504734914019, "learning_rate": 6.474382607706233e-08, "loss": 0.0339, "step": 18764 }, { "epoch": 4.26962457337884, "grad_norm": 1.3323170206955466, "learning_rate": 6.470422896694889e-08, "loss": 0.0118, "step": 18765 }, { "epoch": 4.269852104664391, "grad_norm": 1.2055534215525445, "learning_rate": 6.466464330825498e-08, "loss": 0.033, "step": 18766 }, { "epoch": 4.270079635949943, "grad_norm": 0.6099813456953211, "learning_rate": 6.46250691017896e-08, "loss": 0.0018, "step": 18767 }, { "epoch": 4.270307167235495, "grad_norm": 1.5508222518473749, "learning_rate": 6.458550634836136e-08, "loss": 0.0444, "step": 18768 }, { "epoch": 4.270534698521047, "grad_norm": 1.5124270362530061, "learning_rate": 6.454595504877912e-08, "loss": 0.0704, "step": 18769 }, { "epoch": 4.270762229806598, "grad_norm": 1.0674774261568922, "learning_rate": 6.450641520385103e-08, "loss": 0.0139, "step": 18770 }, { "epoch": 4.27098976109215, "grad_norm": 1.237507081235202, "learning_rate": 6.446688681438537e-08, "loss": 0.0579, "step": 18771 }, { "epoch": 4.271217292377702, "grad_norm": 0.6707192691675119, "learning_rate": 6.442736988119008e-08, "loss": 0.0033, "step": 18772 }, { "epoch": 4.271444823663254, "grad_norm": 1.2423054226579828, "learning_rate": 6.438786440507251e-08, "loss": 0.0145, "step": 18773 }, { "epoch": 4.271672354948805, "grad_norm": 0.9317794781465251, "learning_rate": 6.434837038684048e-08, "loss": 0.0066, "step": 18774 }, { "epoch": 4.271899886234357, "grad_norm": 1.711641865755873, "learning_rate": 6.430888782730085e-08, "loss": 0.0502, "step": 18775 }, { "epoch": 4.272127417519909, "grad_norm": 0.3530502966592724, "learning_rate": 6.426941672726075e-08, "loss": 0.0015, "step": 18776 }, { "epoch": 4.272354948805461, "grad_norm": 0.77879383568118, "learning_rate": 6.422995708752683e-08, "loss": 0.0052, "step": 18777 }, { "epoch": 4.272582480091012, "grad_norm": 1.4935584055383342, "learning_rate": 6.419050890890553e-08, "loss": 0.0544, "step": 18778 }, { "epoch": 4.272810011376564, "grad_norm": 1.435599817282777, "learning_rate": 6.415107219220326e-08, "loss": 0.0165, "step": 18779 }, { "epoch": 4.273037542662116, "grad_norm": 0.6434135142859272, "learning_rate": 6.41116469382258e-08, "loss": 0.0396, "step": 18780 }, { "epoch": 4.273265073947668, "grad_norm": 1.0304635147787746, "learning_rate": 6.407223314777916e-08, "loss": 0.1016, "step": 18781 }, { "epoch": 4.273492605233219, "grad_norm": 1.999395825801807, "learning_rate": 6.403283082166874e-08, "loss": 0.0992, "step": 18782 }, { "epoch": 4.273720136518771, "grad_norm": 0.7194011692232808, "learning_rate": 6.399343996069993e-08, "loss": 0.0064, "step": 18783 }, { "epoch": 4.273947667804324, "grad_norm": 0.7731847311616478, "learning_rate": 6.395406056567772e-08, "loss": 0.0064, "step": 18784 }, { "epoch": 4.274175199089875, "grad_norm": 1.4642122506675015, "learning_rate": 6.3914692637407e-08, "loss": 0.0389, "step": 18785 }, { "epoch": 4.274402730375426, "grad_norm": 1.3180476800253622, "learning_rate": 6.387533617669243e-08, "loss": 0.0703, "step": 18786 }, { "epoch": 4.274630261660978, "grad_norm": 1.1054181795633795, "learning_rate": 6.38359911843383e-08, "loss": 0.0238, "step": 18787 }, { "epoch": 4.274857792946531, "grad_norm": 2.0416664381273892, "learning_rate": 6.379665766114887e-08, "loss": 0.1068, "step": 18788 }, { "epoch": 4.275085324232082, "grad_norm": 1.3316635551028262, "learning_rate": 6.37573356079279e-08, "loss": 0.0447, "step": 18789 }, { "epoch": 4.275312855517634, "grad_norm": 1.3155004625108595, "learning_rate": 6.371802502547916e-08, "loss": 0.0567, "step": 18790 }, { "epoch": 4.275540386803185, "grad_norm": 1.6193151938778578, "learning_rate": 6.367872591460593e-08, "loss": 0.0865, "step": 18791 }, { "epoch": 4.275767918088738, "grad_norm": 1.8578513097789549, "learning_rate": 6.363943827611157e-08, "loss": 0.0099, "step": 18792 }, { "epoch": 4.275995449374289, "grad_norm": 1.6477134884628395, "learning_rate": 6.360016211079914e-08, "loss": 0.0181, "step": 18793 }, { "epoch": 4.276222980659841, "grad_norm": 1.692165243614041, "learning_rate": 6.35608974194711e-08, "loss": 0.0691, "step": 18794 }, { "epoch": 4.276450511945392, "grad_norm": 2.5464113918864255, "learning_rate": 6.352164420293023e-08, "loss": 0.0861, "step": 18795 }, { "epoch": 4.276678043230945, "grad_norm": 0.48384478003423687, "learning_rate": 6.348240246197852e-08, "loss": 0.0031, "step": 18796 }, { "epoch": 4.276905574516496, "grad_norm": 9.441491720782716, "learning_rate": 6.344317219741811e-08, "loss": 0.0078, "step": 18797 }, { "epoch": 4.277133105802048, "grad_norm": 1.7204396085090872, "learning_rate": 6.340395341005098e-08, "loss": 0.0098, "step": 18798 }, { "epoch": 4.277360637087599, "grad_norm": 0.705114190391557, "learning_rate": 6.336474610067839e-08, "loss": 0.0209, "step": 18799 }, { "epoch": 4.277588168373152, "grad_norm": 0.3244370039743689, "learning_rate": 6.33255502701019e-08, "loss": 0.0014, "step": 18800 }, { "epoch": 4.277815699658703, "grad_norm": 1.5303117490822673, "learning_rate": 6.32863659191224e-08, "loss": 0.025, "step": 18801 }, { "epoch": 4.278043230944255, "grad_norm": 1.2987117651385545, "learning_rate": 6.324719304854094e-08, "loss": 0.0911, "step": 18802 }, { "epoch": 4.278270762229806, "grad_norm": 0.47821012185645795, "learning_rate": 6.320803165915796e-08, "loss": 0.0015, "step": 18803 }, { "epoch": 4.278498293515359, "grad_norm": 0.5810349814228581, "learning_rate": 6.316888175177393e-08, "loss": 0.0034, "step": 18804 }, { "epoch": 4.27872582480091, "grad_norm": 0.880424043370585, "learning_rate": 6.312974332718922e-08, "loss": 0.0037, "step": 18805 }, { "epoch": 4.278953356086462, "grad_norm": 1.0777810070360696, "learning_rate": 6.309061638620328e-08, "loss": 0.0299, "step": 18806 }, { "epoch": 4.279180887372013, "grad_norm": 1.226195139232534, "learning_rate": 6.305150092961628e-08, "loss": 0.0523, "step": 18807 }, { "epoch": 4.279408418657566, "grad_norm": 2.1984334224053987, "learning_rate": 6.301239695822727e-08, "loss": 0.0369, "step": 18808 }, { "epoch": 4.279635949943117, "grad_norm": 1.5912104208774043, "learning_rate": 6.297330447283576e-08, "loss": 0.0914, "step": 18809 }, { "epoch": 4.279863481228669, "grad_norm": 1.5698559173783095, "learning_rate": 6.293422347424049e-08, "loss": 0.0532, "step": 18810 }, { "epoch": 4.28009101251422, "grad_norm": 1.5211002559904447, "learning_rate": 6.289515396324032e-08, "loss": 0.0095, "step": 18811 }, { "epoch": 4.280318543799773, "grad_norm": 2.196544335549263, "learning_rate": 6.285609594063386e-08, "loss": 0.1769, "step": 18812 }, { "epoch": 4.280546075085324, "grad_norm": 1.2785295089178643, "learning_rate": 6.281704940721917e-08, "loss": 0.0241, "step": 18813 }, { "epoch": 4.280773606370876, "grad_norm": 1.0152273241270968, "learning_rate": 6.277801436379453e-08, "loss": 0.0141, "step": 18814 }, { "epoch": 4.281001137656427, "grad_norm": 0.8376610613134143, "learning_rate": 6.273899081115746e-08, "loss": 0.0116, "step": 18815 }, { "epoch": 4.28122866894198, "grad_norm": 1.5587429741419339, "learning_rate": 6.269997875010584e-08, "loss": 0.0878, "step": 18816 }, { "epoch": 4.281456200227531, "grad_norm": 1.4166260140985345, "learning_rate": 6.266097818143677e-08, "loss": 0.0449, "step": 18817 }, { "epoch": 4.281683731513083, "grad_norm": 0.7395941159007958, "learning_rate": 6.262198910594736e-08, "loss": 0.009, "step": 18818 }, { "epoch": 4.281911262798634, "grad_norm": 0.8281166736055924, "learning_rate": 6.258301152443469e-08, "loss": 0.038, "step": 18819 }, { "epoch": 4.282138794084187, "grad_norm": 0.6746603337944526, "learning_rate": 6.254404543769514e-08, "loss": 0.0269, "step": 18820 }, { "epoch": 4.282366325369738, "grad_norm": 0.6216933585209963, "learning_rate": 6.25050908465253e-08, "loss": 0.0036, "step": 18821 }, { "epoch": 4.28259385665529, "grad_norm": 0.7038172454117021, "learning_rate": 6.246614775172112e-08, "loss": 0.005, "step": 18822 }, { "epoch": 4.282821387940842, "grad_norm": 1.1965753434808915, "learning_rate": 6.242721615407885e-08, "loss": 0.0545, "step": 18823 }, { "epoch": 4.283048919226394, "grad_norm": 1.1759898153185149, "learning_rate": 6.238829605439375e-08, "loss": 0.0075, "step": 18824 }, { "epoch": 4.283276450511945, "grad_norm": 1.0179461748892986, "learning_rate": 6.234938745346156e-08, "loss": 0.0098, "step": 18825 }, { "epoch": 4.283503981797497, "grad_norm": 1.4875872169367914, "learning_rate": 6.231049035207757e-08, "loss": 0.0588, "step": 18826 }, { "epoch": 4.283731513083049, "grad_norm": 0.8714601094716543, "learning_rate": 6.227160475103646e-08, "loss": 0.0071, "step": 18827 }, { "epoch": 4.283959044368601, "grad_norm": 0.7017055335468965, "learning_rate": 6.223273065113334e-08, "loss": 0.0047, "step": 18828 }, { "epoch": 4.284186575654153, "grad_norm": 1.764002340352332, "learning_rate": 6.219386805316252e-08, "loss": 0.0269, "step": 18829 }, { "epoch": 4.284414106939704, "grad_norm": 1.2641362675515204, "learning_rate": 6.215501695791826e-08, "loss": 0.0715, "step": 18830 }, { "epoch": 4.284641638225256, "grad_norm": 0.4874238607732477, "learning_rate": 6.211617736619456e-08, "loss": 0.0024, "step": 18831 }, { "epoch": 4.284869169510808, "grad_norm": 0.48996243062544653, "learning_rate": 6.207734927878537e-08, "loss": 0.008, "step": 18832 }, { "epoch": 4.28509670079636, "grad_norm": 1.3983103960691414, "learning_rate": 6.203853269648432e-08, "loss": 0.0226, "step": 18833 }, { "epoch": 4.285324232081911, "grad_norm": 1.2440536421412105, "learning_rate": 6.199972762008447e-08, "loss": 0.025, "step": 18834 }, { "epoch": 4.285551763367463, "grad_norm": 1.2877829999325274, "learning_rate": 6.196093405037934e-08, "loss": 0.0417, "step": 18835 }, { "epoch": 4.285779294653015, "grad_norm": 1.165133916953912, "learning_rate": 6.192215198816146e-08, "loss": 0.0162, "step": 18836 }, { "epoch": 4.286006825938567, "grad_norm": 1.4611397743850305, "learning_rate": 6.188338143422345e-08, "loss": 0.0194, "step": 18837 }, { "epoch": 4.286234357224118, "grad_norm": 0.7535122579427529, "learning_rate": 6.18446223893579e-08, "loss": 0.0035, "step": 18838 }, { "epoch": 4.28646188850967, "grad_norm": 1.1248386396446173, "learning_rate": 6.180587485435687e-08, "loss": 0.0155, "step": 18839 }, { "epoch": 4.286689419795222, "grad_norm": 1.6874018638011865, "learning_rate": 6.176713883001243e-08, "loss": 0.0818, "step": 18840 }, { "epoch": 4.286916951080774, "grad_norm": 1.5307339485511253, "learning_rate": 6.172841431711603e-08, "loss": 0.0451, "step": 18841 }, { "epoch": 4.287144482366325, "grad_norm": 0.6939706478149629, "learning_rate": 6.168970131645934e-08, "loss": 0.0174, "step": 18842 }, { "epoch": 4.287372013651877, "grad_norm": 1.3320827677643479, "learning_rate": 6.165099982883351e-08, "loss": 0.0132, "step": 18843 }, { "epoch": 4.287599544937429, "grad_norm": 0.7002479004676467, "learning_rate": 6.161230985502947e-08, "loss": 0.0077, "step": 18844 }, { "epoch": 4.287827076222981, "grad_norm": 0.954592037809867, "learning_rate": 6.157363139583796e-08, "loss": 0.009, "step": 18845 }, { "epoch": 4.288054607508532, "grad_norm": 5.367606278028645, "learning_rate": 6.153496445204958e-08, "loss": 0.0386, "step": 18846 }, { "epoch": 4.288282138794084, "grad_norm": 0.4375988533757127, "learning_rate": 6.14963090244547e-08, "loss": 0.003, "step": 18847 }, { "epoch": 4.288509670079636, "grad_norm": 0.699275912596504, "learning_rate": 6.145766511384328e-08, "loss": 0.0134, "step": 18848 }, { "epoch": 4.288737201365188, "grad_norm": 1.2008149440021827, "learning_rate": 6.141903272100493e-08, "loss": 0.0251, "step": 18849 }, { "epoch": 4.288964732650739, "grad_norm": 2.3957523881643334, "learning_rate": 6.138041184672962e-08, "loss": 0.0085, "step": 18850 }, { "epoch": 4.289192263936291, "grad_norm": 0.8354094283679021, "learning_rate": 6.134180249180625e-08, "loss": 0.0078, "step": 18851 }, { "epoch": 4.289419795221843, "grad_norm": 1.9036261637760856, "learning_rate": 6.130320465702416e-08, "loss": 0.0966, "step": 18852 }, { "epoch": 4.289647326507395, "grad_norm": 2.488493682259613, "learning_rate": 6.126461834317227e-08, "loss": 0.0084, "step": 18853 }, { "epoch": 4.289874857792946, "grad_norm": 0.8808553512378954, "learning_rate": 6.122604355103922e-08, "loss": 0.0023, "step": 18854 }, { "epoch": 4.290102389078498, "grad_norm": 1.0144017939524674, "learning_rate": 6.118748028141337e-08, "loss": 0.0239, "step": 18855 }, { "epoch": 4.29032992036405, "grad_norm": 0.8737583277164658, "learning_rate": 6.114892853508268e-08, "loss": 0.0102, "step": 18856 }, { "epoch": 4.290557451649602, "grad_norm": 1.0415021592558393, "learning_rate": 6.111038831283527e-08, "loss": 0.0065, "step": 18857 }, { "epoch": 4.290784982935153, "grad_norm": 0.7274611833156118, "learning_rate": 6.10718596154588e-08, "loss": 0.0061, "step": 18858 }, { "epoch": 4.291012514220705, "grad_norm": 2.2256564673494488, "learning_rate": 6.103334244374087e-08, "loss": 0.0201, "step": 18859 }, { "epoch": 4.291240045506257, "grad_norm": 1.1102420428796163, "learning_rate": 6.099483679846842e-08, "loss": 0.0184, "step": 18860 }, { "epoch": 4.291467576791809, "grad_norm": 1.4283623442370643, "learning_rate": 6.095634268042876e-08, "loss": 0.0477, "step": 18861 }, { "epoch": 4.291695108077361, "grad_norm": 0.6760144376676576, "learning_rate": 6.09178600904084e-08, "loss": 0.0041, "step": 18862 }, { "epoch": 4.291922639362912, "grad_norm": 0.8197904130230388, "learning_rate": 6.087938902919379e-08, "loss": 0.003, "step": 18863 }, { "epoch": 4.292150170648464, "grad_norm": 1.1248084289010516, "learning_rate": 6.084092949757134e-08, "loss": 0.0078, "step": 18864 }, { "epoch": 4.292377701934016, "grad_norm": 0.8009041089097038, "learning_rate": 6.080248149632712e-08, "loss": 0.0042, "step": 18865 }, { "epoch": 4.292605233219568, "grad_norm": 1.5337720674784787, "learning_rate": 6.0764045026247e-08, "loss": 0.0293, "step": 18866 }, { "epoch": 4.292832764505119, "grad_norm": 1.361288263118857, "learning_rate": 6.072562008811644e-08, "loss": 0.0309, "step": 18867 }, { "epoch": 4.293060295790672, "grad_norm": 0.9183836057470293, "learning_rate": 6.068720668272062e-08, "loss": 0.0141, "step": 18868 }, { "epoch": 4.293287827076223, "grad_norm": 0.8149499637978674, "learning_rate": 6.0648804810845e-08, "loss": 0.0083, "step": 18869 }, { "epoch": 4.293515358361775, "grad_norm": 1.3697233556757278, "learning_rate": 6.061041447327408e-08, "loss": 0.0185, "step": 18870 }, { "epoch": 4.293742889647326, "grad_norm": 0.5530631039610178, "learning_rate": 6.05720356707927e-08, "loss": 0.0042, "step": 18871 }, { "epoch": 4.293970420932879, "grad_norm": 1.6290600472542442, "learning_rate": 6.053366840418517e-08, "loss": 0.0295, "step": 18872 }, { "epoch": 4.29419795221843, "grad_norm": 1.3751390147998694, "learning_rate": 6.049531267423581e-08, "loss": 0.0112, "step": 18873 }, { "epoch": 4.294425483503982, "grad_norm": 1.519372612763243, "learning_rate": 6.045696848172842e-08, "loss": 0.0699, "step": 18874 }, { "epoch": 4.294653014789533, "grad_norm": 3.0831078629750723, "learning_rate": 6.04186358274466e-08, "loss": 0.0209, "step": 18875 }, { "epoch": 4.294880546075086, "grad_norm": 1.0329672386919666, "learning_rate": 6.038031471217398e-08, "loss": 0.0547, "step": 18876 }, { "epoch": 4.295108077360637, "grad_norm": 1.1688448332148937, "learning_rate": 6.034200513669355e-08, "loss": 0.0322, "step": 18877 }, { "epoch": 4.295335608646189, "grad_norm": 1.9211352797650687, "learning_rate": 6.03037071017884e-08, "loss": 0.0538, "step": 18878 }, { "epoch": 4.29556313993174, "grad_norm": 1.6176907883319527, "learning_rate": 6.026542060824141e-08, "loss": 0.0098, "step": 18879 }, { "epoch": 4.295790671217293, "grad_norm": 2.0627965615441974, "learning_rate": 6.022714565683485e-08, "loss": 0.0135, "step": 18880 }, { "epoch": 4.296018202502844, "grad_norm": 0.9453234574050196, "learning_rate": 6.018888224835123e-08, "loss": 0.0084, "step": 18881 }, { "epoch": 4.296245733788396, "grad_norm": 1.5268682295896192, "learning_rate": 6.015063038357228e-08, "loss": 0.0146, "step": 18882 }, { "epoch": 4.2964732650739474, "grad_norm": 1.1928671633013916, "learning_rate": 6.011239006328017e-08, "loss": 0.0239, "step": 18883 }, { "epoch": 4.2967007963595, "grad_norm": 0.9556015995822074, "learning_rate": 6.007416128825613e-08, "loss": 0.0348, "step": 18884 }, { "epoch": 4.296928327645051, "grad_norm": 1.7658122797268068, "learning_rate": 6.00359440592816e-08, "loss": 0.0234, "step": 18885 }, { "epoch": 4.297155858930603, "grad_norm": 1.5124350797010886, "learning_rate": 5.99977383771378e-08, "loss": 0.0116, "step": 18886 }, { "epoch": 4.2973833902161545, "grad_norm": 1.2173730762212853, "learning_rate": 5.995954424260535e-08, "loss": 0.0221, "step": 18887 }, { "epoch": 4.297610921501707, "grad_norm": 1.090026447405423, "learning_rate": 5.992136165646513e-08, "loss": 0.0028, "step": 18888 }, { "epoch": 4.297838452787258, "grad_norm": 1.8040461797483511, "learning_rate": 5.988319061949722e-08, "loss": 0.0292, "step": 18889 }, { "epoch": 4.29806598407281, "grad_norm": 2.729322156475375, "learning_rate": 5.984503113248212e-08, "loss": 0.0102, "step": 18890 }, { "epoch": 4.2982935153583615, "grad_norm": 0.9648635172237177, "learning_rate": 5.98068831961994e-08, "loss": 0.0153, "step": 18891 }, { "epoch": 4.298521046643914, "grad_norm": 1.4762448171796014, "learning_rate": 5.976874681142893e-08, "loss": 0.0629, "step": 18892 }, { "epoch": 4.298748577929465, "grad_norm": 1.4214447544791287, "learning_rate": 5.973062197895015e-08, "loss": 0.0205, "step": 18893 }, { "epoch": 4.298976109215017, "grad_norm": 1.623479339563452, "learning_rate": 5.969250869954215e-08, "loss": 0.0575, "step": 18894 }, { "epoch": 4.2992036405005685, "grad_norm": 0.6394353332335686, "learning_rate": 5.965440697398406e-08, "loss": 0.0027, "step": 18895 }, { "epoch": 4.299431171786121, "grad_norm": 1.2891762820603638, "learning_rate": 5.961631680305439e-08, "loss": 0.013, "step": 18896 }, { "epoch": 4.299658703071672, "grad_norm": 1.0742072165459318, "learning_rate": 5.9578238187531846e-08, "loss": 0.0078, "step": 18897 }, { "epoch": 4.299886234357224, "grad_norm": 1.5871682342041231, "learning_rate": 5.9540171128194473e-08, "loss": 0.0756, "step": 18898 }, { "epoch": 4.3001137656427755, "grad_norm": 0.8359584223803631, "learning_rate": 5.950211562582048e-08, "loss": 0.0067, "step": 18899 }, { "epoch": 4.300341296928328, "grad_norm": 1.5893747613602498, "learning_rate": 5.946407168118763e-08, "loss": 0.0502, "step": 18900 }, { "epoch": 4.30056882821388, "grad_norm": 2.2327590558870574, "learning_rate": 5.942603929507337e-08, "loss": 0.0307, "step": 18901 }, { "epoch": 4.300796359499431, "grad_norm": 1.4647532462125206, "learning_rate": 5.9388018468255134e-08, "loss": 0.1067, "step": 18902 }, { "epoch": 4.3010238907849825, "grad_norm": 1.2701682246584023, "learning_rate": 5.93500092015098e-08, "loss": 0.0332, "step": 18903 }, { "epoch": 4.301251422070535, "grad_norm": 1.0465564034868136, "learning_rate": 5.931201149561451e-08, "loss": 0.0508, "step": 18904 }, { "epoch": 4.301478953356087, "grad_norm": 0.4049890145957634, "learning_rate": 5.927402535134554e-08, "loss": 0.002, "step": 18905 }, { "epoch": 4.301706484641638, "grad_norm": 1.6407698535863096, "learning_rate": 5.923605076947947e-08, "loss": 0.0632, "step": 18906 }, { "epoch": 4.30193401592719, "grad_norm": 1.0395237330835245, "learning_rate": 5.919808775079243e-08, "loss": 0.0083, "step": 18907 }, { "epoch": 4.302161547212742, "grad_norm": 1.2141223681546593, "learning_rate": 5.916013629606018e-08, "loss": 0.0304, "step": 18908 }, { "epoch": 4.302389078498294, "grad_norm": 1.1690965661030766, "learning_rate": 5.91221964060585e-08, "loss": 0.0575, "step": 18909 }, { "epoch": 4.302616609783845, "grad_norm": 1.0982507393222534, "learning_rate": 5.908426808156273e-08, "loss": 0.0176, "step": 18910 }, { "epoch": 4.302844141069397, "grad_norm": 0.7997612134961742, "learning_rate": 5.9046351323348224e-08, "loss": 0.0156, "step": 18911 }, { "epoch": 4.303071672354949, "grad_norm": 1.8159046445768257, "learning_rate": 5.900844613218965e-08, "loss": 0.0202, "step": 18912 }, { "epoch": 4.303299203640501, "grad_norm": 0.8874213257511165, "learning_rate": 5.8970552508861854e-08, "loss": 0.0153, "step": 18913 }, { "epoch": 4.303526734926052, "grad_norm": 1.0059954432892246, "learning_rate": 5.8932670454139444e-08, "loss": 0.0114, "step": 18914 }, { "epoch": 4.303754266211604, "grad_norm": 1.813647976284903, "learning_rate": 5.8894799968796446e-08, "loss": 0.0266, "step": 18915 }, { "epoch": 4.303981797497156, "grad_norm": 4.195400102670392, "learning_rate": 5.885694105360711e-08, "loss": 0.0166, "step": 18916 }, { "epoch": 4.304209328782708, "grad_norm": 0.9730977185322643, "learning_rate": 5.8819093709345034e-08, "loss": 0.0093, "step": 18917 }, { "epoch": 4.304436860068259, "grad_norm": 0.775951346075585, "learning_rate": 5.878125793678351e-08, "loss": 0.0051, "step": 18918 }, { "epoch": 4.304664391353811, "grad_norm": 0.8461837890272056, "learning_rate": 5.874343373669641e-08, "loss": 0.0424, "step": 18919 }, { "epoch": 4.304891922639363, "grad_norm": 1.4021156958619516, "learning_rate": 5.870562110985627e-08, "loss": 0.0815, "step": 18920 }, { "epoch": 4.305119453924915, "grad_norm": 0.7634896789903864, "learning_rate": 5.866782005703626e-08, "loss": 0.0051, "step": 18921 }, { "epoch": 4.305346985210466, "grad_norm": 1.2163239854391064, "learning_rate": 5.8630030579008706e-08, "loss": 0.0451, "step": 18922 }, { "epoch": 4.305574516496018, "grad_norm": 1.0308082856039373, "learning_rate": 5.8592252676546166e-08, "loss": 0.0332, "step": 18923 }, { "epoch": 4.30580204778157, "grad_norm": 1.1635730907746609, "learning_rate": 5.85544863504206e-08, "loss": 0.0071, "step": 18924 }, { "epoch": 4.306029579067122, "grad_norm": 0.5887707327671656, "learning_rate": 5.851673160140389e-08, "loss": 0.0034, "step": 18925 }, { "epoch": 4.306257110352673, "grad_norm": 2.194906811627298, "learning_rate": 5.847898843026785e-08, "loss": 0.0149, "step": 18926 }, { "epoch": 4.306484641638225, "grad_norm": 0.8469077330651564, "learning_rate": 5.8441256837783595e-08, "loss": 0.004, "step": 18927 }, { "epoch": 4.306712172923777, "grad_norm": 1.403083972901271, "learning_rate": 5.840353682472259e-08, "loss": 0.0156, "step": 18928 }, { "epoch": 4.306939704209329, "grad_norm": 1.1580460201146865, "learning_rate": 5.8365828391855546e-08, "loss": 0.0064, "step": 18929 }, { "epoch": 4.30716723549488, "grad_norm": 1.444260201023688, "learning_rate": 5.83281315399533e-08, "loss": 0.0848, "step": 18930 }, { "epoch": 4.307394766780432, "grad_norm": 1.7762138119521864, "learning_rate": 5.829044626978614e-08, "loss": 0.0542, "step": 18931 }, { "epoch": 4.307622298065984, "grad_norm": 1.838928273413533, "learning_rate": 5.8252772582124345e-08, "loss": 0.0442, "step": 18932 }, { "epoch": 4.307849829351536, "grad_norm": 1.2874418951041364, "learning_rate": 5.8215110477738074e-08, "loss": 0.0891, "step": 18933 }, { "epoch": 4.308077360637087, "grad_norm": 0.5986944841675058, "learning_rate": 5.817745995739685e-08, "loss": 0.002, "step": 18934 }, { "epoch": 4.308304891922639, "grad_norm": 1.3257855366146845, "learning_rate": 5.813982102187033e-08, "loss": 0.0282, "step": 18935 }, { "epoch": 4.308532423208191, "grad_norm": 1.1784775129409444, "learning_rate": 5.8102193671927684e-08, "loss": 0.0689, "step": 18936 }, { "epoch": 4.308759954493743, "grad_norm": 1.0206596881412988, "learning_rate": 5.806457790833789e-08, "loss": 0.0125, "step": 18937 }, { "epoch": 4.308987485779294, "grad_norm": 1.0271884268787745, "learning_rate": 5.802697373186984e-08, "loss": 0.0179, "step": 18938 }, { "epoch": 4.309215017064846, "grad_norm": 1.1556717016945048, "learning_rate": 5.798938114329203e-08, "loss": 0.0281, "step": 18939 }, { "epoch": 4.309442548350399, "grad_norm": 0.4913493681613726, "learning_rate": 5.7951800143372996e-08, "loss": 0.0033, "step": 18940 }, { "epoch": 4.30967007963595, "grad_norm": 0.812358913518361, "learning_rate": 5.7914230732880605e-08, "loss": 0.0607, "step": 18941 }, { "epoch": 4.309897610921501, "grad_norm": 1.215759446197948, "learning_rate": 5.787667291258278e-08, "loss": 0.064, "step": 18942 }, { "epoch": 4.3101251422070535, "grad_norm": 0.6983391180596717, "learning_rate": 5.7839126683247176e-08, "loss": 0.0062, "step": 18943 }, { "epoch": 4.310352673492606, "grad_norm": 1.2994169424660686, "learning_rate": 5.780159204564102e-08, "loss": 0.0913, "step": 18944 }, { "epoch": 4.310580204778157, "grad_norm": 0.9797432310649211, "learning_rate": 5.776406900053155e-08, "loss": 0.0037, "step": 18945 }, { "epoch": 4.310807736063709, "grad_norm": 1.0726517280274896, "learning_rate": 5.7726557548685655e-08, "loss": 0.0106, "step": 18946 }, { "epoch": 4.3110352673492605, "grad_norm": 0.9112728606293246, "learning_rate": 5.768905769087008e-08, "loss": 0.0269, "step": 18947 }, { "epoch": 4.311262798634813, "grad_norm": 1.0174866569925367, "learning_rate": 5.765156942785115e-08, "loss": 0.0119, "step": 18948 }, { "epoch": 4.311490329920364, "grad_norm": 1.2952084692064108, "learning_rate": 5.7614092760395134e-08, "loss": 0.0189, "step": 18949 }, { "epoch": 4.311717861205916, "grad_norm": 1.028629812281197, "learning_rate": 5.757662768926794e-08, "loss": 0.0139, "step": 18950 }, { "epoch": 4.3119453924914675, "grad_norm": 1.689969837257224, "learning_rate": 5.753917421523522e-08, "loss": 0.0192, "step": 18951 }, { "epoch": 4.31217292377702, "grad_norm": 0.6602834364078594, "learning_rate": 5.750173233906253e-08, "loss": 0.0143, "step": 18952 }, { "epoch": 4.312400455062571, "grad_norm": 0.9205343006593308, "learning_rate": 5.74643020615151e-08, "loss": 0.0192, "step": 18953 }, { "epoch": 4.312627986348123, "grad_norm": 0.4931957403430844, "learning_rate": 5.7426883383358e-08, "loss": 0.004, "step": 18954 }, { "epoch": 4.3128555176336745, "grad_norm": 0.6501780084736289, "learning_rate": 5.7389476305356044e-08, "loss": 0.0036, "step": 18955 }, { "epoch": 4.313083048919227, "grad_norm": 0.6121945228273237, "learning_rate": 5.735208082827348e-08, "loss": 0.0031, "step": 18956 }, { "epoch": 4.313310580204778, "grad_norm": 1.6013052403525376, "learning_rate": 5.7314696952874905e-08, "loss": 0.0238, "step": 18957 }, { "epoch": 4.31353811149033, "grad_norm": 1.4097627744103847, "learning_rate": 5.7277324679924154e-08, "loss": 0.1028, "step": 18958 }, { "epoch": 4.3137656427758815, "grad_norm": 0.7828529212567742, "learning_rate": 5.7239964010185125e-08, "loss": 0.0281, "step": 18959 }, { "epoch": 4.313993174061434, "grad_norm": 0.5517383284380053, "learning_rate": 5.720261494442145e-08, "loss": 0.0022, "step": 18960 }, { "epoch": 4.314220705346985, "grad_norm": 2.660990127419749, "learning_rate": 5.71652774833966e-08, "loss": 0.1202, "step": 18961 }, { "epoch": 4.314448236632537, "grad_norm": 1.5047238075449716, "learning_rate": 5.7127951627873464e-08, "loss": 0.0238, "step": 18962 }, { "epoch": 4.3146757679180885, "grad_norm": 1.5967013455072383, "learning_rate": 5.709063737861495e-08, "loss": 0.0342, "step": 18963 }, { "epoch": 4.314903299203641, "grad_norm": 1.0592642982724405, "learning_rate": 5.705333473638379e-08, "loss": 0.0134, "step": 18964 }, { "epoch": 4.315130830489192, "grad_norm": 1.3839969258911033, "learning_rate": 5.701604370194222e-08, "loss": 0.027, "step": 18965 }, { "epoch": 4.315358361774744, "grad_norm": 1.1630218956377125, "learning_rate": 5.6978764276052545e-08, "loss": 0.0542, "step": 18966 }, { "epoch": 4.3155858930602955, "grad_norm": 0.8708180340434997, "learning_rate": 5.6941496459476595e-08, "loss": 0.005, "step": 18967 }, { "epoch": 4.315813424345848, "grad_norm": 2.009280777507092, "learning_rate": 5.690424025297625e-08, "loss": 0.108, "step": 18968 }, { "epoch": 4.316040955631399, "grad_norm": 0.8954876552419021, "learning_rate": 5.686699565731285e-08, "loss": 0.0144, "step": 18969 }, { "epoch": 4.316268486916951, "grad_norm": 1.2528275376932858, "learning_rate": 5.6829762673247386e-08, "loss": 0.0044, "step": 18970 }, { "epoch": 4.3164960182025025, "grad_norm": 1.1835519912214358, "learning_rate": 5.679254130154119e-08, "loss": 0.0118, "step": 18971 }, { "epoch": 4.316723549488055, "grad_norm": 1.2995712942734532, "learning_rate": 5.6755331542954694e-08, "loss": 0.0064, "step": 18972 }, { "epoch": 4.316951080773606, "grad_norm": 0.9110648771318587, "learning_rate": 5.671813339824847e-08, "loss": 0.0383, "step": 18973 }, { "epoch": 4.317178612059158, "grad_norm": 1.8059569237839923, "learning_rate": 5.668094686818303e-08, "loss": 0.0951, "step": 18974 }, { "epoch": 4.3174061433447095, "grad_norm": 1.6127905368466373, "learning_rate": 5.6643771953518095e-08, "loss": 0.045, "step": 18975 }, { "epoch": 4.317633674630262, "grad_norm": 1.3040862440686953, "learning_rate": 5.6606608655013635e-08, "loss": 0.0577, "step": 18976 }, { "epoch": 4.317861205915813, "grad_norm": 1.4088679679938496, "learning_rate": 5.6569456973429025e-08, "loss": 0.0302, "step": 18977 }, { "epoch": 4.318088737201365, "grad_norm": 1.2768215352354122, "learning_rate": 5.653231690952374e-08, "loss": 0.0156, "step": 18978 }, { "epoch": 4.318316268486917, "grad_norm": 1.360523163160829, "learning_rate": 5.6495188464056745e-08, "loss": 0.0061, "step": 18979 }, { "epoch": 4.318543799772469, "grad_norm": 2.4516100890664454, "learning_rate": 5.645807163778702e-08, "loss": 0.0788, "step": 18980 }, { "epoch": 4.31877133105802, "grad_norm": 0.8562372637462483, "learning_rate": 5.6420966431473126e-08, "loss": 0.0486, "step": 18981 }, { "epoch": 4.318998862343572, "grad_norm": 0.3170420337154788, "learning_rate": 5.638387284587321e-08, "loss": 0.001, "step": 18982 }, { "epoch": 4.319226393629124, "grad_norm": 1.757344548590853, "learning_rate": 5.634679088174576e-08, "loss": 0.0762, "step": 18983 }, { "epoch": 4.319453924914676, "grad_norm": 1.1368904041038326, "learning_rate": 5.630972053984829e-08, "loss": 0.044, "step": 18984 }, { "epoch": 4.319681456200228, "grad_norm": 1.940347016042446, "learning_rate": 5.62726618209386e-08, "loss": 0.0636, "step": 18985 }, { "epoch": 4.319908987485779, "grad_norm": 1.3872128661849892, "learning_rate": 5.623561472577428e-08, "loss": 0.0885, "step": 18986 }, { "epoch": 4.320136518771331, "grad_norm": 0.9289738907532908, "learning_rate": 5.6198579255112295e-08, "loss": 0.0453, "step": 18987 }, { "epoch": 4.320364050056883, "grad_norm": 1.2666102567734645, "learning_rate": 5.6161555409709675e-08, "loss": 0.0224, "step": 18988 }, { "epoch": 4.320591581342435, "grad_norm": 1.057770401461675, "learning_rate": 5.612454319032297e-08, "loss": 0.0032, "step": 18989 }, { "epoch": 4.320819112627986, "grad_norm": 0.8434498470989691, "learning_rate": 5.608754259770893e-08, "loss": 0.0088, "step": 18990 }, { "epoch": 4.321046643913538, "grad_norm": 0.7989369428556451, "learning_rate": 5.605055363262342e-08, "loss": 0.0149, "step": 18991 }, { "epoch": 4.32127417519909, "grad_norm": 0.9417033235318821, "learning_rate": 5.601357629582263e-08, "loss": 0.011, "step": 18992 }, { "epoch": 4.321501706484642, "grad_norm": 1.0983684369431095, "learning_rate": 5.597661058806242e-08, "loss": 0.0171, "step": 18993 }, { "epoch": 4.321729237770193, "grad_norm": 0.2299257371165031, "learning_rate": 5.593965651009808e-08, "loss": 0.0007, "step": 18994 }, { "epoch": 4.321956769055745, "grad_norm": 1.1103970453315009, "learning_rate": 5.590271406268506e-08, "loss": 0.0057, "step": 18995 }, { "epoch": 4.322184300341297, "grad_norm": 1.6206000344345457, "learning_rate": 5.5865783246578166e-08, "loss": 0.0868, "step": 18996 }, { "epoch": 4.322411831626849, "grad_norm": 1.8938621875420485, "learning_rate": 5.582886406253256e-08, "loss": 0.0188, "step": 18997 }, { "epoch": 4.3226393629124, "grad_norm": 0.8758938114123294, "learning_rate": 5.579195651130235e-08, "loss": 0.0493, "step": 18998 }, { "epoch": 4.3228668941979524, "grad_norm": 0.9132288530528175, "learning_rate": 5.5755060593642166e-08, "loss": 0.0168, "step": 18999 }, { "epoch": 4.323094425483504, "grad_norm": 2.0767718241509927, "learning_rate": 5.57181763103061e-08, "loss": 0.0179, "step": 19000 }, { "epoch": 4.323321956769056, "grad_norm": 0.6160837854644832, "learning_rate": 5.568130366204787e-08, "loss": 0.0054, "step": 19001 }, { "epoch": 4.323549488054607, "grad_norm": 0.7834533457616714, "learning_rate": 5.5644442649621166e-08, "loss": 0.0207, "step": 19002 }, { "epoch": 4.3237770193401595, "grad_norm": 1.8729765506995428, "learning_rate": 5.560759327377929e-08, "loss": 0.1038, "step": 19003 }, { "epoch": 4.324004550625711, "grad_norm": 2.18179562325012, "learning_rate": 5.557075553527545e-08, "loss": 0.1125, "step": 19004 }, { "epoch": 4.324232081911263, "grad_norm": 1.2287473463553775, "learning_rate": 5.5533929434862454e-08, "loss": 0.015, "step": 19005 }, { "epoch": 4.324459613196814, "grad_norm": 1.3528348085936672, "learning_rate": 5.549711497329302e-08, "loss": 0.0193, "step": 19006 }, { "epoch": 4.3246871444823665, "grad_norm": 1.294800141513129, "learning_rate": 5.546031215131961e-08, "loss": 0.0087, "step": 19007 }, { "epoch": 4.324914675767918, "grad_norm": 1.1269547865274603, "learning_rate": 5.542352096969426e-08, "loss": 0.0151, "step": 19008 }, { "epoch": 4.32514220705347, "grad_norm": 1.4445469022783413, "learning_rate": 5.538674142916915e-08, "loss": 0.0579, "step": 19009 }, { "epoch": 4.325369738339021, "grad_norm": 1.6143735659930747, "learning_rate": 5.534997353049576e-08, "loss": 0.0886, "step": 19010 }, { "epoch": 4.3255972696245735, "grad_norm": 1.5052010933279574, "learning_rate": 5.5313217274425705e-08, "loss": 0.0654, "step": 19011 }, { "epoch": 4.325824800910125, "grad_norm": 2.4954295306715375, "learning_rate": 5.527647266171006e-08, "loss": 0.0147, "step": 19012 }, { "epoch": 4.326052332195677, "grad_norm": 1.887786669119032, "learning_rate": 5.523973969309995e-08, "loss": 0.0229, "step": 19013 }, { "epoch": 4.326279863481228, "grad_norm": 0.9786189157880558, "learning_rate": 5.5203018369346176e-08, "loss": 0.023, "step": 19014 }, { "epoch": 4.3265073947667805, "grad_norm": 1.2208778590655172, "learning_rate": 5.516630869119903e-08, "loss": 0.0715, "step": 19015 }, { "epoch": 4.326734926052332, "grad_norm": 1.2788062027252913, "learning_rate": 5.5129610659409094e-08, "loss": 0.0125, "step": 19016 }, { "epoch": 4.326962457337884, "grad_norm": 1.238395103320116, "learning_rate": 5.509292427472612e-08, "loss": 0.0149, "step": 19017 }, { "epoch": 4.327189988623436, "grad_norm": 0.9540767100779344, "learning_rate": 5.505624953790013e-08, "loss": 0.0524, "step": 19018 }, { "epoch": 4.3274175199089875, "grad_norm": 0.9664843181936972, "learning_rate": 5.5019586449680527e-08, "loss": 0.0104, "step": 19019 }, { "epoch": 4.327645051194539, "grad_norm": 0.8679285785172115, "learning_rate": 5.498293501081671e-08, "loss": 0.0065, "step": 19020 }, { "epoch": 4.327872582480091, "grad_norm": 0.8558875624583026, "learning_rate": 5.49462952220578e-08, "loss": 0.0089, "step": 19021 }, { "epoch": 4.328100113765643, "grad_norm": 1.148799394825152, "learning_rate": 5.4909667084152574e-08, "loss": 0.0282, "step": 19022 }, { "epoch": 4.3283276450511945, "grad_norm": 1.1597742342078359, "learning_rate": 5.487305059784981e-08, "loss": 0.0597, "step": 19023 }, { "epoch": 4.328555176336747, "grad_norm": 0.8372114634199249, "learning_rate": 5.483644576389766e-08, "loss": 0.0077, "step": 19024 }, { "epoch": 4.328782707622298, "grad_norm": 0.7649080420798577, "learning_rate": 5.4799852583044336e-08, "loss": 0.0045, "step": 19025 }, { "epoch": 4.32901023890785, "grad_norm": 1.4577885950176661, "learning_rate": 5.4763271056037724e-08, "loss": 0.011, "step": 19026 }, { "epoch": 4.3292377701934015, "grad_norm": 1.0371673503091328, "learning_rate": 5.4726701183625485e-08, "loss": 0.0094, "step": 19027 }, { "epoch": 4.329465301478954, "grad_norm": 11.711910626777032, "learning_rate": 5.469014296655521e-08, "loss": 0.0188, "step": 19028 }, { "epoch": 4.329692832764505, "grad_norm": 1.2636071259886268, "learning_rate": 5.465359640557381e-08, "loss": 0.0552, "step": 19029 }, { "epoch": 4.329920364050057, "grad_norm": 2.0535445312214655, "learning_rate": 5.4617061501428465e-08, "loss": 0.1007, "step": 19030 }, { "epoch": 4.3301478953356085, "grad_norm": 1.4734055864451898, "learning_rate": 5.45805382548658e-08, "loss": 0.0461, "step": 19031 }, { "epoch": 4.330375426621161, "grad_norm": 0.897439280312506, "learning_rate": 5.4544026666632085e-08, "loss": 0.041, "step": 19032 }, { "epoch": 4.330602957906712, "grad_norm": 0.8945908361569795, "learning_rate": 5.450752673747381e-08, "loss": 0.042, "step": 19033 }, { "epoch": 4.330830489192264, "grad_norm": 1.851191763123613, "learning_rate": 5.4471038468136837e-08, "loss": 0.0578, "step": 19034 }, { "epoch": 4.3310580204778155, "grad_norm": 0.9636506190516869, "learning_rate": 5.443456185936703e-08, "loss": 0.0396, "step": 19035 }, { "epoch": 4.331285551763368, "grad_norm": 2.246853633121119, "learning_rate": 5.4398096911909834e-08, "loss": 0.1081, "step": 19036 }, { "epoch": 4.331513083048919, "grad_norm": 0.52545054386492, "learning_rate": 5.4361643626510556e-08, "loss": 0.0036, "step": 19037 }, { "epoch": 4.331740614334471, "grad_norm": 1.47681822239454, "learning_rate": 5.432520200391422e-08, "loss": 0.0214, "step": 19038 }, { "epoch": 4.3319681456200225, "grad_norm": 1.6720749493409524, "learning_rate": 5.4288772044865447e-08, "loss": 0.0239, "step": 19039 }, { "epoch": 4.332195676905575, "grad_norm": 0.7883931760439807, "learning_rate": 5.425235375010912e-08, "loss": 0.0063, "step": 19040 }, { "epoch": 4.332423208191126, "grad_norm": 1.447893480963752, "learning_rate": 5.42159471203893e-08, "loss": 0.0212, "step": 19041 }, { "epoch": 4.332650739476678, "grad_norm": 1.8108744625814708, "learning_rate": 5.417955215645032e-08, "loss": 0.0761, "step": 19042 }, { "epoch": 4.3328782707622295, "grad_norm": 1.4678310413902398, "learning_rate": 5.414316885903589e-08, "loss": 0.0323, "step": 19043 }, { "epoch": 4.333105802047782, "grad_norm": 0.6966830083191335, "learning_rate": 5.410679722888952e-08, "loss": 0.0022, "step": 19044 }, { "epoch": 4.333333333333333, "grad_norm": 1.3529112745844287, "learning_rate": 5.4070437266754634e-08, "loss": 0.0159, "step": 19045 }, { "epoch": 4.333560864618885, "grad_norm": 1.2567027236120338, "learning_rate": 5.403408897337439e-08, "loss": 0.0096, "step": 19046 }, { "epoch": 4.3337883959044365, "grad_norm": 2.3315482344064864, "learning_rate": 5.399775234949181e-08, "loss": 0.0838, "step": 19047 }, { "epoch": 4.334015927189989, "grad_norm": 0.6829197989494473, "learning_rate": 5.396142739584935e-08, "loss": 0.0279, "step": 19048 }, { "epoch": 4.33424345847554, "grad_norm": 1.0716342864646426, "learning_rate": 5.392511411318961e-08, "loss": 0.0409, "step": 19049 }, { "epoch": 4.334470989761092, "grad_norm": 1.1441075105798137, "learning_rate": 5.388881250225464e-08, "loss": 0.0147, "step": 19050 }, { "epoch": 4.3346985210466435, "grad_norm": 1.0049016231599448, "learning_rate": 5.385252256378634e-08, "loss": 0.0146, "step": 19051 }, { "epoch": 4.334926052332196, "grad_norm": 1.4936156387996966, "learning_rate": 5.3816244298526415e-08, "loss": 0.0418, "step": 19052 }, { "epoch": 4.335153583617747, "grad_norm": 1.824201635705093, "learning_rate": 5.377997770721642e-08, "loss": 0.0639, "step": 19053 }, { "epoch": 4.335381114903299, "grad_norm": 1.0586709072596436, "learning_rate": 5.374372279059764e-08, "loss": 0.0349, "step": 19054 }, { "epoch": 4.335608646188851, "grad_norm": 0.8367748522344121, "learning_rate": 5.370747954941087e-08, "loss": 0.0083, "step": 19055 }, { "epoch": 4.335836177474403, "grad_norm": 0.6515192783896887, "learning_rate": 5.3671247984396976e-08, "loss": 0.0023, "step": 19056 }, { "epoch": 4.336063708759955, "grad_norm": 0.44611189137981166, "learning_rate": 5.363502809629655e-08, "loss": 0.0024, "step": 19057 }, { "epoch": 4.336291240045506, "grad_norm": 0.8553284460014942, "learning_rate": 5.359881988584954e-08, "loss": 0.0102, "step": 19058 }, { "epoch": 4.336518771331058, "grad_norm": 1.726422355545088, "learning_rate": 5.356262335379621e-08, "loss": 0.0055, "step": 19059 }, { "epoch": 4.33674630261661, "grad_norm": 1.379073644340662, "learning_rate": 5.35264385008763e-08, "loss": 0.0049, "step": 19060 }, { "epoch": 4.336973833902162, "grad_norm": 1.6387041329423642, "learning_rate": 5.349026532782957e-08, "loss": 0.0655, "step": 19061 }, { "epoch": 4.337201365187713, "grad_norm": 1.9643654236979433, "learning_rate": 5.345410383539508e-08, "loss": 0.0449, "step": 19062 }, { "epoch": 4.3374288964732655, "grad_norm": 1.721269519372968, "learning_rate": 5.341795402431189e-08, "loss": 0.0317, "step": 19063 }, { "epoch": 4.337656427758817, "grad_norm": 1.8862055768916541, "learning_rate": 5.3381815895319005e-08, "loss": 0.0196, "step": 19064 }, { "epoch": 4.337883959044369, "grad_norm": 1.3664324360986992, "learning_rate": 5.3345689449154775e-08, "loss": 0.0425, "step": 19065 }, { "epoch": 4.33811149032992, "grad_norm": 1.8991378356168593, "learning_rate": 5.3309574686557786e-08, "loss": 0.0076, "step": 19066 }, { "epoch": 4.3383390216154725, "grad_norm": 0.4023611087312578, "learning_rate": 5.327347160826612e-08, "loss": 0.002, "step": 19067 }, { "epoch": 4.338566552901024, "grad_norm": 1.6621030518409636, "learning_rate": 5.323738021501768e-08, "loss": 0.0263, "step": 19068 }, { "epoch": 4.338794084186576, "grad_norm": 1.0777258713919475, "learning_rate": 5.320130050755004e-08, "loss": 0.0743, "step": 19069 }, { "epoch": 4.339021615472127, "grad_norm": 0.8265517189417033, "learning_rate": 5.316523248660055e-08, "loss": 0.0068, "step": 19070 }, { "epoch": 4.3392491467576795, "grad_norm": 0.1485682674020104, "learning_rate": 5.312917615290653e-08, "loss": 0.0006, "step": 19071 }, { "epoch": 4.339476678043231, "grad_norm": 2.135144728790849, "learning_rate": 5.309313150720474e-08, "loss": 0.0494, "step": 19072 }, { "epoch": 4.339704209328783, "grad_norm": 1.1769398759102023, "learning_rate": 5.30570985502319e-08, "loss": 0.0843, "step": 19073 }, { "epoch": 4.339931740614334, "grad_norm": 1.1927599101281685, "learning_rate": 5.30210772827245e-08, "loss": 0.0057, "step": 19074 }, { "epoch": 4.3401592718998865, "grad_norm": 1.0524193941189663, "learning_rate": 5.298506770541889e-08, "loss": 0.0519, "step": 19075 }, { "epoch": 4.340386803185438, "grad_norm": 2.2028381204339818, "learning_rate": 5.2949069819050875e-08, "loss": 0.045, "step": 19076 }, { "epoch": 4.34061433447099, "grad_norm": 1.7066543953110849, "learning_rate": 5.2913083624356114e-08, "loss": 0.0345, "step": 19077 }, { "epoch": 4.340841865756541, "grad_norm": 1.044157384620863, "learning_rate": 5.2877109122070334e-08, "loss": 0.024, "step": 19078 }, { "epoch": 4.3410693970420935, "grad_norm": 0.9319375485996981, "learning_rate": 5.284114631292851e-08, "loss": 0.0095, "step": 19079 }, { "epoch": 4.341296928327645, "grad_norm": 0.9802706624622969, "learning_rate": 5.280519519766582e-08, "loss": 0.0203, "step": 19080 }, { "epoch": 4.341524459613197, "grad_norm": 1.726110769309654, "learning_rate": 5.2769255777017084e-08, "loss": 0.0158, "step": 19081 }, { "epoch": 4.341751990898748, "grad_norm": 1.3487336605454745, "learning_rate": 5.273332805171665e-08, "loss": 0.0793, "step": 19082 }, { "epoch": 4.3419795221843005, "grad_norm": 0.9411539502992254, "learning_rate": 5.269741202249906e-08, "loss": 0.0169, "step": 19083 }, { "epoch": 4.342207053469852, "grad_norm": 1.289051220191313, "learning_rate": 5.266150769009819e-08, "loss": 0.0097, "step": 19084 }, { "epoch": 4.342434584755404, "grad_norm": 0.866055171991228, "learning_rate": 5.262561505524795e-08, "loss": 0.0053, "step": 19085 }, { "epoch": 4.342662116040955, "grad_norm": 1.5699441278219768, "learning_rate": 5.258973411868186e-08, "loss": 0.0984, "step": 19086 }, { "epoch": 4.3428896473265075, "grad_norm": 1.2032552248323518, "learning_rate": 5.2553864881133215e-08, "loss": 0.0076, "step": 19087 }, { "epoch": 4.343117178612059, "grad_norm": 0.10036949576654468, "learning_rate": 5.251800734333533e-08, "loss": 0.0004, "step": 19088 }, { "epoch": 4.343344709897611, "grad_norm": 0.9034952555159654, "learning_rate": 5.24821615060208e-08, "loss": 0.0577, "step": 19089 }, { "epoch": 4.343572241183162, "grad_norm": 1.2295604415203683, "learning_rate": 5.244632736992244e-08, "loss": 0.0537, "step": 19090 }, { "epoch": 4.3437997724687145, "grad_norm": 1.2643130104638927, "learning_rate": 5.241050493577253e-08, "loss": 0.0076, "step": 19091 }, { "epoch": 4.344027303754266, "grad_norm": 2.3631836706029135, "learning_rate": 5.2374694204303314e-08, "loss": 0.0606, "step": 19092 }, { "epoch": 4.344254835039818, "grad_norm": 1.0610687299006716, "learning_rate": 5.2338895176246506e-08, "loss": 0.0311, "step": 19093 }, { "epoch": 4.344482366325369, "grad_norm": 1.2993767039615343, "learning_rate": 5.2303107852333944e-08, "loss": 0.0535, "step": 19094 }, { "epoch": 4.3447098976109215, "grad_norm": 0.7481315330676194, "learning_rate": 5.2267332233297134e-08, "loss": 0.0082, "step": 19095 }, { "epoch": 4.344937428896474, "grad_norm": 1.2014812640523223, "learning_rate": 5.223156831986702e-08, "loss": 0.0148, "step": 19096 }, { "epoch": 4.345164960182025, "grad_norm": 1.4223454500119308, "learning_rate": 5.219581611277474e-08, "loss": 0.0092, "step": 19097 }, { "epoch": 4.345392491467576, "grad_norm": 3.4237341822826637, "learning_rate": 5.216007561275084e-08, "loss": 0.0272, "step": 19098 }, { "epoch": 4.3456200227531285, "grad_norm": 1.9329642554995112, "learning_rate": 5.212434682052604e-08, "loss": 0.0139, "step": 19099 }, { "epoch": 4.345847554038681, "grad_norm": 1.8873901304173462, "learning_rate": 5.208862973683025e-08, "loss": 0.1151, "step": 19100 }, { "epoch": 4.346075085324232, "grad_norm": 1.1236923490602286, "learning_rate": 5.2052924362393714e-08, "loss": 0.0478, "step": 19101 }, { "epoch": 4.346302616609784, "grad_norm": 1.0868221314052866, "learning_rate": 5.201723069794613e-08, "loss": 0.0155, "step": 19102 }, { "epoch": 4.3465301478953355, "grad_norm": 1.4825307136517483, "learning_rate": 5.1981548744216915e-08, "loss": 0.0656, "step": 19103 }, { "epoch": 4.346757679180888, "grad_norm": 2.631858367091484, "learning_rate": 5.194587850193555e-08, "loss": 0.109, "step": 19104 }, { "epoch": 4.346985210466439, "grad_norm": 1.6039748368945397, "learning_rate": 5.1910219971830765e-08, "loss": 0.0132, "step": 19105 }, { "epoch": 4.347212741751991, "grad_norm": 1.0779740640901907, "learning_rate": 5.1874573154631555e-08, "loss": 0.0128, "step": 19106 }, { "epoch": 4.3474402730375425, "grad_norm": 1.5713283142387449, "learning_rate": 5.18389380510665e-08, "loss": 0.0696, "step": 19107 }, { "epoch": 4.347667804323095, "grad_norm": 1.5131063619331815, "learning_rate": 5.180331466186378e-08, "loss": 0.0431, "step": 19108 }, { "epoch": 4.347895335608646, "grad_norm": 2.2002601992841933, "learning_rate": 5.176770298775162e-08, "loss": 0.0583, "step": 19109 }, { "epoch": 4.348122866894198, "grad_norm": 1.0792649951700262, "learning_rate": 5.1732103029457774e-08, "loss": 0.0174, "step": 19110 }, { "epoch": 4.3483503981797496, "grad_norm": 1.9189022462345973, "learning_rate": 5.169651478770986e-08, "loss": 0.0261, "step": 19111 }, { "epoch": 4.348577929465302, "grad_norm": 1.4789843471857393, "learning_rate": 5.166093826323514e-08, "loss": 0.0151, "step": 19112 }, { "epoch": 4.348805460750853, "grad_norm": 1.1317415728932223, "learning_rate": 5.162537345676087e-08, "loss": 0.0211, "step": 19113 }, { "epoch": 4.349032992036405, "grad_norm": 1.1660657417496045, "learning_rate": 5.1589820369013917e-08, "loss": 0.0635, "step": 19114 }, { "epoch": 4.349260523321957, "grad_norm": 2.6508174297510654, "learning_rate": 5.155427900072084e-08, "loss": 0.1672, "step": 19115 }, { "epoch": 4.349488054607509, "grad_norm": 1.0609950510390307, "learning_rate": 5.1518749352608145e-08, "loss": 0.0249, "step": 19116 }, { "epoch": 4.34971558589306, "grad_norm": 1.8265237544858328, "learning_rate": 5.1483231425401846e-08, "loss": 0.022, "step": 19117 }, { "epoch": 4.349943117178612, "grad_norm": 2.308643197891239, "learning_rate": 5.1447725219828105e-08, "loss": 0.1478, "step": 19118 }, { "epoch": 4.350170648464164, "grad_norm": 1.8895889587868586, "learning_rate": 5.141223073661231e-08, "loss": 0.0031, "step": 19119 }, { "epoch": 4.350398179749716, "grad_norm": 1.2807431582181612, "learning_rate": 5.137674797647998e-08, "loss": 0.015, "step": 19120 }, { "epoch": 4.350625711035267, "grad_norm": 1.5079460530323119, "learning_rate": 5.134127694015653e-08, "loss": 0.0275, "step": 19121 }, { "epoch": 4.350853242320819, "grad_norm": 1.2785262202050423, "learning_rate": 5.1305817628366714e-08, "loss": 0.0368, "step": 19122 }, { "epoch": 4.351080773606371, "grad_norm": 0.9638621308006269, "learning_rate": 5.127037004183537e-08, "loss": 0.0037, "step": 19123 }, { "epoch": 4.351308304891923, "grad_norm": 0.8012685239151753, "learning_rate": 5.123493418128685e-08, "loss": 0.0137, "step": 19124 }, { "epoch": 4.351535836177474, "grad_norm": 1.0045903615617024, "learning_rate": 5.11995100474455e-08, "loss": 0.0068, "step": 19125 }, { "epoch": 4.351763367463026, "grad_norm": 1.6191408685114126, "learning_rate": 5.1164097641035264e-08, "loss": 0.0847, "step": 19126 }, { "epoch": 4.351990898748578, "grad_norm": 1.5736136059870998, "learning_rate": 5.112869696277993e-08, "loss": 0.0136, "step": 19127 }, { "epoch": 4.35221843003413, "grad_norm": 1.8066566728677678, "learning_rate": 5.1093308013403163e-08, "loss": 0.0429, "step": 19128 }, { "epoch": 4.352445961319681, "grad_norm": 1.107400406664724, "learning_rate": 5.105793079362799e-08, "loss": 0.0372, "step": 19129 }, { "epoch": 4.352673492605233, "grad_norm": 1.3017724625542628, "learning_rate": 5.1022565304177654e-08, "loss": 0.0377, "step": 19130 }, { "epoch": 4.352901023890785, "grad_norm": 0.738651585384989, "learning_rate": 5.0987211545774976e-08, "loss": 0.0103, "step": 19131 }, { "epoch": 4.353128555176337, "grad_norm": 0.6284564872138323, "learning_rate": 5.09518695191423e-08, "loss": 0.0058, "step": 19132 }, { "epoch": 4.353356086461888, "grad_norm": 1.3723530704043339, "learning_rate": 5.0916539225002034e-08, "loss": 0.0388, "step": 19133 }, { "epoch": 4.35358361774744, "grad_norm": 0.8367218815058838, "learning_rate": 5.088122066407637e-08, "loss": 0.0048, "step": 19134 }, { "epoch": 4.3538111490329925, "grad_norm": 1.5396662377749295, "learning_rate": 5.0845913837087245e-08, "loss": 0.0556, "step": 19135 }, { "epoch": 4.354038680318544, "grad_norm": 1.032719978203001, "learning_rate": 5.0810618744755944e-08, "loss": 0.0198, "step": 19136 }, { "epoch": 4.354266211604095, "grad_norm": 1.723441250774732, "learning_rate": 5.077533538780419e-08, "loss": 0.0502, "step": 19137 }, { "epoch": 4.354493742889647, "grad_norm": 1.6172221243523799, "learning_rate": 5.0740063766952924e-08, "loss": 0.0839, "step": 19138 }, { "epoch": 4.3547212741751995, "grad_norm": 0.8222014643363779, "learning_rate": 5.07048038829229e-08, "loss": 0.0415, "step": 19139 }, { "epoch": 4.354948805460751, "grad_norm": 1.7467598695296906, "learning_rate": 5.066955573643499e-08, "loss": 0.0464, "step": 19140 }, { "epoch": 4.355176336746303, "grad_norm": 1.1357532681428881, "learning_rate": 5.063431932820946e-08, "loss": 0.0091, "step": 19141 }, { "epoch": 4.355403868031854, "grad_norm": 0.7197392072163572, "learning_rate": 5.059909465896663e-08, "loss": 0.0139, "step": 19142 }, { "epoch": 4.3556313993174065, "grad_norm": 1.32032259154441, "learning_rate": 5.056388172942628e-08, "loss": 0.0758, "step": 19143 }, { "epoch": 4.355858930602958, "grad_norm": 1.2076808187709924, "learning_rate": 5.052868054030824e-08, "loss": 0.0142, "step": 19144 }, { "epoch": 4.35608646188851, "grad_norm": 1.3526125838311043, "learning_rate": 5.049349109233194e-08, "loss": 0.0408, "step": 19145 }, { "epoch": 4.356313993174061, "grad_norm": 1.077233795026279, "learning_rate": 5.045831338621632e-08, "loss": 0.0079, "step": 19146 }, { "epoch": 4.3565415244596135, "grad_norm": 2.0730244866281202, "learning_rate": 5.042314742268059e-08, "loss": 0.058, "step": 19147 }, { "epoch": 4.356769055745165, "grad_norm": 0.763886636155624, "learning_rate": 5.038799320244349e-08, "loss": 0.036, "step": 19148 }, { "epoch": 4.356996587030717, "grad_norm": 1.0650895861834428, "learning_rate": 5.0352850726223536e-08, "loss": 0.012, "step": 19149 }, { "epoch": 4.357224118316268, "grad_norm": 1.310489278342875, "learning_rate": 5.031771999473883e-08, "loss": 0.0337, "step": 19150 }, { "epoch": 4.3574516496018205, "grad_norm": 0.847281687985937, "learning_rate": 5.028260100870742e-08, "loss": 0.004, "step": 19151 }, { "epoch": 4.357679180887372, "grad_norm": 1.177385502245942, "learning_rate": 5.02474937688472e-08, "loss": 0.0198, "step": 19152 }, { "epoch": 4.357906712172924, "grad_norm": 0.7003784961526621, "learning_rate": 5.021239827587544e-08, "loss": 0.0016, "step": 19153 }, { "epoch": 4.358134243458475, "grad_norm": 1.8749926399827, "learning_rate": 5.0177314530509626e-08, "loss": 0.0254, "step": 19154 }, { "epoch": 4.3583617747440275, "grad_norm": 1.1230954888594535, "learning_rate": 5.014224253346675e-08, "loss": 0.0388, "step": 19155 }, { "epoch": 4.358589306029579, "grad_norm": 1.3602627624230796, "learning_rate": 5.010718228546374e-08, "loss": 0.0769, "step": 19156 }, { "epoch": 4.358816837315131, "grad_norm": 1.3070923551138862, "learning_rate": 5.0072133787217035e-08, "loss": 0.0615, "step": 19157 }, { "epoch": 4.359044368600682, "grad_norm": 1.2661087277892389, "learning_rate": 5.0037097039442944e-08, "loss": 0.0015, "step": 19158 }, { "epoch": 4.3592718998862345, "grad_norm": 1.0294081350121411, "learning_rate": 5.000207204285762e-08, "loss": 0.0164, "step": 19159 }, { "epoch": 4.359499431171786, "grad_norm": 1.8676195835409275, "learning_rate": 4.996705879817675e-08, "loss": 0.1255, "step": 19160 }, { "epoch": 4.359726962457338, "grad_norm": 1.2890300020663725, "learning_rate": 4.9932057306116207e-08, "loss": 0.011, "step": 19161 }, { "epoch": 4.359954493742889, "grad_norm": 0.9203961824926488, "learning_rate": 4.98970675673912e-08, "loss": 0.0073, "step": 19162 }, { "epoch": 4.3601820250284415, "grad_norm": 1.4254248809657046, "learning_rate": 4.9862089582716904e-08, "loss": 0.029, "step": 19163 }, { "epoch": 4.360409556313993, "grad_norm": 0.9297398404306264, "learning_rate": 4.9827123352808244e-08, "loss": 0.0112, "step": 19164 }, { "epoch": 4.360637087599545, "grad_norm": 2.7995828164117826, "learning_rate": 4.979216887837972e-08, "loss": 0.0101, "step": 19165 }, { "epoch": 4.360864618885096, "grad_norm": 0.8250450408479644, "learning_rate": 4.975722616014575e-08, "loss": 0.0402, "step": 19166 }, { "epoch": 4.3610921501706486, "grad_norm": 1.6505901492657424, "learning_rate": 4.972229519882063e-08, "loss": 0.0495, "step": 19167 }, { "epoch": 4.3613196814562, "grad_norm": 0.7536101132922517, "learning_rate": 4.9687375995118305e-08, "loss": 0.0029, "step": 19168 }, { "epoch": 4.361547212741752, "grad_norm": 0.9965309048615548, "learning_rate": 4.9652468549752365e-08, "loss": 0.0108, "step": 19169 }, { "epoch": 4.361774744027303, "grad_norm": 1.6213332537650174, "learning_rate": 4.961757286343613e-08, "loss": 0.0631, "step": 19170 }, { "epoch": 4.362002275312856, "grad_norm": 1.3090197131421397, "learning_rate": 4.958268893688313e-08, "loss": 0.0201, "step": 19171 }, { "epoch": 4.362229806598407, "grad_norm": 4.41779558636226, "learning_rate": 4.9547816770805986e-08, "loss": 0.1039, "step": 19172 }, { "epoch": 4.362457337883959, "grad_norm": 1.1535588903863674, "learning_rate": 4.9512956365917605e-08, "loss": 0.0627, "step": 19173 }, { "epoch": 4.362684869169511, "grad_norm": 1.308219249439807, "learning_rate": 4.947810772293039e-08, "loss": 0.0055, "step": 19174 }, { "epoch": 4.362912400455063, "grad_norm": 1.4027743929272363, "learning_rate": 4.9443270842556777e-08, "loss": 0.0126, "step": 19175 }, { "epoch": 4.363139931740614, "grad_norm": 3.3065260763276982, "learning_rate": 4.940844572550861e-08, "loss": 0.0156, "step": 19176 }, { "epoch": 4.363367463026166, "grad_norm": 0.9077946117526235, "learning_rate": 4.937363237249762e-08, "loss": 0.0161, "step": 19177 }, { "epoch": 4.363594994311718, "grad_norm": 0.5325517526657781, "learning_rate": 4.933883078423539e-08, "loss": 0.0069, "step": 19178 }, { "epoch": 4.36382252559727, "grad_norm": 0.529564010346098, "learning_rate": 4.930404096143315e-08, "loss": 0.0025, "step": 19179 }, { "epoch": 4.364050056882822, "grad_norm": 1.6859127517082784, "learning_rate": 4.926926290480194e-08, "loss": 0.1837, "step": 19180 }, { "epoch": 4.364277588168373, "grad_norm": 1.622394829207675, "learning_rate": 4.923449661505257e-08, "loss": 0.0889, "step": 19181 }, { "epoch": 4.364505119453925, "grad_norm": 1.1035493400888874, "learning_rate": 4.91997420928958e-08, "loss": 0.0701, "step": 19182 }, { "epoch": 4.364732650739477, "grad_norm": 0.8922137699962964, "learning_rate": 4.9164999339041746e-08, "loss": 0.017, "step": 19183 }, { "epoch": 4.364960182025029, "grad_norm": 1.664118518082427, "learning_rate": 4.91302683542004e-08, "loss": 0.0848, "step": 19184 }, { "epoch": 4.36518771331058, "grad_norm": 1.1641642898756113, "learning_rate": 4.909554913908182e-08, "loss": 0.0119, "step": 19185 }, { "epoch": 4.365415244596132, "grad_norm": 1.0415420608924242, "learning_rate": 4.906084169439537e-08, "loss": 0.0232, "step": 19186 }, { "epoch": 4.365642775881684, "grad_norm": 2.1144706104731545, "learning_rate": 4.9026146020850555e-08, "loss": 0.0118, "step": 19187 }, { "epoch": 4.365870307167236, "grad_norm": 1.8096416839849123, "learning_rate": 4.899146211915659e-08, "loss": 0.0495, "step": 19188 }, { "epoch": 4.366097838452787, "grad_norm": 0.7391038215923633, "learning_rate": 4.895678999002208e-08, "loss": 0.0069, "step": 19189 }, { "epoch": 4.366325369738339, "grad_norm": 1.9517967733299455, "learning_rate": 4.8922129634155976e-08, "loss": 0.0401, "step": 19190 }, { "epoch": 4.366552901023891, "grad_norm": 1.2361532696102555, "learning_rate": 4.888748105226632e-08, "loss": 0.0812, "step": 19191 }, { "epoch": 4.366780432309443, "grad_norm": 1.8470695044988348, "learning_rate": 4.885284424506164e-08, "loss": 0.0952, "step": 19192 }, { "epoch": 4.367007963594994, "grad_norm": 1.4647378921083798, "learning_rate": 4.8818219213249486e-08, "loss": 0.0589, "step": 19193 }, { "epoch": 4.367235494880546, "grad_norm": 1.4277912122625964, "learning_rate": 4.878360595753771e-08, "loss": 0.0232, "step": 19194 }, { "epoch": 4.367463026166098, "grad_norm": 1.5150144470717073, "learning_rate": 4.874900447863387e-08, "loss": 0.1047, "step": 19195 }, { "epoch": 4.36769055745165, "grad_norm": 1.5481054611204743, "learning_rate": 4.8714414777244965e-08, "loss": 0.0403, "step": 19196 }, { "epoch": 4.367918088737201, "grad_norm": 0.4720782328434735, "learning_rate": 4.8679836854077995e-08, "loss": 0.003, "step": 19197 }, { "epoch": 4.368145620022753, "grad_norm": 0.4805892345435755, "learning_rate": 4.864527070983969e-08, "loss": 0.0045, "step": 19198 }, { "epoch": 4.368373151308305, "grad_norm": 1.1900012429301923, "learning_rate": 4.861071634523658e-08, "loss": 0.0852, "step": 19199 }, { "epoch": 4.368600682593857, "grad_norm": 0.9595467453052465, "learning_rate": 4.8576173760974674e-08, "loss": 0.0457, "step": 19200 }, { "epoch": 4.368828213879408, "grad_norm": 1.5877380813508963, "learning_rate": 4.8541642957760096e-08, "loss": 0.0163, "step": 19201 }, { "epoch": 4.36905574516496, "grad_norm": 0.3755554967918347, "learning_rate": 4.850712393629879e-08, "loss": 0.0017, "step": 19202 }, { "epoch": 4.369283276450512, "grad_norm": 1.309701701917394, "learning_rate": 4.84726166972959e-08, "loss": 0.0891, "step": 19203 }, { "epoch": 4.369510807736064, "grad_norm": 0.49247492775942353, "learning_rate": 4.843812124145697e-08, "loss": 0.0018, "step": 19204 }, { "epoch": 4.369738339021615, "grad_norm": 0.8393998688670455, "learning_rate": 4.840363756948685e-08, "loss": 0.0088, "step": 19205 }, { "epoch": 4.369965870307167, "grad_norm": 0.7361449006373659, "learning_rate": 4.8369165682090464e-08, "loss": 0.0285, "step": 19206 }, { "epoch": 4.370193401592719, "grad_norm": 0.7716377052157045, "learning_rate": 4.833470557997218e-08, "loss": 0.0163, "step": 19207 }, { "epoch": 4.370420932878271, "grad_norm": 1.7804637391862177, "learning_rate": 4.830025726383643e-08, "loss": 0.0588, "step": 19208 }, { "epoch": 4.370648464163822, "grad_norm": 1.632333852075996, "learning_rate": 4.826582073438738e-08, "loss": 0.0125, "step": 19209 }, { "epoch": 4.370875995449374, "grad_norm": 0.9950351647058585, "learning_rate": 4.823139599232855e-08, "loss": 0.0434, "step": 19210 }, { "epoch": 4.371103526734926, "grad_norm": 1.177744419693403, "learning_rate": 4.819698303836377e-08, "loss": 0.0171, "step": 19211 }, { "epoch": 4.371331058020478, "grad_norm": 2.6586280888612537, "learning_rate": 4.816258187319622e-08, "loss": 0.0054, "step": 19212 }, { "epoch": 4.37155858930603, "grad_norm": 1.342600950648665, "learning_rate": 4.812819249752923e-08, "loss": 0.0145, "step": 19213 }, { "epoch": 4.371786120591581, "grad_norm": 1.33901188605688, "learning_rate": 4.8093814912065285e-08, "loss": 0.0147, "step": 19214 }, { "epoch": 4.372013651877133, "grad_norm": 1.4742967486320144, "learning_rate": 4.80594491175073e-08, "loss": 0.0725, "step": 19215 }, { "epoch": 4.372241183162685, "grad_norm": 1.0478496073324246, "learning_rate": 4.8025095114557635e-08, "loss": 0.0131, "step": 19216 }, { "epoch": 4.372468714448237, "grad_norm": 1.3014437305185473, "learning_rate": 4.799075290391822e-08, "loss": 0.0138, "step": 19217 }, { "epoch": 4.372696245733788, "grad_norm": 1.0989222510416965, "learning_rate": 4.7956422486291136e-08, "loss": 0.0107, "step": 19218 }, { "epoch": 4.3729237770193405, "grad_norm": 0.8712936958606646, "learning_rate": 4.792210386237804e-08, "loss": 0.0097, "step": 19219 }, { "epoch": 4.373151308304892, "grad_norm": 2.373141065357928, "learning_rate": 4.788779703288019e-08, "loss": 0.0346, "step": 19220 }, { "epoch": 4.373378839590444, "grad_norm": 0.5378477993426037, "learning_rate": 4.785350199849874e-08, "loss": 0.0387, "step": 19221 }, { "epoch": 4.373606370875995, "grad_norm": 0.9174550544353205, "learning_rate": 4.781921875993481e-08, "loss": 0.0184, "step": 19222 }, { "epoch": 4.3738339021615475, "grad_norm": 0.6782273824548453, "learning_rate": 4.778494731788902e-08, "loss": 0.0128, "step": 19223 }, { "epoch": 4.374061433447099, "grad_norm": 1.1782669425640995, "learning_rate": 4.7750687673061775e-08, "loss": 0.0538, "step": 19224 }, { "epoch": 4.374288964732651, "grad_norm": 1.6183569272041838, "learning_rate": 4.771643982615329e-08, "loss": 0.0273, "step": 19225 }, { "epoch": 4.374516496018202, "grad_norm": 0.5010602200339557, "learning_rate": 4.768220377786348e-08, "loss": 0.0017, "step": 19226 }, { "epoch": 4.374744027303755, "grad_norm": 0.8841465479200293, "learning_rate": 4.764797952889214e-08, "loss": 0.0054, "step": 19227 }, { "epoch": 4.374971558589306, "grad_norm": 1.2410527003770646, "learning_rate": 4.761376707993884e-08, "loss": 0.0107, "step": 19228 }, { "epoch": 4.375199089874858, "grad_norm": 0.7322154367456911, "learning_rate": 4.7579566431702545e-08, "loss": 0.0384, "step": 19229 }, { "epoch": 4.375426621160409, "grad_norm": 0.9780853646272667, "learning_rate": 4.754537758488255e-08, "loss": 0.0145, "step": 19230 }, { "epoch": 4.375654152445962, "grad_norm": 1.2958927008534606, "learning_rate": 4.7511200540177394e-08, "loss": 0.0589, "step": 19231 }, { "epoch": 4.375881683731513, "grad_norm": 0.41019726393558886, "learning_rate": 4.747703529828576e-08, "loss": 0.002, "step": 19232 }, { "epoch": 4.376109215017065, "grad_norm": 0.7714147615322444, "learning_rate": 4.74428818599057e-08, "loss": 0.0041, "step": 19233 }, { "epoch": 4.376336746302616, "grad_norm": 1.8312449068596726, "learning_rate": 4.7408740225735466e-08, "loss": 0.084, "step": 19234 }, { "epoch": 4.376564277588169, "grad_norm": 1.259952207176146, "learning_rate": 4.737461039647284e-08, "loss": 0.0604, "step": 19235 }, { "epoch": 4.37679180887372, "grad_norm": 3.309169857682549, "learning_rate": 4.734049237281518e-08, "loss": 0.01, "step": 19236 }, { "epoch": 4.377019340159272, "grad_norm": 0.762501680255245, "learning_rate": 4.730638615546006e-08, "loss": 0.0282, "step": 19237 }, { "epoch": 4.377246871444823, "grad_norm": 11.803749547683276, "learning_rate": 4.727229174510441e-08, "loss": 0.0685, "step": 19238 }, { "epoch": 4.377474402730376, "grad_norm": 1.2637231048711206, "learning_rate": 4.7238209142444895e-08, "loss": 0.0195, "step": 19239 }, { "epoch": 4.377701934015927, "grad_norm": 0.9209609622243069, "learning_rate": 4.720413834817833e-08, "loss": 0.0122, "step": 19240 }, { "epoch": 4.377929465301479, "grad_norm": 0.915247676978089, "learning_rate": 4.717007936300096e-08, "loss": 0.0155, "step": 19241 }, { "epoch": 4.37815699658703, "grad_norm": 1.1349728310134508, "learning_rate": 4.713603218760909e-08, "loss": 0.0212, "step": 19242 }, { "epoch": 4.378384527872583, "grad_norm": 0.8142466423251253, "learning_rate": 4.7101996822698226e-08, "loss": 0.0041, "step": 19243 }, { "epoch": 4.378612059158134, "grad_norm": 0.9852335868033034, "learning_rate": 4.7067973268964324e-08, "loss": 0.0161, "step": 19244 }, { "epoch": 4.378839590443686, "grad_norm": 1.4406095135904964, "learning_rate": 4.70339615271026e-08, "loss": 0.0196, "step": 19245 }, { "epoch": 4.379067121729237, "grad_norm": 1.1953819304335556, "learning_rate": 4.699996159780812e-08, "loss": 0.077, "step": 19246 }, { "epoch": 4.37929465301479, "grad_norm": 1.1720147253969293, "learning_rate": 4.696597348177588e-08, "loss": 0.0268, "step": 19247 }, { "epoch": 4.379522184300341, "grad_norm": 0.8130156272076907, "learning_rate": 4.693199717970047e-08, "loss": 0.0163, "step": 19248 }, { "epoch": 4.379749715585893, "grad_norm": 1.297055848536044, "learning_rate": 4.689803269227654e-08, "loss": 0.0111, "step": 19249 }, { "epoch": 4.379977246871444, "grad_norm": 0.9336245476917692, "learning_rate": 4.6864080020197905e-08, "loss": 0.0065, "step": 19250 }, { "epoch": 4.380204778156997, "grad_norm": 1.034652255195954, "learning_rate": 4.683013916415881e-08, "loss": 0.0626, "step": 19251 }, { "epoch": 4.380432309442549, "grad_norm": 2.045293152503336, "learning_rate": 4.679621012485279e-08, "loss": 0.162, "step": 19252 }, { "epoch": 4.3806598407281, "grad_norm": 0.9677413466882349, "learning_rate": 4.6762292902973186e-08, "loss": 0.0054, "step": 19253 }, { "epoch": 4.380887372013651, "grad_norm": 0.7262579915563171, "learning_rate": 4.672838749921332e-08, "loss": 0.0072, "step": 19254 }, { "epoch": 4.381114903299204, "grad_norm": 1.1514587281612094, "learning_rate": 4.66944939142662e-08, "loss": 0.0629, "step": 19255 }, { "epoch": 4.381342434584756, "grad_norm": 1.1449056916685274, "learning_rate": 4.666061214882459e-08, "loss": 0.0326, "step": 19256 }, { "epoch": 4.381569965870307, "grad_norm": 1.0369957061141009, "learning_rate": 4.662674220358085e-08, "loss": 0.0428, "step": 19257 }, { "epoch": 4.381797497155859, "grad_norm": 0.9667269256424514, "learning_rate": 4.6592884079227215e-08, "loss": 0.004, "step": 19258 }, { "epoch": 4.382025028441411, "grad_norm": 0.9693678348953056, "learning_rate": 4.6559037776455835e-08, "loss": 0.0069, "step": 19259 }, { "epoch": 4.382252559726963, "grad_norm": 0.8851398260003424, "learning_rate": 4.6525203295958166e-08, "loss": 0.0048, "step": 19260 }, { "epoch": 4.382480091012514, "grad_norm": 1.2993303052848604, "learning_rate": 4.649138063842602e-08, "loss": 0.0935, "step": 19261 }, { "epoch": 4.382707622298066, "grad_norm": 1.5570552344946678, "learning_rate": 4.6457569804550516e-08, "loss": 0.103, "step": 19262 }, { "epoch": 4.382935153583618, "grad_norm": 1.817614543135993, "learning_rate": 4.6423770795022833e-08, "loss": 0.0433, "step": 19263 }, { "epoch": 4.38316268486917, "grad_norm": 1.0389163419258607, "learning_rate": 4.6389983610533664e-08, "loss": 0.0615, "step": 19264 }, { "epoch": 4.383390216154721, "grad_norm": 1.5950096209444105, "learning_rate": 4.635620825177344e-08, "loss": 0.0688, "step": 19265 }, { "epoch": 4.383617747440273, "grad_norm": 0.3476164993798144, "learning_rate": 4.63224447194327e-08, "loss": 0.0015, "step": 19266 }, { "epoch": 4.383845278725825, "grad_norm": 1.365729646315585, "learning_rate": 4.628869301420133e-08, "loss": 0.0713, "step": 19267 }, { "epoch": 4.384072810011377, "grad_norm": 1.518685537900212, "learning_rate": 4.625495313676919e-08, "loss": 0.0326, "step": 19268 }, { "epoch": 4.384300341296928, "grad_norm": 0.9673739337562152, "learning_rate": 4.622122508782585e-08, "loss": 0.041, "step": 19269 }, { "epoch": 4.38452787258248, "grad_norm": 1.5774373732206084, "learning_rate": 4.618750886806085e-08, "loss": 0.0762, "step": 19270 }, { "epoch": 4.384755403868032, "grad_norm": 0.7118830511969062, "learning_rate": 4.615380447816308e-08, "loss": 0.0076, "step": 19271 }, { "epoch": 4.384982935153584, "grad_norm": 1.0416291216552018, "learning_rate": 4.612011191882135e-08, "loss": 0.0141, "step": 19272 }, { "epoch": 4.385210466439135, "grad_norm": 1.903901935130696, "learning_rate": 4.6086431190724425e-08, "loss": 0.0276, "step": 19273 }, { "epoch": 4.385437997724687, "grad_norm": 0.9531842671091117, "learning_rate": 4.605276229456057e-08, "loss": 0.0184, "step": 19274 }, { "epoch": 4.385665529010239, "grad_norm": 1.082850935374998, "learning_rate": 4.601910523101792e-08, "loss": 0.0136, "step": 19275 }, { "epoch": 4.385893060295791, "grad_norm": 2.1377156274252016, "learning_rate": 4.598546000078454e-08, "loss": 0.0138, "step": 19276 }, { "epoch": 4.386120591581342, "grad_norm": 1.6356218725156368, "learning_rate": 4.595182660454785e-08, "loss": 0.0213, "step": 19277 }, { "epoch": 4.386348122866894, "grad_norm": 0.881624945332149, "learning_rate": 4.5918205042995436e-08, "loss": 0.0059, "step": 19278 }, { "epoch": 4.386575654152446, "grad_norm": 1.0108335602434213, "learning_rate": 4.588459531681425e-08, "loss": 0.0275, "step": 19279 }, { "epoch": 4.386803185437998, "grad_norm": 0.40863137089603546, "learning_rate": 4.585099742669151e-08, "loss": 0.0015, "step": 19280 }, { "epoch": 4.387030716723549, "grad_norm": 1.1565779729970447, "learning_rate": 4.5817411373313414e-08, "loss": 0.0807, "step": 19281 }, { "epoch": 4.387258248009101, "grad_norm": 1.3881005396430162, "learning_rate": 4.578383715736698e-08, "loss": 0.0187, "step": 19282 }, { "epoch": 4.387485779294653, "grad_norm": 1.0826234595774524, "learning_rate": 4.575027477953811e-08, "loss": 0.0361, "step": 19283 }, { "epoch": 4.387713310580205, "grad_norm": 1.772949758694346, "learning_rate": 4.571672424051265e-08, "loss": 0.047, "step": 19284 }, { "epoch": 4.387940841865756, "grad_norm": 1.8051820579902524, "learning_rate": 4.5683185540976586e-08, "loss": 0.034, "step": 19285 }, { "epoch": 4.388168373151308, "grad_norm": 1.4946619438220263, "learning_rate": 4.564965868161513e-08, "loss": 0.0555, "step": 19286 }, { "epoch": 4.38839590443686, "grad_norm": 0.5651992243438628, "learning_rate": 4.561614366311362e-08, "loss": 0.0041, "step": 19287 }, { "epoch": 4.388623435722412, "grad_norm": 1.6132091260711432, "learning_rate": 4.558264048615702e-08, "loss": 0.0834, "step": 19288 }, { "epoch": 4.388850967007963, "grad_norm": 1.9515563492727195, "learning_rate": 4.554914915143017e-08, "loss": 0.0932, "step": 19289 }, { "epoch": 4.389078498293515, "grad_norm": 1.6135265565523291, "learning_rate": 4.5515669659617556e-08, "loss": 0.0278, "step": 19290 }, { "epoch": 4.389306029579068, "grad_norm": 1.369354943672775, "learning_rate": 4.5482202011403255e-08, "loss": 0.0515, "step": 19291 }, { "epoch": 4.389533560864619, "grad_norm": 1.218324375118756, "learning_rate": 4.5448746207471526e-08, "loss": 0.0483, "step": 19292 }, { "epoch": 4.38976109215017, "grad_norm": 1.9916687260705133, "learning_rate": 4.541530224850592e-08, "loss": 0.0904, "step": 19293 }, { "epoch": 4.389988623435722, "grad_norm": 0.7340165028293879, "learning_rate": 4.5381870135190046e-08, "loss": 0.0046, "step": 19294 }, { "epoch": 4.390216154721275, "grad_norm": 1.5287635494169063, "learning_rate": 4.5348449868207326e-08, "loss": 0.0611, "step": 19295 }, { "epoch": 4.390443686006826, "grad_norm": 1.1490756473985932, "learning_rate": 4.5315041448240686e-08, "loss": 0.0408, "step": 19296 }, { "epoch": 4.390671217292378, "grad_norm": 1.3566551178393398, "learning_rate": 4.528164487597297e-08, "loss": 0.0616, "step": 19297 }, { "epoch": 4.390898748577929, "grad_norm": 0.6923881847695811, "learning_rate": 4.524826015208664e-08, "loss": 0.0173, "step": 19298 }, { "epoch": 4.391126279863482, "grad_norm": 1.233217271029797, "learning_rate": 4.521488727726425e-08, "loss": 0.015, "step": 19299 }, { "epoch": 4.391353811149033, "grad_norm": 1.3641491853348344, "learning_rate": 4.518152625218764e-08, "loss": 0.0295, "step": 19300 }, { "epoch": 4.391581342434585, "grad_norm": 0.9548624005835218, "learning_rate": 4.514817707753867e-08, "loss": 0.0064, "step": 19301 }, { "epoch": 4.391808873720136, "grad_norm": 0.676031346890503, "learning_rate": 4.511483975399918e-08, "loss": 0.0069, "step": 19302 }, { "epoch": 4.392036405005689, "grad_norm": 1.2736032356510953, "learning_rate": 4.5081514282250274e-08, "loss": 0.0184, "step": 19303 }, { "epoch": 4.39226393629124, "grad_norm": 2.714590209087073, "learning_rate": 4.5048200662973216e-08, "loss": 0.0647, "step": 19304 }, { "epoch": 4.392491467576792, "grad_norm": 1.3637309557654624, "learning_rate": 4.501489889684865e-08, "loss": 0.0322, "step": 19305 }, { "epoch": 4.392718998862343, "grad_norm": 1.2230078761383003, "learning_rate": 4.4981608984557554e-08, "loss": 0.0611, "step": 19306 }, { "epoch": 4.392946530147896, "grad_norm": 3.0092291796845445, "learning_rate": 4.494833092677994e-08, "loss": 0.0262, "step": 19307 }, { "epoch": 4.393174061433447, "grad_norm": 0.895380747782697, "learning_rate": 4.491506472419617e-08, "loss": 0.0454, "step": 19308 }, { "epoch": 4.393401592718999, "grad_norm": 1.1092431467618824, "learning_rate": 4.488181037748618e-08, "loss": 0.0131, "step": 19309 }, { "epoch": 4.39362912400455, "grad_norm": 0.9952817453205635, "learning_rate": 4.48485678873295e-08, "loss": 0.0141, "step": 19310 }, { "epoch": 4.393856655290103, "grad_norm": 0.3051036521808148, "learning_rate": 4.481533725440565e-08, "loss": 0.0006, "step": 19311 }, { "epoch": 4.394084186575654, "grad_norm": 0.5547379225314785, "learning_rate": 4.478211847939361e-08, "loss": 0.0026, "step": 19312 }, { "epoch": 4.394311717861206, "grad_norm": 0.8157454663662329, "learning_rate": 4.474891156297262e-08, "loss": 0.0128, "step": 19313 }, { "epoch": 4.394539249146757, "grad_norm": 0.9169981677729564, "learning_rate": 4.471571650582103e-08, "loss": 0.0131, "step": 19314 }, { "epoch": 4.39476678043231, "grad_norm": 1.0400899554991492, "learning_rate": 4.468253330861754e-08, "loss": 0.0134, "step": 19315 }, { "epoch": 4.394994311717861, "grad_norm": 1.1566271422064633, "learning_rate": 4.464936197204034e-08, "loss": 0.013, "step": 19316 }, { "epoch": 4.395221843003413, "grad_norm": 0.9434019421923784, "learning_rate": 4.461620249676717e-08, "loss": 0.0139, "step": 19317 }, { "epoch": 4.395449374288964, "grad_norm": 1.371341710100479, "learning_rate": 4.458305488347602e-08, "loss": 0.0537, "step": 19318 }, { "epoch": 4.395676905574517, "grad_norm": 1.4812643584193177, "learning_rate": 4.454991913284419e-08, "loss": 0.1265, "step": 19319 }, { "epoch": 4.395904436860068, "grad_norm": 0.8403769682028263, "learning_rate": 4.4516795245549e-08, "loss": 0.0393, "step": 19320 }, { "epoch": 4.39613196814562, "grad_norm": 0.3690484902902373, "learning_rate": 4.4483683222267326e-08, "loss": 0.002, "step": 19321 }, { "epoch": 4.396359499431171, "grad_norm": 0.6578956745155299, "learning_rate": 4.445058306367607e-08, "loss": 0.0036, "step": 19322 }, { "epoch": 4.396587030716724, "grad_norm": 1.213389175815404, "learning_rate": 4.441749477045169e-08, "loss": 0.027, "step": 19323 }, { "epoch": 4.396814562002275, "grad_norm": 2.3851547827844897, "learning_rate": 4.4384418343270395e-08, "loss": 0.0244, "step": 19324 }, { "epoch": 4.397042093287827, "grad_norm": 1.4632670515724702, "learning_rate": 4.43513537828083e-08, "loss": 0.0478, "step": 19325 }, { "epoch": 4.397269624573378, "grad_norm": 2.2134347960156804, "learning_rate": 4.431830108974112e-08, "loss": 0.0683, "step": 19326 }, { "epoch": 4.397497155858931, "grad_norm": 1.1105901997722167, "learning_rate": 4.428526026474429e-08, "loss": 0.0318, "step": 19327 }, { "epoch": 4.397724687144482, "grad_norm": 1.6937636976685109, "learning_rate": 4.425223130849324e-08, "loss": 0.0075, "step": 19328 }, { "epoch": 4.397952218430034, "grad_norm": 1.2080424389476891, "learning_rate": 4.421921422166298e-08, "loss": 0.0053, "step": 19329 }, { "epoch": 4.398179749715586, "grad_norm": 1.2592104225309342, "learning_rate": 4.4186209004928454e-08, "loss": 0.0237, "step": 19330 }, { "epoch": 4.398407281001138, "grad_norm": 0.928853634130011, "learning_rate": 4.4153215658963996e-08, "loss": 0.0178, "step": 19331 }, { "epoch": 4.398634812286689, "grad_norm": 0.7240158723834068, "learning_rate": 4.4120234184444127e-08, "loss": 0.0202, "step": 19332 }, { "epoch": 4.398862343572241, "grad_norm": 1.8527407800180162, "learning_rate": 4.408726458204282e-08, "loss": 0.0241, "step": 19333 }, { "epoch": 4.399089874857793, "grad_norm": 1.412971769507491, "learning_rate": 4.405430685243385e-08, "loss": 0.0691, "step": 19334 }, { "epoch": 4.399317406143345, "grad_norm": 2.8143382671233885, "learning_rate": 4.402136099629091e-08, "loss": 0.0855, "step": 19335 }, { "epoch": 4.399544937428897, "grad_norm": 1.5107545900374404, "learning_rate": 4.398842701428735e-08, "loss": 0.0086, "step": 19336 }, { "epoch": 4.399772468714448, "grad_norm": 0.7433386899379727, "learning_rate": 4.395550490709638e-08, "loss": 0.0177, "step": 19337 }, { "epoch": 4.4, "grad_norm": 0.7010277492634241, "learning_rate": 4.3922594675390586e-08, "loss": 0.0051, "step": 19338 }, { "epoch": 4.400227531285552, "grad_norm": 0.9928376322471215, "learning_rate": 4.388969631984291e-08, "loss": 0.009, "step": 19339 }, { "epoch": 4.400455062571104, "grad_norm": 0.8380166942525564, "learning_rate": 4.385680984112558e-08, "loss": 0.0023, "step": 19340 }, { "epoch": 4.400682593856655, "grad_norm": 0.4289956984696148, "learning_rate": 4.3823935239910646e-08, "loss": 0.0017, "step": 19341 }, { "epoch": 4.400910125142207, "grad_norm": 1.6286989536950074, "learning_rate": 4.379107251687012e-08, "loss": 0.0295, "step": 19342 }, { "epoch": 4.401137656427759, "grad_norm": 0.888670629913471, "learning_rate": 4.375822167267563e-08, "loss": 0.0035, "step": 19343 }, { "epoch": 4.401365187713311, "grad_norm": 1.1259235228795954, "learning_rate": 4.3725382707998715e-08, "loss": 0.0054, "step": 19344 }, { "epoch": 4.401592718998862, "grad_norm": 1.3877577773885459, "learning_rate": 4.369255562351037e-08, "loss": 0.0344, "step": 19345 }, { "epoch": 4.401820250284414, "grad_norm": 0.9407501037274485, "learning_rate": 4.365974041988151e-08, "loss": 0.0182, "step": 19346 }, { "epoch": 4.402047781569966, "grad_norm": 1.8016428918760847, "learning_rate": 4.362693709778286e-08, "loss": 0.0221, "step": 19347 }, { "epoch": 4.402275312855518, "grad_norm": 2.2041346823057433, "learning_rate": 4.3594145657884915e-08, "loss": 0.0197, "step": 19348 }, { "epoch": 4.402502844141069, "grad_norm": 0.9919238036854214, "learning_rate": 4.3561366100857913e-08, "loss": 0.0085, "step": 19349 }, { "epoch": 4.402730375426621, "grad_norm": 2.904169950530335, "learning_rate": 4.352859842737159e-08, "loss": 0.0196, "step": 19350 }, { "epoch": 4.402957906712173, "grad_norm": 1.1176856347419117, "learning_rate": 4.349584263809596e-08, "loss": 0.0705, "step": 19351 }, { "epoch": 4.403185437997725, "grad_norm": 2.3572096330217707, "learning_rate": 4.346309873370036e-08, "loss": 0.0232, "step": 19352 }, { "epoch": 4.403412969283276, "grad_norm": 1.024573713329205, "learning_rate": 4.343036671485383e-08, "loss": 0.0109, "step": 19353 }, { "epoch": 4.403640500568828, "grad_norm": 0.9120808967554924, "learning_rate": 4.339764658222549e-08, "loss": 0.0164, "step": 19354 }, { "epoch": 4.40386803185438, "grad_norm": 0.6808468226496678, "learning_rate": 4.336493833648418e-08, "loss": 0.0068, "step": 19355 }, { "epoch": 4.404095563139932, "grad_norm": 0.4872417440640561, "learning_rate": 4.3332241978298326e-08, "loss": 0.0022, "step": 19356 }, { "epoch": 4.404323094425483, "grad_norm": 2.074215159996007, "learning_rate": 4.329955750833614e-08, "loss": 0.0159, "step": 19357 }, { "epoch": 4.404550625711035, "grad_norm": 0.5083485032715277, "learning_rate": 4.3266884927265704e-08, "loss": 0.0058, "step": 19358 }, { "epoch": 4.404778156996587, "grad_norm": 0.545930479125303, "learning_rate": 4.323422423575481e-08, "loss": 0.0018, "step": 19359 }, { "epoch": 4.405005688282139, "grad_norm": 1.4311985129272138, "learning_rate": 4.320157543447077e-08, "loss": 0.0257, "step": 19360 }, { "epoch": 4.40523321956769, "grad_norm": 2.1788389780797486, "learning_rate": 4.316893852408105e-08, "loss": 0.0417, "step": 19361 }, { "epoch": 4.405460750853242, "grad_norm": 1.4820239261368715, "learning_rate": 4.313631350525267e-08, "loss": 0.0412, "step": 19362 }, { "epoch": 4.405688282138794, "grad_norm": 1.760142553382347, "learning_rate": 4.310370037865247e-08, "loss": 0.0264, "step": 19363 }, { "epoch": 4.405915813424346, "grad_norm": 1.414034193316917, "learning_rate": 4.307109914494692e-08, "loss": 0.093, "step": 19364 }, { "epoch": 4.406143344709897, "grad_norm": 1.1990303946641623, "learning_rate": 4.30385098048023e-08, "loss": 0.017, "step": 19365 }, { "epoch": 4.406370875995449, "grad_norm": 1.5413795439127187, "learning_rate": 4.300593235888481e-08, "loss": 0.0911, "step": 19366 }, { "epoch": 4.406598407281001, "grad_norm": 1.2893079182834821, "learning_rate": 4.29733668078601e-08, "loss": 0.0504, "step": 19367 }, { "epoch": 4.406825938566553, "grad_norm": 1.5626110729672436, "learning_rate": 4.294081315239382e-08, "loss": 0.016, "step": 19368 }, { "epoch": 4.407053469852105, "grad_norm": 1.2520158082697654, "learning_rate": 4.2908271393151335e-08, "loss": 0.0977, "step": 19369 }, { "epoch": 4.407281001137656, "grad_norm": 0.8646635191069745, "learning_rate": 4.2875741530797875e-08, "loss": 0.0055, "step": 19370 }, { "epoch": 4.407508532423208, "grad_norm": 0.7922658505857233, "learning_rate": 4.284322356599806e-08, "loss": 0.0505, "step": 19371 }, { "epoch": 4.40773606370876, "grad_norm": 1.3795794303684357, "learning_rate": 4.281071749941655e-08, "loss": 0.0584, "step": 19372 }, { "epoch": 4.407963594994312, "grad_norm": 2.3659271392553016, "learning_rate": 4.2778223331717825e-08, "loss": 0.0492, "step": 19373 }, { "epoch": 4.408191126279863, "grad_norm": 0.8781111068004119, "learning_rate": 4.274574106356587e-08, "loss": 0.0179, "step": 19374 }, { "epoch": 4.408418657565416, "grad_norm": 2.715563291958519, "learning_rate": 4.271327069562452e-08, "loss": 0.075, "step": 19375 }, { "epoch": 4.408646188850967, "grad_norm": 1.8598494773067875, "learning_rate": 4.2680812228557616e-08, "loss": 0.0777, "step": 19376 }, { "epoch": 4.408873720136519, "grad_norm": 1.311792369195493, "learning_rate": 4.264836566302846e-08, "loss": 0.0128, "step": 19377 }, { "epoch": 4.40910125142207, "grad_norm": 1.3172214886130749, "learning_rate": 4.2615930999700267e-08, "loss": 0.0253, "step": 19378 }, { "epoch": 4.409328782707623, "grad_norm": 0.5735488066836576, "learning_rate": 4.2583508239235696e-08, "loss": 0.0026, "step": 19379 }, { "epoch": 4.409556313993174, "grad_norm": 0.9460036590400057, "learning_rate": 4.2551097382297696e-08, "loss": 0.0073, "step": 19380 }, { "epoch": 4.409783845278726, "grad_norm": 1.5968432331771045, "learning_rate": 4.2518698429548435e-08, "loss": 0.031, "step": 19381 }, { "epoch": 4.410011376564277, "grad_norm": 1.2156552025132186, "learning_rate": 4.248631138165024e-08, "loss": 0.005, "step": 19382 }, { "epoch": 4.41023890784983, "grad_norm": 0.5884909692588874, "learning_rate": 4.245393623926508e-08, "loss": 0.0021, "step": 19383 }, { "epoch": 4.410466439135381, "grad_norm": 0.7710805003629164, "learning_rate": 4.242157300305451e-08, "loss": 0.0251, "step": 19384 }, { "epoch": 4.410693970420933, "grad_norm": 1.6473319698337543, "learning_rate": 4.238922167368015e-08, "loss": 0.1063, "step": 19385 }, { "epoch": 4.4109215017064844, "grad_norm": 2.8705035907708996, "learning_rate": 4.2356882251803e-08, "loss": 0.0369, "step": 19386 }, { "epoch": 4.411149032992037, "grad_norm": 2.2246890027605937, "learning_rate": 4.2324554738084195e-08, "loss": 0.0083, "step": 19387 }, { "epoch": 4.411376564277588, "grad_norm": 1.2084297596391096, "learning_rate": 4.229223913318426e-08, "loss": 0.0555, "step": 19388 }, { "epoch": 4.41160409556314, "grad_norm": 0.7988443185444264, "learning_rate": 4.2259935437763824e-08, "loss": 0.0321, "step": 19389 }, { "epoch": 4.4118316268486915, "grad_norm": 1.730183955210563, "learning_rate": 4.222764365248315e-08, "loss": 0.0308, "step": 19390 }, { "epoch": 4.412059158134244, "grad_norm": 1.3368691975328617, "learning_rate": 4.219536377800204e-08, "loss": 0.062, "step": 19391 }, { "epoch": 4.412286689419795, "grad_norm": 1.8545209491499044, "learning_rate": 4.216309581498046e-08, "loss": 0.0364, "step": 19392 }, { "epoch": 4.412514220705347, "grad_norm": 1.5558136454905638, "learning_rate": 4.213083976407767e-08, "loss": 0.0616, "step": 19393 }, { "epoch": 4.4127417519908985, "grad_norm": 2.2984232875839163, "learning_rate": 4.2098595625953145e-08, "loss": 0.0261, "step": 19394 }, { "epoch": 4.412969283276451, "grad_norm": 2.245006505383307, "learning_rate": 4.206636340126566e-08, "loss": 0.0641, "step": 19395 }, { "epoch": 4.413196814562002, "grad_norm": 1.4384916510866121, "learning_rate": 4.2034143090674136e-08, "loss": 0.0151, "step": 19396 }, { "epoch": 4.413424345847554, "grad_norm": 1.5937189656156647, "learning_rate": 4.200193469483714e-08, "loss": 0.014, "step": 19397 }, { "epoch": 4.4136518771331055, "grad_norm": 1.3666737068971915, "learning_rate": 4.196973821441283e-08, "loss": 0.0168, "step": 19398 }, { "epoch": 4.413879408418658, "grad_norm": 1.3339216956970816, "learning_rate": 4.193755365005943e-08, "loss": 0.0106, "step": 19399 }, { "epoch": 4.414106939704209, "grad_norm": 0.644487571810274, "learning_rate": 4.190538100243446e-08, "loss": 0.0026, "step": 19400 }, { "epoch": 4.414334470989761, "grad_norm": 2.3935459854919428, "learning_rate": 4.187322027219574e-08, "loss": 0.0631, "step": 19401 }, { "epoch": 4.4145620022753125, "grad_norm": 1.1710164408903503, "learning_rate": 4.184107146000031e-08, "loss": 0.041, "step": 19402 }, { "epoch": 4.414789533560865, "grad_norm": 1.2152332978849703, "learning_rate": 4.180893456650542e-08, "loss": 0.0141, "step": 19403 }, { "epoch": 4.415017064846416, "grad_norm": 1.1619904462781792, "learning_rate": 4.1776809592367905e-08, "loss": 0.0043, "step": 19404 }, { "epoch": 4.415244596131968, "grad_norm": 1.3246799539449394, "learning_rate": 4.1744696538244185e-08, "loss": 0.0125, "step": 19405 }, { "epoch": 4.4154721274175195, "grad_norm": 1.2829913153654415, "learning_rate": 4.171259540479082e-08, "loss": 0.0766, "step": 19406 }, { "epoch": 4.415699658703072, "grad_norm": 1.0838241543314515, "learning_rate": 4.1680506192663675e-08, "loss": 0.0155, "step": 19407 }, { "epoch": 4.415927189988624, "grad_norm": 1.0198785085959277, "learning_rate": 4.164842890251861e-08, "loss": 0.0747, "step": 19408 }, { "epoch": 4.416154721274175, "grad_norm": 0.7121659782363262, "learning_rate": 4.161636353501149e-08, "loss": 0.0371, "step": 19409 }, { "epoch": 4.4163822525597265, "grad_norm": 1.0585873574639393, "learning_rate": 4.158431009079734e-08, "loss": 0.0436, "step": 19410 }, { "epoch": 4.416609783845279, "grad_norm": 1.0034276289520527, "learning_rate": 4.155226857053149e-08, "loss": 0.0142, "step": 19411 }, { "epoch": 4.416837315130831, "grad_norm": 0.5980500785688249, "learning_rate": 4.1520238974868675e-08, "loss": 0.0024, "step": 19412 }, { "epoch": 4.417064846416382, "grad_norm": 0.9520926534934255, "learning_rate": 4.148822130446366e-08, "loss": 0.0134, "step": 19413 }, { "epoch": 4.417292377701934, "grad_norm": 1.2757388283441382, "learning_rate": 4.145621555997063e-08, "loss": 0.0889, "step": 19414 }, { "epoch": 4.417519908987486, "grad_norm": 1.2149389522686438, "learning_rate": 4.142422174204387e-08, "loss": 0.0066, "step": 19415 }, { "epoch": 4.417747440273038, "grad_norm": 0.7253582845502327, "learning_rate": 4.139223985133736e-08, "loss": 0.0155, "step": 19416 }, { "epoch": 4.417974971558589, "grad_norm": 1.3165503594191048, "learning_rate": 4.1360269888504545e-08, "loss": 0.0814, "step": 19417 }, { "epoch": 4.418202502844141, "grad_norm": 1.2568522026005569, "learning_rate": 4.1328311854198986e-08, "loss": 0.0054, "step": 19418 }, { "epoch": 4.418430034129693, "grad_norm": 0.7167881707291333, "learning_rate": 4.129636574907371e-08, "loss": 0.009, "step": 19419 }, { "epoch": 4.418657565415245, "grad_norm": 0.6874516581233828, "learning_rate": 4.126443157378188e-08, "loss": 0.0194, "step": 19420 }, { "epoch": 4.418885096700796, "grad_norm": 1.2927385965582032, "learning_rate": 4.1232509328975806e-08, "loss": 0.0472, "step": 19421 }, { "epoch": 4.419112627986348, "grad_norm": 0.8991450148546919, "learning_rate": 4.120059901530818e-08, "loss": 0.0346, "step": 19422 }, { "epoch": 4.4193401592719, "grad_norm": 0.7064540602105278, "learning_rate": 4.116870063343124e-08, "loss": 0.0054, "step": 19423 }, { "epoch": 4.419567690557452, "grad_norm": 1.6985773873620769, "learning_rate": 4.1136814183996705e-08, "loss": 0.0813, "step": 19424 }, { "epoch": 4.419795221843003, "grad_norm": 1.2161798362933176, "learning_rate": 4.110493966765647e-08, "loss": 0.0689, "step": 19425 }, { "epoch": 4.420022753128555, "grad_norm": 0.9167631644023445, "learning_rate": 4.107307708506182e-08, "loss": 0.0479, "step": 19426 }, { "epoch": 4.420250284414107, "grad_norm": 1.4953004498637574, "learning_rate": 4.104122643686419e-08, "loss": 0.0013, "step": 19427 }, { "epoch": 4.420477815699659, "grad_norm": 1.1094274445096322, "learning_rate": 4.1009387723714295e-08, "loss": 0.0576, "step": 19428 }, { "epoch": 4.42070534698521, "grad_norm": 1.1552172955453628, "learning_rate": 4.097756094626301e-08, "loss": 0.0223, "step": 19429 }, { "epoch": 4.420932878270762, "grad_norm": 1.4744542875743012, "learning_rate": 4.094574610516086e-08, "loss": 0.1021, "step": 19430 }, { "epoch": 4.421160409556314, "grad_norm": 1.2572126619993882, "learning_rate": 4.0913943201057944e-08, "loss": 0.0594, "step": 19431 }, { "epoch": 4.421387940841866, "grad_norm": 1.5103209034753362, "learning_rate": 4.088215223460444e-08, "loss": 0.0566, "step": 19432 }, { "epoch": 4.421615472127417, "grad_norm": 1.4134739334579864, "learning_rate": 4.085037320644997e-08, "loss": 0.0693, "step": 19433 }, { "epoch": 4.421843003412969, "grad_norm": 1.0889636531466493, "learning_rate": 4.0818606117243935e-08, "loss": 0.0624, "step": 19434 }, { "epoch": 4.422070534698521, "grad_norm": 0.8304727245533582, "learning_rate": 4.078685096763568e-08, "loss": 0.052, "step": 19435 }, { "epoch": 4.422298065984073, "grad_norm": 8.810037326681655, "learning_rate": 4.075510775827428e-08, "loss": 0.0435, "step": 19436 }, { "epoch": 4.422525597269624, "grad_norm": 2.230648863835962, "learning_rate": 4.072337648980865e-08, "loss": 0.0257, "step": 19437 }, { "epoch": 4.422753128555176, "grad_norm": 2.6697059659880824, "learning_rate": 4.069165716288695e-08, "loss": 0.0758, "step": 19438 }, { "epoch": 4.422980659840728, "grad_norm": 2.7077992253157634, "learning_rate": 4.0659949778157835e-08, "loss": 0.0629, "step": 19439 }, { "epoch": 4.42320819112628, "grad_norm": 1.550635997250476, "learning_rate": 4.062825433626919e-08, "loss": 0.0798, "step": 19440 }, { "epoch": 4.423435722411831, "grad_norm": 0.3554535078720894, "learning_rate": 4.059657083786868e-08, "loss": 0.0022, "step": 19441 }, { "epoch": 4.4236632536973834, "grad_norm": 1.824119345973847, "learning_rate": 4.0564899283603926e-08, "loss": 0.096, "step": 19442 }, { "epoch": 4.423890784982935, "grad_norm": 0.4304808274165839, "learning_rate": 4.0533239674122384e-08, "loss": 0.0044, "step": 19443 }, { "epoch": 4.424118316268487, "grad_norm": 1.2075692339080073, "learning_rate": 4.0501592010071046e-08, "loss": 0.0081, "step": 19444 }, { "epoch": 4.424345847554038, "grad_norm": 1.0877320123393452, "learning_rate": 4.0469956292096616e-08, "loss": 0.0249, "step": 19445 }, { "epoch": 4.4245733788395905, "grad_norm": 1.8260020323498811, "learning_rate": 4.0438332520845934e-08, "loss": 0.0455, "step": 19446 }, { "epoch": 4.424800910125143, "grad_norm": 2.5635210028650044, "learning_rate": 4.040672069696508e-08, "loss": 0.1083, "step": 19447 }, { "epoch": 4.425028441410694, "grad_norm": 1.791028012034139, "learning_rate": 4.0375120821100135e-08, "loss": 0.0854, "step": 19448 }, { "epoch": 4.425255972696245, "grad_norm": 1.5178730027067324, "learning_rate": 4.034353289389705e-08, "loss": 0.0058, "step": 19449 }, { "epoch": 4.4254835039817975, "grad_norm": 1.8113621594634595, "learning_rate": 4.03119569160014e-08, "loss": 0.0057, "step": 19450 }, { "epoch": 4.42571103526735, "grad_norm": 1.8805017922924878, "learning_rate": 4.028039288805866e-08, "loss": 0.0572, "step": 19451 }, { "epoch": 4.425938566552901, "grad_norm": 0.6968703072316782, "learning_rate": 4.024884081071378e-08, "loss": 0.0018, "step": 19452 }, { "epoch": 4.426166097838453, "grad_norm": 0.611523188562787, "learning_rate": 4.021730068461162e-08, "loss": 0.0029, "step": 19453 }, { "epoch": 4.4263936291240045, "grad_norm": 0.531092805821144, "learning_rate": 4.018577251039699e-08, "loss": 0.0035, "step": 19454 }, { "epoch": 4.426621160409557, "grad_norm": 1.0941861553900196, "learning_rate": 4.015425628871396e-08, "loss": 0.0399, "step": 19455 }, { "epoch": 4.426848691695108, "grad_norm": 1.0565526599951318, "learning_rate": 4.012275202020688e-08, "loss": 0.006, "step": 19456 }, { "epoch": 4.42707622298066, "grad_norm": 1.5159404579256497, "learning_rate": 4.009125970551954e-08, "loss": 0.0034, "step": 19457 }, { "epoch": 4.4273037542662115, "grad_norm": 0.32925087838558664, "learning_rate": 4.00597793452958e-08, "loss": 0.0006, "step": 19458 }, { "epoch": 4.427531285551764, "grad_norm": 2.2456244504308103, "learning_rate": 4.0028310940178896e-08, "loss": 0.0052, "step": 19459 }, { "epoch": 4.427758816837315, "grad_norm": 1.7805356581721712, "learning_rate": 3.999685449081185e-08, "loss": 0.04, "step": 19460 }, { "epoch": 4.427986348122867, "grad_norm": 0.6820818917219136, "learning_rate": 3.9965409997837835e-08, "loss": 0.0042, "step": 19461 }, { "epoch": 4.4282138794084185, "grad_norm": 0.8658066372862908, "learning_rate": 3.993397746189932e-08, "loss": 0.0078, "step": 19462 }, { "epoch": 4.428441410693971, "grad_norm": 0.7964810416624007, "learning_rate": 3.990255688363878e-08, "loss": 0.0111, "step": 19463 }, { "epoch": 4.428668941979522, "grad_norm": 1.5129526017123183, "learning_rate": 3.987114826369848e-08, "loss": 0.0226, "step": 19464 }, { "epoch": 4.428896473265074, "grad_norm": 1.1839298207446294, "learning_rate": 3.983975160272033e-08, "loss": 0.0173, "step": 19465 }, { "epoch": 4.4291240045506255, "grad_norm": 1.6983282981596277, "learning_rate": 3.980836690134604e-08, "loss": 0.105, "step": 19466 }, { "epoch": 4.429351535836178, "grad_norm": 1.1114332798849096, "learning_rate": 3.977699416021684e-08, "loss": 0.0182, "step": 19467 }, { "epoch": 4.429579067121729, "grad_norm": 0.955538850716465, "learning_rate": 3.9745633379974225e-08, "loss": 0.0525, "step": 19468 }, { "epoch": 4.429806598407281, "grad_norm": 1.3702832471031667, "learning_rate": 3.9714284561258866e-08, "loss": 0.0074, "step": 19469 }, { "epoch": 4.4300341296928325, "grad_norm": 1.5446794614871666, "learning_rate": 3.9682947704711777e-08, "loss": 0.0369, "step": 19470 }, { "epoch": 4.430261660978385, "grad_norm": 4.065300530317766, "learning_rate": 3.9651622810973274e-08, "loss": 0.0144, "step": 19471 }, { "epoch": 4.430489192263936, "grad_norm": 0.9884670694237583, "learning_rate": 3.962030988068355e-08, "loss": 0.0584, "step": 19472 }, { "epoch": 4.430716723549488, "grad_norm": 0.8890792415852375, "learning_rate": 3.958900891448264e-08, "loss": 0.0263, "step": 19473 }, { "epoch": 4.4309442548350395, "grad_norm": 0.6176599855481212, "learning_rate": 3.955771991301018e-08, "loss": 0.0057, "step": 19474 }, { "epoch": 4.431171786120592, "grad_norm": 1.4286739282231848, "learning_rate": 3.952644287690578e-08, "loss": 0.0234, "step": 19475 }, { "epoch": 4.431399317406143, "grad_norm": 1.7997966422608591, "learning_rate": 3.94951778068086e-08, "loss": 0.0537, "step": 19476 }, { "epoch": 4.431626848691695, "grad_norm": 1.7184736491310235, "learning_rate": 3.9463924703357774e-08, "loss": 0.0107, "step": 19477 }, { "epoch": 4.4318543799772465, "grad_norm": 1.2001132998468447, "learning_rate": 3.943268356719203e-08, "loss": 0.0137, "step": 19478 }, { "epoch": 4.432081911262799, "grad_norm": 1.4734634835915146, "learning_rate": 3.940145439894967e-08, "loss": 0.1199, "step": 19479 }, { "epoch": 4.43230944254835, "grad_norm": 1.330663447104139, "learning_rate": 3.937023719926922e-08, "loss": 0.0705, "step": 19480 }, { "epoch": 4.432536973833902, "grad_norm": 0.7120472704572802, "learning_rate": 3.933903196878849e-08, "loss": 0.0044, "step": 19481 }, { "epoch": 4.4327645051194535, "grad_norm": 0.5300503129607229, "learning_rate": 3.9307838708145324e-08, "loss": 0.0023, "step": 19482 }, { "epoch": 4.432992036405006, "grad_norm": 1.3587747819057987, "learning_rate": 3.9276657417977316e-08, "loss": 0.0734, "step": 19483 }, { "epoch": 4.433219567690557, "grad_norm": 1.201920746635615, "learning_rate": 3.924548809892188e-08, "loss": 0.0057, "step": 19484 }, { "epoch": 4.433447098976109, "grad_norm": 1.1207080542998162, "learning_rate": 3.921433075161587e-08, "loss": 0.075, "step": 19485 }, { "epoch": 4.433674630261661, "grad_norm": 1.6556852279299872, "learning_rate": 3.9183185376696e-08, "loss": 0.0271, "step": 19486 }, { "epoch": 4.433902161547213, "grad_norm": 1.4160985424614405, "learning_rate": 3.915205197479906e-08, "loss": 0.1014, "step": 19487 }, { "epoch": 4.434129692832764, "grad_norm": 1.6869005626837779, "learning_rate": 3.9120930546561185e-08, "loss": 0.0354, "step": 19488 }, { "epoch": 4.434357224118316, "grad_norm": 0.6032680489920477, "learning_rate": 3.908982109261848e-08, "loss": 0.0052, "step": 19489 }, { "epoch": 4.434584755403868, "grad_norm": 0.7095594348493831, "learning_rate": 3.905872361360683e-08, "loss": 0.0086, "step": 19490 }, { "epoch": 4.43481228668942, "grad_norm": 1.0213901303614032, "learning_rate": 3.9027638110161744e-08, "loss": 0.0128, "step": 19491 }, { "epoch": 4.435039817974972, "grad_norm": 1.3002787131046976, "learning_rate": 3.8996564582918646e-08, "loss": 0.0628, "step": 19492 }, { "epoch": 4.435267349260523, "grad_norm": 1.2143054690176973, "learning_rate": 3.8965503032512496e-08, "loss": 0.08, "step": 19493 }, { "epoch": 4.435494880546075, "grad_norm": 1.8240844188521852, "learning_rate": 3.8934453459578205e-08, "loss": 0.0627, "step": 19494 }, { "epoch": 4.435722411831627, "grad_norm": 0.8099827124208325, "learning_rate": 3.890341586475034e-08, "loss": 0.0861, "step": 19495 }, { "epoch": 4.435949943117179, "grad_norm": 0.9951767971543015, "learning_rate": 3.8872390248663255e-08, "loss": 0.0357, "step": 19496 }, { "epoch": 4.43617747440273, "grad_norm": 0.4486901205796422, "learning_rate": 3.884137661195116e-08, "loss": 0.002, "step": 19497 }, { "epoch": 4.436405005688282, "grad_norm": 0.9710757274640088, "learning_rate": 3.8810374955247714e-08, "loss": 0.0077, "step": 19498 }, { "epoch": 4.436632536973834, "grad_norm": 0.8015929709105323, "learning_rate": 3.877938527918672e-08, "loss": 0.0103, "step": 19499 }, { "epoch": 4.436860068259386, "grad_norm": 1.2759179965026488, "learning_rate": 3.874840758440142e-08, "loss": 0.0149, "step": 19500 }, { "epoch": 4.437087599544937, "grad_norm": 1.5114445478644913, "learning_rate": 3.8717441871525123e-08, "loss": 0.0299, "step": 19501 }, { "epoch": 4.4373151308304895, "grad_norm": 2.210989532556335, "learning_rate": 3.8686488141190456e-08, "loss": 0.0922, "step": 19502 }, { "epoch": 4.437542662116041, "grad_norm": 1.5156378652088378, "learning_rate": 3.865554639403017e-08, "loss": 0.0164, "step": 19503 }, { "epoch": 4.437770193401593, "grad_norm": 1.2426267625925085, "learning_rate": 3.862461663067682e-08, "loss": 0.0174, "step": 19504 }, { "epoch": 4.437997724687144, "grad_norm": 0.6232603841583245, "learning_rate": 3.859369885176233e-08, "loss": 0.006, "step": 19505 }, { "epoch": 4.4382252559726965, "grad_norm": 0.7974491839869757, "learning_rate": 3.8562793057918686e-08, "loss": 0.0038, "step": 19506 }, { "epoch": 4.438452787258248, "grad_norm": 0.46946116908047164, "learning_rate": 3.8531899249777544e-08, "loss": 0.0053, "step": 19507 }, { "epoch": 4.4386803185438, "grad_norm": 2.1663274624106026, "learning_rate": 3.850101742797034e-08, "loss": 0.0239, "step": 19508 }, { "epoch": 4.438907849829351, "grad_norm": 0.5118517407262786, "learning_rate": 3.847014759312817e-08, "loss": 0.0038, "step": 19509 }, { "epoch": 4.4391353811149035, "grad_norm": 0.8024062480426459, "learning_rate": 3.843928974588199e-08, "loss": 0.008, "step": 19510 }, { "epoch": 4.439362912400455, "grad_norm": 1.7685616824616124, "learning_rate": 3.84084438868626e-08, "loss": 0.0278, "step": 19511 }, { "epoch": 4.439590443686007, "grad_norm": 0.527540420560813, "learning_rate": 3.8377610016700204e-08, "loss": 0.0016, "step": 19512 }, { "epoch": 4.439817974971558, "grad_norm": 3.3873817619666293, "learning_rate": 3.8346788136025204e-08, "loss": 0.0085, "step": 19513 }, { "epoch": 4.4400455062571105, "grad_norm": 1.0244801876259686, "learning_rate": 3.831597824546736e-08, "loss": 0.0424, "step": 19514 }, { "epoch": 4.440273037542662, "grad_norm": 1.1966908127433653, "learning_rate": 3.828518034565659e-08, "loss": 0.0075, "step": 19515 }, { "epoch": 4.440500568828214, "grad_norm": 1.246507850175692, "learning_rate": 3.825439443722205e-08, "loss": 0.0252, "step": 19516 }, { "epoch": 4.440728100113765, "grad_norm": 1.5628563118759833, "learning_rate": 3.822362052079316e-08, "loss": 0.0212, "step": 19517 }, { "epoch": 4.4409556313993175, "grad_norm": 1.2625476142967142, "learning_rate": 3.819285859699894e-08, "loss": 0.0201, "step": 19518 }, { "epoch": 4.441183162684869, "grad_norm": 1.05092460931517, "learning_rate": 3.81621086664679e-08, "loss": 0.017, "step": 19519 }, { "epoch": 4.441410693970421, "grad_norm": 2.1029042381998475, "learning_rate": 3.813137072982871e-08, "loss": 0.0367, "step": 19520 }, { "epoch": 4.441638225255972, "grad_norm": 0.45349901884912386, "learning_rate": 3.810064478770942e-08, "loss": 0.0023, "step": 19521 }, { "epoch": 4.4418657565415245, "grad_norm": 0.6299751856124611, "learning_rate": 3.806993084073819e-08, "loss": 0.0032, "step": 19522 }, { "epoch": 4.442093287827076, "grad_norm": 1.0640873737733705, "learning_rate": 3.803922888954252e-08, "loss": 0.0093, "step": 19523 }, { "epoch": 4.442320819112628, "grad_norm": 1.1336218760462344, "learning_rate": 3.8008538934750096e-08, "loss": 0.0071, "step": 19524 }, { "epoch": 4.44254835039818, "grad_norm": 1.3344618603018767, "learning_rate": 3.797786097698819e-08, "loss": 0.0134, "step": 19525 }, { "epoch": 4.4427758816837315, "grad_norm": 1.5130107089674714, "learning_rate": 3.79471950168836e-08, "loss": 0.0172, "step": 19526 }, { "epoch": 4.443003412969283, "grad_norm": 0.6869199719073823, "learning_rate": 3.7916541055063386e-08, "loss": 0.0127, "step": 19527 }, { "epoch": 4.443230944254835, "grad_norm": 1.0208579123859995, "learning_rate": 3.788589909215379e-08, "loss": 0.0307, "step": 19528 }, { "epoch": 4.443458475540387, "grad_norm": 1.8880013188579687, "learning_rate": 3.785526912878104e-08, "loss": 0.0478, "step": 19529 }, { "epoch": 4.4436860068259385, "grad_norm": 1.6521549344190154, "learning_rate": 3.782465116557145e-08, "loss": 0.0895, "step": 19530 }, { "epoch": 4.443913538111491, "grad_norm": 0.5410491457972578, "learning_rate": 3.779404520315048e-08, "loss": 0.0044, "step": 19531 }, { "epoch": 4.444141069397042, "grad_norm": 1.4958105392543424, "learning_rate": 3.776345124214396e-08, "loss": 0.0687, "step": 19532 }, { "epoch": 4.444368600682594, "grad_norm": 0.9536267895593419, "learning_rate": 3.7732869283176944e-08, "loss": 0.0235, "step": 19533 }, { "epoch": 4.4445961319681455, "grad_norm": 1.4421319581241965, "learning_rate": 3.770229932687462e-08, "loss": 0.0171, "step": 19534 }, { "epoch": 4.444823663253698, "grad_norm": 1.400834529111438, "learning_rate": 3.767174137386164e-08, "loss": 0.0451, "step": 19535 }, { "epoch": 4.445051194539249, "grad_norm": 1.0155128487139025, "learning_rate": 3.7641195424762617e-08, "loss": 0.0098, "step": 19536 }, { "epoch": 4.445278725824801, "grad_norm": 1.3214016515619267, "learning_rate": 3.761066148020194e-08, "loss": 0.0215, "step": 19537 }, { "epoch": 4.4455062571103525, "grad_norm": 1.427004229815253, "learning_rate": 3.7580139540803476e-08, "loss": 0.0315, "step": 19538 }, { "epoch": 4.445733788395905, "grad_norm": 2.9555261343252486, "learning_rate": 3.75496296071913e-08, "loss": 0.0205, "step": 19539 }, { "epoch": 4.445961319681456, "grad_norm": 1.8851146981636155, "learning_rate": 3.751913167998882e-08, "loss": 0.0512, "step": 19540 }, { "epoch": 4.446188850967008, "grad_norm": 0.9618671001790249, "learning_rate": 3.748864575981927e-08, "loss": 0.0093, "step": 19541 }, { "epoch": 4.4464163822525595, "grad_norm": 1.84340989641431, "learning_rate": 3.7458171847305767e-08, "loss": 0.0648, "step": 19542 }, { "epoch": 4.446643913538112, "grad_norm": 1.6548704504889027, "learning_rate": 3.7427709943071296e-08, "loss": 0.0546, "step": 19543 }, { "epoch": 4.446871444823663, "grad_norm": 1.784329226166112, "learning_rate": 3.7397260047738404e-08, "loss": 0.0258, "step": 19544 }, { "epoch": 4.447098976109215, "grad_norm": 2.745485672376895, "learning_rate": 3.736682216192923e-08, "loss": 0.0159, "step": 19545 }, { "epoch": 4.4473265073947665, "grad_norm": 1.2159632024191696, "learning_rate": 3.733639628626613e-08, "loss": 0.0221, "step": 19546 }, { "epoch": 4.447554038680319, "grad_norm": 1.0611377245186024, "learning_rate": 3.730598242137083e-08, "loss": 0.0364, "step": 19547 }, { "epoch": 4.44778156996587, "grad_norm": 1.1863354319869068, "learning_rate": 3.727558056786491e-08, "loss": 0.0163, "step": 19548 }, { "epoch": 4.448009101251422, "grad_norm": 1.353464027678762, "learning_rate": 3.7245190726369684e-08, "loss": 0.1029, "step": 19549 }, { "epoch": 4.4482366325369735, "grad_norm": 2.002259354235372, "learning_rate": 3.721481289750639e-08, "loss": 0.0635, "step": 19550 }, { "epoch": 4.448464163822526, "grad_norm": 1.175918068421768, "learning_rate": 3.718444708189585e-08, "loss": 0.0356, "step": 19551 }, { "epoch": 4.448691695108077, "grad_norm": 1.090186664170797, "learning_rate": 3.715409328015868e-08, "loss": 0.0102, "step": 19552 }, { "epoch": 4.448919226393629, "grad_norm": 2.021785937369502, "learning_rate": 3.712375149291528e-08, "loss": 0.0132, "step": 19553 }, { "epoch": 4.4491467576791806, "grad_norm": 1.1951477498447178, "learning_rate": 3.709342172078578e-08, "loss": 0.0068, "step": 19554 }, { "epoch": 4.449374288964733, "grad_norm": 1.2154893974845409, "learning_rate": 3.706310396438997e-08, "loss": 0.0158, "step": 19555 }, { "epoch": 4.449601820250284, "grad_norm": 1.555580942923653, "learning_rate": 3.703279822434756e-08, "loss": 0.0078, "step": 19556 }, { "epoch": 4.449829351535836, "grad_norm": 0.9851999453530059, "learning_rate": 3.7002504501277914e-08, "loss": 0.0209, "step": 19557 }, { "epoch": 4.450056882821388, "grad_norm": 1.6185965059907588, "learning_rate": 3.697222279580033e-08, "loss": 0.054, "step": 19558 }, { "epoch": 4.45028441410694, "grad_norm": 0.5734507484565419, "learning_rate": 3.6941953108533543e-08, "loss": 0.0019, "step": 19559 }, { "epoch": 4.450511945392491, "grad_norm": 1.0228083984928795, "learning_rate": 3.6911695440096236e-08, "loss": 0.004, "step": 19560 }, { "epoch": 4.450739476678043, "grad_norm": 0.49309934711395625, "learning_rate": 3.688144979110686e-08, "loss": 0.0024, "step": 19561 }, { "epoch": 4.450967007963595, "grad_norm": 0.44178357280603797, "learning_rate": 3.685121616218347e-08, "loss": 0.0017, "step": 19562 }, { "epoch": 4.451194539249147, "grad_norm": 1.8591829258401769, "learning_rate": 3.682099455394411e-08, "loss": 0.0226, "step": 19563 }, { "epoch": 4.451422070534699, "grad_norm": 0.3817360309435755, "learning_rate": 3.6790784967006415e-08, "loss": 0.002, "step": 19564 }, { "epoch": 4.45164960182025, "grad_norm": 1.7148962249592083, "learning_rate": 3.6760587401987945e-08, "loss": 0.0414, "step": 19565 }, { "epoch": 4.451877133105802, "grad_norm": 0.8033863060690347, "learning_rate": 3.673040185950577e-08, "loss": 0.0075, "step": 19566 }, { "epoch": 4.452104664391354, "grad_norm": 1.3408729100666437, "learning_rate": 3.6700228340176694e-08, "loss": 0.02, "step": 19567 }, { "epoch": 4.452332195676906, "grad_norm": 1.8655407392268033, "learning_rate": 3.667006684461759e-08, "loss": 0.0152, "step": 19568 }, { "epoch": 4.452559726962457, "grad_norm": 0.8644608458356053, "learning_rate": 3.6639917373444755e-08, "loss": 0.0336, "step": 19569 }, { "epoch": 4.4527872582480095, "grad_norm": 0.4827775840329491, "learning_rate": 3.6609779927274516e-08, "loss": 0.0022, "step": 19570 }, { "epoch": 4.453014789533561, "grad_norm": 1.2505567251146712, "learning_rate": 3.6579654506722766e-08, "loss": 0.05, "step": 19571 }, { "epoch": 4.453242320819113, "grad_norm": 1.1303836467812014, "learning_rate": 3.654954111240533e-08, "loss": 0.0072, "step": 19572 }, { "epoch": 4.453469852104664, "grad_norm": 2.138026030073725, "learning_rate": 3.651943974493761e-08, "loss": 0.0269, "step": 19573 }, { "epoch": 4.4536973833902165, "grad_norm": 1.5762913195704924, "learning_rate": 3.648935040493469e-08, "loss": 0.0372, "step": 19574 }, { "epoch": 4.453924914675768, "grad_norm": 1.086065008777411, "learning_rate": 3.645927309301168e-08, "loss": 0.0152, "step": 19575 }, { "epoch": 4.45415244596132, "grad_norm": 0.4704184547395173, "learning_rate": 3.642920780978325e-08, "loss": 0.0037, "step": 19576 }, { "epoch": 4.454379977246871, "grad_norm": 0.5821254405306349, "learning_rate": 3.639915455586382e-08, "loss": 0.0025, "step": 19577 }, { "epoch": 4.4546075085324235, "grad_norm": 0.2877203638903508, "learning_rate": 3.636911333186784e-08, "loss": 0.0009, "step": 19578 }, { "epoch": 4.454835039817975, "grad_norm": 1.06587905932048, "learning_rate": 3.633908413840905e-08, "loss": 0.0194, "step": 19579 }, { "epoch": 4.455062571103527, "grad_norm": 0.778014459036541, "learning_rate": 3.63090669761014e-08, "loss": 0.012, "step": 19580 }, { "epoch": 4.455290102389078, "grad_norm": 1.451426716334531, "learning_rate": 3.627906184555822e-08, "loss": 0.0133, "step": 19581 }, { "epoch": 4.4555176336746305, "grad_norm": 1.7654170506759967, "learning_rate": 3.624906874739285e-08, "loss": 0.0643, "step": 19582 }, { "epoch": 4.455745164960182, "grad_norm": 0.3696786490262, "learning_rate": 3.6219087682218196e-08, "loss": 0.0016, "step": 19583 }, { "epoch": 4.455972696245734, "grad_norm": 1.460801542859516, "learning_rate": 3.61891186506471e-08, "loss": 0.0434, "step": 19584 }, { "epoch": 4.456200227531285, "grad_norm": 0.36782503283012263, "learning_rate": 3.6159161653292195e-08, "loss": 0.0015, "step": 19585 }, { "epoch": 4.4564277588168375, "grad_norm": 0.7477303740110252, "learning_rate": 3.6129216690765495e-08, "loss": 0.0133, "step": 19586 }, { "epoch": 4.456655290102389, "grad_norm": 1.1632420727672677, "learning_rate": 3.609928376367921e-08, "loss": 0.0389, "step": 19587 }, { "epoch": 4.456882821387941, "grad_norm": 1.592758416928798, "learning_rate": 3.6069362872644954e-08, "loss": 0.0596, "step": 19588 }, { "epoch": 4.457110352673492, "grad_norm": 1.7247822145923055, "learning_rate": 3.603945401827444e-08, "loss": 0.0101, "step": 19589 }, { "epoch": 4.4573378839590445, "grad_norm": 1.500733512888897, "learning_rate": 3.600955720117871e-08, "loss": 0.0215, "step": 19590 }, { "epoch": 4.457565415244596, "grad_norm": 2.926696302838067, "learning_rate": 3.597967242196909e-08, "loss": 0.0615, "step": 19591 }, { "epoch": 4.457792946530148, "grad_norm": 0.8802124651670354, "learning_rate": 3.59497996812562e-08, "loss": 0.0035, "step": 19592 }, { "epoch": 4.458020477815699, "grad_norm": 1.3275887059374223, "learning_rate": 3.5919938979650586e-08, "loss": 0.0214, "step": 19593 }, { "epoch": 4.4582480091012515, "grad_norm": 1.0979906316604318, "learning_rate": 3.58900903177626e-08, "loss": 0.0104, "step": 19594 }, { "epoch": 4.458475540386803, "grad_norm": 1.692222462079095, "learning_rate": 3.586025369620223e-08, "loss": 0.0383, "step": 19595 }, { "epoch": 4.458703071672355, "grad_norm": 1.8994618588750947, "learning_rate": 3.5830429115579285e-08, "loss": 0.0144, "step": 19596 }, { "epoch": 4.458930602957906, "grad_norm": 2.8561797651492875, "learning_rate": 3.580061657650347e-08, "loss": 0.0085, "step": 19597 }, { "epoch": 4.4591581342434585, "grad_norm": 1.6618308781713047, "learning_rate": 3.577081607958381e-08, "loss": 0.0695, "step": 19598 }, { "epoch": 4.45938566552901, "grad_norm": 0.706283691020957, "learning_rate": 3.574102762542976e-08, "loss": 0.0091, "step": 19599 }, { "epoch": 4.459613196814562, "grad_norm": 0.7233135267550113, "learning_rate": 3.571125121464979e-08, "loss": 0.0042, "step": 19600 }, { "epoch": 4.459840728100113, "grad_norm": 1.1413144666878063, "learning_rate": 3.5681486847852633e-08, "loss": 0.0444, "step": 19601 }, { "epoch": 4.4600682593856655, "grad_norm": 1.1879573636806224, "learning_rate": 3.565173452564657e-08, "loss": 0.0852, "step": 19602 }, { "epoch": 4.460295790671218, "grad_norm": 0.9876188634259649, "learning_rate": 3.562199424863973e-08, "loss": 0.0081, "step": 19603 }, { "epoch": 4.460523321956769, "grad_norm": 1.484105176900881, "learning_rate": 3.559226601744001e-08, "loss": 0.1082, "step": 19604 }, { "epoch": 4.460750853242321, "grad_norm": 2.0014457496404106, "learning_rate": 3.556254983265485e-08, "loss": 0.0137, "step": 19605 }, { "epoch": 4.4609783845278725, "grad_norm": 1.4805089402795542, "learning_rate": 3.553284569489168e-08, "loss": 0.0629, "step": 19606 }, { "epoch": 4.461205915813425, "grad_norm": 1.4471701961335326, "learning_rate": 3.550315360475759e-08, "loss": 0.0259, "step": 19607 }, { "epoch": 4.461433447098976, "grad_norm": 1.0285961789060787, "learning_rate": 3.5473473562859446e-08, "loss": 0.0058, "step": 19608 }, { "epoch": 4.461660978384528, "grad_norm": 1.0355756191695413, "learning_rate": 3.5443805569803785e-08, "loss": 0.015, "step": 19609 }, { "epoch": 4.4618885096700796, "grad_norm": 1.330698827383092, "learning_rate": 3.541414962619706e-08, "loss": 0.0453, "step": 19610 }, { "epoch": 4.462116040955632, "grad_norm": 0.7751624795783644, "learning_rate": 3.5384505732645317e-08, "loss": 0.0038, "step": 19611 }, { "epoch": 4.462343572241183, "grad_norm": 0.8158568830764673, "learning_rate": 3.535487388975446e-08, "loss": 0.0067, "step": 19612 }, { "epoch": 4.462571103526735, "grad_norm": 1.8334479380708746, "learning_rate": 3.532525409813012e-08, "loss": 0.0056, "step": 19613 }, { "epoch": 4.462798634812287, "grad_norm": 0.5779531096642149, "learning_rate": 3.5295646358377635e-08, "loss": 0.0038, "step": 19614 }, { "epoch": 4.463026166097839, "grad_norm": 1.1488327501727416, "learning_rate": 3.5266050671102155e-08, "loss": 0.0089, "step": 19615 }, { "epoch": 4.46325369738339, "grad_norm": 1.229793381685128, "learning_rate": 3.5236467036908473e-08, "loss": 0.021, "step": 19616 }, { "epoch": 4.463481228668942, "grad_norm": 0.976756919602182, "learning_rate": 3.520689545640138e-08, "loss": 0.0063, "step": 19617 }, { "epoch": 4.463708759954494, "grad_norm": 1.4055086427556696, "learning_rate": 3.517733593018519e-08, "loss": 0.0066, "step": 19618 }, { "epoch": 4.463936291240046, "grad_norm": 0.8007262798269964, "learning_rate": 3.514778845886399e-08, "loss": 0.0138, "step": 19619 }, { "epoch": 4.464163822525597, "grad_norm": 1.7073773995328094, "learning_rate": 3.511825304304182e-08, "loss": 0.0044, "step": 19620 }, { "epoch": 4.464391353811149, "grad_norm": 0.8011127476988342, "learning_rate": 3.5088729683322165e-08, "loss": 0.0159, "step": 19621 }, { "epoch": 4.464618885096701, "grad_norm": 0.9179549003940465, "learning_rate": 3.5059218380308555e-08, "loss": 0.0695, "step": 19622 }, { "epoch": 4.464846416382253, "grad_norm": 1.1201567090528528, "learning_rate": 3.5029719134604054e-08, "loss": 0.0181, "step": 19623 }, { "epoch": 4.465073947667804, "grad_norm": 0.5162707199821303, "learning_rate": 3.500023194681158e-08, "loss": 0.0013, "step": 19624 }, { "epoch": 4.465301478953356, "grad_norm": 1.2745657016381517, "learning_rate": 3.4970756817533924e-08, "loss": 0.0099, "step": 19625 }, { "epoch": 4.465529010238908, "grad_norm": 1.218948721263299, "learning_rate": 3.4941293747373306e-08, "loss": 0.0104, "step": 19626 }, { "epoch": 4.46575654152446, "grad_norm": 0.6385838480597711, "learning_rate": 3.4911842736932155e-08, "loss": 0.0015, "step": 19627 }, { "epoch": 4.465984072810011, "grad_norm": 1.1154683148218005, "learning_rate": 3.488240378681208e-08, "loss": 0.0155, "step": 19628 }, { "epoch": 4.466211604095563, "grad_norm": 1.969574508023017, "learning_rate": 3.4852976897615035e-08, "loss": 0.0093, "step": 19629 }, { "epoch": 4.466439135381115, "grad_norm": 0.868321450127459, "learning_rate": 3.4823562069942264e-08, "loss": 0.0064, "step": 19630 }, { "epoch": 4.466666666666667, "grad_norm": 1.1053648819354553, "learning_rate": 3.479415930439503e-08, "loss": 0.0401, "step": 19631 }, { "epoch": 4.466894197952218, "grad_norm": 1.3802770197803738, "learning_rate": 3.476476860157438e-08, "loss": 0.0766, "step": 19632 }, { "epoch": 4.46712172923777, "grad_norm": 1.6721163267811456, "learning_rate": 3.47353899620808e-08, "loss": 0.0403, "step": 19633 }, { "epoch": 4.467349260523322, "grad_norm": 1.5262923924145204, "learning_rate": 3.4706023386514856e-08, "loss": 0.0192, "step": 19634 }, { "epoch": 4.467576791808874, "grad_norm": 0.9185399655422162, "learning_rate": 3.467666887547676e-08, "loss": 0.0266, "step": 19635 }, { "epoch": 4.467804323094425, "grad_norm": 1.4211847959786612, "learning_rate": 3.464732642956638e-08, "loss": 0.0269, "step": 19636 }, { "epoch": 4.468031854379977, "grad_norm": 1.3959529589054964, "learning_rate": 3.4617996049383375e-08, "loss": 0.0103, "step": 19637 }, { "epoch": 4.468259385665529, "grad_norm": 1.3803995941944756, "learning_rate": 3.458867773552733e-08, "loss": 0.0572, "step": 19638 }, { "epoch": 4.468486916951081, "grad_norm": 1.9150017892716373, "learning_rate": 3.4559371488597576e-08, "loss": 0.051, "step": 19639 }, { "epoch": 4.468714448236632, "grad_norm": 1.6707905909972136, "learning_rate": 3.4530077309192774e-08, "loss": 0.0414, "step": 19640 }, { "epoch": 4.468941979522184, "grad_norm": 0.7998024785885969, "learning_rate": 3.450079519791191e-08, "loss": 0.0091, "step": 19641 }, { "epoch": 4.4691695108077365, "grad_norm": 0.8478080349674112, "learning_rate": 3.447152515535339e-08, "loss": 0.0328, "step": 19642 }, { "epoch": 4.469397042093288, "grad_norm": 2.205947330894427, "learning_rate": 3.44422671821152e-08, "loss": 0.0366, "step": 19643 }, { "epoch": 4.46962457337884, "grad_norm": 1.61859021358071, "learning_rate": 3.4413021278795616e-08, "loss": 0.0076, "step": 19644 }, { "epoch": 4.469852104664391, "grad_norm": 1.1245065476202458, "learning_rate": 3.4383787445992215e-08, "loss": 0.0129, "step": 19645 }, { "epoch": 4.4700796359499435, "grad_norm": 1.2768170119726572, "learning_rate": 3.435456568430265e-08, "loss": 0.0235, "step": 19646 }, { "epoch": 4.470307167235495, "grad_norm": 1.0428850955941813, "learning_rate": 3.4325355994324005e-08, "loss": 0.0135, "step": 19647 }, { "epoch": 4.470534698521047, "grad_norm": 1.570826837277845, "learning_rate": 3.429615837665323e-08, "loss": 0.0925, "step": 19648 }, { "epoch": 4.470762229806598, "grad_norm": 1.4468551606335904, "learning_rate": 3.426697283188729e-08, "loss": 0.0233, "step": 19649 }, { "epoch": 4.4709897610921505, "grad_norm": 1.0724763135351243, "learning_rate": 3.4237799360622296e-08, "loss": 0.0146, "step": 19650 }, { "epoch": 4.471217292377702, "grad_norm": 0.8171728417464849, "learning_rate": 3.4208637963455e-08, "loss": 0.0093, "step": 19651 }, { "epoch": 4.471444823663254, "grad_norm": 1.6330114788692423, "learning_rate": 3.417948864098103e-08, "loss": 0.0335, "step": 19652 }, { "epoch": 4.471672354948805, "grad_norm": 0.6768988603554968, "learning_rate": 3.415035139379644e-08, "loss": 0.002, "step": 19653 }, { "epoch": 4.4718998862343575, "grad_norm": 2.077236098233952, "learning_rate": 3.4121226222496514e-08, "loss": 0.0343, "step": 19654 }, { "epoch": 4.472127417519909, "grad_norm": 2.0295807614482766, "learning_rate": 3.409211312767648e-08, "loss": 0.0091, "step": 19655 }, { "epoch": 4.472354948805461, "grad_norm": 2.0264011404582725, "learning_rate": 3.406301210993147e-08, "loss": 0.0748, "step": 19656 }, { "epoch": 4.472582480091012, "grad_norm": 1.722448324216399, "learning_rate": 3.4033923169856236e-08, "loss": 0.04, "step": 19657 }, { "epoch": 4.4728100113765645, "grad_norm": 2.1102117547825916, "learning_rate": 3.400484630804543e-08, "loss": 0.0058, "step": 19658 }, { "epoch": 4.473037542662116, "grad_norm": 1.5515660250412397, "learning_rate": 3.3975781525093094e-08, "loss": 0.0692, "step": 19659 }, { "epoch": 4.473265073947668, "grad_norm": 1.1642623002247336, "learning_rate": 3.394672882159347e-08, "loss": 0.0154, "step": 19660 }, { "epoch": 4.473492605233219, "grad_norm": 1.2099084129754414, "learning_rate": 3.3917688198140256e-08, "loss": 0.026, "step": 19661 }, { "epoch": 4.4737201365187715, "grad_norm": 0.596824859002717, "learning_rate": 3.3888659655326866e-08, "loss": 0.0057, "step": 19662 }, { "epoch": 4.473947667804323, "grad_norm": 1.0217202467818642, "learning_rate": 3.385964319374671e-08, "loss": 0.0767, "step": 19663 }, { "epoch": 4.474175199089875, "grad_norm": 1.5646055024606162, "learning_rate": 3.3830638813992856e-08, "loss": 0.1104, "step": 19664 }, { "epoch": 4.474402730375426, "grad_norm": 1.3848854448316557, "learning_rate": 3.380164651665817e-08, "loss": 0.0541, "step": 19665 }, { "epoch": 4.4746302616609785, "grad_norm": 1.5861179060327253, "learning_rate": 3.377266630233502e-08, "loss": 0.0112, "step": 19666 }, { "epoch": 4.47485779294653, "grad_norm": 1.6059557521777226, "learning_rate": 3.374369817161577e-08, "loss": 0.0601, "step": 19667 }, { "epoch": 4.475085324232082, "grad_norm": 1.1736123259678422, "learning_rate": 3.371474212509253e-08, "loss": 0.0173, "step": 19668 }, { "epoch": 4.475312855517633, "grad_norm": 1.1689804810192819, "learning_rate": 3.3685798163357044e-08, "loss": 0.0081, "step": 19669 }, { "epoch": 4.4755403868031856, "grad_norm": 0.4475322208789882, "learning_rate": 3.365686628700085e-08, "loss": 0.001, "step": 19670 }, { "epoch": 4.475767918088737, "grad_norm": 1.1548610948887361, "learning_rate": 3.3627946496615355e-08, "loss": 0.0361, "step": 19671 }, { "epoch": 4.475995449374289, "grad_norm": 1.042263455119008, "learning_rate": 3.359903879279168e-08, "loss": 0.0054, "step": 19672 }, { "epoch": 4.47622298065984, "grad_norm": 1.5802317537024915, "learning_rate": 3.3570143176120535e-08, "loss": 0.0167, "step": 19673 }, { "epoch": 4.476450511945393, "grad_norm": 1.489315949259119, "learning_rate": 3.354125964719242e-08, "loss": 0.0896, "step": 19674 }, { "epoch": 4.476678043230944, "grad_norm": 1.2455835727064954, "learning_rate": 3.351238820659783e-08, "loss": 0.0699, "step": 19675 }, { "epoch": 4.476905574516496, "grad_norm": 2.6787272495427894, "learning_rate": 3.3483528854926645e-08, "loss": 0.043, "step": 19676 }, { "epoch": 4.477133105802047, "grad_norm": 1.1723155631534707, "learning_rate": 3.345468159276888e-08, "loss": 0.0459, "step": 19677 }, { "epoch": 4.4773606370876, "grad_norm": 1.2324559574785499, "learning_rate": 3.342584642071406e-08, "loss": 0.0413, "step": 19678 }, { "epoch": 4.477588168373151, "grad_norm": 0.4003923318948185, "learning_rate": 3.3397023339351576e-08, "loss": 0.0025, "step": 19679 }, { "epoch": 4.477815699658703, "grad_norm": 0.7949313972307952, "learning_rate": 3.336821234927047e-08, "loss": 0.019, "step": 19680 }, { "epoch": 4.478043230944255, "grad_norm": 1.753244272579028, "learning_rate": 3.333941345105951e-08, "loss": 0.0759, "step": 19681 }, { "epoch": 4.478270762229807, "grad_norm": 0.8406431826315818, "learning_rate": 3.331062664530746e-08, "loss": 0.0525, "step": 19682 }, { "epoch": 4.478498293515359, "grad_norm": 0.6818009021179015, "learning_rate": 3.3281851932602454e-08, "loss": 0.0632, "step": 19683 }, { "epoch": 4.47872582480091, "grad_norm": 1.388593137564474, "learning_rate": 3.325308931353272e-08, "loss": 0.0385, "step": 19684 }, { "epoch": 4.478953356086462, "grad_norm": 0.6877260568189645, "learning_rate": 3.3224338788686236e-08, "loss": 0.0072, "step": 19685 }, { "epoch": 4.479180887372014, "grad_norm": 1.7633059155389013, "learning_rate": 3.3195600358650326e-08, "loss": 0.0164, "step": 19686 }, { "epoch": 4.479408418657566, "grad_norm": 0.9790650883793266, "learning_rate": 3.316687402401264e-08, "loss": 0.04, "step": 19687 }, { "epoch": 4.479635949943117, "grad_norm": 1.5367017117804558, "learning_rate": 3.313815978536007e-08, "loss": 0.0177, "step": 19688 }, { "epoch": 4.479863481228669, "grad_norm": 1.2106619451853413, "learning_rate": 3.310945764327965e-08, "loss": 0.0227, "step": 19689 }, { "epoch": 4.480091012514221, "grad_norm": 1.0449107768161412, "learning_rate": 3.308076759835778e-08, "loss": 0.0105, "step": 19690 }, { "epoch": 4.480318543799773, "grad_norm": 1.137148031354879, "learning_rate": 3.305208965118108e-08, "loss": 0.0587, "step": 19691 }, { "epoch": 4.480546075085324, "grad_norm": 1.083842869611863, "learning_rate": 3.30234238023356e-08, "loss": 0.0189, "step": 19692 }, { "epoch": 4.480773606370876, "grad_norm": 1.5310706032879113, "learning_rate": 3.299477005240706e-08, "loss": 0.0147, "step": 19693 }, { "epoch": 4.481001137656428, "grad_norm": 2.6738663442790367, "learning_rate": 3.296612840198131e-08, "loss": 0.0348, "step": 19694 }, { "epoch": 4.48122866894198, "grad_norm": 1.3880144682191855, "learning_rate": 3.293749885164357e-08, "loss": 0.0155, "step": 19695 }, { "epoch": 4.481456200227531, "grad_norm": 1.1388780475900258, "learning_rate": 3.290888140197915e-08, "loss": 0.0381, "step": 19696 }, { "epoch": 4.481683731513083, "grad_norm": 1.1862789711536237, "learning_rate": 3.2880276053572706e-08, "loss": 0.0502, "step": 19697 }, { "epoch": 4.481911262798635, "grad_norm": 0.909150303769645, "learning_rate": 3.285168280700905e-08, "loss": 0.0099, "step": 19698 }, { "epoch": 4.482138794084187, "grad_norm": 1.9426208337349118, "learning_rate": 3.2823101662872584e-08, "loss": 0.0785, "step": 19699 }, { "epoch": 4.482366325369738, "grad_norm": 0.7278721378316696, "learning_rate": 3.279453262174728e-08, "loss": 0.0052, "step": 19700 }, { "epoch": 4.48259385665529, "grad_norm": 7.86078143490897, "learning_rate": 3.2765975684217325e-08, "loss": 0.0088, "step": 19701 }, { "epoch": 4.482821387940842, "grad_norm": 1.6235997500151713, "learning_rate": 3.273743085086607e-08, "loss": 0.0225, "step": 19702 }, { "epoch": 4.483048919226394, "grad_norm": 1.5271437663810368, "learning_rate": 3.270889812227715e-08, "loss": 0.0104, "step": 19703 }, { "epoch": 4.483276450511945, "grad_norm": 0.8267565656121563, "learning_rate": 3.2680377499033494e-08, "loss": 0.0058, "step": 19704 }, { "epoch": 4.483503981797497, "grad_norm": 1.4085950628830388, "learning_rate": 3.2651868981718185e-08, "loss": 0.0093, "step": 19705 }, { "epoch": 4.483731513083049, "grad_norm": 1.0463607499491554, "learning_rate": 3.262337257091387e-08, "loss": 0.0052, "step": 19706 }, { "epoch": 4.483959044368601, "grad_norm": 0.8573308822410148, "learning_rate": 3.259488826720295e-08, "loss": 0.0569, "step": 19707 }, { "epoch": 4.484186575654152, "grad_norm": 0.971914763316993, "learning_rate": 3.256641607116758e-08, "loss": 0.0345, "step": 19708 }, { "epoch": 4.484414106939704, "grad_norm": 1.237904941278387, "learning_rate": 3.253795598338959e-08, "loss": 0.0656, "step": 19709 }, { "epoch": 4.484641638225256, "grad_norm": 1.414771051365615, "learning_rate": 3.250950800445081e-08, "loss": 0.0484, "step": 19710 }, { "epoch": 4.484869169510808, "grad_norm": 1.667156736378681, "learning_rate": 3.248107213493258e-08, "loss": 0.0771, "step": 19711 }, { "epoch": 4.485096700796359, "grad_norm": 0.6304429486016258, "learning_rate": 3.245264837541603e-08, "loss": 0.0032, "step": 19712 }, { "epoch": 4.485324232081911, "grad_norm": 0.3077910199386142, "learning_rate": 3.2424236726482234e-08, "loss": 0.0013, "step": 19713 }, { "epoch": 4.485551763367463, "grad_norm": 5.267579005304407, "learning_rate": 3.2395837188711687e-08, "loss": 0.012, "step": 19714 }, { "epoch": 4.485779294653015, "grad_norm": 0.898093708673856, "learning_rate": 3.236744976268504e-08, "loss": 0.026, "step": 19715 }, { "epoch": 4.486006825938566, "grad_norm": 0.9742188671830938, "learning_rate": 3.233907444898231e-08, "loss": 0.034, "step": 19716 }, { "epoch": 4.486234357224118, "grad_norm": 1.3253101693684044, "learning_rate": 3.2310711248183454e-08, "loss": 0.0656, "step": 19717 }, { "epoch": 4.48646188850967, "grad_norm": 1.0067250568755466, "learning_rate": 3.228236016086829e-08, "loss": 0.0274, "step": 19718 }, { "epoch": 4.486689419795222, "grad_norm": 0.6204813502204979, "learning_rate": 3.225402118761607e-08, "loss": 0.0074, "step": 19719 }, { "epoch": 4.486916951080774, "grad_norm": 1.3339068789087207, "learning_rate": 3.22256943290062e-08, "loss": 0.0653, "step": 19720 }, { "epoch": 4.487144482366325, "grad_norm": 1.4529062126989167, "learning_rate": 3.219737958561744e-08, "loss": 0.1424, "step": 19721 }, { "epoch": 4.4873720136518775, "grad_norm": 1.012313447432336, "learning_rate": 3.216907695802865e-08, "loss": 0.0167, "step": 19722 }, { "epoch": 4.487599544937429, "grad_norm": 1.81157264536969, "learning_rate": 3.214078644681817e-08, "loss": 0.0348, "step": 19723 }, { "epoch": 4.487827076222981, "grad_norm": 0.3212537382244756, "learning_rate": 3.2112508052564165e-08, "loss": 0.0013, "step": 19724 }, { "epoch": 4.488054607508532, "grad_norm": 1.5641722929053035, "learning_rate": 3.2084241775844836e-08, "loss": 0.0204, "step": 19725 }, { "epoch": 4.4882821387940846, "grad_norm": 1.2534125273243502, "learning_rate": 3.205598761723758e-08, "loss": 0.0468, "step": 19726 }, { "epoch": 4.488509670079636, "grad_norm": 0.8824043363086504, "learning_rate": 3.202774557732012e-08, "loss": 0.0112, "step": 19727 }, { "epoch": 4.488737201365188, "grad_norm": 1.9931218555169838, "learning_rate": 3.199951565666951e-08, "loss": 0.0296, "step": 19728 }, { "epoch": 4.488964732650739, "grad_norm": 0.5195581083008672, "learning_rate": 3.197129785586277e-08, "loss": 0.0024, "step": 19729 }, { "epoch": 4.489192263936292, "grad_norm": 1.4620143871350288, "learning_rate": 3.19430921754766e-08, "loss": 0.0317, "step": 19730 }, { "epoch": 4.489419795221843, "grad_norm": 0.6124104944091957, "learning_rate": 3.191489861608749e-08, "loss": 0.0077, "step": 19731 }, { "epoch": 4.489647326507395, "grad_norm": 1.621962628366253, "learning_rate": 3.188671717827177e-08, "loss": 0.0236, "step": 19732 }, { "epoch": 4.489874857792946, "grad_norm": 1.250227184822639, "learning_rate": 3.185854786260517e-08, "loss": 0.0066, "step": 19733 }, { "epoch": 4.490102389078499, "grad_norm": 1.4648685652562172, "learning_rate": 3.1830390669663685e-08, "loss": 0.006, "step": 19734 }, { "epoch": 4.49032992036405, "grad_norm": 1.3726240188879524, "learning_rate": 3.1802245600022626e-08, "loss": 0.0806, "step": 19735 }, { "epoch": 4.490557451649602, "grad_norm": 0.7522444900558809, "learning_rate": 3.1774112654257354e-08, "loss": 0.0268, "step": 19736 }, { "epoch": 4.490784982935153, "grad_norm": 1.0823634179171198, "learning_rate": 3.174599183294262e-08, "loss": 0.0558, "step": 19737 }, { "epoch": 4.491012514220706, "grad_norm": 0.8717723420678787, "learning_rate": 3.1717883136653386e-08, "loss": 0.0161, "step": 19738 }, { "epoch": 4.491240045506257, "grad_norm": 1.7792682676354579, "learning_rate": 3.168978656596412e-08, "loss": 0.0221, "step": 19739 }, { "epoch": 4.491467576791809, "grad_norm": 0.9665713827162524, "learning_rate": 3.166170212144902e-08, "loss": 0.072, "step": 19740 }, { "epoch": 4.49169510807736, "grad_norm": 1.2137279366970013, "learning_rate": 3.163362980368207e-08, "loss": 0.0281, "step": 19741 }, { "epoch": 4.491922639362913, "grad_norm": 0.806469973631247, "learning_rate": 3.16055696132371e-08, "loss": 0.0088, "step": 19742 }, { "epoch": 4.492150170648464, "grad_norm": 0.9086117257858454, "learning_rate": 3.1577521550687356e-08, "loss": 0.0115, "step": 19743 }, { "epoch": 4.492377701934016, "grad_norm": 1.8296095413177447, "learning_rate": 3.154948561660633e-08, "loss": 0.033, "step": 19744 }, { "epoch": 4.492605233219567, "grad_norm": 1.5292837619355106, "learning_rate": 3.1521461811566956e-08, "loss": 0.0443, "step": 19745 }, { "epoch": 4.49283276450512, "grad_norm": 1.3856924708576965, "learning_rate": 3.149345013614212e-08, "loss": 0.0812, "step": 19746 }, { "epoch": 4.493060295790671, "grad_norm": 1.4857901792438781, "learning_rate": 3.146545059090407e-08, "loss": 0.0062, "step": 19747 }, { "epoch": 4.493287827076223, "grad_norm": 0.9963519373672874, "learning_rate": 3.1437463176425334e-08, "loss": 0.0125, "step": 19748 }, { "epoch": 4.493515358361774, "grad_norm": 1.7863865598940682, "learning_rate": 3.140948789327774e-08, "loss": 0.0027, "step": 19749 }, { "epoch": 4.493742889647327, "grad_norm": 1.6659126333308956, "learning_rate": 3.138152474203307e-08, "loss": 0.0715, "step": 19750 }, { "epoch": 4.493970420932878, "grad_norm": 1.9039411518793017, "learning_rate": 3.135357372326286e-08, "loss": 0.0947, "step": 19751 }, { "epoch": 4.49419795221843, "grad_norm": 0.4962554842178617, "learning_rate": 3.132563483753834e-08, "loss": 0.0025, "step": 19752 }, { "epoch": 4.494425483503981, "grad_norm": 1.171357015826484, "learning_rate": 3.12977080854307e-08, "loss": 0.0273, "step": 19753 }, { "epoch": 4.494653014789534, "grad_norm": 6.165627108325656, "learning_rate": 3.126979346751061e-08, "loss": 0.0352, "step": 19754 }, { "epoch": 4.494880546075085, "grad_norm": 1.3626657466827314, "learning_rate": 3.12418909843485e-08, "loss": 0.0304, "step": 19755 }, { "epoch": 4.495108077360637, "grad_norm": 1.90851238379578, "learning_rate": 3.1214000636514764e-08, "loss": 0.0331, "step": 19756 }, { "epoch": 4.495335608646188, "grad_norm": 1.14258804050962, "learning_rate": 3.118612242457936e-08, "loss": 0.0456, "step": 19757 }, { "epoch": 4.495563139931741, "grad_norm": 1.0118611315899808, "learning_rate": 3.115825634911203e-08, "loss": 0.0385, "step": 19758 }, { "epoch": 4.495790671217293, "grad_norm": 0.36418247054145664, "learning_rate": 3.113040241068239e-08, "loss": 0.0029, "step": 19759 }, { "epoch": 4.496018202502844, "grad_norm": 1.5328927900792475, "learning_rate": 3.1102560609859794e-08, "loss": 0.0489, "step": 19760 }, { "epoch": 4.496245733788396, "grad_norm": 1.6316860226667758, "learning_rate": 3.107473094721321e-08, "loss": 0.0148, "step": 19761 }, { "epoch": 4.496473265073948, "grad_norm": 1.338597129743016, "learning_rate": 3.104691342331129e-08, "loss": 0.0187, "step": 19762 }, { "epoch": 4.4967007963595, "grad_norm": 1.2425016997879683, "learning_rate": 3.101910803872281e-08, "loss": 0.0093, "step": 19763 }, { "epoch": 4.496928327645051, "grad_norm": 8.589749103613576, "learning_rate": 3.0991314794015795e-08, "loss": 0.0337, "step": 19764 }, { "epoch": 4.497155858930603, "grad_norm": 1.337871614555805, "learning_rate": 3.096353368975846e-08, "loss": 0.0136, "step": 19765 }, { "epoch": 4.497383390216155, "grad_norm": 1.1043981935344749, "learning_rate": 3.093576472651856e-08, "loss": 0.0709, "step": 19766 }, { "epoch": 4.497610921501707, "grad_norm": 1.5375899604600138, "learning_rate": 3.090800790486376e-08, "loss": 0.0897, "step": 19767 }, { "epoch": 4.497838452787258, "grad_norm": 0.5776519197558232, "learning_rate": 3.088026322536124e-08, "loss": 0.0125, "step": 19768 }, { "epoch": 4.49806598407281, "grad_norm": 1.431936285022838, "learning_rate": 3.085253068857798e-08, "loss": 0.0527, "step": 19769 }, { "epoch": 4.498293515358362, "grad_norm": 1.593282092463016, "learning_rate": 3.082481029508096e-08, "loss": 0.0623, "step": 19770 }, { "epoch": 4.498521046643914, "grad_norm": 1.1489816534972197, "learning_rate": 3.079710204543638e-08, "loss": 0.0067, "step": 19771 }, { "epoch": 4.498748577929465, "grad_norm": 2.138114478851855, "learning_rate": 3.076940594021111e-08, "loss": 0.1118, "step": 19772 }, { "epoch": 4.498976109215017, "grad_norm": 1.6530494477033026, "learning_rate": 3.074172197997077e-08, "loss": 0.0164, "step": 19773 }, { "epoch": 4.499203640500569, "grad_norm": 1.7084188447481665, "learning_rate": 3.071405016528132e-08, "loss": 0.0389, "step": 19774 }, { "epoch": 4.499431171786121, "grad_norm": 0.8363682853163532, "learning_rate": 3.068639049670835e-08, "loss": 0.0089, "step": 19775 }, { "epoch": 4.499658703071672, "grad_norm": 0.9655374816117681, "learning_rate": 3.0658742974817026e-08, "loss": 0.0095, "step": 19776 }, { "epoch": 4.499886234357224, "grad_norm": 2.226416562372457, "learning_rate": 3.0631107600172516e-08, "loss": 0.0335, "step": 19777 }, { "epoch": 4.500113765642776, "grad_norm": 1.415265770466215, "learning_rate": 3.060348437333959e-08, "loss": 0.0059, "step": 19778 }, { "epoch": 4.500341296928328, "grad_norm": 0.3376500590482606, "learning_rate": 3.0575873294882984e-08, "loss": 0.0016, "step": 19779 }, { "epoch": 4.500568828213879, "grad_norm": 1.6682700845214067, "learning_rate": 3.054827436536692e-08, "loss": 0.0851, "step": 19780 }, { "epoch": 4.500796359499431, "grad_norm": 1.2105499334571739, "learning_rate": 3.0520687585355315e-08, "loss": 0.0103, "step": 19781 }, { "epoch": 4.501023890784983, "grad_norm": 1.1228735012933586, "learning_rate": 3.0493112955412225e-08, "loss": 0.0451, "step": 19782 }, { "epoch": 4.501251422070535, "grad_norm": 0.9813447046414854, "learning_rate": 3.046555047610103e-08, "loss": 0.0102, "step": 19783 }, { "epoch": 4.501478953356086, "grad_norm": 0.9921898827193167, "learning_rate": 3.043800014798509e-08, "loss": 0.0352, "step": 19784 }, { "epoch": 4.501706484641638, "grad_norm": 2.3106740082233395, "learning_rate": 3.041046197162764e-08, "loss": 0.0594, "step": 19785 }, { "epoch": 4.50193401592719, "grad_norm": 2.0666308723546893, "learning_rate": 3.0382935947591426e-08, "loss": 0.0439, "step": 19786 }, { "epoch": 4.502161547212742, "grad_norm": 1.090601247949363, "learning_rate": 3.035542207643898e-08, "loss": 0.0193, "step": 19787 }, { "epoch": 4.502389078498293, "grad_norm": 1.2383060897304279, "learning_rate": 3.032792035873262e-08, "loss": 0.0132, "step": 19788 }, { "epoch": 4.502616609783845, "grad_norm": 1.24261574205921, "learning_rate": 3.0300430795034625e-08, "loss": 0.0269, "step": 19789 }, { "epoch": 4.502844141069397, "grad_norm": 1.8308856930472428, "learning_rate": 3.0272953385906554e-08, "loss": 0.0815, "step": 19790 }, { "epoch": 4.503071672354949, "grad_norm": 1.5382934141828153, "learning_rate": 3.0245488131910096e-08, "loss": 0.0472, "step": 19791 }, { "epoch": 4.5032992036405, "grad_norm": 1.2093172890059876, "learning_rate": 3.0218035033606695e-08, "loss": 0.0099, "step": 19792 }, { "epoch": 4.503526734926052, "grad_norm": 0.6260217939813566, "learning_rate": 3.019059409155735e-08, "loss": 0.0042, "step": 19793 }, { "epoch": 4.503754266211605, "grad_norm": 1.0420373134196563, "learning_rate": 3.0163165306322934e-08, "loss": 0.0346, "step": 19794 }, { "epoch": 4.503981797497156, "grad_norm": 1.863993235563068, "learning_rate": 3.013574867846397e-08, "loss": 0.0124, "step": 19795 }, { "epoch": 4.504209328782707, "grad_norm": 1.0379465145843365, "learning_rate": 3.010834420854092e-08, "loss": 0.0154, "step": 19796 }, { "epoch": 4.504436860068259, "grad_norm": 1.102719161974799, "learning_rate": 3.0080951897113814e-08, "loss": 0.0186, "step": 19797 }, { "epoch": 4.504664391353812, "grad_norm": 1.8858571209254857, "learning_rate": 3.005357174474241e-08, "loss": 0.0189, "step": 19798 }, { "epoch": 4.504891922639363, "grad_norm": 2.7999469712047933, "learning_rate": 3.002620375198655e-08, "loss": 0.0114, "step": 19799 }, { "epoch": 4.505119453924914, "grad_norm": 0.8300155427458916, "learning_rate": 2.999884791940535e-08, "loss": 0.0033, "step": 19800 }, { "epoch": 4.505346985210466, "grad_norm": 0.8815884214155459, "learning_rate": 2.997150424755811e-08, "loss": 0.0099, "step": 19801 }, { "epoch": 4.505574516496019, "grad_norm": 1.3033847805038443, "learning_rate": 2.994417273700347e-08, "loss": 0.0566, "step": 19802 }, { "epoch": 4.50580204778157, "grad_norm": 2.1097302881509603, "learning_rate": 2.9916853388300204e-08, "loss": 0.0508, "step": 19803 }, { "epoch": 4.506029579067122, "grad_norm": 1.6075500527039135, "learning_rate": 2.988954620200657e-08, "loss": 0.0057, "step": 19804 }, { "epoch": 4.506257110352673, "grad_norm": 1.5370241727389318, "learning_rate": 2.986225117868072e-08, "loss": 0.0218, "step": 19805 }, { "epoch": 4.506484641638226, "grad_norm": 0.7283472371609492, "learning_rate": 2.983496831888054e-08, "loss": 0.009, "step": 19806 }, { "epoch": 4.506712172923777, "grad_norm": 1.1358263250668221, "learning_rate": 2.980769762316357e-08, "loss": 0.0415, "step": 19807 }, { "epoch": 4.506939704209329, "grad_norm": 1.4108429394634376, "learning_rate": 2.978043909208729e-08, "loss": 0.0203, "step": 19808 }, { "epoch": 4.50716723549488, "grad_norm": 1.3688478893452087, "learning_rate": 2.9753192726208677e-08, "loss": 0.016, "step": 19809 }, { "epoch": 4.507394766780433, "grad_norm": 0.7876096788216009, "learning_rate": 2.9725958526084786e-08, "loss": 0.0644, "step": 19810 }, { "epoch": 4.507622298065984, "grad_norm": 1.0286214924112003, "learning_rate": 2.969873649227198e-08, "loss": 0.0108, "step": 19811 }, { "epoch": 4.507849829351536, "grad_norm": 1.1068748308249516, "learning_rate": 2.9671526625326756e-08, "loss": 0.0175, "step": 19812 }, { "epoch": 4.508077360637087, "grad_norm": 1.6352766624761335, "learning_rate": 2.9644328925805267e-08, "loss": 0.0924, "step": 19813 }, { "epoch": 4.50830489192264, "grad_norm": 2.489708389367059, "learning_rate": 2.9617143394263316e-08, "loss": 0.116, "step": 19814 }, { "epoch": 4.508532423208191, "grad_norm": 0.871178889060707, "learning_rate": 2.9589970031256647e-08, "loss": 0.0108, "step": 19815 }, { "epoch": 4.508759954493743, "grad_norm": 1.3070349547293882, "learning_rate": 2.95628088373405e-08, "loss": 0.016, "step": 19816 }, { "epoch": 4.508987485779294, "grad_norm": 0.5733833648954842, "learning_rate": 2.9535659813070064e-08, "loss": 0.0037, "step": 19817 }, { "epoch": 4.509215017064847, "grad_norm": 1.2540577129092991, "learning_rate": 2.9508522959000167e-08, "loss": 0.013, "step": 19818 }, { "epoch": 4.509442548350398, "grad_norm": 1.2077998615250947, "learning_rate": 2.948139827568544e-08, "loss": 0.0221, "step": 19819 }, { "epoch": 4.50967007963595, "grad_norm": 1.4012935904681358, "learning_rate": 2.9454285763680437e-08, "loss": 0.0051, "step": 19820 }, { "epoch": 4.509897610921501, "grad_norm": 1.4023341122841628, "learning_rate": 2.942718542353902e-08, "loss": 0.0167, "step": 19821 }, { "epoch": 4.510125142207054, "grad_norm": 0.6769143281696314, "learning_rate": 2.940009725581526e-08, "loss": 0.0047, "step": 19822 }, { "epoch": 4.510352673492605, "grad_norm": 2.335596670373203, "learning_rate": 2.9373021261062603e-08, "loss": 0.0226, "step": 19823 }, { "epoch": 4.510580204778157, "grad_norm": 1.1988969827585099, "learning_rate": 2.9345957439834705e-08, "loss": 0.026, "step": 19824 }, { "epoch": 4.510807736063708, "grad_norm": 0.7567605763104925, "learning_rate": 2.9318905792684453e-08, "loss": 0.0052, "step": 19825 }, { "epoch": 4.511035267349261, "grad_norm": 2.460747930619719, "learning_rate": 2.9291866320164814e-08, "loss": 0.0084, "step": 19826 }, { "epoch": 4.511262798634812, "grad_norm": 1.5349696624578002, "learning_rate": 2.9264839022828533e-08, "loss": 0.165, "step": 19827 }, { "epoch": 4.511490329920364, "grad_norm": 1.8931076393547797, "learning_rate": 2.9237823901227813e-08, "loss": 0.0612, "step": 19828 }, { "epoch": 4.5117178612059154, "grad_norm": 1.828739564561602, "learning_rate": 2.9210820955914918e-08, "loss": 0.0194, "step": 19829 }, { "epoch": 4.511945392491468, "grad_norm": 1.3841533534751862, "learning_rate": 2.9183830187441768e-08, "loss": 0.0556, "step": 19830 }, { "epoch": 4.512172923777019, "grad_norm": 1.004466399486816, "learning_rate": 2.9156851596359863e-08, "loss": 0.0477, "step": 19831 }, { "epoch": 4.512400455062571, "grad_norm": 1.411038233346219, "learning_rate": 2.9129885183220645e-08, "loss": 0.0621, "step": 19832 }, { "epoch": 4.512627986348123, "grad_norm": 0.9660975103165347, "learning_rate": 2.910293094857533e-08, "loss": 0.0103, "step": 19833 }, { "epoch": 4.512855517633675, "grad_norm": 1.316999773958067, "learning_rate": 2.9075988892974805e-08, "loss": 0.0323, "step": 19834 }, { "epoch": 4.513083048919226, "grad_norm": 0.870531882064315, "learning_rate": 2.9049059016969666e-08, "loss": 0.0055, "step": 19835 }, { "epoch": 4.513310580204778, "grad_norm": 1.0587576651772048, "learning_rate": 2.9022141321110378e-08, "loss": 0.0116, "step": 19836 }, { "epoch": 4.51353811149033, "grad_norm": 0.6655331868869155, "learning_rate": 2.8995235805946987e-08, "loss": 0.009, "step": 19837 }, { "epoch": 4.513765642775882, "grad_norm": 1.047724574956268, "learning_rate": 2.8968342472029328e-08, "loss": 0.0133, "step": 19838 }, { "epoch": 4.513993174061433, "grad_norm": 2.3733064701685564, "learning_rate": 2.8941461319907313e-08, "loss": 0.114, "step": 19839 }, { "epoch": 4.514220705346985, "grad_norm": 1.2620246349536672, "learning_rate": 2.891459235013015e-08, "loss": 0.022, "step": 19840 }, { "epoch": 4.514448236632537, "grad_norm": 0.8044724819549808, "learning_rate": 2.888773556324713e-08, "loss": 0.0028, "step": 19841 }, { "epoch": 4.514675767918089, "grad_norm": 0.8882940885653987, "learning_rate": 2.886089095980697e-08, "loss": 0.0072, "step": 19842 }, { "epoch": 4.514903299203641, "grad_norm": 1.2909364197902704, "learning_rate": 2.883405854035848e-08, "loss": 0.0107, "step": 19843 }, { "epoch": 4.515130830489192, "grad_norm": 0.8189657005736138, "learning_rate": 2.8807238305449958e-08, "loss": 0.0038, "step": 19844 }, { "epoch": 4.515358361774744, "grad_norm": 1.08574585776306, "learning_rate": 2.8780430255629585e-08, "loss": 0.0067, "step": 19845 }, { "epoch": 4.515585893060296, "grad_norm": 1.1589084215357055, "learning_rate": 2.8753634391445394e-08, "loss": 0.0168, "step": 19846 }, { "epoch": 4.515813424345848, "grad_norm": 1.0978259267566446, "learning_rate": 2.8726850713444858e-08, "loss": 0.0437, "step": 19847 }, { "epoch": 4.516040955631399, "grad_norm": 0.9837943226132851, "learning_rate": 2.8700079222175532e-08, "loss": 0.0168, "step": 19848 }, { "epoch": 4.516268486916951, "grad_norm": 0.31469311868803707, "learning_rate": 2.8673319918184546e-08, "loss": 0.0022, "step": 19849 }, { "epoch": 4.516496018202503, "grad_norm": 1.1885915856578286, "learning_rate": 2.8646572802018616e-08, "loss": 0.0933, "step": 19850 }, { "epoch": 4.516723549488055, "grad_norm": 0.3702894537081405, "learning_rate": 2.8619837874224662e-08, "loss": 0.0023, "step": 19851 }, { "epoch": 4.516951080773606, "grad_norm": 1.2295169028720994, "learning_rate": 2.8593115135348986e-08, "loss": 0.0283, "step": 19852 }, { "epoch": 4.517178612059158, "grad_norm": 0.9836847883290181, "learning_rate": 2.856640458593782e-08, "loss": 0.0715, "step": 19853 }, { "epoch": 4.51740614334471, "grad_norm": 4.905041196258016, "learning_rate": 2.853970622653697e-08, "loss": 0.0035, "step": 19854 }, { "epoch": 4.517633674630262, "grad_norm": 0.7801417281390229, "learning_rate": 2.8513020057692256e-08, "loss": 0.0045, "step": 19855 }, { "epoch": 4.517861205915813, "grad_norm": 1.2964155945229823, "learning_rate": 2.8486346079948934e-08, "loss": 0.041, "step": 19856 }, { "epoch": 4.518088737201365, "grad_norm": 1.0700423071308953, "learning_rate": 2.8459684293852194e-08, "loss": 0.0431, "step": 19857 }, { "epoch": 4.518316268486917, "grad_norm": 0.21271448443649676, "learning_rate": 2.8433034699947014e-08, "loss": 0.0008, "step": 19858 }, { "epoch": 4.518543799772469, "grad_norm": 1.088950707285623, "learning_rate": 2.840639729877803e-08, "loss": 0.0462, "step": 19859 }, { "epoch": 4.51877133105802, "grad_norm": 0.815099271981607, "learning_rate": 2.837977209088974e-08, "loss": 0.0121, "step": 19860 }, { "epoch": 4.518998862343572, "grad_norm": 1.277830329173923, "learning_rate": 2.8353159076826286e-08, "loss": 0.0213, "step": 19861 }, { "epoch": 4.519226393629124, "grad_norm": 1.074511852221831, "learning_rate": 2.8326558257131473e-08, "loss": 0.0097, "step": 19862 }, { "epoch": 4.519453924914676, "grad_norm": 1.0339681364555935, "learning_rate": 2.82999696323491e-08, "loss": 0.0307, "step": 19863 }, { "epoch": 4.519681456200227, "grad_norm": 1.7245577808946473, "learning_rate": 2.8273393203022482e-08, "loss": 0.0066, "step": 19864 }, { "epoch": 4.519908987485779, "grad_norm": 1.5474807961068449, "learning_rate": 2.8246828969694802e-08, "loss": 0.0211, "step": 19865 }, { "epoch": 4.520136518771331, "grad_norm": 0.7807080313152213, "learning_rate": 2.8220276932909087e-08, "loss": 0.0478, "step": 19866 }, { "epoch": 4.520364050056883, "grad_norm": 0.8957547866397526, "learning_rate": 2.8193737093208038e-08, "loss": 0.0393, "step": 19867 }, { "epoch": 4.520591581342434, "grad_norm": 0.6388234656185288, "learning_rate": 2.8167209451133993e-08, "loss": 0.0022, "step": 19868 }, { "epoch": 4.520819112627986, "grad_norm": 0.4818013845836947, "learning_rate": 2.8140694007229087e-08, "loss": 0.002, "step": 19869 }, { "epoch": 4.521046643913538, "grad_norm": 1.1598559572844007, "learning_rate": 2.8114190762035322e-08, "loss": 0.0041, "step": 19870 }, { "epoch": 4.52127417519909, "grad_norm": 1.3283635665254703, "learning_rate": 2.8087699716094346e-08, "loss": 0.0084, "step": 19871 }, { "epoch": 4.521501706484642, "grad_norm": 0.7506251967515486, "learning_rate": 2.806122086994753e-08, "loss": 0.0029, "step": 19872 }, { "epoch": 4.521729237770193, "grad_norm": 1.1316848821185617, "learning_rate": 2.8034754224136178e-08, "loss": 0.0069, "step": 19873 }, { "epoch": 4.521956769055745, "grad_norm": 2.321916693638945, "learning_rate": 2.800829977920118e-08, "loss": 0.0345, "step": 19874 }, { "epoch": 4.522184300341297, "grad_norm": 1.8381400805386017, "learning_rate": 2.7981857535683206e-08, "loss": 0.06, "step": 19875 }, { "epoch": 4.522411831626849, "grad_norm": 0.6503524594446664, "learning_rate": 2.7955427494122667e-08, "loss": 0.0025, "step": 19876 }, { "epoch": 4.5226393629124, "grad_norm": 0.8088356275857912, "learning_rate": 2.792900965505975e-08, "loss": 0.0048, "step": 19877 }, { "epoch": 4.522866894197952, "grad_norm": 0.6702839249739754, "learning_rate": 2.790260401903437e-08, "loss": 0.0122, "step": 19878 }, { "epoch": 4.523094425483504, "grad_norm": 0.8629053381612247, "learning_rate": 2.787621058658617e-08, "loss": 0.0095, "step": 19879 }, { "epoch": 4.523321956769056, "grad_norm": 0.7469988970233133, "learning_rate": 2.7849829358254782e-08, "loss": 0.0062, "step": 19880 }, { "epoch": 4.523549488054607, "grad_norm": 0.9693107263275437, "learning_rate": 2.7823460334579154e-08, "loss": 0.0456, "step": 19881 }, { "epoch": 4.52377701934016, "grad_norm": 1.5816757084591107, "learning_rate": 2.7797103516098363e-08, "loss": 0.0292, "step": 19882 }, { "epoch": 4.524004550625711, "grad_norm": 1.0713890571295273, "learning_rate": 2.7770758903351013e-08, "loss": 0.0313, "step": 19883 }, { "epoch": 4.524232081911263, "grad_norm": 3.5896222237965367, "learning_rate": 2.774442649687563e-08, "loss": 0.0311, "step": 19884 }, { "epoch": 4.5244596131968144, "grad_norm": 1.6806495758122313, "learning_rate": 2.771810629721032e-08, "loss": 0.1008, "step": 19885 }, { "epoch": 4.524687144482367, "grad_norm": 3.397040648822602, "learning_rate": 2.7691798304892993e-08, "loss": 0.0307, "step": 19886 }, { "epoch": 4.524914675767918, "grad_norm": 0.510628442288682, "learning_rate": 2.7665502520461477e-08, "loss": 0.0016, "step": 19887 }, { "epoch": 4.52514220705347, "grad_norm": 2.1235465122227177, "learning_rate": 2.7639218944453052e-08, "loss": 0.0889, "step": 19888 }, { "epoch": 4.5253697383390215, "grad_norm": 0.47142999651970513, "learning_rate": 2.761294757740507e-08, "loss": 0.003, "step": 19889 }, { "epoch": 4.525597269624574, "grad_norm": 1.6900635446293635, "learning_rate": 2.7586688419854317e-08, "loss": 0.0584, "step": 19890 }, { "epoch": 4.525824800910125, "grad_norm": 1.6642057389227458, "learning_rate": 2.756044147233759e-08, "loss": 0.0122, "step": 19891 }, { "epoch": 4.526052332195677, "grad_norm": 2.031157889897539, "learning_rate": 2.753420673539113e-08, "loss": 0.0149, "step": 19892 }, { "epoch": 4.5262798634812285, "grad_norm": 5.62835858102694, "learning_rate": 2.7507984209551447e-08, "loss": 0.0375, "step": 19893 }, { "epoch": 4.526507394766781, "grad_norm": 1.168595723423393, "learning_rate": 2.7481773895354296e-08, "loss": 0.0221, "step": 19894 }, { "epoch": 4.526734926052332, "grad_norm": 1.468739850665323, "learning_rate": 2.7455575793335285e-08, "loss": 0.014, "step": 19895 }, { "epoch": 4.526962457337884, "grad_norm": 2.144832321258452, "learning_rate": 2.742938990403003e-08, "loss": 0.0191, "step": 19896 }, { "epoch": 4.5271899886234355, "grad_norm": 2.5691700492837533, "learning_rate": 2.7403216227973594e-08, "loss": 0.0536, "step": 19897 }, { "epoch": 4.527417519908988, "grad_norm": 0.4587596748740695, "learning_rate": 2.737705476570096e-08, "loss": 0.0033, "step": 19898 }, { "epoch": 4.527645051194539, "grad_norm": 0.8287191885777552, "learning_rate": 2.735090551774691e-08, "loss": 0.0042, "step": 19899 }, { "epoch": 4.527872582480091, "grad_norm": 1.9695267285193023, "learning_rate": 2.7324768484645743e-08, "loss": 0.0456, "step": 19900 }, { "epoch": 4.5281001137656425, "grad_norm": 0.3965619131431583, "learning_rate": 2.7298643666931816e-08, "loss": 0.0012, "step": 19901 }, { "epoch": 4.528327645051195, "grad_norm": 1.0839054535374222, "learning_rate": 2.727253106513887e-08, "loss": 0.0404, "step": 19902 }, { "epoch": 4.528555176336746, "grad_norm": 1.5730504246074903, "learning_rate": 2.724643067980079e-08, "loss": 0.0413, "step": 19903 }, { "epoch": 4.528782707622298, "grad_norm": 1.3802820318230857, "learning_rate": 2.7220342511450823e-08, "loss": 0.0292, "step": 19904 }, { "epoch": 4.5290102389078495, "grad_norm": 3.8596622852344296, "learning_rate": 2.7194266560622296e-08, "loss": 0.0037, "step": 19905 }, { "epoch": 4.529237770193402, "grad_norm": 1.289656588584724, "learning_rate": 2.7168202827848254e-08, "loss": 0.0281, "step": 19906 }, { "epoch": 4.529465301478953, "grad_norm": 0.9777417162992477, "learning_rate": 2.7142151313661115e-08, "loss": 0.0268, "step": 19907 }, { "epoch": 4.529692832764505, "grad_norm": 0.7084500896243872, "learning_rate": 2.7116112018593586e-08, "loss": 0.0033, "step": 19908 }, { "epoch": 4.5299203640500565, "grad_norm": 1.2410821474409317, "learning_rate": 2.7090084943177662e-08, "loss": 0.066, "step": 19909 }, { "epoch": 4.530147895335609, "grad_norm": 3.5737858127030004, "learning_rate": 2.7064070087945494e-08, "loss": 0.0152, "step": 19910 }, { "epoch": 4.530375426621161, "grad_norm": 0.5843157531804634, "learning_rate": 2.703806745342853e-08, "loss": 0.0019, "step": 19911 }, { "epoch": 4.530602957906712, "grad_norm": 0.9615302674711624, "learning_rate": 2.701207704015843e-08, "loss": 0.0537, "step": 19912 }, { "epoch": 4.5308304891922635, "grad_norm": 1.6377443411048143, "learning_rate": 2.698609884866636e-08, "loss": 0.0244, "step": 19913 }, { "epoch": 4.531058020477816, "grad_norm": 1.0515716564008868, "learning_rate": 2.6960132879483154e-08, "loss": 0.0128, "step": 19914 }, { "epoch": 4.531285551763368, "grad_norm": 2.9957015483280816, "learning_rate": 2.693417913313963e-08, "loss": 0.1049, "step": 19915 }, { "epoch": 4.531513083048919, "grad_norm": 0.9499164754922984, "learning_rate": 2.690823761016606e-08, "loss": 0.006, "step": 19916 }, { "epoch": 4.5317406143344705, "grad_norm": 1.923784543349846, "learning_rate": 2.6882308311092855e-08, "loss": 0.0632, "step": 19917 }, { "epoch": 4.531968145620023, "grad_norm": 1.771977681234563, "learning_rate": 2.685639123644987e-08, "loss": 0.0165, "step": 19918 }, { "epoch": 4.532195676905575, "grad_norm": 1.1347812135590842, "learning_rate": 2.6830486386766747e-08, "loss": 0.0181, "step": 19919 }, { "epoch": 4.532423208191126, "grad_norm": 0.47972066890238285, "learning_rate": 2.6804593762572996e-08, "loss": 0.0031, "step": 19920 }, { "epoch": 4.532650739476678, "grad_norm": 0.8769455094793517, "learning_rate": 2.6778713364397844e-08, "loss": 0.0689, "step": 19921 }, { "epoch": 4.53287827076223, "grad_norm": 1.513812752239961, "learning_rate": 2.6752845192770175e-08, "loss": 0.043, "step": 19922 }, { "epoch": 4.533105802047782, "grad_norm": 1.4752709506649104, "learning_rate": 2.6726989248218732e-08, "loss": 0.0291, "step": 19923 }, { "epoch": 4.533333333333333, "grad_norm": 1.8040775327112524, "learning_rate": 2.6701145531271984e-08, "loss": 0.0446, "step": 19924 }, { "epoch": 4.533560864618885, "grad_norm": 1.835759647332356, "learning_rate": 2.6675314042457976e-08, "loss": 0.0444, "step": 19925 }, { "epoch": 4.533788395904437, "grad_norm": 0.8641520470868033, "learning_rate": 2.6649494782304762e-08, "loss": 0.007, "step": 19926 }, { "epoch": 4.534015927189989, "grad_norm": 1.866873189081832, "learning_rate": 2.662368775134018e-08, "loss": 0.0543, "step": 19927 }, { "epoch": 4.53424345847554, "grad_norm": 1.251613032063159, "learning_rate": 2.659789295009145e-08, "loss": 0.0823, "step": 19928 }, { "epoch": 4.534470989761092, "grad_norm": 2.879437180951489, "learning_rate": 2.6572110379085926e-08, "loss": 0.0169, "step": 19929 }, { "epoch": 4.534698521046644, "grad_norm": 1.8177256658873309, "learning_rate": 2.6546340038850484e-08, "loss": 0.0979, "step": 19930 }, { "epoch": 4.534926052332196, "grad_norm": 0.42035904330568313, "learning_rate": 2.6520581929911844e-08, "loss": 0.0024, "step": 19931 }, { "epoch": 4.535153583617747, "grad_norm": 1.1855009678284751, "learning_rate": 2.64948360527964e-08, "loss": 0.019, "step": 19932 }, { "epoch": 4.535381114903299, "grad_norm": 1.9447103815860547, "learning_rate": 2.646910240803033e-08, "loss": 0.0112, "step": 19933 }, { "epoch": 4.535608646188851, "grad_norm": 1.394080490208517, "learning_rate": 2.64433809961398e-08, "loss": 0.0573, "step": 19934 }, { "epoch": 4.535836177474403, "grad_norm": 1.419276946708413, "learning_rate": 2.64176718176503e-08, "loss": 0.0781, "step": 19935 }, { "epoch": 4.536063708759954, "grad_norm": 1.0392405679378152, "learning_rate": 2.639197487308731e-08, "loss": 0.0268, "step": 19936 }, { "epoch": 4.536291240045506, "grad_norm": 1.0629757180076702, "learning_rate": 2.6366290162976176e-08, "loss": 0.0357, "step": 19937 }, { "epoch": 4.536518771331058, "grad_norm": 0.7220063423126857, "learning_rate": 2.6340617687841546e-08, "loss": 0.0249, "step": 19938 }, { "epoch": 4.53674630261661, "grad_norm": 1.176612169680956, "learning_rate": 2.6314957448208348e-08, "loss": 0.0417, "step": 19939 }, { "epoch": 4.536973833902161, "grad_norm": 1.3370479448429955, "learning_rate": 2.628930944460102e-08, "loss": 0.0195, "step": 19940 }, { "epoch": 4.537201365187713, "grad_norm": 1.2239507418990991, "learning_rate": 2.6263673677543733e-08, "loss": 0.0523, "step": 19941 }, { "epoch": 4.537428896473265, "grad_norm": 3.999110677060841, "learning_rate": 2.6238050147560368e-08, "loss": 0.0299, "step": 19942 }, { "epoch": 4.537656427758817, "grad_norm": 1.1411905550946908, "learning_rate": 2.6212438855174744e-08, "loss": 0.0208, "step": 19943 }, { "epoch": 4.537883959044368, "grad_norm": 1.9079316689242969, "learning_rate": 2.618683980091026e-08, "loss": 0.159, "step": 19944 }, { "epoch": 4.5381114903299204, "grad_norm": 0.9454696488447094, "learning_rate": 2.6161252985289972e-08, "loss": 0.0428, "step": 19945 }, { "epoch": 4.538339021615472, "grad_norm": 1.9590945455822162, "learning_rate": 2.6135678408837002e-08, "loss": 0.048, "step": 19946 }, { "epoch": 4.538566552901024, "grad_norm": 0.8217823206378265, "learning_rate": 2.611011607207399e-08, "loss": 0.0028, "step": 19947 }, { "epoch": 4.538794084186575, "grad_norm": 1.657494082323985, "learning_rate": 2.6084565975523433e-08, "loss": 0.1004, "step": 19948 }, { "epoch": 4.5390216154721275, "grad_norm": 0.23151192895098321, "learning_rate": 2.605902811970748e-08, "loss": 0.0008, "step": 19949 }, { "epoch": 4.53924914675768, "grad_norm": 1.8255967808285387, "learning_rate": 2.603350250514808e-08, "loss": 0.0064, "step": 19950 }, { "epoch": 4.539476678043231, "grad_norm": 0.7149040532821082, "learning_rate": 2.600798913236696e-08, "loss": 0.0037, "step": 19951 }, { "epoch": 4.539704209328782, "grad_norm": 1.7192155615169395, "learning_rate": 2.598248800188552e-08, "loss": 0.0107, "step": 19952 }, { "epoch": 4.5399317406143345, "grad_norm": 1.2941114094304378, "learning_rate": 2.5956999114224932e-08, "loss": 0.0486, "step": 19953 }, { "epoch": 4.540159271899887, "grad_norm": 0.8640566836900156, "learning_rate": 2.593152246990617e-08, "loss": 0.0085, "step": 19954 }, { "epoch": 4.540386803185438, "grad_norm": 1.048652384584792, "learning_rate": 2.590605806945e-08, "loss": 0.0567, "step": 19955 }, { "epoch": 4.540614334470989, "grad_norm": 1.858466904539194, "learning_rate": 2.5880605913376904e-08, "loss": 0.005, "step": 19956 }, { "epoch": 4.5408418657565415, "grad_norm": 1.1528434924280133, "learning_rate": 2.5855166002206818e-08, "loss": 0.0289, "step": 19957 }, { "epoch": 4.541069397042094, "grad_norm": 1.1816752449453114, "learning_rate": 2.582973833646002e-08, "loss": 0.0249, "step": 19958 }, { "epoch": 4.541296928327645, "grad_norm": 0.8892105535458398, "learning_rate": 2.5804322916655814e-08, "loss": 0.0052, "step": 19959 }, { "epoch": 4.541524459613197, "grad_norm": 0.5998045803885995, "learning_rate": 2.5778919743314063e-08, "loss": 0.0045, "step": 19960 }, { "epoch": 4.5417519908987485, "grad_norm": 0.4790115789889676, "learning_rate": 2.5753528816953658e-08, "loss": 0.0029, "step": 19961 }, { "epoch": 4.541979522184301, "grad_norm": 2.103743239306762, "learning_rate": 2.5728150138093765e-08, "loss": 0.0789, "step": 19962 }, { "epoch": 4.542207053469852, "grad_norm": 1.3774270926607093, "learning_rate": 2.5702783707253e-08, "loss": 0.0662, "step": 19963 }, { "epoch": 4.542434584755404, "grad_norm": 0.6987898461358805, "learning_rate": 2.567742952494963e-08, "loss": 0.0055, "step": 19964 }, { "epoch": 4.5426621160409555, "grad_norm": 2.5131851408840773, "learning_rate": 2.5652087591702053e-08, "loss": 0.0218, "step": 19965 }, { "epoch": 4.542889647326508, "grad_norm": 0.3782661721716693, "learning_rate": 2.5626757908028128e-08, "loss": 0.0012, "step": 19966 }, { "epoch": 4.543117178612059, "grad_norm": 0.779736001198329, "learning_rate": 2.5601440474445627e-08, "loss": 0.0131, "step": 19967 }, { "epoch": 4.543344709897611, "grad_norm": 1.6581332838696656, "learning_rate": 2.557613529147192e-08, "loss": 0.0531, "step": 19968 }, { "epoch": 4.5435722411831625, "grad_norm": 0.7047120698069981, "learning_rate": 2.5550842359624227e-08, "loss": 0.0068, "step": 19969 }, { "epoch": 4.543799772468715, "grad_norm": 0.5978416472203877, "learning_rate": 2.5525561679419496e-08, "loss": 0.0023, "step": 19970 }, { "epoch": 4.544027303754266, "grad_norm": 1.0164557157840965, "learning_rate": 2.5500293251374263e-08, "loss": 0.0129, "step": 19971 }, { "epoch": 4.544254835039818, "grad_norm": 0.4784591797686159, "learning_rate": 2.5475037076005196e-08, "loss": 0.0018, "step": 19972 }, { "epoch": 4.5444823663253695, "grad_norm": 0.9835087600136124, "learning_rate": 2.544979315382834e-08, "loss": 0.0519, "step": 19973 }, { "epoch": 4.544709897610922, "grad_norm": 1.6747301848142577, "learning_rate": 2.5424561485359808e-08, "loss": 0.0418, "step": 19974 }, { "epoch": 4.544937428896473, "grad_norm": 1.5931730817917051, "learning_rate": 2.5399342071115164e-08, "loss": 0.115, "step": 19975 }, { "epoch": 4.545164960182025, "grad_norm": 1.3433605200206902, "learning_rate": 2.5374134911609756e-08, "loss": 0.043, "step": 19976 }, { "epoch": 4.5453924914675765, "grad_norm": 2.158251516717524, "learning_rate": 2.5348940007358935e-08, "loss": 0.0064, "step": 19977 }, { "epoch": 4.545620022753129, "grad_norm": 1.4076314132594374, "learning_rate": 2.53237573588775e-08, "loss": 0.0139, "step": 19978 }, { "epoch": 4.54584755403868, "grad_norm": 1.1226370890635533, "learning_rate": 2.529858696668018e-08, "loss": 0.0413, "step": 19979 }, { "epoch": 4.546075085324232, "grad_norm": 2.488991265095988, "learning_rate": 2.527342883128149e-08, "loss": 0.0095, "step": 19980 }, { "epoch": 4.5463026166097835, "grad_norm": 0.6024047713772737, "learning_rate": 2.5248282953195676e-08, "loss": 0.0033, "step": 19981 }, { "epoch": 4.546530147895336, "grad_norm": 1.229742312784411, "learning_rate": 2.5223149332936486e-08, "loss": 0.0595, "step": 19982 }, { "epoch": 4.546757679180887, "grad_norm": 7.578677719412632, "learning_rate": 2.5198027971017614e-08, "loss": 0.0879, "step": 19983 }, { "epoch": 4.546985210466439, "grad_norm": 1.3720850798916202, "learning_rate": 2.5172918867952673e-08, "loss": 0.0091, "step": 19984 }, { "epoch": 4.5472127417519905, "grad_norm": 1.731503886466175, "learning_rate": 2.5147822024254657e-08, "loss": 0.0788, "step": 19985 }, { "epoch": 4.547440273037543, "grad_norm": 0.6087794845200841, "learning_rate": 2.5122737440436627e-08, "loss": 0.0031, "step": 19986 }, { "epoch": 4.547667804323094, "grad_norm": 1.90633402726209, "learning_rate": 2.509766511701123e-08, "loss": 0.0234, "step": 19987 }, { "epoch": 4.547895335608646, "grad_norm": 1.1930459587115785, "learning_rate": 2.507260505449083e-08, "loss": 0.0099, "step": 19988 }, { "epoch": 4.548122866894198, "grad_norm": 1.562595475121514, "learning_rate": 2.5047557253387727e-08, "loss": 0.0513, "step": 19989 }, { "epoch": 4.54835039817975, "grad_norm": 1.8476084405389366, "learning_rate": 2.502252171421374e-08, "loss": 0.0562, "step": 19990 }, { "epoch": 4.548577929465301, "grad_norm": 0.6413105227326654, "learning_rate": 2.499749843748067e-08, "loss": 0.0049, "step": 19991 }, { "epoch": 4.548805460750853, "grad_norm": 2.5016632731920163, "learning_rate": 2.4972487423699856e-08, "loss": 0.022, "step": 19992 }, { "epoch": 4.549032992036405, "grad_norm": 1.259414513196303, "learning_rate": 2.494748867338241e-08, "loss": 0.0264, "step": 19993 }, { "epoch": 4.549260523321957, "grad_norm": 1.365165035110612, "learning_rate": 2.4922502187039523e-08, "loss": 0.0119, "step": 19994 }, { "epoch": 4.549488054607508, "grad_norm": 0.25677249763954435, "learning_rate": 2.489752796518155e-08, "loss": 0.0006, "step": 19995 }, { "epoch": 4.54971558589306, "grad_norm": 1.2326773765919592, "learning_rate": 2.4872566008319195e-08, "loss": 0.0211, "step": 19996 }, { "epoch": 4.549943117178612, "grad_norm": 1.0176508731039797, "learning_rate": 2.484761631696239e-08, "loss": 0.0421, "step": 19997 }, { "epoch": 4.550170648464164, "grad_norm": 1.224283496495397, "learning_rate": 2.4822678891621294e-08, "loss": 0.0562, "step": 19998 }, { "epoch": 4.550398179749716, "grad_norm": 1.629963372503813, "learning_rate": 2.4797753732805352e-08, "loss": 0.0238, "step": 19999 }, { "epoch": 4.550625711035267, "grad_norm": 1.3504237806351607, "learning_rate": 2.477284084102409e-08, "loss": 0.0152, "step": 20000 }, { "epoch": 4.5508532423208194, "grad_norm": 0.7132967424277925, "learning_rate": 2.4747940216786822e-08, "loss": 0.0048, "step": 20001 }, { "epoch": 4.551080773606371, "grad_norm": 1.1818463245843778, "learning_rate": 2.472305186060224e-08, "loss": 0.0524, "step": 20002 }, { "epoch": 4.551308304891923, "grad_norm": 1.6288701602016162, "learning_rate": 2.4698175772979242e-08, "loss": 0.092, "step": 20003 }, { "epoch": 4.551535836177474, "grad_norm": 1.836066849923269, "learning_rate": 2.467331195442603e-08, "loss": 0.1125, "step": 20004 }, { "epoch": 4.5517633674630265, "grad_norm": 1.0902376452194056, "learning_rate": 2.464846040545095e-08, "loss": 0.0576, "step": 20005 }, { "epoch": 4.551990898748578, "grad_norm": 1.2732411279269487, "learning_rate": 2.4623621126561788e-08, "loss": 0.0783, "step": 20006 }, { "epoch": 4.55221843003413, "grad_norm": 1.2314165322291566, "learning_rate": 2.45987941182662e-08, "loss": 0.0185, "step": 20007 }, { "epoch": 4.552445961319681, "grad_norm": 0.8225108512715575, "learning_rate": 2.4573979381071824e-08, "loss": 0.0089, "step": 20008 }, { "epoch": 4.5526734926052335, "grad_norm": 1.1912840591217382, "learning_rate": 2.4549176915485627e-08, "loss": 0.0042, "step": 20009 }, { "epoch": 4.552901023890785, "grad_norm": 1.9236323195046823, "learning_rate": 2.4524386722014625e-08, "loss": 0.0112, "step": 20010 }, { "epoch": 4.553128555176337, "grad_norm": 0.6474326860715008, "learning_rate": 2.4499608801165294e-08, "loss": 0.0038, "step": 20011 }, { "epoch": 4.553356086461888, "grad_norm": 1.6553472146165442, "learning_rate": 2.447484315344438e-08, "loss": 0.0132, "step": 20012 }, { "epoch": 4.5535836177474405, "grad_norm": 1.3307665964404243, "learning_rate": 2.4450089779357727e-08, "loss": 0.0286, "step": 20013 }, { "epoch": 4.553811149032992, "grad_norm": 1.2331066069269845, "learning_rate": 2.442534867941139e-08, "loss": 0.0525, "step": 20014 }, { "epoch": 4.554038680318544, "grad_norm": 1.2880966111197671, "learning_rate": 2.4400619854111144e-08, "loss": 0.0121, "step": 20015 }, { "epoch": 4.554266211604095, "grad_norm": 1.2366065840831064, "learning_rate": 2.437590330396214e-08, "loss": 0.0139, "step": 20016 }, { "epoch": 4.5544937428896475, "grad_norm": 1.2888631006404638, "learning_rate": 2.4351199029469806e-08, "loss": 0.0131, "step": 20017 }, { "epoch": 4.554721274175199, "grad_norm": 2.2079656693859424, "learning_rate": 2.4326507031138812e-08, "loss": 0.0133, "step": 20018 }, { "epoch": 4.554948805460751, "grad_norm": 0.9909330948284544, "learning_rate": 2.4301827309473962e-08, "loss": 0.0546, "step": 20019 }, { "epoch": 4.555176336746302, "grad_norm": 1.5793975836794776, "learning_rate": 2.4277159864979707e-08, "loss": 0.0105, "step": 20020 }, { "epoch": 4.5554038680318545, "grad_norm": 1.051473565318096, "learning_rate": 2.4252504698160028e-08, "loss": 0.0098, "step": 20021 }, { "epoch": 4.555631399317406, "grad_norm": 2.1212548604706747, "learning_rate": 2.4227861809519025e-08, "loss": 0.019, "step": 20022 }, { "epoch": 4.555858930602958, "grad_norm": 1.0623240206419329, "learning_rate": 2.4203231199560194e-08, "loss": 0.0058, "step": 20023 }, { "epoch": 4.556086461888509, "grad_norm": 1.025771098437974, "learning_rate": 2.417861286878709e-08, "loss": 0.0294, "step": 20024 }, { "epoch": 4.5563139931740615, "grad_norm": 0.991014640070374, "learning_rate": 2.4154006817702775e-08, "loss": 0.0096, "step": 20025 }, { "epoch": 4.556541524459613, "grad_norm": 1.0875490148386318, "learning_rate": 2.4129413046810045e-08, "loss": 0.0412, "step": 20026 }, { "epoch": 4.556769055745165, "grad_norm": 1.4754624403439258, "learning_rate": 2.4104831556611835e-08, "loss": 0.0569, "step": 20027 }, { "epoch": 4.556996587030717, "grad_norm": 1.6760810095630887, "learning_rate": 2.408026234761024e-08, "loss": 0.0413, "step": 20028 }, { "epoch": 4.5572241183162685, "grad_norm": 1.4771380472691302, "learning_rate": 2.4055705420307703e-08, "loss": 0.0229, "step": 20029 }, { "epoch": 4.55745164960182, "grad_norm": 1.2542692348423963, "learning_rate": 2.403116077520584e-08, "loss": 0.0253, "step": 20030 }, { "epoch": 4.557679180887372, "grad_norm": 1.9602375943164068, "learning_rate": 2.4006628412806475e-08, "loss": 0.005, "step": 20031 }, { "epoch": 4.557906712172924, "grad_norm": 0.7854202561805474, "learning_rate": 2.3982108333610935e-08, "loss": 0.0047, "step": 20032 }, { "epoch": 4.5581342434584755, "grad_norm": 0.41573270443194194, "learning_rate": 2.3957600538120352e-08, "loss": 0.0025, "step": 20033 }, { "epoch": 4.558361774744027, "grad_norm": 1.3152203306324328, "learning_rate": 2.393310502683578e-08, "loss": 0.1054, "step": 20034 }, { "epoch": 4.558589306029579, "grad_norm": 1.0454146185107442, "learning_rate": 2.3908621800257658e-08, "loss": 0.0041, "step": 20035 }, { "epoch": 4.558816837315131, "grad_norm": 3.536623584704017, "learning_rate": 2.3884150858886484e-08, "loss": 0.0038, "step": 20036 }, { "epoch": 4.5590443686006825, "grad_norm": 0.8201117829723715, "learning_rate": 2.3859692203222346e-08, "loss": 0.0061, "step": 20037 }, { "epoch": 4.559271899886235, "grad_norm": 2.2144583755214016, "learning_rate": 2.383524583376519e-08, "loss": 0.1556, "step": 20038 }, { "epoch": 4.559499431171786, "grad_norm": 1.1677133703081668, "learning_rate": 2.3810811751014616e-08, "loss": 0.0083, "step": 20039 }, { "epoch": 4.559726962457338, "grad_norm": 1.182583640947009, "learning_rate": 2.3786389955469947e-08, "loss": 0.0264, "step": 20040 }, { "epoch": 4.5599544937428895, "grad_norm": 0.7396716812858648, "learning_rate": 2.3761980447630507e-08, "loss": 0.005, "step": 20041 }, { "epoch": 4.560182025028442, "grad_norm": 1.2991442914099096, "learning_rate": 2.373758322799506e-08, "loss": 0.0049, "step": 20042 }, { "epoch": 4.560409556313993, "grad_norm": 1.6539639956076044, "learning_rate": 2.3713198297062242e-08, "loss": 0.0432, "step": 20043 }, { "epoch": 4.560637087599545, "grad_norm": 0.4190880575712031, "learning_rate": 2.368882565533047e-08, "loss": 0.0019, "step": 20044 }, { "epoch": 4.5608646188850965, "grad_norm": 1.9857004904931224, "learning_rate": 2.3664465303297814e-08, "loss": 0.0759, "step": 20045 }, { "epoch": 4.561092150170649, "grad_norm": 2.3180554110311156, "learning_rate": 2.3640117241462147e-08, "loss": 0.0137, "step": 20046 }, { "epoch": 4.5613196814562, "grad_norm": 1.2070033697291935, "learning_rate": 2.361578147032119e-08, "loss": 0.0044, "step": 20047 }, { "epoch": 4.561547212741752, "grad_norm": 1.3060440877157187, "learning_rate": 2.3591457990372256e-08, "loss": 0.0077, "step": 20048 }, { "epoch": 4.5617747440273035, "grad_norm": 0.9710428075801812, "learning_rate": 2.3567146802112516e-08, "loss": 0.0462, "step": 20049 }, { "epoch": 4.562002275312856, "grad_norm": 2.768593339596923, "learning_rate": 2.3542847906038862e-08, "loss": 0.0075, "step": 20050 }, { "epoch": 4.562229806598407, "grad_norm": 1.2428559511328097, "learning_rate": 2.3518561302647915e-08, "loss": 0.044, "step": 20051 }, { "epoch": 4.562457337883959, "grad_norm": 1.3667236368516797, "learning_rate": 2.3494286992435874e-08, "loss": 0.0754, "step": 20052 }, { "epoch": 4.5626848691695105, "grad_norm": 2.0326313854234708, "learning_rate": 2.347002497589901e-08, "loss": 0.0108, "step": 20053 }, { "epoch": 4.562912400455063, "grad_norm": 1.636913775466193, "learning_rate": 2.344577525353324e-08, "loss": 0.06, "step": 20054 }, { "epoch": 4.563139931740614, "grad_norm": 1.9665699101121812, "learning_rate": 2.3421537825834145e-08, "loss": 0.0177, "step": 20055 }, { "epoch": 4.563367463026166, "grad_norm": 0.8991338780355849, "learning_rate": 2.339731269329709e-08, "loss": 0.0349, "step": 20056 }, { "epoch": 4.563594994311718, "grad_norm": 1.884873866893547, "learning_rate": 2.337309985641717e-08, "loss": 0.0433, "step": 20057 }, { "epoch": 4.56382252559727, "grad_norm": 1.1486485576038767, "learning_rate": 2.334889931568926e-08, "loss": 0.0123, "step": 20058 }, { "epoch": 4.564050056882821, "grad_norm": 1.1465930499059398, "learning_rate": 2.332471107160797e-08, "loss": 0.0498, "step": 20059 }, { "epoch": 4.564277588168373, "grad_norm": 0.9190203325594292, "learning_rate": 2.330053512466762e-08, "loss": 0.0259, "step": 20060 }, { "epoch": 4.564505119453925, "grad_norm": 1.659655137689977, "learning_rate": 2.327637147536241e-08, "loss": 0.0078, "step": 20061 }, { "epoch": 4.564732650739477, "grad_norm": 2.2121441750634245, "learning_rate": 2.3252220124186167e-08, "loss": 0.0712, "step": 20062 }, { "epoch": 4.564960182025028, "grad_norm": 0.32276428099663323, "learning_rate": 2.322808107163253e-08, "loss": 0.0012, "step": 20063 }, { "epoch": 4.56518771331058, "grad_norm": 1.2199703867037188, "learning_rate": 2.320395431819479e-08, "loss": 0.0156, "step": 20064 }, { "epoch": 4.565415244596132, "grad_norm": 0.737038424836874, "learning_rate": 2.3179839864366088e-08, "loss": 0.0023, "step": 20065 }, { "epoch": 4.565642775881684, "grad_norm": 1.2144688300588553, "learning_rate": 2.3155737710639296e-08, "loss": 0.0139, "step": 20066 }, { "epoch": 4.565870307167236, "grad_norm": 1.6404159664006381, "learning_rate": 2.3131647857506935e-08, "loss": 0.0519, "step": 20067 }, { "epoch": 4.566097838452787, "grad_norm": 1.6896775853289774, "learning_rate": 2.310757030546139e-08, "loss": 0.0264, "step": 20068 }, { "epoch": 4.566325369738339, "grad_norm": 0.9534639409796741, "learning_rate": 2.3083505054994902e-08, "loss": 0.0045, "step": 20069 }, { "epoch": 4.566552901023891, "grad_norm": 1.0085663559742621, "learning_rate": 2.305945210659924e-08, "loss": 0.0094, "step": 20070 }, { "epoch": 4.566780432309443, "grad_norm": 1.371863864543547, "learning_rate": 2.303541146076588e-08, "loss": 0.0177, "step": 20071 }, { "epoch": 4.567007963594994, "grad_norm": 9.502743324233276, "learning_rate": 2.301138311798637e-08, "loss": 0.0354, "step": 20072 }, { "epoch": 4.567235494880546, "grad_norm": 1.685132911937134, "learning_rate": 2.2987367078751572e-08, "loss": 0.008, "step": 20073 }, { "epoch": 4.567463026166098, "grad_norm": 1.362374784080879, "learning_rate": 2.296336334355248e-08, "loss": 0.0144, "step": 20074 }, { "epoch": 4.56769055745165, "grad_norm": 1.5014995100040143, "learning_rate": 2.2939371912879747e-08, "loss": 0.0956, "step": 20075 }, { "epoch": 4.567918088737201, "grad_norm": 1.394483453055187, "learning_rate": 2.291539278722353e-08, "loss": 0.0558, "step": 20076 }, { "epoch": 4.5681456200227535, "grad_norm": 1.7787005024128548, "learning_rate": 2.289142596707414e-08, "loss": 0.0644, "step": 20077 }, { "epoch": 4.568373151308305, "grad_norm": 1.0317938728096783, "learning_rate": 2.2867471452921176e-08, "loss": 0.0352, "step": 20078 }, { "epoch": 4.568600682593857, "grad_norm": 0.994996481742895, "learning_rate": 2.284352924525439e-08, "loss": 0.0623, "step": 20079 }, { "epoch": 4.568828213879408, "grad_norm": 1.4971437511565553, "learning_rate": 2.2819599344562974e-08, "loss": 0.0125, "step": 20080 }, { "epoch": 4.5690557451649605, "grad_norm": 1.0293337353005472, "learning_rate": 2.279568175133626e-08, "loss": 0.0053, "step": 20081 }, { "epoch": 4.569283276450512, "grad_norm": 0.891406830228145, "learning_rate": 2.2771776466062877e-08, "loss": 0.0142, "step": 20082 }, { "epoch": 4.569510807736064, "grad_norm": 1.072561113050715, "learning_rate": 2.2747883489231403e-08, "loss": 0.0139, "step": 20083 }, { "epoch": 4.569738339021615, "grad_norm": 1.5674433860007198, "learning_rate": 2.272400282133033e-08, "loss": 0.0281, "step": 20084 }, { "epoch": 4.5699658703071675, "grad_norm": 1.23187247618972, "learning_rate": 2.2700134462847467e-08, "loss": 0.021, "step": 20085 }, { "epoch": 4.570193401592719, "grad_norm": 0.726277796243581, "learning_rate": 2.267627841427089e-08, "loss": 0.0332, "step": 20086 }, { "epoch": 4.570420932878271, "grad_norm": 1.1243879438828182, "learning_rate": 2.2652434676087993e-08, "loss": 0.028, "step": 20087 }, { "epoch": 4.570648464163822, "grad_norm": 1.0113627678839185, "learning_rate": 2.2628603248786295e-08, "loss": 0.0134, "step": 20088 }, { "epoch": 4.5708759954493745, "grad_norm": 1.3155294596559233, "learning_rate": 2.2604784132852774e-08, "loss": 0.0761, "step": 20089 }, { "epoch": 4.571103526734926, "grad_norm": 0.9519448889982745, "learning_rate": 2.2580977328774123e-08, "loss": 0.0556, "step": 20090 }, { "epoch": 4.571331058020478, "grad_norm": 1.406134577721175, "learning_rate": 2.2557182837037105e-08, "loss": 0.0108, "step": 20091 }, { "epoch": 4.571558589306029, "grad_norm": 1.833867498111619, "learning_rate": 2.2533400658127923e-08, "loss": 0.0885, "step": 20092 }, { "epoch": 4.5717861205915815, "grad_norm": 1.3256905487662598, "learning_rate": 2.2509630792532586e-08, "loss": 0.0182, "step": 20093 }, { "epoch": 4.572013651877133, "grad_norm": 1.6619140651280135, "learning_rate": 2.248587324073709e-08, "loss": 0.0397, "step": 20094 }, { "epoch": 4.572241183162685, "grad_norm": 1.2613392968791157, "learning_rate": 2.246212800322681e-08, "loss": 0.0619, "step": 20095 }, { "epoch": 4.572468714448236, "grad_norm": 1.0908160674927967, "learning_rate": 2.2438395080487257e-08, "loss": 0.0275, "step": 20096 }, { "epoch": 4.5726962457337885, "grad_norm": 2.1159138542810094, "learning_rate": 2.241467447300326e-08, "loss": 0.0367, "step": 20097 }, { "epoch": 4.57292377701934, "grad_norm": 1.8616513382056437, "learning_rate": 2.239096618125977e-08, "loss": 0.0176, "step": 20098 }, { "epoch": 4.573151308304892, "grad_norm": 1.4076516839125275, "learning_rate": 2.2367270205741267e-08, "loss": 0.0491, "step": 20099 }, { "epoch": 4.573378839590443, "grad_norm": 1.532085312277107, "learning_rate": 2.2343586546932154e-08, "loss": 0.0245, "step": 20100 }, { "epoch": 4.5736063708759955, "grad_norm": 1.227974687531151, "learning_rate": 2.2319915205316416e-08, "loss": 0.0125, "step": 20101 }, { "epoch": 4.573833902161547, "grad_norm": 2.4387779353591537, "learning_rate": 2.229625618137776e-08, "loss": 0.087, "step": 20102 }, { "epoch": 4.574061433447099, "grad_norm": 0.9773415369956293, "learning_rate": 2.2272609475599975e-08, "loss": 0.0314, "step": 20103 }, { "epoch": 4.57428896473265, "grad_norm": 1.450096175061342, "learning_rate": 2.2248975088466075e-08, "loss": 0.0242, "step": 20104 }, { "epoch": 4.5745164960182025, "grad_norm": 2.2728811045260207, "learning_rate": 2.222535302045935e-08, "loss": 0.1275, "step": 20105 }, { "epoch": 4.574744027303755, "grad_norm": 1.3650644839157087, "learning_rate": 2.2201743272062397e-08, "loss": 0.0384, "step": 20106 }, { "epoch": 4.574971558589306, "grad_norm": 1.0300565396896015, "learning_rate": 2.2178145843757827e-08, "loss": 0.0418, "step": 20107 }, { "epoch": 4.575199089874857, "grad_norm": 1.4884845419247101, "learning_rate": 2.2154560736028023e-08, "loss": 0.0167, "step": 20108 }, { "epoch": 4.5754266211604095, "grad_norm": 1.8852086198771023, "learning_rate": 2.2130987949354892e-08, "loss": 0.0062, "step": 20109 }, { "epoch": 4.575654152445962, "grad_norm": 1.061950519313681, "learning_rate": 2.2107427484220275e-08, "loss": 0.0131, "step": 20110 }, { "epoch": 4.575881683731513, "grad_norm": 0.5867729364536843, "learning_rate": 2.208387934110566e-08, "loss": 0.0029, "step": 20111 }, { "epoch": 4.576109215017064, "grad_norm": 2.465858120577067, "learning_rate": 2.2060343520492466e-08, "loss": 0.0838, "step": 20112 }, { "epoch": 4.5763367463026166, "grad_norm": 0.5697431657776582, "learning_rate": 2.203682002286149e-08, "loss": 0.0108, "step": 20113 }, { "epoch": 4.576564277588169, "grad_norm": 1.6410668853783692, "learning_rate": 2.2013308848693667e-08, "loss": 0.0391, "step": 20114 }, { "epoch": 4.57679180887372, "grad_norm": 1.4838636046088194, "learning_rate": 2.1989809998469586e-08, "loss": 0.0196, "step": 20115 }, { "epoch": 4.577019340159272, "grad_norm": 0.8497955661409928, "learning_rate": 2.1966323472669276e-08, "loss": 0.009, "step": 20116 }, { "epoch": 4.577246871444824, "grad_norm": 1.2164475452745258, "learning_rate": 2.194284927177305e-08, "loss": 0.0533, "step": 20117 }, { "epoch": 4.577474402730376, "grad_norm": 0.7526356147171835, "learning_rate": 2.1919387396260456e-08, "loss": 0.0034, "step": 20118 }, { "epoch": 4.577701934015927, "grad_norm": 0.9885506563956522, "learning_rate": 2.1895937846611177e-08, "loss": 0.0015, "step": 20119 }, { "epoch": 4.577929465301479, "grad_norm": 1.4156117452911179, "learning_rate": 2.1872500623304275e-08, "loss": 0.0352, "step": 20120 }, { "epoch": 4.578156996587031, "grad_norm": 2.275445153292331, "learning_rate": 2.184907572681895e-08, "loss": 0.0477, "step": 20121 }, { "epoch": 4.578384527872583, "grad_norm": 1.0195361433937018, "learning_rate": 2.1825663157633918e-08, "loss": 0.004, "step": 20122 }, { "epoch": 4.578612059158134, "grad_norm": 0.8996069677588255, "learning_rate": 2.180226291622761e-08, "loss": 0.0197, "step": 20123 }, { "epoch": 4.578839590443686, "grad_norm": 0.764027008468774, "learning_rate": 2.1778875003078405e-08, "loss": 0.015, "step": 20124 }, { "epoch": 4.579067121729238, "grad_norm": 0.6688143077271423, "learning_rate": 2.175549941866417e-08, "loss": 0.0085, "step": 20125 }, { "epoch": 4.57929465301479, "grad_norm": 1.1860084153691124, "learning_rate": 2.173213616346287e-08, "loss": 0.032, "step": 20126 }, { "epoch": 4.579522184300341, "grad_norm": 1.13817356231722, "learning_rate": 2.170878523795175e-08, "loss": 0.006, "step": 20127 }, { "epoch": 4.579749715585893, "grad_norm": 2.1855424571843964, "learning_rate": 2.1685446642608214e-08, "loss": 0.0166, "step": 20128 }, { "epoch": 4.579977246871445, "grad_norm": 0.8616927203163444, "learning_rate": 2.166212037790924e-08, "loss": 0.0041, "step": 20129 }, { "epoch": 4.580204778156997, "grad_norm": 1.484351165032366, "learning_rate": 2.1638806444331597e-08, "loss": 0.0319, "step": 20130 }, { "epoch": 4.580432309442548, "grad_norm": 1.3902550246450935, "learning_rate": 2.161550484235171e-08, "loss": 0.0715, "step": 20131 }, { "epoch": 4.5806598407281, "grad_norm": 1.4640526431794807, "learning_rate": 2.1592215572445937e-08, "loss": 0.0238, "step": 20132 }, { "epoch": 4.580887372013652, "grad_norm": 0.7367637921332086, "learning_rate": 2.1568938635090077e-08, "loss": 0.0037, "step": 20133 }, { "epoch": 4.581114903299204, "grad_norm": 1.2304969211519652, "learning_rate": 2.1545674030759993e-08, "loss": 0.0348, "step": 20134 }, { "epoch": 4.581342434584755, "grad_norm": 2.2963428655530986, "learning_rate": 2.1522421759931147e-08, "loss": 0.0309, "step": 20135 }, { "epoch": 4.581569965870307, "grad_norm": 0.9358464634239295, "learning_rate": 2.149918182307885e-08, "loss": 0.0434, "step": 20136 }, { "epoch": 4.581797497155859, "grad_norm": 2.3530829768282366, "learning_rate": 2.147595422067793e-08, "loss": 0.0307, "step": 20137 }, { "epoch": 4.582025028441411, "grad_norm": 0.3986568378814696, "learning_rate": 2.1452738953203352e-08, "loss": 0.0023, "step": 20138 }, { "epoch": 4.582252559726962, "grad_norm": 2.3819871720201378, "learning_rate": 2.142953602112939e-08, "loss": 0.012, "step": 20139 }, { "epoch": 4.582480091012514, "grad_norm": 1.7062278238800768, "learning_rate": 2.1406345424930182e-08, "loss": 0.0121, "step": 20140 }, { "epoch": 4.582707622298066, "grad_norm": 0.8737147094799388, "learning_rate": 2.1383167165080002e-08, "loss": 0.0312, "step": 20141 }, { "epoch": 4.582935153583618, "grad_norm": 1.4518810944962621, "learning_rate": 2.1360001242052285e-08, "loss": 0.0369, "step": 20142 }, { "epoch": 4.583162684869169, "grad_norm": 1.6485570571729506, "learning_rate": 2.1336847656320755e-08, "loss": 0.0424, "step": 20143 }, { "epoch": 4.583390216154721, "grad_norm": 1.329894061123006, "learning_rate": 2.1313706408358432e-08, "loss": 0.0242, "step": 20144 }, { "epoch": 4.5836177474402735, "grad_norm": 1.1700716843619827, "learning_rate": 2.1290577498638416e-08, "loss": 0.0235, "step": 20145 }, { "epoch": 4.583845278725825, "grad_norm": 1.2975904090426476, "learning_rate": 2.1267460927633236e-08, "loss": 0.0293, "step": 20146 }, { "epoch": 4.584072810011376, "grad_norm": 1.4873954283441284, "learning_rate": 2.124435669581551e-08, "loss": 0.0449, "step": 20147 }, { "epoch": 4.584300341296928, "grad_norm": 0.5720312876216407, "learning_rate": 2.1221264803657492e-08, "loss": 0.0033, "step": 20148 }, { "epoch": 4.5845278725824805, "grad_norm": 2.479589101623116, "learning_rate": 2.119818525163096e-08, "loss": 0.0112, "step": 20149 }, { "epoch": 4.584755403868032, "grad_norm": 1.6808008930033553, "learning_rate": 2.117511804020783e-08, "loss": 0.0161, "step": 20150 }, { "epoch": 4.584982935153583, "grad_norm": 2.106835702153732, "learning_rate": 2.115206316985939e-08, "loss": 0.0331, "step": 20151 }, { "epoch": 4.585210466439135, "grad_norm": 1.2708723072606818, "learning_rate": 2.1129020641056792e-08, "loss": 0.0199, "step": 20152 }, { "epoch": 4.5854379977246875, "grad_norm": 1.6917317492151183, "learning_rate": 2.1105990454271112e-08, "loss": 0.0077, "step": 20153 }, { "epoch": 4.585665529010239, "grad_norm": 1.2998399055323873, "learning_rate": 2.1082972609973024e-08, "loss": 0.0116, "step": 20154 }, { "epoch": 4.585893060295791, "grad_norm": 1.0447453386406427, "learning_rate": 2.105996710863298e-08, "loss": 0.0071, "step": 20155 }, { "epoch": 4.586120591581342, "grad_norm": 0.3712390291882569, "learning_rate": 2.103697395072109e-08, "loss": 0.0012, "step": 20156 }, { "epoch": 4.5863481228668945, "grad_norm": 0.9245834011688183, "learning_rate": 2.1013993136707392e-08, "loss": 0.0126, "step": 20157 }, { "epoch": 4.586575654152446, "grad_norm": 1.4728451792866049, "learning_rate": 2.0991024667061584e-08, "loss": 0.0244, "step": 20158 }, { "epoch": 4.586803185437998, "grad_norm": 1.395232973366379, "learning_rate": 2.0968068542252945e-08, "loss": 0.0376, "step": 20159 }, { "epoch": 4.587030716723549, "grad_norm": 1.3277891935513844, "learning_rate": 2.094512476275075e-08, "loss": 0.0473, "step": 20160 }, { "epoch": 4.5872582480091015, "grad_norm": 1.40976034529205, "learning_rate": 2.0922193329023925e-08, "loss": 0.0437, "step": 20161 }, { "epoch": 4.587485779294653, "grad_norm": 0.7065659760648266, "learning_rate": 2.08992742415412e-08, "loss": 0.002, "step": 20162 }, { "epoch": 4.587713310580205, "grad_norm": 1.684689194554055, "learning_rate": 2.0876367500770946e-08, "loss": 0.0686, "step": 20163 }, { "epoch": 4.587940841865756, "grad_norm": 1.2817547893530346, "learning_rate": 2.0853473107181332e-08, "loss": 0.0101, "step": 20164 }, { "epoch": 4.5881683731513085, "grad_norm": 2.1052065682187306, "learning_rate": 2.0830591061240315e-08, "loss": 0.0192, "step": 20165 }, { "epoch": 4.58839590443686, "grad_norm": 1.7202482928236542, "learning_rate": 2.080772136341551e-08, "loss": 0.0883, "step": 20166 }, { "epoch": 4.588623435722412, "grad_norm": 2.9441772382432663, "learning_rate": 2.0784864014174316e-08, "loss": 0.0269, "step": 20167 }, { "epoch": 4.588850967007963, "grad_norm": 1.973242749375292, "learning_rate": 2.0762019013983932e-08, "loss": 0.0949, "step": 20168 }, { "epoch": 4.5890784982935156, "grad_norm": 0.8415115762958845, "learning_rate": 2.0739186363311348e-08, "loss": 0.0158, "step": 20169 }, { "epoch": 4.589306029579067, "grad_norm": 0.8366065415986356, "learning_rate": 2.071636606262313e-08, "loss": 0.0024, "step": 20170 }, { "epoch": 4.589533560864619, "grad_norm": 0.9107917757537783, "learning_rate": 2.0693558112385646e-08, "loss": 0.0067, "step": 20171 }, { "epoch": 4.58976109215017, "grad_norm": 1.2931656197894403, "learning_rate": 2.0670762513065115e-08, "loss": 0.0127, "step": 20172 }, { "epoch": 4.589988623435723, "grad_norm": 0.24189002846850283, "learning_rate": 2.0647979265127418e-08, "loss": 0.0007, "step": 20173 }, { "epoch": 4.590216154721274, "grad_norm": 1.0774634369098057, "learning_rate": 2.0625208369038155e-08, "loss": 0.0131, "step": 20174 }, { "epoch": 4.590443686006826, "grad_norm": 0.6566573558633617, "learning_rate": 2.0602449825262856e-08, "loss": 0.0065, "step": 20175 }, { "epoch": 4.590671217292377, "grad_norm": 1.2680490896789163, "learning_rate": 2.0579703634266563e-08, "loss": 0.0135, "step": 20176 }, { "epoch": 4.59089874857793, "grad_norm": 0.9558990131925489, "learning_rate": 2.0556969796514185e-08, "loss": 0.0128, "step": 20177 }, { "epoch": 4.591126279863481, "grad_norm": 1.0628553783079395, "learning_rate": 2.053424831247035e-08, "loss": 0.0598, "step": 20178 }, { "epoch": 4.591353811149033, "grad_norm": 1.3470861719011753, "learning_rate": 2.0511539182599476e-08, "loss": 0.0111, "step": 20179 }, { "epoch": 4.591581342434584, "grad_norm": 1.3511849581122806, "learning_rate": 2.0488842407365635e-08, "loss": 0.0436, "step": 20180 }, { "epoch": 4.591808873720137, "grad_norm": 1.7035461220168366, "learning_rate": 2.04661579872327e-08, "loss": 0.0602, "step": 20181 }, { "epoch": 4.592036405005688, "grad_norm": 1.2868043426476923, "learning_rate": 2.044348592266446e-08, "loss": 0.0296, "step": 20182 }, { "epoch": 4.59226393629124, "grad_norm": 1.408856535977402, "learning_rate": 2.0420826214124085e-08, "loss": 0.0148, "step": 20183 }, { "epoch": 4.592491467576792, "grad_norm": 0.9167664149707335, "learning_rate": 2.0398178862074888e-08, "loss": 0.0263, "step": 20184 }, { "epoch": 4.592718998862344, "grad_norm": 1.0371298505335702, "learning_rate": 2.0375543866979554e-08, "loss": 0.0507, "step": 20185 }, { "epoch": 4.592946530147895, "grad_norm": 2.48116282713305, "learning_rate": 2.035292122930084e-08, "loss": 0.0218, "step": 20186 }, { "epoch": 4.593174061433447, "grad_norm": 0.6656986634545853, "learning_rate": 2.0330310949500942e-08, "loss": 0.0359, "step": 20187 }, { "epoch": 4.593401592718999, "grad_norm": 2.3354664271672454, "learning_rate": 2.03077130280422e-08, "loss": 0.0554, "step": 20188 }, { "epoch": 4.593629124004551, "grad_norm": 1.8919478880618315, "learning_rate": 2.0285127465386328e-08, "loss": 0.0961, "step": 20189 }, { "epoch": 4.593856655290102, "grad_norm": 1.128533768263543, "learning_rate": 2.026255426199497e-08, "loss": 0.0125, "step": 20190 }, { "epoch": 4.594084186575654, "grad_norm": 2.226758457062006, "learning_rate": 2.0239993418329494e-08, "loss": 0.0442, "step": 20191 }, { "epoch": 4.594311717861206, "grad_norm": 1.0482612749242144, "learning_rate": 2.0217444934850922e-08, "loss": 0.0085, "step": 20192 }, { "epoch": 4.594539249146758, "grad_norm": 1.168065125491336, "learning_rate": 2.0194908812020268e-08, "loss": 0.0559, "step": 20193 }, { "epoch": 4.59476678043231, "grad_norm": 0.6823524798755561, "learning_rate": 2.0172385050297935e-08, "loss": 0.0047, "step": 20194 }, { "epoch": 4.594994311717861, "grad_norm": 1.3575170165602077, "learning_rate": 2.0149873650144385e-08, "loss": 0.0406, "step": 20195 }, { "epoch": 4.595221843003413, "grad_norm": 1.7626492799413973, "learning_rate": 2.01273746120198e-08, "loss": 0.0863, "step": 20196 }, { "epoch": 4.595449374288965, "grad_norm": 0.6786885784471796, "learning_rate": 2.010488793638375e-08, "loss": 0.0039, "step": 20197 }, { "epoch": 4.595676905574517, "grad_norm": 1.0712439683713155, "learning_rate": 2.0082413623696075e-08, "loss": 0.0148, "step": 20198 }, { "epoch": 4.595904436860068, "grad_norm": 1.2161837545549314, "learning_rate": 2.005995167441599e-08, "loss": 0.0562, "step": 20199 }, { "epoch": 4.59613196814562, "grad_norm": 1.2606044680610347, "learning_rate": 2.003750208900257e-08, "loss": 0.0444, "step": 20200 }, { "epoch": 4.596359499431172, "grad_norm": 7.396577690565787, "learning_rate": 2.0015064867914686e-08, "loss": 0.0746, "step": 20201 }, { "epoch": 4.596587030716724, "grad_norm": 6.038700801020936, "learning_rate": 1.999264001161086e-08, "loss": 0.0343, "step": 20202 }, { "epoch": 4.596814562002275, "grad_norm": 1.004354576372937, "learning_rate": 1.9970227520549543e-08, "loss": 0.0142, "step": 20203 }, { "epoch": 4.597042093287827, "grad_norm": 1.0455892231770687, "learning_rate": 1.9947827395188633e-08, "loss": 0.0403, "step": 20204 }, { "epoch": 4.597269624573379, "grad_norm": 1.3001361521056036, "learning_rate": 1.99254396359861e-08, "loss": 0.0471, "step": 20205 }, { "epoch": 4.597497155858931, "grad_norm": 2.193772126115941, "learning_rate": 1.9903064243399355e-08, "loss": 0.0789, "step": 20206 }, { "epoch": 4.597724687144482, "grad_norm": 1.892359164740806, "learning_rate": 1.9880701217885807e-08, "loss": 0.0356, "step": 20207 }, { "epoch": 4.597952218430034, "grad_norm": 1.3160032270349356, "learning_rate": 1.9858350559902596e-08, "loss": 0.0856, "step": 20208 }, { "epoch": 4.598179749715586, "grad_norm": 1.032511694791164, "learning_rate": 1.9836012269906364e-08, "loss": 0.0017, "step": 20209 }, { "epoch": 4.598407281001138, "grad_norm": 0.3844243854618859, "learning_rate": 1.981368634835376e-08, "loss": 0.0028, "step": 20210 }, { "epoch": 4.598634812286689, "grad_norm": 0.7016993458040638, "learning_rate": 1.979137279570102e-08, "loss": 0.0027, "step": 20211 }, { "epoch": 4.598862343572241, "grad_norm": 2.0826870959749635, "learning_rate": 1.9769071612404307e-08, "loss": 0.0376, "step": 20212 }, { "epoch": 4.599089874857793, "grad_norm": 2.000478586603753, "learning_rate": 1.9746782798919227e-08, "loss": 0.0119, "step": 20213 }, { "epoch": 4.599317406143345, "grad_norm": 0.9591411024729438, "learning_rate": 1.9724506355701528e-08, "loss": 0.0132, "step": 20214 }, { "epoch": 4.599544937428896, "grad_norm": 0.5252522904225752, "learning_rate": 1.97022422832064e-08, "loss": 0.0028, "step": 20215 }, { "epoch": 4.599772468714448, "grad_norm": 1.4636468049344398, "learning_rate": 1.9679990581888826e-08, "loss": 0.0745, "step": 20216 }, { "epoch": 4.6, "grad_norm": 0.9758307126848046, "learning_rate": 1.965775125220379e-08, "loss": 0.0035, "step": 20217 }, { "epoch": 4.600227531285552, "grad_norm": 0.7256975573140327, "learning_rate": 1.9635524294605578e-08, "loss": 0.0055, "step": 20218 }, { "epoch": 4.600455062571103, "grad_norm": 1.3179720343587606, "learning_rate": 1.9613309709548625e-08, "loss": 0.0784, "step": 20219 }, { "epoch": 4.600682593856655, "grad_norm": 0.894690287348951, "learning_rate": 1.959110749748687e-08, "loss": 0.0064, "step": 20220 }, { "epoch": 4.600910125142207, "grad_norm": 1.3045154119729205, "learning_rate": 1.9568917658874115e-08, "loss": 0.114, "step": 20221 }, { "epoch": 4.601137656427759, "grad_norm": 0.8755273478587218, "learning_rate": 1.9546740194163886e-08, "loss": 0.031, "step": 20222 }, { "epoch": 4.601365187713311, "grad_norm": 0.7083406736562735, "learning_rate": 1.9524575103809507e-08, "loss": 0.0052, "step": 20223 }, { "epoch": 4.601592718998862, "grad_norm": 1.9016269491308213, "learning_rate": 1.9502422388263942e-08, "loss": 0.0964, "step": 20224 }, { "epoch": 4.601820250284414, "grad_norm": 0.8636209589483659, "learning_rate": 1.9480282047979818e-08, "loss": 0.0184, "step": 20225 }, { "epoch": 4.602047781569966, "grad_norm": 1.074859997155014, "learning_rate": 1.9458154083409965e-08, "loss": 0.0065, "step": 20226 }, { "epoch": 4.602275312855518, "grad_norm": 0.6987767727371703, "learning_rate": 1.9436038495006317e-08, "loss": 0.0046, "step": 20227 }, { "epoch": 4.602502844141069, "grad_norm": 1.424645476766737, "learning_rate": 1.941393528322094e-08, "loss": 0.024, "step": 20228 }, { "epoch": 4.602730375426621, "grad_norm": 1.616782620457105, "learning_rate": 1.9391844448505837e-08, "loss": 0.065, "step": 20229 }, { "epoch": 4.602957906712173, "grad_norm": 1.3502810960845881, "learning_rate": 1.936976599131217e-08, "loss": 0.0377, "step": 20230 }, { "epoch": 4.603185437997725, "grad_norm": 4.174691601320549, "learning_rate": 1.9347699912091386e-08, "loss": 0.0801, "step": 20231 }, { "epoch": 4.603412969283276, "grad_norm": 1.6069781068036202, "learning_rate": 1.9325646211294374e-08, "loss": 0.0461, "step": 20232 }, { "epoch": 4.603640500568829, "grad_norm": 2.337825159655781, "learning_rate": 1.9303604889372023e-08, "loss": 0.0117, "step": 20233 }, { "epoch": 4.60386803185438, "grad_norm": 1.2412643421291878, "learning_rate": 1.92815759467746e-08, "loss": 0.0158, "step": 20234 }, { "epoch": 4.604095563139932, "grad_norm": 0.2575865228047124, "learning_rate": 1.9259559383952435e-08, "loss": 0.0009, "step": 20235 }, { "epoch": 4.604323094425483, "grad_norm": 0.7360249731473482, "learning_rate": 1.923755520135559e-08, "loss": 0.0045, "step": 20236 }, { "epoch": 4.604550625711036, "grad_norm": 0.6777879184268576, "learning_rate": 1.921556339943363e-08, "loss": 0.0027, "step": 20237 }, { "epoch": 4.604778156996587, "grad_norm": 0.6661983314700621, "learning_rate": 1.91935839786362e-08, "loss": 0.0045, "step": 20238 }, { "epoch": 4.605005688282139, "grad_norm": 1.567946454793238, "learning_rate": 1.9171616939412456e-08, "loss": 0.0236, "step": 20239 }, { "epoch": 4.60523321956769, "grad_norm": 1.0760522115108737, "learning_rate": 1.91496622822112e-08, "loss": 0.0276, "step": 20240 }, { "epoch": 4.605460750853243, "grad_norm": 1.4176269158753987, "learning_rate": 1.912772000748131e-08, "loss": 0.0274, "step": 20241 }, { "epoch": 4.605688282138794, "grad_norm": 0.7590736141369094, "learning_rate": 1.9105790115671248e-08, "loss": 0.0028, "step": 20242 }, { "epoch": 4.605915813424346, "grad_norm": 2.81514960594143, "learning_rate": 1.908387260722927e-08, "loss": 0.0471, "step": 20243 }, { "epoch": 4.606143344709897, "grad_norm": 0.8068875572877567, "learning_rate": 1.9061967482603138e-08, "loss": 0.0049, "step": 20244 }, { "epoch": 4.60637087599545, "grad_norm": 1.8771546738327722, "learning_rate": 1.904007474224083e-08, "loss": 0.0918, "step": 20245 }, { "epoch": 4.606598407281001, "grad_norm": 0.8342262089187997, "learning_rate": 1.9018194386589557e-08, "loss": 0.007, "step": 20246 }, { "epoch": 4.606825938566553, "grad_norm": 1.860184623708522, "learning_rate": 1.8996326416096526e-08, "loss": 0.022, "step": 20247 }, { "epoch": 4.607053469852104, "grad_norm": 0.6580906547673978, "learning_rate": 1.897447083120875e-08, "loss": 0.0033, "step": 20248 }, { "epoch": 4.607281001137657, "grad_norm": 1.1484330220728498, "learning_rate": 1.8952627632372876e-08, "loss": 0.0089, "step": 20249 }, { "epoch": 4.607508532423208, "grad_norm": 1.1920646038326963, "learning_rate": 1.893079682003543e-08, "loss": 0.029, "step": 20250 }, { "epoch": 4.60773606370876, "grad_norm": 1.313237336519023, "learning_rate": 1.8908978394642507e-08, "loss": 0.0502, "step": 20251 }, { "epoch": 4.607963594994311, "grad_norm": 1.1390027297584322, "learning_rate": 1.8887172356640143e-08, "loss": 0.0386, "step": 20252 }, { "epoch": 4.608191126279864, "grad_norm": 2.193720426264593, "learning_rate": 1.8865378706473883e-08, "loss": 0.0621, "step": 20253 }, { "epoch": 4.608418657565415, "grad_norm": 0.9104445238825039, "learning_rate": 1.8843597444589205e-08, "loss": 0.0082, "step": 20254 }, { "epoch": 4.608646188850967, "grad_norm": 1.8121453695973722, "learning_rate": 1.8821828571431167e-08, "loss": 0.0029, "step": 20255 }, { "epoch": 4.608873720136518, "grad_norm": 1.2577759594545983, "learning_rate": 1.88000720874449e-08, "loss": 0.0395, "step": 20256 }, { "epoch": 4.609101251422071, "grad_norm": 1.4203991452492686, "learning_rate": 1.8778327993074983e-08, "loss": 0.0484, "step": 20257 }, { "epoch": 4.609328782707622, "grad_norm": 1.693748780955342, "learning_rate": 1.8756596288765778e-08, "loss": 0.1057, "step": 20258 }, { "epoch": 4.609556313993174, "grad_norm": 1.2927949646999712, "learning_rate": 1.8734876974961374e-08, "loss": 0.0696, "step": 20259 }, { "epoch": 4.609783845278725, "grad_norm": 2.1690369155959726, "learning_rate": 1.8713170052105865e-08, "loss": 0.0492, "step": 20260 }, { "epoch": 4.610011376564278, "grad_norm": 0.7237962456592902, "learning_rate": 1.869147552064264e-08, "loss": 0.0321, "step": 20261 }, { "epoch": 4.61023890784983, "grad_norm": 0.20335381542768388, "learning_rate": 1.8669793381015445e-08, "loss": 0.0008, "step": 20262 }, { "epoch": 4.610466439135381, "grad_norm": 1.365842845544675, "learning_rate": 1.8648123633667123e-08, "loss": 0.0572, "step": 20263 }, { "epoch": 4.610693970420932, "grad_norm": 1.2918647057152906, "learning_rate": 1.8626466279040718e-08, "loss": 0.1109, "step": 20264 }, { "epoch": 4.610921501706485, "grad_norm": 0.6886441955310454, "learning_rate": 1.860482131757886e-08, "loss": 0.0056, "step": 20265 }, { "epoch": 4.611149032992037, "grad_norm": 0.9731453212537992, "learning_rate": 1.858318874972384e-08, "loss": 0.0106, "step": 20266 }, { "epoch": 4.611376564277588, "grad_norm": 1.2730256821831325, "learning_rate": 1.85615685759178e-08, "loss": 0.0304, "step": 20267 }, { "epoch": 4.611604095563139, "grad_norm": 1.753337315810507, "learning_rate": 1.8539960796602683e-08, "loss": 0.0332, "step": 20268 }, { "epoch": 4.611831626848692, "grad_norm": 0.9595323614895793, "learning_rate": 1.8518365412220142e-08, "loss": 0.0075, "step": 20269 }, { "epoch": 4.612059158134244, "grad_norm": 0.5274111946144177, "learning_rate": 1.849678242321143e-08, "loss": 0.0019, "step": 20270 }, { "epoch": 4.612286689419795, "grad_norm": 1.2772581215210015, "learning_rate": 1.8475211830017782e-08, "loss": 0.089, "step": 20271 }, { "epoch": 4.612514220705347, "grad_norm": 1.3485228980544077, "learning_rate": 1.8453653633080034e-08, "loss": 0.0367, "step": 20272 }, { "epoch": 4.612741751990899, "grad_norm": 1.1197731887526918, "learning_rate": 1.8432107832838733e-08, "loss": 0.0061, "step": 20273 }, { "epoch": 4.612969283276451, "grad_norm": 1.737600530687206, "learning_rate": 1.841057442973422e-08, "loss": 0.021, "step": 20274 }, { "epoch": 4.613196814562002, "grad_norm": 1.1836482528343595, "learning_rate": 1.838905342420663e-08, "loss": 0.0282, "step": 20275 }, { "epoch": 4.613424345847554, "grad_norm": 0.7385832605248897, "learning_rate": 1.8367544816695962e-08, "loss": 0.0024, "step": 20276 }, { "epoch": 4.613651877133106, "grad_norm": 0.9061217195777433, "learning_rate": 1.834604860764165e-08, "loss": 0.0062, "step": 20277 }, { "epoch": 4.613879408418658, "grad_norm": 0.6324657591202668, "learning_rate": 1.8324564797483e-08, "loss": 0.0035, "step": 20278 }, { "epoch": 4.614106939704209, "grad_norm": 1.447117621744514, "learning_rate": 1.830309338665924e-08, "loss": 0.0624, "step": 20279 }, { "epoch": 4.614334470989761, "grad_norm": 1.7345105566507997, "learning_rate": 1.828163437560905e-08, "loss": 0.0612, "step": 20280 }, { "epoch": 4.614562002275313, "grad_norm": 2.40528551207555, "learning_rate": 1.8260187764771105e-08, "loss": 0.0858, "step": 20281 }, { "epoch": 4.614789533560865, "grad_norm": 1.9026663322576105, "learning_rate": 1.8238753554583737e-08, "loss": 0.0096, "step": 20282 }, { "epoch": 4.615017064846416, "grad_norm": 1.1387291789599399, "learning_rate": 1.8217331745485138e-08, "loss": 0.0384, "step": 20283 }, { "epoch": 4.615244596131968, "grad_norm": 1.2729105540359442, "learning_rate": 1.819592233791294e-08, "loss": 0.0211, "step": 20284 }, { "epoch": 4.61547212741752, "grad_norm": 1.513323534024446, "learning_rate": 1.817452533230471e-08, "loss": 0.0055, "step": 20285 }, { "epoch": 4.615699658703072, "grad_norm": 1.4689639583457967, "learning_rate": 1.8153140729097948e-08, "loss": 0.0509, "step": 20286 }, { "epoch": 4.615927189988623, "grad_norm": 0.6742272980857721, "learning_rate": 1.8131768528729525e-08, "loss": 0.0495, "step": 20287 }, { "epoch": 4.616154721274175, "grad_norm": 0.7987390591550583, "learning_rate": 1.8110408731636318e-08, "loss": 0.0149, "step": 20288 }, { "epoch": 4.616382252559727, "grad_norm": 1.1355927928439218, "learning_rate": 1.8089061338254915e-08, "loss": 0.0152, "step": 20289 }, { "epoch": 4.616609783845279, "grad_norm": 0.8950875183765491, "learning_rate": 1.806772634902164e-08, "loss": 0.0092, "step": 20290 }, { "epoch": 4.61683731513083, "grad_norm": 0.8432397183971027, "learning_rate": 1.804640376437246e-08, "loss": 0.0123, "step": 20291 }, { "epoch": 4.617064846416382, "grad_norm": 0.506592870878432, "learning_rate": 1.8025093584743138e-08, "loss": 0.0021, "step": 20292 }, { "epoch": 4.617292377701934, "grad_norm": 1.9144366408737603, "learning_rate": 1.8003795810569434e-08, "loss": 0.051, "step": 20293 }, { "epoch": 4.617519908987486, "grad_norm": 0.7197256662614385, "learning_rate": 1.7982510442286355e-08, "loss": 0.0073, "step": 20294 }, { "epoch": 4.617747440273037, "grad_norm": 0.8915000854013267, "learning_rate": 1.7961237480329032e-08, "loss": 0.0179, "step": 20295 }, { "epoch": 4.617974971558589, "grad_norm": 2.294394743752931, "learning_rate": 1.79399769251324e-08, "loss": 0.0272, "step": 20296 }, { "epoch": 4.618202502844141, "grad_norm": 0.8237903317677373, "learning_rate": 1.791872877713076e-08, "loss": 0.0097, "step": 20297 }, { "epoch": 4.618430034129693, "grad_norm": 1.31139187973706, "learning_rate": 1.7897493036758556e-08, "loss": 0.0185, "step": 20298 }, { "epoch": 4.618657565415244, "grad_norm": 1.195970946904256, "learning_rate": 1.787626970444968e-08, "loss": 0.0457, "step": 20299 }, { "epoch": 4.618885096700796, "grad_norm": 0.5949554818236524, "learning_rate": 1.785505878063802e-08, "loss": 0.0141, "step": 20300 }, { "epoch": 4.619112627986349, "grad_norm": 1.26113883458098, "learning_rate": 1.783386026575698e-08, "loss": 0.0595, "step": 20301 }, { "epoch": 4.6193401592719, "grad_norm": 0.5743398149252963, "learning_rate": 1.7812674160239824e-08, "loss": 0.0023, "step": 20302 }, { "epoch": 4.619567690557451, "grad_norm": 0.0892098782858985, "learning_rate": 1.7791500464519675e-08, "loss": 0.0002, "step": 20303 }, { "epoch": 4.619795221843003, "grad_norm": 0.5668343607512084, "learning_rate": 1.7770339179029177e-08, "loss": 0.0032, "step": 20304 }, { "epoch": 4.620022753128556, "grad_norm": 1.1522968688605901, "learning_rate": 1.77491903042009e-08, "loss": 0.018, "step": 20305 }, { "epoch": 4.620250284414107, "grad_norm": 1.5232546487375582, "learning_rate": 1.7728053840466998e-08, "loss": 0.0551, "step": 20306 }, { "epoch": 4.620477815699658, "grad_norm": 1.8331944950712529, "learning_rate": 1.7706929788259553e-08, "loss": 0.0181, "step": 20307 }, { "epoch": 4.62070534698521, "grad_norm": 1.4870825308724813, "learning_rate": 1.768581814801017e-08, "loss": 0.0143, "step": 20308 }, { "epoch": 4.620932878270763, "grad_norm": 0.9769206808832736, "learning_rate": 1.766471892015044e-08, "loss": 0.0083, "step": 20309 }, { "epoch": 4.621160409556314, "grad_norm": 1.9610227067061932, "learning_rate": 1.764363210511169e-08, "loss": 0.0283, "step": 20310 }, { "epoch": 4.621387940841866, "grad_norm": 1.0145436154446759, "learning_rate": 1.762255770332462e-08, "loss": 0.0186, "step": 20311 }, { "epoch": 4.621615472127417, "grad_norm": 1.8924987348504783, "learning_rate": 1.760149571522027e-08, "loss": 0.1197, "step": 20312 }, { "epoch": 4.62184300341297, "grad_norm": 1.2571014046599112, "learning_rate": 1.758044614122878e-08, "loss": 0.0798, "step": 20313 }, { "epoch": 4.622070534698521, "grad_norm": 1.7767075996160393, "learning_rate": 1.7559408981780712e-08, "loss": 0.1181, "step": 20314 }, { "epoch": 4.622298065984073, "grad_norm": 1.185203864012537, "learning_rate": 1.753838423730572e-08, "loss": 0.0283, "step": 20315 }, { "epoch": 4.622525597269624, "grad_norm": 1.2848885516531645, "learning_rate": 1.7517371908233675e-08, "loss": 0.0801, "step": 20316 }, { "epoch": 4.622753128555177, "grad_norm": 1.8348344710209867, "learning_rate": 1.7496371994994015e-08, "loss": 0.0407, "step": 20317 }, { "epoch": 4.622980659840728, "grad_norm": 1.0311318408536487, "learning_rate": 1.747538449801585e-08, "loss": 0.0305, "step": 20318 }, { "epoch": 4.62320819112628, "grad_norm": 0.8853667870530915, "learning_rate": 1.7454409417728343e-08, "loss": 0.0109, "step": 20319 }, { "epoch": 4.623435722411831, "grad_norm": 0.3450508287748267, "learning_rate": 1.7433446754559907e-08, "loss": 0.0006, "step": 20320 }, { "epoch": 4.623663253697384, "grad_norm": 2.091014504885287, "learning_rate": 1.7412496508939154e-08, "loss": 0.1003, "step": 20321 }, { "epoch": 4.623890784982935, "grad_norm": 2.287054049421027, "learning_rate": 1.7391558681294218e-08, "loss": 0.0244, "step": 20322 }, { "epoch": 4.624118316268487, "grad_norm": 1.2389133428751276, "learning_rate": 1.7370633272053016e-08, "loss": 0.0202, "step": 20323 }, { "epoch": 4.624345847554038, "grad_norm": 2.6161036360013465, "learning_rate": 1.7349720281643335e-08, "loss": 0.0629, "step": 20324 }, { "epoch": 4.624573378839591, "grad_norm": 0.8170009427130016, "learning_rate": 1.7328819710492468e-08, "loss": 0.0369, "step": 20325 }, { "epoch": 4.624800910125142, "grad_norm": 3.0179839510733886, "learning_rate": 1.7307931559027643e-08, "loss": 0.041, "step": 20326 }, { "epoch": 4.625028441410694, "grad_norm": 0.8288991746215782, "learning_rate": 1.7287055827675815e-08, "loss": 0.063, "step": 20327 }, { "epoch": 4.625255972696245, "grad_norm": 0.40668107150721355, "learning_rate": 1.726619251686337e-08, "loss": 0.0021, "step": 20328 }, { "epoch": 4.625483503981798, "grad_norm": 1.4724938073787162, "learning_rate": 1.724534162701713e-08, "loss": 0.0191, "step": 20329 }, { "epoch": 4.625711035267349, "grad_norm": 1.200584827935912, "learning_rate": 1.7224503158563e-08, "loss": 0.0232, "step": 20330 }, { "epoch": 4.625938566552901, "grad_norm": 1.6317958135153767, "learning_rate": 1.7203677111927026e-08, "loss": 0.0243, "step": 20331 }, { "epoch": 4.6261660978384525, "grad_norm": 1.0052801374754963, "learning_rate": 1.7182863487534633e-08, "loss": 0.0547, "step": 20332 }, { "epoch": 4.626393629124005, "grad_norm": 1.4710834855082127, "learning_rate": 1.716206228581145e-08, "loss": 0.0745, "step": 20333 }, { "epoch": 4.626621160409556, "grad_norm": 1.2607393934919866, "learning_rate": 1.714127350718249e-08, "loss": 0.0139, "step": 20334 }, { "epoch": 4.626848691695108, "grad_norm": 1.4877240969773626, "learning_rate": 1.712049715207255e-08, "loss": 0.0126, "step": 20335 }, { "epoch": 4.6270762229806595, "grad_norm": 0.8012303834636113, "learning_rate": 1.7099733220906496e-08, "loss": 0.0164, "step": 20336 }, { "epoch": 4.627303754266212, "grad_norm": 2.041117129166798, "learning_rate": 1.7078981714108574e-08, "loss": 0.0591, "step": 20337 }, { "epoch": 4.627531285551763, "grad_norm": 0.9479655108762636, "learning_rate": 1.7058242632102887e-08, "loss": 0.0119, "step": 20338 }, { "epoch": 4.627758816837315, "grad_norm": 0.341257982196031, "learning_rate": 1.7037515975313266e-08, "loss": 0.0021, "step": 20339 }, { "epoch": 4.627986348122867, "grad_norm": 1.9311290256707268, "learning_rate": 1.7016801744163465e-08, "loss": 0.0311, "step": 20340 }, { "epoch": 4.628213879408419, "grad_norm": 1.3775646565623623, "learning_rate": 1.6996099939076762e-08, "loss": 0.0239, "step": 20341 }, { "epoch": 4.62844141069397, "grad_norm": 1.3360514868709725, "learning_rate": 1.697541056047622e-08, "loss": 0.0479, "step": 20342 }, { "epoch": 4.628668941979522, "grad_norm": 1.2679959485847658, "learning_rate": 1.695473360878476e-08, "loss": 0.0564, "step": 20343 }, { "epoch": 4.628896473265074, "grad_norm": 1.843921158761617, "learning_rate": 1.6934069084424965e-08, "loss": 0.0433, "step": 20344 }, { "epoch": 4.629124004550626, "grad_norm": 2.371365599515255, "learning_rate": 1.6913416987819204e-08, "loss": 0.0414, "step": 20345 }, { "epoch": 4.629351535836177, "grad_norm": 1.4366310737357815, "learning_rate": 1.6892777319389568e-08, "loss": 0.0941, "step": 20346 }, { "epoch": 4.629579067121729, "grad_norm": 0.8872152533808919, "learning_rate": 1.687215007955781e-08, "loss": 0.0338, "step": 20347 }, { "epoch": 4.629806598407281, "grad_norm": 1.5065043361140933, "learning_rate": 1.6851535268745596e-08, "loss": 0.0599, "step": 20348 }, { "epoch": 4.630034129692833, "grad_norm": 1.7030898402247268, "learning_rate": 1.6830932887374127e-08, "loss": 0.0981, "step": 20349 }, { "epoch": 4.630261660978385, "grad_norm": 1.0171277291722705, "learning_rate": 1.6810342935864726e-08, "loss": 0.0422, "step": 20350 }, { "epoch": 4.630489192263936, "grad_norm": 1.6888028570992077, "learning_rate": 1.678976541463803e-08, "loss": 0.0565, "step": 20351 }, { "epoch": 4.630716723549488, "grad_norm": 1.5243455741711323, "learning_rate": 1.676920032411461e-08, "loss": 0.0832, "step": 20352 }, { "epoch": 4.63094425483504, "grad_norm": 1.3085604800856403, "learning_rate": 1.6748647664714886e-08, "loss": 0.0162, "step": 20353 }, { "epoch": 4.631171786120592, "grad_norm": 0.8227930742701753, "learning_rate": 1.6728107436858808e-08, "loss": 0.0092, "step": 20354 }, { "epoch": 4.631399317406143, "grad_norm": 0.765899594551416, "learning_rate": 1.670757964096617e-08, "loss": 0.0067, "step": 20355 }, { "epoch": 4.631626848691695, "grad_norm": 0.8310540896285742, "learning_rate": 1.6687064277456574e-08, "loss": 0.0102, "step": 20356 }, { "epoch": 4.631854379977247, "grad_norm": 0.993322320078622, "learning_rate": 1.6666561346749337e-08, "loss": 0.0769, "step": 20357 }, { "epoch": 4.632081911262799, "grad_norm": 1.0819088593111357, "learning_rate": 1.6646070849263425e-08, "loss": 0.0133, "step": 20358 }, { "epoch": 4.63230944254835, "grad_norm": 1.6306164771161318, "learning_rate": 1.662559278541781e-08, "loss": 0.0456, "step": 20359 }, { "epoch": 4.632536973833902, "grad_norm": 1.6252924397317596, "learning_rate": 1.660512715563084e-08, "loss": 0.0341, "step": 20360 }, { "epoch": 4.632764505119454, "grad_norm": 1.247946415368116, "learning_rate": 1.658467396032079e-08, "loss": 0.0181, "step": 20361 }, { "epoch": 4.632992036405006, "grad_norm": 1.395079741824165, "learning_rate": 1.656423319990573e-08, "loss": 0.0488, "step": 20362 }, { "epoch": 4.633219567690557, "grad_norm": 1.4497615990782033, "learning_rate": 1.6543804874803448e-08, "loss": 0.0121, "step": 20363 }, { "epoch": 4.633447098976109, "grad_norm": 0.8161029202004823, "learning_rate": 1.652338898543153e-08, "loss": 0.0542, "step": 20364 }, { "epoch": 4.633674630261661, "grad_norm": 0.813171974676329, "learning_rate": 1.650298553220721e-08, "loss": 0.0057, "step": 20365 }, { "epoch": 4.633902161547213, "grad_norm": 0.985029786177951, "learning_rate": 1.6482594515547373e-08, "loss": 0.0341, "step": 20366 }, { "epoch": 4.634129692832764, "grad_norm": 2.0750989945114333, "learning_rate": 1.6462215935868914e-08, "loss": 0.0637, "step": 20367 }, { "epoch": 4.634357224118316, "grad_norm": 1.4792240929681866, "learning_rate": 1.6441849793588163e-08, "loss": 0.0163, "step": 20368 }, { "epoch": 4.634584755403868, "grad_norm": 3.1866138038329352, "learning_rate": 1.6421496089121526e-08, "loss": 0.016, "step": 20369 }, { "epoch": 4.63481228668942, "grad_norm": 0.747099040782804, "learning_rate": 1.6401154822884986e-08, "loss": 0.0032, "step": 20370 }, { "epoch": 4.635039817974971, "grad_norm": 0.9271150108858898, "learning_rate": 1.6380825995294253e-08, "loss": 0.0085, "step": 20371 }, { "epoch": 4.635267349260523, "grad_norm": 1.4341336849084552, "learning_rate": 1.6360509606764828e-08, "loss": 0.0153, "step": 20372 }, { "epoch": 4.635494880546075, "grad_norm": 1.6868693923197167, "learning_rate": 1.63402056577118e-08, "loss": 0.0059, "step": 20373 }, { "epoch": 4.635722411831627, "grad_norm": 1.5113081427481199, "learning_rate": 1.6319914148550387e-08, "loss": 0.0096, "step": 20374 }, { "epoch": 4.635949943117178, "grad_norm": 0.8401442207625389, "learning_rate": 1.629963507969512e-08, "loss": 0.003, "step": 20375 }, { "epoch": 4.63617747440273, "grad_norm": 0.5544302114576033, "learning_rate": 1.6279368451560528e-08, "loss": 0.0022, "step": 20376 }, { "epoch": 4.636405005688282, "grad_norm": 1.6379217017513314, "learning_rate": 1.6259114264560795e-08, "loss": 0.0544, "step": 20377 }, { "epoch": 4.636632536973834, "grad_norm": 1.0516596291391844, "learning_rate": 1.6238872519109965e-08, "loss": 0.0165, "step": 20378 }, { "epoch": 4.636860068259386, "grad_norm": 1.3797781168353558, "learning_rate": 1.6218643215621736e-08, "loss": 0.0245, "step": 20379 }, { "epoch": 4.637087599544937, "grad_norm": 1.8999329344398497, "learning_rate": 1.6198426354509385e-08, "loss": 0.0273, "step": 20380 }, { "epoch": 4.637315130830489, "grad_norm": 1.865306076689409, "learning_rate": 1.617822193618633e-08, "loss": 0.0618, "step": 20381 }, { "epoch": 4.637542662116041, "grad_norm": 2.058538807546302, "learning_rate": 1.61580299610653e-08, "loss": 0.0418, "step": 20382 }, { "epoch": 4.637770193401593, "grad_norm": 2.371598167460023, "learning_rate": 1.6137850429559234e-08, "loss": 0.1242, "step": 20383 }, { "epoch": 4.637997724687144, "grad_norm": 0.9252291652056936, "learning_rate": 1.6117683342080427e-08, "loss": 0.0333, "step": 20384 }, { "epoch": 4.638225255972696, "grad_norm": 0.9748086007536361, "learning_rate": 1.6097528699040924e-08, "loss": 0.0199, "step": 20385 }, { "epoch": 4.638452787258248, "grad_norm": 0.8675593705071192, "learning_rate": 1.6077386500852887e-08, "loss": 0.0353, "step": 20386 }, { "epoch": 4.6386803185438, "grad_norm": 0.9596199606495434, "learning_rate": 1.6057256747927867e-08, "loss": 0.01, "step": 20387 }, { "epoch": 4.6389078498293514, "grad_norm": 0.7318854179594423, "learning_rate": 1.6037139440677197e-08, "loss": 0.0259, "step": 20388 }, { "epoch": 4.639135381114904, "grad_norm": 1.7422507651135248, "learning_rate": 1.6017034579512217e-08, "loss": 0.024, "step": 20389 }, { "epoch": 4.639362912400455, "grad_norm": 1.3419325325461384, "learning_rate": 1.5996942164843777e-08, "loss": 0.0262, "step": 20390 }, { "epoch": 4.639590443686007, "grad_norm": 0.851316459204897, "learning_rate": 1.5976862197082528e-08, "loss": 0.0371, "step": 20391 }, { "epoch": 4.6398179749715585, "grad_norm": 1.4240215006718195, "learning_rate": 1.5956794676638756e-08, "loss": 0.0745, "step": 20392 }, { "epoch": 4.640045506257111, "grad_norm": 0.6117916305876073, "learning_rate": 1.5936739603922767e-08, "loss": 0.0014, "step": 20393 }, { "epoch": 4.640273037542662, "grad_norm": 1.491833283026242, "learning_rate": 1.5916696979344296e-08, "loss": 0.0709, "step": 20394 }, { "epoch": 4.640500568828214, "grad_norm": 2.2821507471521465, "learning_rate": 1.5896666803313086e-08, "loss": 0.0876, "step": 20395 }, { "epoch": 4.6407281001137655, "grad_norm": 1.897269360482833, "learning_rate": 1.58766490762384e-08, "loss": 0.006, "step": 20396 }, { "epoch": 4.640955631399318, "grad_norm": 1.4278688367576773, "learning_rate": 1.5856643798529624e-08, "loss": 0.0307, "step": 20397 }, { "epoch": 4.641183162684869, "grad_norm": 0.9157761045936945, "learning_rate": 1.583665097059539e-08, "loss": 0.008, "step": 20398 }, { "epoch": 4.641410693970421, "grad_norm": 1.6034521483798567, "learning_rate": 1.5816670592844335e-08, "loss": 0.0309, "step": 20399 }, { "epoch": 4.6416382252559725, "grad_norm": 1.6946069362712006, "learning_rate": 1.579670266568495e-08, "loss": 0.0769, "step": 20400 }, { "epoch": 4.641865756541525, "grad_norm": 0.3931387559314062, "learning_rate": 1.577674718952517e-08, "loss": 0.0012, "step": 20401 }, { "epoch": 4.642093287827076, "grad_norm": 2.084361038091107, "learning_rate": 1.5756804164772933e-08, "loss": 0.1367, "step": 20402 }, { "epoch": 4.642320819112628, "grad_norm": 0.4643364354489972, "learning_rate": 1.57368735918359e-08, "loss": 0.0025, "step": 20403 }, { "epoch": 4.6425483503981795, "grad_norm": 1.5777551971009105, "learning_rate": 1.5716955471121313e-08, "loss": 0.0667, "step": 20404 }, { "epoch": 4.642775881683732, "grad_norm": 1.6348316780978727, "learning_rate": 1.5697049803036353e-08, "loss": 0.0149, "step": 20405 }, { "epoch": 4.643003412969283, "grad_norm": 1.1770114534256562, "learning_rate": 1.5677156587987768e-08, "loss": 0.0476, "step": 20406 }, { "epoch": 4.643230944254835, "grad_norm": 1.3461473526888175, "learning_rate": 1.5657275826382255e-08, "loss": 0.0397, "step": 20407 }, { "epoch": 4.6434584755403865, "grad_norm": 1.2837237175182528, "learning_rate": 1.5637407518625942e-08, "loss": 0.0307, "step": 20408 }, { "epoch": 4.643686006825939, "grad_norm": 0.9351550003364982, "learning_rate": 1.56175516651251e-08, "loss": 0.0056, "step": 20409 }, { "epoch": 4.64391353811149, "grad_norm": 1.1898963003425704, "learning_rate": 1.5597708266285524e-08, "loss": 0.0251, "step": 20410 }, { "epoch": 4.644141069397042, "grad_norm": 1.910386387522616, "learning_rate": 1.557787732251258e-08, "loss": 0.0073, "step": 20411 }, { "epoch": 4.6443686006825935, "grad_norm": 1.098208261990318, "learning_rate": 1.555805883421184e-08, "loss": 0.0181, "step": 20412 }, { "epoch": 4.644596131968146, "grad_norm": 1.2378847379774793, "learning_rate": 1.5538252801788194e-08, "loss": 0.1253, "step": 20413 }, { "epoch": 4.644823663253697, "grad_norm": 1.001113541391499, "learning_rate": 1.5518459225646528e-08, "loss": 0.0531, "step": 20414 }, { "epoch": 4.645051194539249, "grad_norm": 1.5142341940815784, "learning_rate": 1.5498678106191308e-08, "loss": 0.1011, "step": 20415 }, { "epoch": 4.6452787258248005, "grad_norm": 1.8042752178946635, "learning_rate": 1.5478909443826795e-08, "loss": 0.0332, "step": 20416 }, { "epoch": 4.645506257110353, "grad_norm": 0.44485438471415994, "learning_rate": 1.5459153238957178e-08, "loss": 0.0024, "step": 20417 }, { "epoch": 4.645733788395905, "grad_norm": 1.015480603760404, "learning_rate": 1.5439409491986092e-08, "loss": 0.0052, "step": 20418 }, { "epoch": 4.645961319681456, "grad_norm": 1.3747566568918135, "learning_rate": 1.5419678203317175e-08, "loss": 0.0042, "step": 20419 }, { "epoch": 4.6461888509670075, "grad_norm": 1.251309125137257, "learning_rate": 1.5399959373353577e-08, "loss": 0.0327, "step": 20420 }, { "epoch": 4.64641638225256, "grad_norm": 1.6478258642384696, "learning_rate": 1.5380253002498443e-08, "loss": 0.0377, "step": 20421 }, { "epoch": 4.646643913538112, "grad_norm": 0.328943359129008, "learning_rate": 1.5360559091154443e-08, "loss": 0.0016, "step": 20422 }, { "epoch": 4.646871444823663, "grad_norm": 1.7702744453550852, "learning_rate": 1.5340877639724097e-08, "loss": 0.0555, "step": 20423 }, { "epoch": 4.6470989761092145, "grad_norm": 1.4532201428882467, "learning_rate": 1.5321208648609724e-08, "loss": 0.0435, "step": 20424 }, { "epoch": 4.647326507394767, "grad_norm": 1.435127640975213, "learning_rate": 1.5301552118213228e-08, "loss": 0.0535, "step": 20425 }, { "epoch": 4.647554038680319, "grad_norm": 1.2023730948353253, "learning_rate": 1.5281908048936436e-08, "loss": 0.0466, "step": 20426 }, { "epoch": 4.64778156996587, "grad_norm": 1.0906120390232277, "learning_rate": 1.5262276441180697e-08, "loss": 0.0044, "step": 20427 }, { "epoch": 4.648009101251422, "grad_norm": 0.327247950749988, "learning_rate": 1.524265729534749e-08, "loss": 0.0019, "step": 20428 }, { "epoch": 4.648236632536974, "grad_norm": 0.5809225800399102, "learning_rate": 1.5223050611837543e-08, "loss": 0.0043, "step": 20429 }, { "epoch": 4.648464163822526, "grad_norm": 1.0368893032858948, "learning_rate": 1.520345639105171e-08, "loss": 0.0423, "step": 20430 }, { "epoch": 4.648691695108077, "grad_norm": 0.754903625617015, "learning_rate": 1.5183874633390438e-08, "loss": 0.0091, "step": 20431 }, { "epoch": 4.648919226393629, "grad_norm": 1.483546245309101, "learning_rate": 1.516430533925396e-08, "loss": 0.0166, "step": 20432 }, { "epoch": 4.649146757679181, "grad_norm": 1.1571448829663316, "learning_rate": 1.514474850904224e-08, "loss": 0.0616, "step": 20433 }, { "epoch": 4.649374288964733, "grad_norm": 1.3029970210473696, "learning_rate": 1.512520414315495e-08, "loss": 0.0727, "step": 20434 }, { "epoch": 4.649601820250284, "grad_norm": 1.3668042426887732, "learning_rate": 1.5105672241991567e-08, "loss": 0.0234, "step": 20435 }, { "epoch": 4.649829351535836, "grad_norm": 0.8853825681600346, "learning_rate": 1.5086152805951216e-08, "loss": 0.0044, "step": 20436 }, { "epoch": 4.650056882821388, "grad_norm": 1.8999873508409215, "learning_rate": 1.5066645835432882e-08, "loss": 0.0049, "step": 20437 }, { "epoch": 4.65028441410694, "grad_norm": 1.0078065438812571, "learning_rate": 1.504715133083534e-08, "loss": 0.0637, "step": 20438 }, { "epoch": 4.650511945392491, "grad_norm": 1.9528550658979793, "learning_rate": 1.502766929255696e-08, "loss": 0.0941, "step": 20439 }, { "epoch": 4.650739476678043, "grad_norm": 1.2815967047757064, "learning_rate": 1.500819972099589e-08, "loss": 0.018, "step": 20440 }, { "epoch": 4.650967007963595, "grad_norm": 1.591423728010838, "learning_rate": 1.4988742616550074e-08, "loss": 0.0114, "step": 20441 }, { "epoch": 4.651194539249147, "grad_norm": 2.173258359815113, "learning_rate": 1.4969297979617117e-08, "loss": 0.0138, "step": 20442 }, { "epoch": 4.651422070534698, "grad_norm": 1.407448883701498, "learning_rate": 1.4949865810594544e-08, "loss": 0.0732, "step": 20443 }, { "epoch": 4.6516496018202504, "grad_norm": 1.675571143520316, "learning_rate": 1.49304461098794e-08, "loss": 0.1062, "step": 20444 }, { "epoch": 4.651877133105802, "grad_norm": 2.0140524239534043, "learning_rate": 1.4911038877868724e-08, "loss": 0.0977, "step": 20445 }, { "epoch": 4.652104664391354, "grad_norm": 1.2954023367757928, "learning_rate": 1.4891644114959011e-08, "loss": 0.0229, "step": 20446 }, { "epoch": 4.652332195676905, "grad_norm": 1.0999939767098097, "learning_rate": 1.4872261821546816e-08, "loss": 0.0142, "step": 20447 }, { "epoch": 4.6525597269624575, "grad_norm": 0.7423972997972113, "learning_rate": 1.485289199802821e-08, "loss": 0.0047, "step": 20448 }, { "epoch": 4.652787258248009, "grad_norm": 5.492959146085281, "learning_rate": 1.4833534644798919e-08, "loss": 0.0482, "step": 20449 }, { "epoch": 4.653014789533561, "grad_norm": 1.6390072728247356, "learning_rate": 1.4814189762254808e-08, "loss": 0.0648, "step": 20450 }, { "epoch": 4.653242320819112, "grad_norm": 0.9235562991798818, "learning_rate": 1.4794857350791113e-08, "loss": 0.0063, "step": 20451 }, { "epoch": 4.6534698521046645, "grad_norm": 1.0809688549264744, "learning_rate": 1.4775537410803079e-08, "loss": 0.0121, "step": 20452 }, { "epoch": 4.653697383390216, "grad_norm": 1.0572068051843668, "learning_rate": 1.4756229942685524e-08, "loss": 0.009, "step": 20453 }, { "epoch": 4.653924914675768, "grad_norm": 0.7457934853798925, "learning_rate": 1.4736934946832856e-08, "loss": 0.0062, "step": 20454 }, { "epoch": 4.654152445961319, "grad_norm": 2.1829881015304657, "learning_rate": 1.4717652423639692e-08, "loss": 0.0556, "step": 20455 }, { "epoch": 4.6543799772468715, "grad_norm": 1.1704471336932443, "learning_rate": 1.469838237349995e-08, "loss": 0.0196, "step": 20456 }, { "epoch": 4.654607508532424, "grad_norm": 0.9678094903464951, "learning_rate": 1.4679124796807692e-08, "loss": 0.0252, "step": 20457 }, { "epoch": 4.654835039817975, "grad_norm": 1.3801300202020965, "learning_rate": 1.4659879693956283e-08, "loss": 0.0981, "step": 20458 }, { "epoch": 4.655062571103526, "grad_norm": 2.23008718268243, "learning_rate": 1.4640647065339159e-08, "loss": 0.05, "step": 20459 }, { "epoch": 4.6552901023890785, "grad_norm": 1.227414512848251, "learning_rate": 1.4621426911349407e-08, "loss": 0.0136, "step": 20460 }, { "epoch": 4.655517633674631, "grad_norm": 2.797793719309937, "learning_rate": 1.4602219232379839e-08, "loss": 0.0156, "step": 20461 }, { "epoch": 4.655745164960182, "grad_norm": 0.9879761156389694, "learning_rate": 1.4583024028822988e-08, "loss": 0.0492, "step": 20462 }, { "epoch": 4.655972696245733, "grad_norm": 0.9610011624944856, "learning_rate": 1.4563841301071177e-08, "loss": 0.0329, "step": 20463 }, { "epoch": 4.6562002275312855, "grad_norm": 1.985866652247357, "learning_rate": 1.4544671049516527e-08, "loss": 0.0311, "step": 20464 }, { "epoch": 4.656427758816838, "grad_norm": 0.686452179981701, "learning_rate": 1.4525513274550803e-08, "loss": 0.007, "step": 20465 }, { "epoch": 4.656655290102389, "grad_norm": 0.6250880477376586, "learning_rate": 1.4506367976565572e-08, "loss": 0.0072, "step": 20466 }, { "epoch": 4.656882821387941, "grad_norm": 1.5362626236873371, "learning_rate": 1.4487235155952184e-08, "loss": 0.0644, "step": 20467 }, { "epoch": 4.6571103526734925, "grad_norm": 5.649880032556499, "learning_rate": 1.4468114813101439e-08, "loss": 0.0126, "step": 20468 }, { "epoch": 4.657337883959045, "grad_norm": 37.40123177747149, "learning_rate": 1.4449006948404412e-08, "loss": 0.0214, "step": 20469 }, { "epoch": 4.657565415244596, "grad_norm": 5.05284319157017, "learning_rate": 1.4429911562251417e-08, "loss": 0.0587, "step": 20470 }, { "epoch": 4.657792946530148, "grad_norm": 0.8497040307715616, "learning_rate": 1.4410828655032904e-08, "loss": 0.0231, "step": 20471 }, { "epoch": 4.6580204778156995, "grad_norm": 0.8228679162733232, "learning_rate": 1.4391758227138841e-08, "loss": 0.0147, "step": 20472 }, { "epoch": 4.658248009101252, "grad_norm": 2.20551679617477, "learning_rate": 1.4372700278958915e-08, "loss": 0.1139, "step": 20473 }, { "epoch": 4.658475540386803, "grad_norm": 1.4427168626067375, "learning_rate": 1.4353654810882676e-08, "loss": 0.0255, "step": 20474 }, { "epoch": 4.658703071672355, "grad_norm": 0.6123406410149835, "learning_rate": 1.4334621823299394e-08, "loss": 0.0044, "step": 20475 }, { "epoch": 4.6589306029579065, "grad_norm": 1.5477292447740454, "learning_rate": 1.4315601316597997e-08, "loss": 0.0258, "step": 20476 }, { "epoch": 4.659158134243459, "grad_norm": 0.29859052577361744, "learning_rate": 1.4296593291167337e-08, "loss": 0.0012, "step": 20477 }, { "epoch": 4.65938566552901, "grad_norm": 1.5113604512332868, "learning_rate": 1.4277597747395927e-08, "loss": 0.0661, "step": 20478 }, { "epoch": 4.659613196814562, "grad_norm": 0.9513544904735769, "learning_rate": 1.4258614685671927e-08, "loss": 0.0145, "step": 20479 }, { "epoch": 4.6598407281001135, "grad_norm": 0.7452161229861296, "learning_rate": 1.4239644106383218e-08, "loss": 0.0368, "step": 20480 }, { "epoch": 4.660068259385666, "grad_norm": 1.3248484029951912, "learning_rate": 1.4220686009917758e-08, "loss": 0.0109, "step": 20481 }, { "epoch": 4.660295790671217, "grad_norm": 1.502078224387207, "learning_rate": 1.4201740396662805e-08, "loss": 0.0433, "step": 20482 }, { "epoch": 4.660523321956769, "grad_norm": 0.8009294042616822, "learning_rate": 1.4182807267005618e-08, "loss": 0.0496, "step": 20483 }, { "epoch": 4.6607508532423205, "grad_norm": 2.9841653357697866, "learning_rate": 1.416388662133325e-08, "loss": 0.0162, "step": 20484 }, { "epoch": 4.660978384527873, "grad_norm": 1.5524407384431167, "learning_rate": 1.4144978460032335e-08, "loss": 0.0311, "step": 20485 }, { "epoch": 4.661205915813424, "grad_norm": 1.172228221352668, "learning_rate": 1.412608278348937e-08, "loss": 0.0326, "step": 20486 }, { "epoch": 4.661433447098976, "grad_norm": 0.8886617854874727, "learning_rate": 1.4107199592090434e-08, "loss": 0.0577, "step": 20487 }, { "epoch": 4.6616609783845275, "grad_norm": 0.8740941038238536, "learning_rate": 1.4088328886221678e-08, "loss": 0.0163, "step": 20488 }, { "epoch": 4.66188850967008, "grad_norm": 1.5988241744812952, "learning_rate": 1.4069470666268486e-08, "loss": 0.1316, "step": 20489 }, { "epoch": 4.662116040955631, "grad_norm": 1.3188893550861123, "learning_rate": 1.4050624932616525e-08, "loss": 0.0152, "step": 20490 }, { "epoch": 4.662343572241183, "grad_norm": 1.858367696797076, "learning_rate": 1.40317916856509e-08, "loss": 0.0542, "step": 20491 }, { "epoch": 4.6625711035267345, "grad_norm": 0.8693857620752878, "learning_rate": 1.4012970925756444e-08, "loss": 0.0139, "step": 20492 }, { "epoch": 4.662798634812287, "grad_norm": 1.2297250365058658, "learning_rate": 1.399416265331799e-08, "loss": 0.0139, "step": 20493 }, { "epoch": 4.663026166097838, "grad_norm": 1.1975478182889214, "learning_rate": 1.3975366868719808e-08, "loss": 0.0264, "step": 20494 }, { "epoch": 4.66325369738339, "grad_norm": 1.2675026848010713, "learning_rate": 1.3956583572346111e-08, "loss": 0.0264, "step": 20495 }, { "epoch": 4.663481228668942, "grad_norm": 0.80360817272877, "learning_rate": 1.3937812764580686e-08, "loss": 0.0042, "step": 20496 }, { "epoch": 4.663708759954494, "grad_norm": 0.2062697373684055, "learning_rate": 1.3919054445807323e-08, "loss": 0.0006, "step": 20497 }, { "epoch": 4.663936291240045, "grad_norm": 0.7791912969489592, "learning_rate": 1.3900308616409327e-08, "loss": 0.0099, "step": 20498 }, { "epoch": 4.664163822525597, "grad_norm": 2.2610405983866237, "learning_rate": 1.3881575276769865e-08, "loss": 0.0516, "step": 20499 }, { "epoch": 4.664391353811149, "grad_norm": 1.5083897764451313, "learning_rate": 1.3862854427271824e-08, "loss": 0.049, "step": 20500 }, { "epoch": 4.664618885096701, "grad_norm": 0.5175923259749832, "learning_rate": 1.3844146068297746e-08, "loss": 0.0018, "step": 20501 }, { "epoch": 4.664846416382253, "grad_norm": 1.9043469255434968, "learning_rate": 1.3825450200230032e-08, "loss": 0.0479, "step": 20502 }, { "epoch": 4.665073947667804, "grad_norm": 0.8190227627444645, "learning_rate": 1.3806766823450806e-08, "loss": 0.0058, "step": 20503 }, { "epoch": 4.6653014789533565, "grad_norm": 1.2854945648486287, "learning_rate": 1.3788095938341919e-08, "loss": 0.0353, "step": 20504 }, { "epoch": 4.665529010238908, "grad_norm": 0.9168506590285372, "learning_rate": 1.3769437545285078e-08, "loss": 0.0376, "step": 20505 }, { "epoch": 4.66575654152446, "grad_norm": 1.625849297739048, "learning_rate": 1.3750791644661366e-08, "loss": 0.0085, "step": 20506 }, { "epoch": 4.665984072810011, "grad_norm": 1.2594813058622731, "learning_rate": 1.3732158236852145e-08, "loss": 0.0144, "step": 20507 }, { "epoch": 4.6662116040955635, "grad_norm": 1.2218993297138265, "learning_rate": 1.3713537322238012e-08, "loss": 0.0434, "step": 20508 }, { "epoch": 4.666439135381115, "grad_norm": 1.336662380024066, "learning_rate": 1.3694928901199708e-08, "loss": 0.0269, "step": 20509 }, { "epoch": 4.666666666666667, "grad_norm": 1.1578962475699852, "learning_rate": 1.3676332974117552e-08, "loss": 0.0078, "step": 20510 }, { "epoch": 4.666894197952218, "grad_norm": 1.0191569080943932, "learning_rate": 1.3657749541371445e-08, "loss": 0.019, "step": 20511 }, { "epoch": 4.6671217292377705, "grad_norm": 0.9585441248413205, "learning_rate": 1.3639178603341435e-08, "loss": 0.0082, "step": 20512 }, { "epoch": 4.667349260523322, "grad_norm": 3.980635564008011, "learning_rate": 1.3620620160406938e-08, "loss": 0.0696, "step": 20513 }, { "epoch": 4.667576791808874, "grad_norm": 1.4969462085104002, "learning_rate": 1.3602074212947237e-08, "loss": 0.0501, "step": 20514 }, { "epoch": 4.667804323094425, "grad_norm": 0.9382204314292556, "learning_rate": 1.3583540761341468e-08, "loss": 0.0423, "step": 20515 }, { "epoch": 4.6680318543799775, "grad_norm": 0.6973706516229248, "learning_rate": 1.356501980596829e-08, "loss": 0.0057, "step": 20516 }, { "epoch": 4.668259385665529, "grad_norm": 2.3818002939794756, "learning_rate": 1.3546511347206359e-08, "loss": 0.0359, "step": 20517 }, { "epoch": 4.668486916951081, "grad_norm": 1.6529554343105868, "learning_rate": 1.3528015385433912e-08, "loss": 0.0147, "step": 20518 }, { "epoch": 4.668714448236632, "grad_norm": 1.205835792106377, "learning_rate": 1.3509531921029052e-08, "loss": 0.03, "step": 20519 }, { "epoch": 4.6689419795221845, "grad_norm": 1.3507285450602387, "learning_rate": 1.349106095436932e-08, "loss": 0.0285, "step": 20520 }, { "epoch": 4.669169510807736, "grad_norm": 1.6203205128507776, "learning_rate": 1.3472602485832472e-08, "loss": 0.0756, "step": 20521 }, { "epoch": 4.669397042093288, "grad_norm": 1.262804545250113, "learning_rate": 1.3454156515795639e-08, "loss": 0.0645, "step": 20522 }, { "epoch": 4.669624573378839, "grad_norm": 0.9043104201075844, "learning_rate": 1.3435723044635877e-08, "loss": 0.0051, "step": 20523 }, { "epoch": 4.6698521046643915, "grad_norm": 1.786469830492779, "learning_rate": 1.3417302072729899e-08, "loss": 0.0506, "step": 20524 }, { "epoch": 4.670079635949943, "grad_norm": 1.2411396067065212, "learning_rate": 1.3398893600454141e-08, "loss": 0.0207, "step": 20525 }, { "epoch": 4.670307167235495, "grad_norm": 1.3694224858026411, "learning_rate": 1.3380497628185037e-08, "loss": 0.0817, "step": 20526 }, { "epoch": 4.670534698521046, "grad_norm": 1.3079444930712099, "learning_rate": 1.336211415629833e-08, "loss": 0.0444, "step": 20527 }, { "epoch": 4.6707622298065985, "grad_norm": 0.9063732675853702, "learning_rate": 1.33437431851699e-08, "loss": 0.0043, "step": 20528 }, { "epoch": 4.67098976109215, "grad_norm": 1.1653872689469411, "learning_rate": 1.3325384715175138e-08, "loss": 0.0418, "step": 20529 }, { "epoch": 4.671217292377702, "grad_norm": 1.7949437134737476, "learning_rate": 1.3307038746689232e-08, "loss": 0.0378, "step": 20530 }, { "epoch": 4.671444823663253, "grad_norm": 1.3590412520701098, "learning_rate": 1.3288705280087299e-08, "loss": 0.0341, "step": 20531 }, { "epoch": 4.6716723549488055, "grad_norm": 1.1772883839174135, "learning_rate": 1.327038431574383e-08, "loss": 0.0071, "step": 20532 }, { "epoch": 4.671899886234357, "grad_norm": 1.675391722377016, "learning_rate": 1.3252075854033459e-08, "loss": 0.0092, "step": 20533 }, { "epoch": 4.672127417519909, "grad_norm": 0.8795770130010577, "learning_rate": 1.3233779895330257e-08, "loss": 0.0043, "step": 20534 }, { "epoch": 4.672354948805461, "grad_norm": 0.6693213747653292, "learning_rate": 1.3215496440008232e-08, "loss": 0.0043, "step": 20535 }, { "epoch": 4.6725824800910125, "grad_norm": 3.6741605823467047, "learning_rate": 1.3197225488440976e-08, "loss": 0.0508, "step": 20536 }, { "epoch": 4.672810011376564, "grad_norm": 1.305948129789468, "learning_rate": 1.317896704100194e-08, "loss": 0.0252, "step": 20537 }, { "epoch": 4.673037542662116, "grad_norm": 0.6430032105616101, "learning_rate": 1.3160721098064432e-08, "loss": 0.0205, "step": 20538 }, { "epoch": 4.673265073947668, "grad_norm": 1.8289887116945325, "learning_rate": 1.3142487660001147e-08, "loss": 0.014, "step": 20539 }, { "epoch": 4.6734926052332195, "grad_norm": 2.1857413205253517, "learning_rate": 1.3124266727184907e-08, "loss": 0.0189, "step": 20540 }, { "epoch": 4.673720136518772, "grad_norm": 2.952153427627594, "learning_rate": 1.3106058299988122e-08, "loss": 0.0042, "step": 20541 }, { "epoch": 4.673947667804323, "grad_norm": 2.42119464846248, "learning_rate": 1.308786237878272e-08, "loss": 0.0244, "step": 20542 }, { "epoch": 4.674175199089875, "grad_norm": 1.7165125525894558, "learning_rate": 1.3069678963940832e-08, "loss": 0.0399, "step": 20543 }, { "epoch": 4.6744027303754265, "grad_norm": 1.727236347026172, "learning_rate": 1.3051508055833967e-08, "loss": 0.0117, "step": 20544 }, { "epoch": 4.674630261660979, "grad_norm": 1.1215388942489561, "learning_rate": 1.3033349654833633e-08, "loss": 0.0715, "step": 20545 }, { "epoch": 4.67485779294653, "grad_norm": 1.1295129547603693, "learning_rate": 1.3015203761310782e-08, "loss": 0.0231, "step": 20546 }, { "epoch": 4.675085324232082, "grad_norm": 0.5985840781097399, "learning_rate": 1.2997070375636439e-08, "loss": 0.0024, "step": 20547 }, { "epoch": 4.6753128555176335, "grad_norm": 0.9395648245359327, "learning_rate": 1.297894949818114e-08, "loss": 0.0031, "step": 20548 }, { "epoch": 4.675540386803186, "grad_norm": 1.6898657716196435, "learning_rate": 1.2960841129315213e-08, "loss": 0.0914, "step": 20549 }, { "epoch": 4.675767918088737, "grad_norm": 1.3279441255066082, "learning_rate": 1.294274526940878e-08, "loss": 0.0458, "step": 20550 }, { "epoch": 4.675995449374289, "grad_norm": 1.6036446057612288, "learning_rate": 1.2924661918831683e-08, "loss": 0.0112, "step": 20551 }, { "epoch": 4.6762229806598405, "grad_norm": 1.7504747260857747, "learning_rate": 1.2906591077953626e-08, "loss": 0.0692, "step": 20552 }, { "epoch": 4.676450511945393, "grad_norm": 2.288753229428941, "learning_rate": 1.2888532747143758e-08, "loss": 0.075, "step": 20553 }, { "epoch": 4.676678043230944, "grad_norm": 0.5507716703679971, "learning_rate": 1.28704869267713e-08, "loss": 0.0014, "step": 20554 }, { "epoch": 4.676905574516496, "grad_norm": 0.9987416165234494, "learning_rate": 1.2852453617205051e-08, "loss": 0.0052, "step": 20555 }, { "epoch": 4.6771331058020476, "grad_norm": 0.9547684835951309, "learning_rate": 1.2834432818813538e-08, "loss": 0.0126, "step": 20556 }, { "epoch": 4.6773606370876, "grad_norm": 0.4913430794441236, "learning_rate": 1.2816424531965076e-08, "loss": 0.0024, "step": 20557 }, { "epoch": 4.677588168373151, "grad_norm": 2.046753849840016, "learning_rate": 1.2798428757027707e-08, "loss": 0.0648, "step": 20558 }, { "epoch": 4.677815699658703, "grad_norm": 0.658944178472181, "learning_rate": 1.2780445494369326e-08, "loss": 0.0034, "step": 20559 }, { "epoch": 4.678043230944255, "grad_norm": 1.5812151780760837, "learning_rate": 1.276247474435742e-08, "loss": 0.1026, "step": 20560 }, { "epoch": 4.678270762229807, "grad_norm": 1.3200441232712894, "learning_rate": 1.2744516507359195e-08, "loss": 0.0313, "step": 20561 }, { "epoch": 4.678498293515358, "grad_norm": 1.8893230683293138, "learning_rate": 1.2726570783741787e-08, "loss": 0.0948, "step": 20562 }, { "epoch": 4.67872582480091, "grad_norm": 0.9108391212553931, "learning_rate": 1.2708637573871919e-08, "loss": 0.0059, "step": 20563 }, { "epoch": 4.678953356086462, "grad_norm": 2.3672394495593068, "learning_rate": 1.269071687811617e-08, "loss": 0.0494, "step": 20564 }, { "epoch": 4.679180887372014, "grad_norm": 0.9916785882979796, "learning_rate": 1.2672808696840775e-08, "loss": 0.062, "step": 20565 }, { "epoch": 4.679408418657565, "grad_norm": 1.4735074584723535, "learning_rate": 1.2654913030411762e-08, "loss": 0.0114, "step": 20566 }, { "epoch": 4.679635949943117, "grad_norm": 1.6418826150533383, "learning_rate": 1.263702987919488e-08, "loss": 0.0186, "step": 20567 }, { "epoch": 4.679863481228669, "grad_norm": 0.9789495514274124, "learning_rate": 1.2619159243555599e-08, "loss": 0.0101, "step": 20568 }, { "epoch": 4.680091012514221, "grad_norm": 1.1951080086048969, "learning_rate": 1.2601301123859183e-08, "loss": 0.0068, "step": 20569 }, { "epoch": 4.680318543799773, "grad_norm": 1.0715369505232473, "learning_rate": 1.258345552047055e-08, "loss": 0.0246, "step": 20570 }, { "epoch": 4.680546075085324, "grad_norm": 1.6049487020718671, "learning_rate": 1.2565622433754616e-08, "loss": 0.0175, "step": 20571 }, { "epoch": 4.680773606370876, "grad_norm": 1.03271153418067, "learning_rate": 1.2547801864075601e-08, "loss": 0.0136, "step": 20572 }, { "epoch": 4.681001137656428, "grad_norm": 0.6519085997959145, "learning_rate": 1.2529993811798008e-08, "loss": 0.0074, "step": 20573 }, { "epoch": 4.68122866894198, "grad_norm": 1.1266133557343534, "learning_rate": 1.2512198277285642e-08, "loss": 0.0525, "step": 20574 }, { "epoch": 4.681456200227531, "grad_norm": 1.2710722308288664, "learning_rate": 1.2494415260902102e-08, "loss": 0.0195, "step": 20575 }, { "epoch": 4.681683731513083, "grad_norm": 0.9045146108804207, "learning_rate": 1.2476644763011053e-08, "loss": 0.0121, "step": 20576 }, { "epoch": 4.681911262798635, "grad_norm": 1.0686694595378337, "learning_rate": 1.245888678397554e-08, "loss": 0.0947, "step": 20577 }, { "epoch": 4.682138794084187, "grad_norm": 1.1803386927222135, "learning_rate": 1.2441141324158676e-08, "loss": 0.0426, "step": 20578 }, { "epoch": 4.682366325369738, "grad_norm": 0.9807076043964432, "learning_rate": 1.2423408383922947e-08, "loss": 0.0439, "step": 20579 }, { "epoch": 4.6825938566552905, "grad_norm": 1.0521358694695926, "learning_rate": 1.240568796363091e-08, "loss": 0.0366, "step": 20580 }, { "epoch": 4.682821387940842, "grad_norm": 1.1945891405558127, "learning_rate": 1.2387980063644709e-08, "loss": 0.0048, "step": 20581 }, { "epoch": 4.683048919226394, "grad_norm": 1.4150902559262892, "learning_rate": 1.2370284684326204e-08, "loss": 0.0423, "step": 20582 }, { "epoch": 4.683276450511945, "grad_norm": 1.1071434903724793, "learning_rate": 1.235260182603705e-08, "loss": 0.0102, "step": 20583 }, { "epoch": 4.6835039817974975, "grad_norm": 0.9704781080192229, "learning_rate": 1.2334931489138765e-08, "loss": 0.0282, "step": 20584 }, { "epoch": 4.683731513083049, "grad_norm": 3.8510097602731728, "learning_rate": 1.231727367399245e-08, "loss": 0.0139, "step": 20585 }, { "epoch": 4.683959044368601, "grad_norm": 0.582517073622628, "learning_rate": 1.2299628380958994e-08, "loss": 0.0037, "step": 20586 }, { "epoch": 4.684186575654152, "grad_norm": 0.5301759822112267, "learning_rate": 1.2281995610399014e-08, "loss": 0.0013, "step": 20587 }, { "epoch": 4.6844141069397045, "grad_norm": 0.770142541651034, "learning_rate": 1.2264375362672914e-08, "loss": 0.0038, "step": 20588 }, { "epoch": 4.684641638225256, "grad_norm": 2.0069965150818474, "learning_rate": 1.2246767638140755e-08, "loss": 0.1056, "step": 20589 }, { "epoch": 4.684869169510808, "grad_norm": 1.1498944005617913, "learning_rate": 1.2229172437162525e-08, "loss": 0.0793, "step": 20590 }, { "epoch": 4.685096700796359, "grad_norm": 1.4658923849146883, "learning_rate": 1.221158976009773e-08, "loss": 0.0237, "step": 20591 }, { "epoch": 4.6853242320819115, "grad_norm": 2.1927516172872012, "learning_rate": 1.2194019607305804e-08, "loss": 0.0261, "step": 20592 }, { "epoch": 4.685551763367463, "grad_norm": 1.213540653964873, "learning_rate": 1.2176461979145835e-08, "loss": 0.0966, "step": 20593 }, { "epoch": 4.685779294653015, "grad_norm": 0.8649436971445679, "learning_rate": 1.2158916875976562e-08, "loss": 0.0486, "step": 20594 }, { "epoch": 4.686006825938566, "grad_norm": 0.9949021156971071, "learning_rate": 1.2141384298156796e-08, "loss": 0.0187, "step": 20595 }, { "epoch": 4.6862343572241185, "grad_norm": 1.2387198016257945, "learning_rate": 1.2123864246044656e-08, "loss": 0.0088, "step": 20596 }, { "epoch": 4.68646188850967, "grad_norm": 0.6740035790907757, "learning_rate": 1.2106356719998255e-08, "loss": 0.0067, "step": 20597 }, { "epoch": 4.686689419795222, "grad_norm": 2.2867724517777432, "learning_rate": 1.2088861720375502e-08, "loss": 0.0273, "step": 20598 }, { "epoch": 4.686916951080773, "grad_norm": 1.5293128703947427, "learning_rate": 1.207137924753396e-08, "loss": 0.1211, "step": 20599 }, { "epoch": 4.6871444823663255, "grad_norm": 1.1272699969900335, "learning_rate": 1.205390930183091e-08, "loss": 0.0588, "step": 20600 }, { "epoch": 4.687372013651877, "grad_norm": 1.1504513027162087, "learning_rate": 1.203645188362329e-08, "loss": 0.0166, "step": 20601 }, { "epoch": 4.687599544937429, "grad_norm": 1.104245293205267, "learning_rate": 1.2019006993268107e-08, "loss": 0.013, "step": 20602 }, { "epoch": 4.68782707622298, "grad_norm": 1.1955484598435429, "learning_rate": 1.200157463112174e-08, "loss": 0.0288, "step": 20603 }, { "epoch": 4.6880546075085325, "grad_norm": 0.829419378843181, "learning_rate": 1.1984154797540573e-08, "loss": 0.0133, "step": 20604 }, { "epoch": 4.688282138794084, "grad_norm": 1.3058736533341553, "learning_rate": 1.196674749288057e-08, "loss": 0.0056, "step": 20605 }, { "epoch": 4.688509670079636, "grad_norm": 1.1558613722305002, "learning_rate": 1.1949352717497558e-08, "loss": 0.0064, "step": 20606 }, { "epoch": 4.688737201365187, "grad_norm": 1.867225392742214, "learning_rate": 1.1931970471747017e-08, "loss": 0.0516, "step": 20607 }, { "epoch": 4.6889647326507395, "grad_norm": 1.1553896401573618, "learning_rate": 1.1914600755984219e-08, "loss": 0.0634, "step": 20608 }, { "epoch": 4.689192263936292, "grad_norm": 0.8962854712104384, "learning_rate": 1.1897243570564155e-08, "loss": 0.0146, "step": 20609 }, { "epoch": 4.689419795221843, "grad_norm": 0.458519813780555, "learning_rate": 1.1879898915841611e-08, "loss": 0.0063, "step": 20610 }, { "epoch": 4.689647326507394, "grad_norm": 1.5285991994979589, "learning_rate": 1.186256679217096e-08, "loss": 0.0637, "step": 20611 }, { "epoch": 4.6898748577929465, "grad_norm": 0.43388521299031313, "learning_rate": 1.1845247199906706e-08, "loss": 0.0024, "step": 20612 }, { "epoch": 4.690102389078499, "grad_norm": 1.587277414383143, "learning_rate": 1.1827940139402528e-08, "loss": 0.0279, "step": 20613 }, { "epoch": 4.69032992036405, "grad_norm": 0.7008622045073151, "learning_rate": 1.1810645611012375e-08, "loss": 0.0034, "step": 20614 }, { "epoch": 4.690557451649601, "grad_norm": 0.7004888494184726, "learning_rate": 1.179336361508951e-08, "loss": 0.0067, "step": 20615 }, { "epoch": 4.690784982935154, "grad_norm": 1.838726448251741, "learning_rate": 1.1776094151987328e-08, "loss": 0.0971, "step": 20616 }, { "epoch": 4.691012514220706, "grad_norm": 0.5280273893123593, "learning_rate": 1.1758837222058742e-08, "loss": 0.0074, "step": 20617 }, { "epoch": 4.691240045506257, "grad_norm": 1.215923566433813, "learning_rate": 1.1741592825656316e-08, "loss": 0.0548, "step": 20618 }, { "epoch": 4.691467576791809, "grad_norm": 2.489156906558874, "learning_rate": 1.1724360963132758e-08, "loss": 0.0381, "step": 20619 }, { "epoch": 4.691695108077361, "grad_norm": 2.3446077326919506, "learning_rate": 1.1707141634839937e-08, "loss": 0.1032, "step": 20620 }, { "epoch": 4.691922639362913, "grad_norm": 1.5642574078629672, "learning_rate": 1.1689934841130069e-08, "loss": 0.0241, "step": 20621 }, { "epoch": 4.692150170648464, "grad_norm": 0.8923197257619718, "learning_rate": 1.167274058235468e-08, "loss": 0.0364, "step": 20622 }, { "epoch": 4.692377701934016, "grad_norm": 1.520521681430139, "learning_rate": 1.1655558858865227e-08, "loss": 0.0388, "step": 20623 }, { "epoch": 4.692605233219568, "grad_norm": 0.7412257420011177, "learning_rate": 1.1638389671012815e-08, "loss": 0.0148, "step": 20624 }, { "epoch": 4.69283276450512, "grad_norm": 1.1372370237322589, "learning_rate": 1.1621233019148414e-08, "loss": 0.011, "step": 20625 }, { "epoch": 4.693060295790671, "grad_norm": 1.26571348886696, "learning_rate": 1.1604088903622718e-08, "loss": 0.0395, "step": 20626 }, { "epoch": 4.693287827076223, "grad_norm": 1.530444142744767, "learning_rate": 1.1586957324786e-08, "loss": 0.0062, "step": 20627 }, { "epoch": 4.693515358361775, "grad_norm": 2.9043141864655797, "learning_rate": 1.1569838282988467e-08, "loss": 0.0061, "step": 20628 }, { "epoch": 4.693742889647327, "grad_norm": 1.2514459912237383, "learning_rate": 1.1552731778580045e-08, "loss": 0.0723, "step": 20629 }, { "epoch": 4.693970420932878, "grad_norm": 0.8937621808415289, "learning_rate": 1.153563781191018e-08, "loss": 0.0105, "step": 20630 }, { "epoch": 4.69419795221843, "grad_norm": 1.4397810723347078, "learning_rate": 1.1518556383328522e-08, "loss": 0.1264, "step": 20631 }, { "epoch": 4.694425483503982, "grad_norm": 1.848514306196892, "learning_rate": 1.1501487493183957e-08, "loss": 0.0117, "step": 20632 }, { "epoch": 4.694653014789534, "grad_norm": 0.7894538166000877, "learning_rate": 1.1484431141825445e-08, "loss": 0.0134, "step": 20633 }, { "epoch": 4.694880546075085, "grad_norm": 2.5181215539129376, "learning_rate": 1.1467387329601524e-08, "loss": 0.0918, "step": 20634 }, { "epoch": 4.695108077360637, "grad_norm": 0.9907481489433803, "learning_rate": 1.1450356056860664e-08, "loss": 0.0521, "step": 20635 }, { "epoch": 4.695335608646189, "grad_norm": 1.6122146664404322, "learning_rate": 1.1433337323950785e-08, "loss": 0.0625, "step": 20636 }, { "epoch": 4.695563139931741, "grad_norm": 1.6086497538098499, "learning_rate": 1.1416331131219802e-08, "loss": 0.0159, "step": 20637 }, { "epoch": 4.695790671217292, "grad_norm": 0.44340527913410754, "learning_rate": 1.1399337479015282e-08, "loss": 0.0023, "step": 20638 }, { "epoch": 4.696018202502844, "grad_norm": 0.6018648207954521, "learning_rate": 1.1382356367684588e-08, "loss": 0.0174, "step": 20639 }, { "epoch": 4.696245733788396, "grad_norm": 1.7945518584705238, "learning_rate": 1.1365387797574734e-08, "loss": 0.0107, "step": 20640 }, { "epoch": 4.696473265073948, "grad_norm": 1.893021161031942, "learning_rate": 1.1348431769032456e-08, "loss": 0.0565, "step": 20641 }, { "epoch": 4.696700796359499, "grad_norm": 1.7751603334986328, "learning_rate": 1.133148828240449e-08, "loss": 0.0362, "step": 20642 }, { "epoch": 4.696928327645051, "grad_norm": 1.5255958945129737, "learning_rate": 1.131455733803695e-08, "loss": 0.0452, "step": 20643 }, { "epoch": 4.697155858930603, "grad_norm": 0.3316465147051769, "learning_rate": 1.1297638936275945e-08, "loss": 0.0022, "step": 20644 }, { "epoch": 4.697383390216155, "grad_norm": 0.65169417015981, "learning_rate": 1.1280733077467312e-08, "loss": 0.0387, "step": 20645 }, { "epoch": 4.697610921501706, "grad_norm": 1.4193171299104654, "learning_rate": 1.1263839761956469e-08, "loss": 0.0184, "step": 20646 }, { "epoch": 4.697838452787258, "grad_norm": 1.2778236624307848, "learning_rate": 1.1246958990088833e-08, "loss": 0.051, "step": 20647 }, { "epoch": 4.6980659840728105, "grad_norm": 2.408722700745981, "learning_rate": 1.1230090762209267e-08, "loss": 0.0133, "step": 20648 }, { "epoch": 4.698293515358362, "grad_norm": 1.7812632392739765, "learning_rate": 1.1213235078662495e-08, "loss": 0.0304, "step": 20649 }, { "epoch": 4.698521046643913, "grad_norm": 0.9734958867126818, "learning_rate": 1.1196391939793175e-08, "loss": 0.0511, "step": 20650 }, { "epoch": 4.698748577929465, "grad_norm": 1.0334205978309554, "learning_rate": 1.1179561345945471e-08, "loss": 0.006, "step": 20651 }, { "epoch": 4.6989761092150175, "grad_norm": 1.4066784130853032, "learning_rate": 1.116274329746335e-08, "loss": 0.0403, "step": 20652 }, { "epoch": 4.699203640500569, "grad_norm": 1.5900584017841228, "learning_rate": 1.1145937794690559e-08, "loss": 0.0399, "step": 20653 }, { "epoch": 4.69943117178612, "grad_norm": 1.184481808803406, "learning_rate": 1.1129144837970645e-08, "loss": 0.0166, "step": 20654 }, { "epoch": 4.699658703071672, "grad_norm": 0.20080717019016311, "learning_rate": 1.1112364427646738e-08, "loss": 0.0006, "step": 20655 }, { "epoch": 4.6998862343572245, "grad_norm": 1.36916992163567, "learning_rate": 1.1095596564061826e-08, "loss": 0.0221, "step": 20656 }, { "epoch": 4.700113765642776, "grad_norm": 0.7649713496483963, "learning_rate": 1.107884124755855e-08, "loss": 0.004, "step": 20657 }, { "epoch": 4.700341296928328, "grad_norm": 1.133905382852225, "learning_rate": 1.1062098478479416e-08, "loss": 0.0393, "step": 20658 }, { "epoch": 4.700568828213879, "grad_norm": 1.2452129120910875, "learning_rate": 1.104536825716665e-08, "loss": 0.0722, "step": 20659 }, { "epoch": 4.7007963594994315, "grad_norm": 2.3323012330734336, "learning_rate": 1.1028650583962131e-08, "loss": 0.0241, "step": 20660 }, { "epoch": 4.701023890784983, "grad_norm": 1.3084941006087254, "learning_rate": 1.1011945459207598e-08, "loss": 0.011, "step": 20661 }, { "epoch": 4.701251422070535, "grad_norm": 0.555100505548018, "learning_rate": 1.0995252883244447e-08, "loss": 0.0039, "step": 20662 }, { "epoch": 4.701478953356086, "grad_norm": 1.8392403349604187, "learning_rate": 1.0978572856413794e-08, "loss": 0.0975, "step": 20663 }, { "epoch": 4.7017064846416385, "grad_norm": 1.2209405037672012, "learning_rate": 1.0961905379056545e-08, "loss": 0.0536, "step": 20664 }, { "epoch": 4.70193401592719, "grad_norm": 0.8710616828886034, "learning_rate": 1.094525045151347e-08, "loss": 0.018, "step": 20665 }, { "epoch": 4.702161547212742, "grad_norm": 0.8313613823647942, "learning_rate": 1.0928608074124853e-08, "loss": 0.0294, "step": 20666 }, { "epoch": 4.702389078498293, "grad_norm": 0.8669739525322356, "learning_rate": 1.0911978247230906e-08, "loss": 0.0026, "step": 20667 }, { "epoch": 4.7026166097838455, "grad_norm": 0.4604209045887348, "learning_rate": 1.0895360971171429e-08, "loss": 0.0036, "step": 20668 }, { "epoch": 4.702844141069397, "grad_norm": 0.6680122015430896, "learning_rate": 1.0878756246286078e-08, "loss": 0.002, "step": 20669 }, { "epoch": 4.703071672354949, "grad_norm": 0.4622297837128883, "learning_rate": 1.0862164072914238e-08, "loss": 0.0059, "step": 20670 }, { "epoch": 4.7032992036405, "grad_norm": 0.6289984996800023, "learning_rate": 1.084558445139508e-08, "loss": 0.0048, "step": 20671 }, { "epoch": 4.7035267349260526, "grad_norm": 0.7254125406505291, "learning_rate": 1.082901738206736e-08, "loss": 0.0254, "step": 20672 }, { "epoch": 4.703754266211604, "grad_norm": 0.7409779465828227, "learning_rate": 1.0812462865269768e-08, "loss": 0.0023, "step": 20673 }, { "epoch": 4.703981797497156, "grad_norm": 0.7507191567741046, "learning_rate": 1.0795920901340573e-08, "loss": 0.0157, "step": 20674 }, { "epoch": 4.704209328782707, "grad_norm": 1.253957011990175, "learning_rate": 1.0779391490617839e-08, "loss": 0.0102, "step": 20675 }, { "epoch": 4.70443686006826, "grad_norm": 1.7257870598031944, "learning_rate": 1.0762874633439557e-08, "loss": 0.0101, "step": 20676 }, { "epoch": 4.704664391353811, "grad_norm": 1.3834036809529875, "learning_rate": 1.07463703301431e-08, "loss": 0.0511, "step": 20677 }, { "epoch": 4.704891922639363, "grad_norm": 1.3698820814172217, "learning_rate": 1.0729878581065902e-08, "loss": 0.0098, "step": 20678 }, { "epoch": 4.705119453924914, "grad_norm": 1.2715701539348963, "learning_rate": 1.071339938654506e-08, "loss": 0.0106, "step": 20679 }, { "epoch": 4.705346985210467, "grad_norm": 1.8987909785603567, "learning_rate": 1.0696932746917314e-08, "loss": 0.0385, "step": 20680 }, { "epoch": 4.705574516496018, "grad_norm": 0.5665292040962863, "learning_rate": 1.06804786625192e-08, "loss": 0.0015, "step": 20681 }, { "epoch": 4.70580204778157, "grad_norm": 1.3636411841751155, "learning_rate": 1.066403713368698e-08, "loss": 0.0363, "step": 20682 }, { "epoch": 4.706029579067121, "grad_norm": 0.7288534517204176, "learning_rate": 1.0647608160756842e-08, "loss": 0.0184, "step": 20683 }, { "epoch": 4.706257110352674, "grad_norm": 0.6049457945542824, "learning_rate": 1.063119174406435e-08, "loss": 0.0029, "step": 20684 }, { "epoch": 4.706484641638225, "grad_norm": 0.6356200092291147, "learning_rate": 1.061478788394521e-08, "loss": 0.04, "step": 20685 }, { "epoch": 4.706712172923777, "grad_norm": 1.3831378176369136, "learning_rate": 1.0598396580734569e-08, "loss": 0.0722, "step": 20686 }, { "epoch": 4.706939704209329, "grad_norm": 1.219102345031922, "learning_rate": 1.0582017834767505e-08, "loss": 0.0593, "step": 20687 }, { "epoch": 4.707167235494881, "grad_norm": 1.109258922057637, "learning_rate": 1.0565651646378753e-08, "loss": 0.0227, "step": 20688 }, { "epoch": 4.707394766780432, "grad_norm": 0.600553865406797, "learning_rate": 1.0549298015902765e-08, "loss": 0.0106, "step": 20689 }, { "epoch": 4.707622298065984, "grad_norm": 1.600951352097216, "learning_rate": 1.0532956943673858e-08, "loss": 0.0359, "step": 20690 }, { "epoch": 4.707849829351536, "grad_norm": 0.6677450805514816, "learning_rate": 1.0516628430025864e-08, "loss": 0.0014, "step": 20691 }, { "epoch": 4.708077360637088, "grad_norm": 0.5358709763631853, "learning_rate": 1.050031247529268e-08, "loss": 0.0064, "step": 20692 }, { "epoch": 4.708304891922639, "grad_norm": 0.4200582292474751, "learning_rate": 1.048400907980772e-08, "loss": 0.0027, "step": 20693 }, { "epoch": 4.708532423208191, "grad_norm": 1.1601860200870062, "learning_rate": 1.046771824390419e-08, "loss": 0.0178, "step": 20694 }, { "epoch": 4.708759954493743, "grad_norm": 0.33145805044498894, "learning_rate": 1.0451439967915019e-08, "loss": 0.0015, "step": 20695 }, { "epoch": 4.708987485779295, "grad_norm": 0.8576982794081124, "learning_rate": 1.0435174252172928e-08, "loss": 0.0038, "step": 20696 }, { "epoch": 4.709215017064847, "grad_norm": 0.9890670466635858, "learning_rate": 1.0418921097010287e-08, "loss": 0.0145, "step": 20697 }, { "epoch": 4.709442548350398, "grad_norm": 1.4423049225966658, "learning_rate": 1.0402680502759401e-08, "loss": 0.1113, "step": 20698 }, { "epoch": 4.70967007963595, "grad_norm": 5.201015682941786, "learning_rate": 1.0386452469752159e-08, "loss": 0.0959, "step": 20699 }, { "epoch": 4.709897610921502, "grad_norm": 1.1256674742716144, "learning_rate": 1.0370236998320238e-08, "loss": 0.0186, "step": 20700 }, { "epoch": 4.710125142207054, "grad_norm": 0.1901846804013112, "learning_rate": 1.035403408879497e-08, "loss": 0.0006, "step": 20701 }, { "epoch": 4.710352673492605, "grad_norm": 3.976173160414004, "learning_rate": 1.033784374150769e-08, "loss": 0.0311, "step": 20702 }, { "epoch": 4.710580204778157, "grad_norm": 2.867255258696235, "learning_rate": 1.0321665956789032e-08, "loss": 0.0296, "step": 20703 }, { "epoch": 4.710807736063709, "grad_norm": 1.5261424814455138, "learning_rate": 1.0305500734969846e-08, "loss": 0.1036, "step": 20704 }, { "epoch": 4.711035267349261, "grad_norm": 1.8193847235806127, "learning_rate": 1.028934807638049e-08, "loss": 0.1332, "step": 20705 }, { "epoch": 4.711262798634812, "grad_norm": 1.6327787737391861, "learning_rate": 1.027320798135105e-08, "loss": 0.1376, "step": 20706 }, { "epoch": 4.711490329920364, "grad_norm": 1.5525607641898362, "learning_rate": 1.0257080450211468e-08, "loss": 0.0555, "step": 20707 }, { "epoch": 4.711717861205916, "grad_norm": 0.8562852112520086, "learning_rate": 1.0240965483291274e-08, "loss": 0.0127, "step": 20708 }, { "epoch": 4.711945392491468, "grad_norm": 1.1208876787843782, "learning_rate": 1.0224863080919855e-08, "loss": 0.0239, "step": 20709 }, { "epoch": 4.712172923777019, "grad_norm": 1.6505480307921085, "learning_rate": 1.0208773243426326e-08, "loss": 0.0177, "step": 20710 }, { "epoch": 4.712400455062571, "grad_norm": 0.9656156789526886, "learning_rate": 1.019269597113952e-08, "loss": 0.0579, "step": 20711 }, { "epoch": 4.712627986348123, "grad_norm": 1.134828123536079, "learning_rate": 1.0176631264388128e-08, "loss": 0.0552, "step": 20712 }, { "epoch": 4.712855517633675, "grad_norm": 1.5672217845814183, "learning_rate": 1.0160579123500298e-08, "loss": 0.0249, "step": 20713 }, { "epoch": 4.713083048919226, "grad_norm": 0.6828277229785756, "learning_rate": 1.0144539548804233e-08, "loss": 0.0096, "step": 20714 }, { "epoch": 4.713310580204778, "grad_norm": 1.09294337648834, "learning_rate": 1.012851254062773e-08, "loss": 0.0058, "step": 20715 }, { "epoch": 4.71353811149033, "grad_norm": 1.2789293373454573, "learning_rate": 1.0112498099298374e-08, "loss": 0.0057, "step": 20716 }, { "epoch": 4.713765642775882, "grad_norm": 2.370469720323149, "learning_rate": 1.0096496225143401e-08, "loss": 0.0333, "step": 20717 }, { "epoch": 4.713993174061433, "grad_norm": 3.3083655862718695, "learning_rate": 1.0080506918489913e-08, "loss": 0.0059, "step": 20718 }, { "epoch": 4.714220705346985, "grad_norm": 1.034779015675533, "learning_rate": 1.0064530179664731e-08, "loss": 0.0054, "step": 20719 }, { "epoch": 4.714448236632537, "grad_norm": 1.3930448356798195, "learning_rate": 1.0048566008994329e-08, "loss": 0.0555, "step": 20720 }, { "epoch": 4.714675767918089, "grad_norm": 1.245986298897247, "learning_rate": 1.0032614406805044e-08, "loss": 0.0721, "step": 20721 }, { "epoch": 4.71490329920364, "grad_norm": 2.3318201557458886, "learning_rate": 1.0016675373422796e-08, "loss": 0.0087, "step": 20722 }, { "epoch": 4.715130830489192, "grad_norm": 1.4767040378924268, "learning_rate": 1.0000748909173435e-08, "loss": 0.0555, "step": 20723 }, { "epoch": 4.715358361774744, "grad_norm": 1.4024625168246942, "learning_rate": 9.984835014382466e-09, "loss": 0.0243, "step": 20724 }, { "epoch": 4.715585893060296, "grad_norm": 0.4333673719071379, "learning_rate": 9.968933689375044e-09, "loss": 0.0032, "step": 20725 }, { "epoch": 4.715813424345848, "grad_norm": 1.4276301017607091, "learning_rate": 9.953044934476325e-09, "loss": 0.0102, "step": 20726 }, { "epoch": 4.716040955631399, "grad_norm": 1.1864533211350752, "learning_rate": 9.937168750010912e-09, "loss": 0.0172, "step": 20727 }, { "epoch": 4.716268486916951, "grad_norm": 1.085424831526504, "learning_rate": 9.921305136303405e-09, "loss": 0.0727, "step": 20728 }, { "epoch": 4.716496018202503, "grad_norm": 1.4872588225196928, "learning_rate": 9.90545409367792e-09, "loss": 0.13, "step": 20729 }, { "epoch": 4.716723549488055, "grad_norm": 0.8422892587091768, "learning_rate": 9.889615622458507e-09, "loss": 0.017, "step": 20730 }, { "epoch": 4.716951080773606, "grad_norm": 1.1393806104561084, "learning_rate": 9.873789722968722e-09, "loss": 0.0409, "step": 20731 }, { "epoch": 4.717178612059158, "grad_norm": 1.1091642125480092, "learning_rate": 9.857976395532196e-09, "loss": 0.07, "step": 20732 }, { "epoch": 4.71740614334471, "grad_norm": 0.9806692907376873, "learning_rate": 9.842175640472074e-09, "loss": 0.0427, "step": 20733 }, { "epoch": 4.717633674630262, "grad_norm": 2.0757990902243946, "learning_rate": 9.826387458111153e-09, "loss": 0.0037, "step": 20734 }, { "epoch": 4.717861205915813, "grad_norm": 1.9623034789681775, "learning_rate": 9.81061184877237e-09, "loss": 0.0266, "step": 20735 }, { "epoch": 4.718088737201366, "grad_norm": 0.956716623212342, "learning_rate": 9.794848812778035e-09, "loss": 0.041, "step": 20736 }, { "epoch": 4.718316268486917, "grad_norm": 0.5609092722128071, "learning_rate": 9.779098350450183e-09, "loss": 0.0019, "step": 20737 }, { "epoch": 4.718543799772469, "grad_norm": 1.3735814327593299, "learning_rate": 9.763360462110848e-09, "loss": 0.0201, "step": 20738 }, { "epoch": 4.71877133105802, "grad_norm": 1.028100420818374, "learning_rate": 9.74763514808172e-09, "loss": 0.0078, "step": 20739 }, { "epoch": 4.718998862343573, "grad_norm": 1.4411684313285815, "learning_rate": 9.731922408684133e-09, "loss": 0.0493, "step": 20740 }, { "epoch": 4.719226393629124, "grad_norm": 1.2702421128429668, "learning_rate": 9.716222244239223e-09, "loss": 0.0296, "step": 20741 }, { "epoch": 4.719453924914676, "grad_norm": 0.8914369003585648, "learning_rate": 9.700534655067914e-09, "loss": 0.0041, "step": 20742 }, { "epoch": 4.719681456200227, "grad_norm": 1.212847021974521, "learning_rate": 9.684859641490852e-09, "loss": 0.0404, "step": 20743 }, { "epoch": 4.71990898748578, "grad_norm": 1.4250566125331423, "learning_rate": 9.669197203828265e-09, "loss": 0.0951, "step": 20744 }, { "epoch": 4.720136518771331, "grad_norm": 1.0720181926022108, "learning_rate": 9.653547342400454e-09, "loss": 0.0745, "step": 20745 }, { "epoch": 4.720364050056883, "grad_norm": 1.4012019602520713, "learning_rate": 9.637910057527094e-09, "loss": 0.018, "step": 20746 }, { "epoch": 4.720591581342434, "grad_norm": 1.40159347722681, "learning_rate": 9.622285349527929e-09, "loss": 0.1221, "step": 20747 }, { "epoch": 4.720819112627987, "grad_norm": 2.0068666118651075, "learning_rate": 9.606673218722217e-09, "loss": 0.0416, "step": 20748 }, { "epoch": 4.721046643913538, "grad_norm": 1.5587359560237213, "learning_rate": 9.59107366542908e-09, "loss": 0.0555, "step": 20749 }, { "epoch": 4.72127417519909, "grad_norm": 1.698231748969673, "learning_rate": 9.575486689967356e-09, "loss": 0.0558, "step": 20750 }, { "epoch": 4.721501706484641, "grad_norm": 0.802778066178176, "learning_rate": 9.559912292655474e-09, "loss": 0.0023, "step": 20751 }, { "epoch": 4.721729237770194, "grad_norm": 1.0636983397451152, "learning_rate": 9.544350473811998e-09, "loss": 0.0325, "step": 20752 }, { "epoch": 4.721956769055745, "grad_norm": 1.7159456709506553, "learning_rate": 9.528801233754797e-09, "loss": 0.0552, "step": 20753 }, { "epoch": 4.722184300341297, "grad_norm": 1.0892320995524611, "learning_rate": 9.513264572801675e-09, "loss": 0.0627, "step": 20754 }, { "epoch": 4.722411831626848, "grad_norm": 1.3605416665801842, "learning_rate": 9.497740491270293e-09, "loss": 0.0239, "step": 20755 }, { "epoch": 4.722639362912401, "grad_norm": 1.9826644916008105, "learning_rate": 9.48222898947783e-09, "loss": 0.0333, "step": 20756 }, { "epoch": 4.722866894197952, "grad_norm": 0.48017462596077176, "learning_rate": 9.466730067741251e-09, "loss": 0.004, "step": 20757 }, { "epoch": 4.723094425483504, "grad_norm": 0.3795679328426222, "learning_rate": 9.451243726377458e-09, "loss": 0.0013, "step": 20758 }, { "epoch": 4.723321956769055, "grad_norm": 5.477219337620861, "learning_rate": 9.435769965703001e-09, "loss": 0.0123, "step": 20759 }, { "epoch": 4.723549488054608, "grad_norm": 1.375086335127381, "learning_rate": 9.420308786033949e-09, "loss": 0.031, "step": 20760 }, { "epoch": 4.723777019340159, "grad_norm": 1.6488379819450836, "learning_rate": 9.404860187686507e-09, "loss": 0.0282, "step": 20761 }, { "epoch": 4.724004550625711, "grad_norm": 1.4568385273327473, "learning_rate": 9.389424170976256e-09, "loss": 0.0146, "step": 20762 }, { "epoch": 4.724232081911262, "grad_norm": 0.7181763211007295, "learning_rate": 9.374000736218706e-09, "loss": 0.0036, "step": 20763 }, { "epoch": 4.724459613196815, "grad_norm": 1.2141143484747672, "learning_rate": 9.35858988372916e-09, "loss": 0.0075, "step": 20764 }, { "epoch": 4.724687144482367, "grad_norm": 2.1139853610172383, "learning_rate": 9.343191613822509e-09, "loss": 0.0775, "step": 20765 }, { "epoch": 4.724914675767918, "grad_norm": 1.233838508421053, "learning_rate": 9.327805926813566e-09, "loss": 0.0852, "step": 20766 }, { "epoch": 4.725142207053469, "grad_norm": 1.8054695727353365, "learning_rate": 9.312432823016595e-09, "loss": 0.0241, "step": 20767 }, { "epoch": 4.725369738339022, "grad_norm": 1.5429830078636508, "learning_rate": 9.297072302746068e-09, "loss": 0.0245, "step": 20768 }, { "epoch": 4.725597269624574, "grad_norm": 1.244190532644251, "learning_rate": 9.281724366315692e-09, "loss": 0.0332, "step": 20769 }, { "epoch": 4.725824800910125, "grad_norm": 1.4038975706650823, "learning_rate": 9.266389014039242e-09, "loss": 0.0506, "step": 20770 }, { "epoch": 4.726052332195676, "grad_norm": 2.5715526770682393, "learning_rate": 9.251066246230151e-09, "loss": 0.0443, "step": 20771 }, { "epoch": 4.726279863481229, "grad_norm": 0.7374050007430502, "learning_rate": 9.235756063201498e-09, "loss": 0.0038, "step": 20772 }, { "epoch": 4.726507394766781, "grad_norm": 2.2462340296416956, "learning_rate": 9.220458465266368e-09, "loss": 0.0788, "step": 20773 }, { "epoch": 4.726734926052332, "grad_norm": 1.2387627188024455, "learning_rate": 9.20517345273736e-09, "loss": 0.0692, "step": 20774 }, { "epoch": 4.726962457337884, "grad_norm": 0.757883269494665, "learning_rate": 9.18990102592672e-09, "loss": 0.0032, "step": 20775 }, { "epoch": 4.727189988623436, "grad_norm": 0.49530856114861466, "learning_rate": 9.17464118514677e-09, "loss": 0.0019, "step": 20776 }, { "epoch": 4.727417519908988, "grad_norm": 1.2684946197895082, "learning_rate": 9.159393930709276e-09, "loss": 0.0867, "step": 20777 }, { "epoch": 4.727645051194539, "grad_norm": 2.4572079668685056, "learning_rate": 9.14415926292593e-09, "loss": 0.0109, "step": 20778 }, { "epoch": 4.727872582480091, "grad_norm": 1.0545069346855094, "learning_rate": 9.128937182108083e-09, "loss": 0.0116, "step": 20779 }, { "epoch": 4.728100113765643, "grad_norm": 0.9998763962787425, "learning_rate": 9.113727688566872e-09, "loss": 0.0203, "step": 20780 }, { "epoch": 4.728327645051195, "grad_norm": 1.0047889277308466, "learning_rate": 9.098530782613094e-09, "loss": 0.0182, "step": 20781 }, { "epoch": 4.728555176336746, "grad_norm": 0.34278392742108016, "learning_rate": 9.083346464557399e-09, "loss": 0.0011, "step": 20782 }, { "epoch": 4.728782707622298, "grad_norm": 1.8688490249471605, "learning_rate": 9.068174734710097e-09, "loss": 0.0311, "step": 20783 }, { "epoch": 4.72901023890785, "grad_norm": 1.0061326859620112, "learning_rate": 9.053015593381286e-09, "loss": 0.0162, "step": 20784 }, { "epoch": 4.729237770193402, "grad_norm": 0.6869695715281567, "learning_rate": 9.037869040880721e-09, "loss": 0.0193, "step": 20785 }, { "epoch": 4.729465301478953, "grad_norm": 1.683741643214279, "learning_rate": 9.02273507751808e-09, "loss": 0.1451, "step": 20786 }, { "epoch": 4.729692832764505, "grad_norm": 0.8865300541697474, "learning_rate": 9.007613703602633e-09, "loss": 0.0092, "step": 20787 }, { "epoch": 4.729920364050057, "grad_norm": 0.6108768507778705, "learning_rate": 8.992504919443437e-09, "loss": 0.0034, "step": 20788 }, { "epoch": 4.730147895335609, "grad_norm": 1.4429711950778854, "learning_rate": 8.977408725349204e-09, "loss": 0.0062, "step": 20789 }, { "epoch": 4.73037542662116, "grad_norm": 1.228175190948666, "learning_rate": 8.962325121628646e-09, "loss": 0.0142, "step": 20790 }, { "epoch": 4.730602957906712, "grad_norm": 0.24073015882934087, "learning_rate": 8.947254108589848e-09, "loss": 0.0007, "step": 20791 }, { "epoch": 4.730830489192264, "grad_norm": 0.8208103499107031, "learning_rate": 8.932195686540967e-09, "loss": 0.0276, "step": 20792 }, { "epoch": 4.731058020477816, "grad_norm": 0.7064726469806885, "learning_rate": 8.917149855789745e-09, "loss": 0.0067, "step": 20793 }, { "epoch": 4.731285551763367, "grad_norm": 1.6269339005914494, "learning_rate": 8.902116616643711e-09, "loss": 0.0758, "step": 20794 }, { "epoch": 4.731513083048919, "grad_norm": 2.479387350802735, "learning_rate": 8.88709596941005e-09, "loss": 0.0331, "step": 20795 }, { "epoch": 4.731740614334471, "grad_norm": 0.810192566646584, "learning_rate": 8.872087914395742e-09, "loss": 0.0356, "step": 20796 }, { "epoch": 4.731968145620023, "grad_norm": 2.3831170890795415, "learning_rate": 8.85709245190762e-09, "loss": 0.0867, "step": 20797 }, { "epoch": 4.732195676905574, "grad_norm": 0.8633991677940391, "learning_rate": 8.84210958225211e-09, "loss": 0.0387, "step": 20798 }, { "epoch": 4.732423208191126, "grad_norm": 1.001305762469721, "learning_rate": 8.827139305735422e-09, "loss": 0.0066, "step": 20799 }, { "epoch": 4.732650739476678, "grad_norm": 2.185547726265097, "learning_rate": 8.812181622663562e-09, "loss": 0.0137, "step": 20800 }, { "epoch": 4.73287827076223, "grad_norm": 0.8584801285914646, "learning_rate": 8.79723653334219e-09, "loss": 0.005, "step": 20801 }, { "epoch": 4.733105802047781, "grad_norm": 0.8396458590738572, "learning_rate": 8.782304038076824e-09, "loss": 0.0128, "step": 20802 }, { "epoch": 4.733333333333333, "grad_norm": 1.416264672250773, "learning_rate": 8.767384137172569e-09, "loss": 0.0274, "step": 20803 }, { "epoch": 4.733560864618886, "grad_norm": 1.0752546569349446, "learning_rate": 8.752476830934457e-09, "loss": 0.0187, "step": 20804 }, { "epoch": 4.733788395904437, "grad_norm": 0.7050403129073516, "learning_rate": 8.737582119667034e-09, "loss": 0.0054, "step": 20805 }, { "epoch": 4.734015927189988, "grad_norm": 1.2507589210766505, "learning_rate": 8.722700003674853e-09, "loss": 0.0193, "step": 20806 }, { "epoch": 4.73424345847554, "grad_norm": 1.1219933466780876, "learning_rate": 8.707830483262042e-09, "loss": 0.0351, "step": 20807 }, { "epoch": 4.734470989761093, "grad_norm": 1.3737629255175758, "learning_rate": 8.692973558732456e-09, "loss": 0.0339, "step": 20808 }, { "epoch": 4.734698521046644, "grad_norm": 1.6041986634169694, "learning_rate": 8.678129230389812e-09, "loss": 0.0227, "step": 20809 }, { "epoch": 4.734926052332195, "grad_norm": 0.8419437243210675, "learning_rate": 8.663297498537407e-09, "loss": 0.0116, "step": 20810 }, { "epoch": 4.735153583617747, "grad_norm": 0.7007609954491961, "learning_rate": 8.648478363478541e-09, "loss": 0.0035, "step": 20811 }, { "epoch": 4.7353811149033, "grad_norm": 4.2603660354796835, "learning_rate": 8.633671825515888e-09, "loss": 0.012, "step": 20812 }, { "epoch": 4.735608646188851, "grad_norm": 0.18097513215625025, "learning_rate": 8.618877884952192e-09, "loss": 0.0006, "step": 20813 }, { "epoch": 4.735836177474403, "grad_norm": 1.363142878835634, "learning_rate": 8.604096542089782e-09, "loss": 0.0174, "step": 20814 }, { "epoch": 4.736063708759954, "grad_norm": 3.3334351536915383, "learning_rate": 8.589327797230707e-09, "loss": 0.0208, "step": 20815 }, { "epoch": 4.736291240045507, "grad_norm": 1.1374857586547888, "learning_rate": 8.574571650676947e-09, "loss": 0.0653, "step": 20816 }, { "epoch": 4.736518771331058, "grad_norm": 1.653379002454865, "learning_rate": 8.559828102729997e-09, "loss": 0.1199, "step": 20817 }, { "epoch": 4.73674630261661, "grad_norm": 0.7483246001482723, "learning_rate": 8.545097153691145e-09, "loss": 0.0034, "step": 20818 }, { "epoch": 4.736973833902161, "grad_norm": 1.0915272131785942, "learning_rate": 8.530378803861608e-09, "loss": 0.0558, "step": 20819 }, { "epoch": 4.737201365187714, "grad_norm": 0.413136028866989, "learning_rate": 8.515673053542048e-09, "loss": 0.0033, "step": 20820 }, { "epoch": 4.737428896473265, "grad_norm": 1.6321998186863875, "learning_rate": 8.500979903033197e-09, "loss": 0.0497, "step": 20821 }, { "epoch": 4.737656427758817, "grad_norm": 0.31920107704396655, "learning_rate": 8.48629935263516e-09, "loss": 0.0008, "step": 20822 }, { "epoch": 4.737883959044368, "grad_norm": 0.7802132429941928, "learning_rate": 8.471631402648117e-09, "loss": 0.0039, "step": 20823 }, { "epoch": 4.738111490329921, "grad_norm": 0.975267241698348, "learning_rate": 8.456976053371758e-09, "loss": 0.0475, "step": 20824 }, { "epoch": 4.738339021615472, "grad_norm": 0.6832025973124243, "learning_rate": 8.442333305105702e-09, "loss": 0.0038, "step": 20825 }, { "epoch": 4.738566552901024, "grad_norm": 1.9399413706268738, "learning_rate": 8.427703158149155e-09, "loss": 0.0164, "step": 20826 }, { "epoch": 4.738794084186575, "grad_norm": 1.2775958200637798, "learning_rate": 8.413085612801186e-09, "loss": 0.0074, "step": 20827 }, { "epoch": 4.739021615472128, "grad_norm": 2.3605518097975042, "learning_rate": 8.398480669360512e-09, "loss": 0.018, "step": 20828 }, { "epoch": 4.739249146757679, "grad_norm": 2.182619087437284, "learning_rate": 8.383888328125645e-09, "loss": 0.0831, "step": 20829 }, { "epoch": 4.739476678043231, "grad_norm": 0.8664935546982268, "learning_rate": 8.369308589394818e-09, "loss": 0.0299, "step": 20830 }, { "epoch": 4.7397042093287824, "grad_norm": 0.6719831585847604, "learning_rate": 8.354741453465987e-09, "loss": 0.0029, "step": 20831 }, { "epoch": 4.739931740614335, "grad_norm": 1.466504600855876, "learning_rate": 8.3401869206369e-09, "loss": 0.0121, "step": 20832 }, { "epoch": 4.740159271899886, "grad_norm": 0.6202978955911269, "learning_rate": 8.325644991205099e-09, "loss": 0.0126, "step": 20833 }, { "epoch": 4.740386803185438, "grad_norm": 2.3129763751887333, "learning_rate": 8.311115665467704e-09, "loss": 0.0163, "step": 20834 }, { "epoch": 4.7406143344709895, "grad_norm": 1.2219766211564713, "learning_rate": 8.296598943721701e-09, "loss": 0.01, "step": 20835 }, { "epoch": 4.740841865756542, "grad_norm": 1.5203614699046006, "learning_rate": 8.282094826263729e-09, "loss": 0.006, "step": 20836 }, { "epoch": 4.741069397042093, "grad_norm": 0.3531743707673246, "learning_rate": 8.267603313390354e-09, "loss": 0.0013, "step": 20837 }, { "epoch": 4.741296928327645, "grad_norm": 1.1745777856486848, "learning_rate": 8.253124405397591e-09, "loss": 0.0181, "step": 20838 }, { "epoch": 4.7415244596131965, "grad_norm": 0.49954718784360214, "learning_rate": 8.238658102581454e-09, "loss": 0.0022, "step": 20839 }, { "epoch": 4.741751990898749, "grad_norm": 1.9089777308630103, "learning_rate": 8.224204405237677e-09, "loss": 0.0289, "step": 20840 }, { "epoch": 4.7419795221843, "grad_norm": 1.189187422632601, "learning_rate": 8.20976331366151e-09, "loss": 0.0457, "step": 20841 }, { "epoch": 4.742207053469852, "grad_norm": 1.4323423461006652, "learning_rate": 8.195334828148272e-09, "loss": 0.0228, "step": 20842 }, { "epoch": 4.742434584755404, "grad_norm": 0.9631954467269603, "learning_rate": 8.180918948992728e-09, "loss": 0.011, "step": 20843 }, { "epoch": 4.742662116040956, "grad_norm": 1.8302152983070064, "learning_rate": 8.166515676489503e-09, "loss": 0.0237, "step": 20844 }, { "epoch": 4.742889647326507, "grad_norm": 0.6328882181786448, "learning_rate": 8.152125010933016e-09, "loss": 0.0048, "step": 20845 }, { "epoch": 4.743117178612059, "grad_norm": 2.327379504037635, "learning_rate": 8.137746952617404e-09, "loss": 0.0255, "step": 20846 }, { "epoch": 4.743344709897611, "grad_norm": 2.4335563909506286, "learning_rate": 8.1233815018366e-09, "loss": 0.0475, "step": 20847 }, { "epoch": 4.743572241183163, "grad_norm": 1.1064437136688297, "learning_rate": 8.109028658884049e-09, "loss": 0.0087, "step": 20848 }, { "epoch": 4.743799772468714, "grad_norm": 1.5459733772718998, "learning_rate": 8.094688424053199e-09, "loss": 0.045, "step": 20849 }, { "epoch": 4.744027303754266, "grad_norm": 1.3863862427243265, "learning_rate": 8.080360797637077e-09, "loss": 0.0299, "step": 20850 }, { "epoch": 4.744254835039818, "grad_norm": 3.1985373321531427, "learning_rate": 8.066045779928574e-09, "loss": 0.0131, "step": 20851 }, { "epoch": 4.74448236632537, "grad_norm": 2.3951945276673934, "learning_rate": 8.051743371220167e-09, "loss": 0.0304, "step": 20852 }, { "epoch": 4.744709897610922, "grad_norm": 1.3411175094608432, "learning_rate": 8.037453571804257e-09, "loss": 0.0111, "step": 20853 }, { "epoch": 4.744937428896473, "grad_norm": 1.5107427547692236, "learning_rate": 8.023176381972975e-09, "loss": 0.0802, "step": 20854 }, { "epoch": 4.745164960182025, "grad_norm": 0.4427401143939142, "learning_rate": 8.008911802017891e-09, "loss": 0.0024, "step": 20855 }, { "epoch": 4.745392491467577, "grad_norm": 2.3825831757443408, "learning_rate": 7.994659832230785e-09, "loss": 0.112, "step": 20856 }, { "epoch": 4.745620022753129, "grad_norm": 1.0611314214020775, "learning_rate": 7.980420472902886e-09, "loss": 0.0271, "step": 20857 }, { "epoch": 4.74584755403868, "grad_norm": 0.8488351088050908, "learning_rate": 7.96619372432507e-09, "loss": 0.0169, "step": 20858 }, { "epoch": 4.746075085324232, "grad_norm": 1.2148508907803797, "learning_rate": 7.951979586788214e-09, "loss": 0.018, "step": 20859 }, { "epoch": 4.746302616609784, "grad_norm": 1.0878935848210611, "learning_rate": 7.937778060582852e-09, "loss": 0.038, "step": 20860 }, { "epoch": 4.746530147895336, "grad_norm": 0.4310848575699016, "learning_rate": 7.923589145999236e-09, "loss": 0.0011, "step": 20861 }, { "epoch": 4.746757679180887, "grad_norm": 1.3152837006765448, "learning_rate": 7.909412843327344e-09, "loss": 0.0589, "step": 20862 }, { "epoch": 4.746985210466439, "grad_norm": 0.8977833072040884, "learning_rate": 7.895249152856804e-09, "loss": 0.0319, "step": 20863 }, { "epoch": 4.747212741751991, "grad_norm": 0.7577561611565603, "learning_rate": 7.881098074877317e-09, "loss": 0.0165, "step": 20864 }, { "epoch": 4.747440273037543, "grad_norm": 1.1242584164764398, "learning_rate": 7.866959609677885e-09, "loss": 0.054, "step": 20865 }, { "epoch": 4.747667804323094, "grad_norm": 2.4432819778780535, "learning_rate": 7.852833757547654e-09, "loss": 0.015, "step": 20866 }, { "epoch": 4.747895335608646, "grad_norm": 1.1970912988167441, "learning_rate": 7.838720518775142e-09, "loss": 0.0613, "step": 20867 }, { "epoch": 4.748122866894198, "grad_norm": 0.9282248325229351, "learning_rate": 7.824619893649008e-09, "loss": 0.0259, "step": 20868 }, { "epoch": 4.74835039817975, "grad_norm": 1.3452437435566962, "learning_rate": 7.810531882457353e-09, "loss": 0.0497, "step": 20869 }, { "epoch": 4.748577929465301, "grad_norm": 1.052099785806787, "learning_rate": 7.796456485488075e-09, "loss": 0.0432, "step": 20870 }, { "epoch": 4.748805460750853, "grad_norm": 0.9336031829247396, "learning_rate": 7.782393703028857e-09, "loss": 0.0032, "step": 20871 }, { "epoch": 4.749032992036405, "grad_norm": 1.044898389923847, "learning_rate": 7.76834353536704e-09, "loss": 0.0424, "step": 20872 }, { "epoch": 4.749260523321957, "grad_norm": 1.5341880781329966, "learning_rate": 7.754305982790034e-09, "loss": 0.0594, "step": 20873 }, { "epoch": 4.749488054607508, "grad_norm": 0.282332180801861, "learning_rate": 7.740281045584483e-09, "loss": 0.0017, "step": 20874 }, { "epoch": 4.74971558589306, "grad_norm": 1.3449304694688444, "learning_rate": 7.726268724037173e-09, "loss": 0.0124, "step": 20875 }, { "epoch": 4.749943117178612, "grad_norm": 1.4920291383361073, "learning_rate": 7.71226901843447e-09, "loss": 0.0369, "step": 20876 }, { "epoch": 4.750170648464164, "grad_norm": 0.6483849099222753, "learning_rate": 7.698281929062398e-09, "loss": 0.0139, "step": 20877 }, { "epoch": 4.750398179749715, "grad_norm": 0.7753564363708225, "learning_rate": 7.684307456206908e-09, "loss": 0.0343, "step": 20878 }, { "epoch": 4.750625711035267, "grad_norm": 1.6313796196762906, "learning_rate": 7.670345600153673e-09, "loss": 0.0418, "step": 20879 }, { "epoch": 4.750853242320819, "grad_norm": 0.8533207682394067, "learning_rate": 7.656396361187951e-09, "loss": 0.0107, "step": 20880 }, { "epoch": 4.751080773606371, "grad_norm": 0.48397732987025993, "learning_rate": 7.642459739594931e-09, "loss": 0.0189, "step": 20881 }, { "epoch": 4.751308304891923, "grad_norm": 1.1074592663965497, "learning_rate": 7.628535735659387e-09, "loss": 0.074, "step": 20882 }, { "epoch": 4.751535836177474, "grad_norm": 1.3007282639302842, "learning_rate": 7.614624349665881e-09, "loss": 0.0096, "step": 20883 }, { "epoch": 4.751763367463026, "grad_norm": 1.8655570076850108, "learning_rate": 7.600725581898769e-09, "loss": 0.1733, "step": 20884 }, { "epoch": 4.751990898748578, "grad_norm": 2.868767633886001, "learning_rate": 7.58683943264206e-09, "loss": 0.092, "step": 20885 }, { "epoch": 4.75221843003413, "grad_norm": 1.4138476436056442, "learning_rate": 7.572965902179694e-09, "loss": 0.0194, "step": 20886 }, { "epoch": 4.7524459613196814, "grad_norm": 1.7551530327779057, "learning_rate": 7.559104990795125e-09, "loss": 0.0713, "step": 20887 }, { "epoch": 4.752673492605233, "grad_norm": 0.81654772939842, "learning_rate": 7.545256698771666e-09, "loss": 0.0107, "step": 20888 }, { "epoch": 4.752901023890785, "grad_norm": 1.2361092339062247, "learning_rate": 7.531421026392288e-09, "loss": 0.0127, "step": 20889 }, { "epoch": 4.753128555176337, "grad_norm": 1.2300462953251252, "learning_rate": 7.517597973939889e-09, "loss": 0.0457, "step": 20890 }, { "epoch": 4.7533560864618885, "grad_norm": 0.5084741618545187, "learning_rate": 7.50378754169688e-09, "loss": 0.002, "step": 20891 }, { "epoch": 4.753583617747441, "grad_norm": 1.088725980292482, "learning_rate": 7.489989729945538e-09, "loss": 0.0134, "step": 20892 }, { "epoch": 4.753811149032992, "grad_norm": 0.80542806548878, "learning_rate": 7.476204538967927e-09, "loss": 0.0033, "step": 20893 }, { "epoch": 4.754038680318544, "grad_norm": 1.401383486067283, "learning_rate": 7.462431969045766e-09, "loss": 0.0387, "step": 20894 }, { "epoch": 4.7542662116040955, "grad_norm": 1.5289925478668294, "learning_rate": 7.448672020460568e-09, "loss": 0.0416, "step": 20895 }, { "epoch": 4.754493742889648, "grad_norm": 0.6932138014985277, "learning_rate": 7.434924693493495e-09, "loss": 0.0053, "step": 20896 }, { "epoch": 4.754721274175199, "grad_norm": 1.3005286792701036, "learning_rate": 7.421189988425503e-09, "loss": 0.0172, "step": 20897 }, { "epoch": 4.754948805460751, "grad_norm": 0.7981753141434559, "learning_rate": 7.4074679055373414e-09, "loss": 0.0031, "step": 20898 }, { "epoch": 4.7551763367463025, "grad_norm": 1.5962613225500901, "learning_rate": 7.393758445109478e-09, "loss": 0.0535, "step": 20899 }, { "epoch": 4.755403868031855, "grad_norm": 1.8729997931769118, "learning_rate": 7.380061607422176e-09, "loss": 0.0973, "step": 20900 }, { "epoch": 4.755631399317406, "grad_norm": 0.7377869169251945, "learning_rate": 7.366377392755211e-09, "loss": 0.0099, "step": 20901 }, { "epoch": 4.755858930602958, "grad_norm": 1.1415370576535029, "learning_rate": 7.35270580138836e-09, "loss": 0.0223, "step": 20902 }, { "epoch": 4.7560864618885095, "grad_norm": 1.0039838496991926, "learning_rate": 7.339046833601051e-09, "loss": 0.0308, "step": 20903 }, { "epoch": 4.756313993174062, "grad_norm": 1.0077307257611503, "learning_rate": 7.325400489672438e-09, "loss": 0.013, "step": 20904 }, { "epoch": 4.756541524459613, "grad_norm": 1.338060324915401, "learning_rate": 7.311766769881392e-09, "loss": 0.0339, "step": 20905 }, { "epoch": 4.756769055745165, "grad_norm": 1.0444012189879694, "learning_rate": 7.298145674506651e-09, "loss": 0.0089, "step": 20906 }, { "epoch": 4.7569965870307165, "grad_norm": 0.9139527520660191, "learning_rate": 7.2845372038265335e-09, "loss": 0.0081, "step": 20907 }, { "epoch": 4.757224118316269, "grad_norm": 1.0401377402215488, "learning_rate": 7.270941358119079e-09, "loss": 0.0094, "step": 20908 }, { "epoch": 4.75745164960182, "grad_norm": 1.130748337530987, "learning_rate": 7.257358137662401e-09, "loss": 0.0134, "step": 20909 }, { "epoch": 4.757679180887372, "grad_norm": 1.226397943040716, "learning_rate": 7.243787542733915e-09, "loss": 0.0153, "step": 20910 }, { "epoch": 4.7579067121729235, "grad_norm": 0.7515763615752079, "learning_rate": 7.230229573611039e-09, "loss": 0.0027, "step": 20911 }, { "epoch": 4.758134243458476, "grad_norm": 0.8662485347893064, "learning_rate": 7.216684230570911e-09, "loss": 0.0397, "step": 20912 }, { "epoch": 4.758361774744027, "grad_norm": 1.095945331775845, "learning_rate": 7.203151513890325e-09, "loss": 0.0074, "step": 20913 }, { "epoch": 4.758589306029579, "grad_norm": 1.199862877701876, "learning_rate": 7.189631423845864e-09, "loss": 0.0127, "step": 20914 }, { "epoch": 4.7588168373151305, "grad_norm": 1.862651562062404, "learning_rate": 7.1761239607139045e-09, "loss": 0.0146, "step": 20915 }, { "epoch": 4.759044368600683, "grad_norm": 1.069140861926515, "learning_rate": 7.162629124770476e-09, "loss": 0.0047, "step": 20916 }, { "epoch": 4.759271899886234, "grad_norm": 1.1645697090904166, "learning_rate": 7.14914691629133e-09, "loss": 0.0058, "step": 20917 }, { "epoch": 4.759499431171786, "grad_norm": 1.4677738778365301, "learning_rate": 7.135677335552149e-09, "loss": 0.05, "step": 20918 }, { "epoch": 4.7597269624573375, "grad_norm": 0.962364553320853, "learning_rate": 7.122220382828129e-09, "loss": 0.0084, "step": 20919 }, { "epoch": 4.75995449374289, "grad_norm": 1.2246751832566474, "learning_rate": 7.108776058394329e-09, "loss": 0.0285, "step": 20920 }, { "epoch": 4.760182025028442, "grad_norm": 1.0769123247133494, "learning_rate": 7.095344362525528e-09, "loss": 0.0224, "step": 20921 }, { "epoch": 4.760409556313993, "grad_norm": 1.0432985576518947, "learning_rate": 7.081925295496297e-09, "loss": 0.0073, "step": 20922 }, { "epoch": 4.7606370875995445, "grad_norm": 0.8390270251324375, "learning_rate": 7.068518857580794e-09, "loss": 0.0069, "step": 20923 }, { "epoch": 4.760864618885097, "grad_norm": 0.6676502638804184, "learning_rate": 7.055125049053105e-09, "loss": 0.0054, "step": 20924 }, { "epoch": 4.761092150170649, "grad_norm": 0.8613839286227878, "learning_rate": 7.041743870186968e-09, "loss": 0.0155, "step": 20925 }, { "epoch": 4.7613196814562, "grad_norm": 0.2606214079259513, "learning_rate": 7.028375321255845e-09, "loss": 0.0007, "step": 20926 }, { "epoch": 4.7615472127417515, "grad_norm": 1.3370092850169735, "learning_rate": 7.015019402532921e-09, "loss": 0.0265, "step": 20927 }, { "epoch": 4.761774744027304, "grad_norm": 1.5145047319243488, "learning_rate": 7.001676114291242e-09, "loss": 0.0316, "step": 20928 }, { "epoch": 4.762002275312856, "grad_norm": 1.562080250065725, "learning_rate": 6.9883454568035055e-09, "loss": 0.0102, "step": 20929 }, { "epoch": 4.762229806598407, "grad_norm": 1.4409735422986552, "learning_rate": 6.975027430342132e-09, "loss": 0.0557, "step": 20930 }, { "epoch": 4.762457337883959, "grad_norm": 1.1243729618681253, "learning_rate": 6.961722035179336e-09, "loss": 0.0196, "step": 20931 }, { "epoch": 4.762684869169511, "grad_norm": 0.9548671141545649, "learning_rate": 6.94842927158712e-09, "loss": 0.0071, "step": 20932 }, { "epoch": 4.762912400455063, "grad_norm": 0.9704830862478179, "learning_rate": 6.935149139837005e-09, "loss": 0.0092, "step": 20933 }, { "epoch": 4.763139931740614, "grad_norm": 0.6511247387770357, "learning_rate": 6.921881640200509e-09, "loss": 0.0129, "step": 20934 }, { "epoch": 4.763367463026166, "grad_norm": 0.369165585574001, "learning_rate": 6.908626772948873e-09, "loss": 0.0012, "step": 20935 }, { "epoch": 4.763594994311718, "grad_norm": 0.7455800521560187, "learning_rate": 6.895384538352923e-09, "loss": 0.0034, "step": 20936 }, { "epoch": 4.76382252559727, "grad_norm": 0.8541775546546596, "learning_rate": 6.8821549366832745e-09, "loss": 0.0056, "step": 20937 }, { "epoch": 4.764050056882821, "grad_norm": 0.67201727958714, "learning_rate": 6.868937968210407e-09, "loss": 0.0152, "step": 20938 }, { "epoch": 4.764277588168373, "grad_norm": 1.4751443535257822, "learning_rate": 6.855733633204242e-09, "loss": 0.0119, "step": 20939 }, { "epoch": 4.764505119453925, "grad_norm": 0.8254299618142082, "learning_rate": 6.842541931934912e-09, "loss": 0.0065, "step": 20940 }, { "epoch": 4.764732650739477, "grad_norm": 1.0041498704206617, "learning_rate": 6.8293628646719235e-09, "loss": 0.0134, "step": 20941 }, { "epoch": 4.764960182025028, "grad_norm": 0.7884016483625848, "learning_rate": 6.816196431684643e-09, "loss": 0.0086, "step": 20942 }, { "epoch": 4.76518771331058, "grad_norm": 1.5919985952244355, "learning_rate": 6.8030426332420935e-09, "loss": 0.0148, "step": 20943 }, { "epoch": 4.765415244596132, "grad_norm": 1.1329276920326121, "learning_rate": 6.789901469613294e-09, "loss": 0.0098, "step": 20944 }, { "epoch": 4.765642775881684, "grad_norm": 0.7697393478268064, "learning_rate": 6.776772941066573e-09, "loss": 0.0034, "step": 20945 }, { "epoch": 4.765870307167235, "grad_norm": 0.7869286525001553, "learning_rate": 6.763657047870464e-09, "loss": 0.0263, "step": 20946 }, { "epoch": 4.7660978384527874, "grad_norm": 1.3770060468000915, "learning_rate": 6.750553790292949e-09, "loss": 0.0042, "step": 20947 }, { "epoch": 4.766325369738339, "grad_norm": 1.1083430200914648, "learning_rate": 6.737463168601868e-09, "loss": 0.0655, "step": 20948 }, { "epoch": 4.766552901023891, "grad_norm": 1.2998174090437926, "learning_rate": 6.724385183064716e-09, "loss": 0.022, "step": 20949 }, { "epoch": 4.766780432309442, "grad_norm": 0.619508690099885, "learning_rate": 6.711319833948848e-09, "loss": 0.0035, "step": 20950 }, { "epoch": 4.7670079635949945, "grad_norm": 1.2353285157516787, "learning_rate": 6.6982671215212044e-09, "loss": 0.0209, "step": 20951 }, { "epoch": 4.767235494880546, "grad_norm": 0.8798223717104544, "learning_rate": 6.685227046048654e-09, "loss": 0.0086, "step": 20952 }, { "epoch": 4.767463026166098, "grad_norm": 0.68301055449424, "learning_rate": 6.67219960779765e-09, "loss": 0.0023, "step": 20953 }, { "epoch": 4.767690557451649, "grad_norm": 2.410199336287274, "learning_rate": 6.659184807034508e-09, "loss": 0.0294, "step": 20954 }, { "epoch": 4.7679180887372015, "grad_norm": 1.6400324084625955, "learning_rate": 6.646182644025126e-09, "loss": 0.0357, "step": 20955 }, { "epoch": 4.768145620022753, "grad_norm": 1.105671779964904, "learning_rate": 6.633193119035403e-09, "loss": 0.0653, "step": 20956 }, { "epoch": 4.768373151308305, "grad_norm": 2.31794498853963, "learning_rate": 6.620216232330681e-09, "loss": 0.0556, "step": 20957 }, { "epoch": 4.768600682593856, "grad_norm": 1.0289485277381187, "learning_rate": 6.6072519841761665e-09, "loss": 0.1131, "step": 20958 }, { "epoch": 4.7688282138794085, "grad_norm": 1.5933733355176656, "learning_rate": 6.594300374836923e-09, "loss": 0.0133, "step": 20959 }, { "epoch": 4.769055745164961, "grad_norm": 1.572595196307567, "learning_rate": 6.581361404577671e-09, "loss": 0.0861, "step": 20960 }, { "epoch": 4.769283276450512, "grad_norm": 0.5244887378837956, "learning_rate": 6.568435073662782e-09, "loss": 0.0086, "step": 20961 }, { "epoch": 4.769510807736063, "grad_norm": 0.7984515544491195, "learning_rate": 6.555521382356489e-09, "loss": 0.0103, "step": 20962 }, { "epoch": 4.7697383390216155, "grad_norm": 0.9444202692716119, "learning_rate": 6.542620330922677e-09, "loss": 0.0188, "step": 20963 }, { "epoch": 4.769965870307168, "grad_norm": 0.4973723559962199, "learning_rate": 6.529731919625165e-09, "loss": 0.0035, "step": 20964 }, { "epoch": 4.770193401592719, "grad_norm": 1.2939717151960082, "learning_rate": 6.516856148727144e-09, "loss": 0.0534, "step": 20965 }, { "epoch": 4.77042093287827, "grad_norm": 1.1482571599699196, "learning_rate": 6.503993018491875e-09, "loss": 0.0146, "step": 20966 }, { "epoch": 4.7706484641638225, "grad_norm": 1.6048285127125923, "learning_rate": 6.4911425291822735e-09, "loss": 0.0408, "step": 20967 }, { "epoch": 4.770875995449375, "grad_norm": 1.5803062431020618, "learning_rate": 6.478304681061045e-09, "loss": 0.0151, "step": 20968 }, { "epoch": 4.771103526734926, "grad_norm": 1.1580626215986332, "learning_rate": 6.465479474390482e-09, "loss": 0.0496, "step": 20969 }, { "epoch": 4.771331058020478, "grad_norm": 1.0449167375788433, "learning_rate": 6.4526669094326644e-09, "loss": 0.0043, "step": 20970 }, { "epoch": 4.7715585893060295, "grad_norm": 1.2331799279530604, "learning_rate": 6.439866986449605e-09, "loss": 0.0517, "step": 20971 }, { "epoch": 4.771786120591582, "grad_norm": 2.012767592005488, "learning_rate": 6.427079705702763e-09, "loss": 0.0665, "step": 20972 }, { "epoch": 4.772013651877133, "grad_norm": 0.9308957281637655, "learning_rate": 6.414305067453524e-09, "loss": 0.0128, "step": 20973 }, { "epoch": 4.772241183162685, "grad_norm": 1.0540889319758264, "learning_rate": 6.4015430719630704e-09, "loss": 0.0913, "step": 20974 }, { "epoch": 4.7724687144482365, "grad_norm": 1.1432503544261314, "learning_rate": 6.388793719492164e-09, "loss": 0.0383, "step": 20975 }, { "epoch": 4.772696245733789, "grad_norm": 0.6591475773228381, "learning_rate": 6.376057010301362e-09, "loss": 0.0079, "step": 20976 }, { "epoch": 4.77292377701934, "grad_norm": 1.5115014620297733, "learning_rate": 6.3633329446509405e-09, "loss": 0.0495, "step": 20977 }, { "epoch": 4.773151308304892, "grad_norm": 1.6202725782755265, "learning_rate": 6.350621522801109e-09, "loss": 0.0901, "step": 20978 }, { "epoch": 4.7733788395904435, "grad_norm": 1.3819624198971958, "learning_rate": 6.337922745011521e-09, "loss": 0.0452, "step": 20979 }, { "epoch": 4.773606370875996, "grad_norm": 1.0881910807071102, "learning_rate": 6.32523661154176e-09, "loss": 0.0326, "step": 20980 }, { "epoch": 4.773833902161547, "grad_norm": 1.669642716283583, "learning_rate": 6.312563122651133e-09, "loss": 0.0228, "step": 20981 }, { "epoch": 4.774061433447099, "grad_norm": 1.782330482780306, "learning_rate": 6.299902278598599e-09, "loss": 0.0958, "step": 20982 }, { "epoch": 4.7742889647326505, "grad_norm": 0.9372606923973515, "learning_rate": 6.287254079643049e-09, "loss": 0.01, "step": 20983 }, { "epoch": 4.774516496018203, "grad_norm": 1.1492208047996175, "learning_rate": 6.274618526042886e-09, "loss": 0.0102, "step": 20984 }, { "epoch": 4.774744027303754, "grad_norm": 2.524041798739827, "learning_rate": 6.261995618056377e-09, "loss": 0.0354, "step": 20985 }, { "epoch": 4.774971558589306, "grad_norm": 0.8018101681206471, "learning_rate": 6.24938535594144e-09, "loss": 0.0084, "step": 20986 }, { "epoch": 4.7751990898748575, "grad_norm": 1.0319659887479335, "learning_rate": 6.236787739955924e-09, "loss": 0.0046, "step": 20987 }, { "epoch": 4.77542662116041, "grad_norm": 1.4693935072264825, "learning_rate": 6.2242027703572624e-09, "loss": 0.0261, "step": 20988 }, { "epoch": 4.775654152445961, "grad_norm": 1.1675633590712418, "learning_rate": 6.21163044740268e-09, "loss": 0.0292, "step": 20989 }, { "epoch": 4.775881683731513, "grad_norm": 1.385902810456603, "learning_rate": 6.199070771349055e-09, "loss": 0.0422, "step": 20990 }, { "epoch": 4.7761092150170645, "grad_norm": 1.1705997279969027, "learning_rate": 6.186523742453196e-09, "loss": 0.0067, "step": 20991 }, { "epoch": 4.776336746302617, "grad_norm": 1.0066146689098456, "learning_rate": 6.173989360971494e-09, "loss": 0.0176, "step": 20992 }, { "epoch": 4.776564277588168, "grad_norm": 1.860653099115971, "learning_rate": 6.161467627160064e-09, "loss": 0.0306, "step": 20993 }, { "epoch": 4.77679180887372, "grad_norm": 0.7024413028466723, "learning_rate": 6.148958541274952e-09, "loss": 0.0059, "step": 20994 }, { "epoch": 4.7770193401592715, "grad_norm": 1.8168970756945646, "learning_rate": 6.136462103571717e-09, "loss": 0.1193, "step": 20995 }, { "epoch": 4.777246871444824, "grad_norm": 1.2785369990824222, "learning_rate": 6.123978314305778e-09, "loss": 0.018, "step": 20996 }, { "epoch": 4.777474402730375, "grad_norm": 3.1601899689027717, "learning_rate": 6.11150717373242e-09, "loss": 0.0283, "step": 20997 }, { "epoch": 4.777701934015927, "grad_norm": 0.32374777045082537, "learning_rate": 6.099048682106299e-09, "loss": 0.0013, "step": 20998 }, { "epoch": 4.777929465301479, "grad_norm": 1.0422399706684313, "learning_rate": 6.0866028396821404e-09, "loss": 0.0281, "step": 20999 }, { "epoch": 4.778156996587031, "grad_norm": 1.710120896948869, "learning_rate": 6.074169646714395e-09, "loss": 0.0345, "step": 21000 }, { "epoch": 4.778384527872582, "grad_norm": 1.1738836368516434, "learning_rate": 6.061749103457165e-09, "loss": 0.0077, "step": 21001 }, { "epoch": 4.778612059158134, "grad_norm": 0.8332738019240311, "learning_rate": 6.049341210164206e-09, "loss": 0.0065, "step": 21002 }, { "epoch": 4.7788395904436864, "grad_norm": 1.8327146335098143, "learning_rate": 6.036945967089134e-09, "loss": 0.0052, "step": 21003 }, { "epoch": 4.779067121729238, "grad_norm": 1.5259476995224202, "learning_rate": 6.024563374485287e-09, "loss": 0.0744, "step": 21004 }, { "epoch": 4.779294653014789, "grad_norm": 0.8301736214439476, "learning_rate": 6.012193432605798e-09, "loss": 0.0078, "step": 21005 }, { "epoch": 4.779522184300341, "grad_norm": 2.367644916336436, "learning_rate": 5.9998361417034494e-09, "loss": 0.0089, "step": 21006 }, { "epoch": 4.7797497155858935, "grad_norm": 0.9888969617206773, "learning_rate": 5.987491502030817e-09, "loss": 0.0144, "step": 21007 }, { "epoch": 4.779977246871445, "grad_norm": 0.9923175190263852, "learning_rate": 5.975159513840131e-09, "loss": 0.0173, "step": 21008 }, { "epoch": 4.780204778156997, "grad_norm": 0.9147079441793687, "learning_rate": 5.9628401773836195e-09, "loss": 0.0493, "step": 21009 }, { "epoch": 4.780432309442548, "grad_norm": 1.8387393355828556, "learning_rate": 5.950533492912816e-09, "loss": 0.0178, "step": 21010 }, { "epoch": 4.7806598407281005, "grad_norm": 1.1440094350998438, "learning_rate": 5.938239460679465e-09, "loss": 0.0417, "step": 21011 }, { "epoch": 4.780887372013652, "grad_norm": 0.7416576756500636, "learning_rate": 5.925958080934685e-09, "loss": 0.0245, "step": 21012 }, { "epoch": 4.781114903299204, "grad_norm": 1.0392004487668, "learning_rate": 5.913689353929525e-09, "loss": 0.0057, "step": 21013 }, { "epoch": 4.781342434584755, "grad_norm": 0.8027543978799065, "learning_rate": 5.901433279914826e-09, "loss": 0.0547, "step": 21014 }, { "epoch": 4.7815699658703075, "grad_norm": 0.4814478981529177, "learning_rate": 5.889189859140942e-09, "loss": 0.0034, "step": 21015 }, { "epoch": 4.781797497155859, "grad_norm": 1.5091014665368345, "learning_rate": 5.876959091858162e-09, "loss": 0.0118, "step": 21016 }, { "epoch": 4.782025028441411, "grad_norm": 1.676452777891929, "learning_rate": 5.864740978316491e-09, "loss": 0.1018, "step": 21017 }, { "epoch": 4.782252559726962, "grad_norm": 1.9260653018325844, "learning_rate": 5.852535518765662e-09, "loss": 0.009, "step": 21018 }, { "epoch": 4.7824800910125145, "grad_norm": 1.3150428390556075, "learning_rate": 5.840342713455058e-09, "loss": 0.005, "step": 21019 }, { "epoch": 4.782707622298066, "grad_norm": 1.44107205866938, "learning_rate": 5.8281625626338544e-09, "loss": 0.0383, "step": 21020 }, { "epoch": 4.782935153583618, "grad_norm": 0.8348885057568137, "learning_rate": 5.8159950665511585e-09, "loss": 0.0136, "step": 21021 }, { "epoch": 4.783162684869169, "grad_norm": 1.549062724665689, "learning_rate": 5.803840225455451e-09, "loss": 0.0818, "step": 21022 }, { "epoch": 4.7833902161547215, "grad_norm": 1.2325374090918415, "learning_rate": 5.791698039595283e-09, "loss": 0.0328, "step": 21023 }, { "epoch": 4.783617747440273, "grad_norm": 0.806685927686801, "learning_rate": 5.7795685092187915e-09, "loss": 0.0485, "step": 21024 }, { "epoch": 4.783845278725825, "grad_norm": 0.9750181081936294, "learning_rate": 5.7674516345738315e-09, "loss": 0.0483, "step": 21025 }, { "epoch": 4.784072810011376, "grad_norm": 1.0383406987476773, "learning_rate": 5.755347415908122e-09, "loss": 0.0076, "step": 21026 }, { "epoch": 4.7843003412969285, "grad_norm": 0.45307952599325985, "learning_rate": 5.743255853468965e-09, "loss": 0.0013, "step": 21027 }, { "epoch": 4.78452787258248, "grad_norm": 0.19009086746416934, "learning_rate": 5.7311769475036635e-09, "loss": 0.0007, "step": 21028 }, { "epoch": 4.784755403868032, "grad_norm": 1.1310261397683274, "learning_rate": 5.719110698258826e-09, "loss": 0.0721, "step": 21029 }, { "epoch": 4.784982935153583, "grad_norm": 1.5394367406922997, "learning_rate": 5.707057105981337e-09, "loss": 0.05, "step": 21030 }, { "epoch": 4.7852104664391355, "grad_norm": 1.4928067682115673, "learning_rate": 5.69501617091732e-09, "loss": 0.023, "step": 21031 }, { "epoch": 4.785437997724687, "grad_norm": 1.783094104211226, "learning_rate": 5.682987893313035e-09, "loss": 0.0763, "step": 21032 }, { "epoch": 4.785665529010239, "grad_norm": 1.9845007620990502, "learning_rate": 5.670972273414191e-09, "loss": 0.0181, "step": 21033 }, { "epoch": 4.78589306029579, "grad_norm": 0.8274826526884386, "learning_rate": 5.658969311466422e-09, "loss": 0.0089, "step": 21034 }, { "epoch": 4.7861205915813425, "grad_norm": 0.8773516762876074, "learning_rate": 5.646979007715159e-09, "loss": 0.0344, "step": 21035 }, { "epoch": 4.786348122866894, "grad_norm": 0.939515800739607, "learning_rate": 5.635001362405273e-09, "loss": 0.0166, "step": 21036 }, { "epoch": 4.786575654152446, "grad_norm": 1.3988764814029293, "learning_rate": 5.62303637578164e-09, "loss": 0.008, "step": 21037 }, { "epoch": 4.786803185437998, "grad_norm": 0.44448258281889547, "learning_rate": 5.611084048088786e-09, "loss": 0.0019, "step": 21038 }, { "epoch": 4.7870307167235495, "grad_norm": 1.1284146154388415, "learning_rate": 5.599144379571097e-09, "loss": 0.0073, "step": 21039 }, { "epoch": 4.787258248009101, "grad_norm": 0.548447301312321, "learning_rate": 5.587217370472478e-09, "loss": 0.0022, "step": 21040 }, { "epoch": 4.787485779294653, "grad_norm": 0.8588991018118011, "learning_rate": 5.57530302103669e-09, "loss": 0.0494, "step": 21041 }, { "epoch": 4.787713310580205, "grad_norm": 1.2088796294385955, "learning_rate": 5.56340133150736e-09, "loss": 0.0322, "step": 21042 }, { "epoch": 4.7879408418657565, "grad_norm": 0.46173275603608255, "learning_rate": 5.551512302127623e-09, "loss": 0.0011, "step": 21043 }, { "epoch": 4.788168373151308, "grad_norm": 0.2171677740703181, "learning_rate": 5.539635933140483e-09, "loss": 0.0011, "step": 21044 }, { "epoch": 4.78839590443686, "grad_norm": 0.8768992937949949, "learning_rate": 5.527772224788799e-09, "loss": 0.0126, "step": 21045 }, { "epoch": 4.788623435722412, "grad_norm": 1.2315971247602844, "learning_rate": 5.515921177314809e-09, "loss": 0.0614, "step": 21046 }, { "epoch": 4.7888509670079635, "grad_norm": 1.061294011598532, "learning_rate": 5.504082790960888e-09, "loss": 0.0606, "step": 21047 }, { "epoch": 4.789078498293516, "grad_norm": 2.2473358549864053, "learning_rate": 5.492257065968995e-09, "loss": 0.039, "step": 21048 }, { "epoch": 4.789306029579067, "grad_norm": 0.639551014253732, "learning_rate": 5.480444002580812e-09, "loss": 0.0224, "step": 21049 }, { "epoch": 4.789533560864619, "grad_norm": 1.8305008102147644, "learning_rate": 5.468643601037743e-09, "loss": 0.0737, "step": 21050 }, { "epoch": 4.7897610921501705, "grad_norm": 2.0865847839216425, "learning_rate": 5.456855861580984e-09, "loss": 0.0248, "step": 21051 }, { "epoch": 4.789988623435723, "grad_norm": 1.1496383918910211, "learning_rate": 5.445080784451454e-09, "loss": 0.0206, "step": 21052 }, { "epoch": 4.790216154721274, "grad_norm": 0.6267620395176153, "learning_rate": 5.433318369889792e-09, "loss": 0.0017, "step": 21053 }, { "epoch": 4.790443686006826, "grad_norm": 0.8996619233436077, "learning_rate": 5.421568618136364e-09, "loss": 0.0192, "step": 21054 }, { "epoch": 4.7906712172923775, "grad_norm": 1.293050238187436, "learning_rate": 5.409831529431461e-09, "loss": 0.0125, "step": 21055 }, { "epoch": 4.79089874857793, "grad_norm": 2.167600448837176, "learning_rate": 5.3981071040148245e-09, "loss": 0.0677, "step": 21056 }, { "epoch": 4.791126279863481, "grad_norm": 1.2044338480898464, "learning_rate": 5.386395342126191e-09, "loss": 0.0343, "step": 21057 }, { "epoch": 4.791353811149033, "grad_norm": 1.6731849643587935, "learning_rate": 5.374696244004815e-09, "loss": 0.018, "step": 21058 }, { "epoch": 4.791581342434585, "grad_norm": 1.3289578491156957, "learning_rate": 5.363009809889879e-09, "loss": 0.0823, "step": 21059 }, { "epoch": 4.791808873720137, "grad_norm": 0.292231632212553, "learning_rate": 5.351336040020153e-09, "loss": 0.0012, "step": 21060 }, { "epoch": 4.792036405005688, "grad_norm": 1.042063281063634, "learning_rate": 5.3396749346344004e-09, "loss": 0.0146, "step": 21061 }, { "epoch": 4.79226393629124, "grad_norm": 0.8669076509709692, "learning_rate": 5.328026493970698e-09, "loss": 0.0022, "step": 21062 }, { "epoch": 4.792491467576792, "grad_norm": 1.9004851950802109, "learning_rate": 5.3163907182673945e-09, "loss": 0.0996, "step": 21063 }, { "epoch": 4.792718998862344, "grad_norm": 1.2606840583948409, "learning_rate": 5.3047676077621484e-09, "loss": 0.0291, "step": 21064 }, { "epoch": 4.792946530147895, "grad_norm": 1.0672907577369348, "learning_rate": 5.293157162692478e-09, "loss": 0.0103, "step": 21065 }, { "epoch": 4.793174061433447, "grad_norm": 0.8972344135702391, "learning_rate": 5.281559383295692e-09, "loss": 0.005, "step": 21066 }, { "epoch": 4.793401592718999, "grad_norm": 0.8356118802254844, "learning_rate": 5.269974269808895e-09, "loss": 0.0037, "step": 21067 }, { "epoch": 4.793629124004551, "grad_norm": 0.7988062838967375, "learning_rate": 5.2584018224689085e-09, "loss": 0.0032, "step": 21068 }, { "epoch": 4.793856655290102, "grad_norm": 2.3192189304883555, "learning_rate": 5.246842041512143e-09, "loss": 0.0093, "step": 21069 }, { "epoch": 4.794084186575654, "grad_norm": 0.44222662065100876, "learning_rate": 5.235294927174936e-09, "loss": 0.0024, "step": 21070 }, { "epoch": 4.794311717861206, "grad_norm": 1.3984005946868658, "learning_rate": 5.223760479693279e-09, "loss": 0.0571, "step": 21071 }, { "epoch": 4.794539249146758, "grad_norm": 2.379526516744529, "learning_rate": 5.212238699302818e-09, "loss": 0.0224, "step": 21072 }, { "epoch": 4.794766780432309, "grad_norm": 0.8663911588855094, "learning_rate": 5.200729586239128e-09, "loss": 0.005, "step": 21073 }, { "epoch": 4.794994311717861, "grad_norm": 1.3523167189980774, "learning_rate": 5.189233140737368e-09, "loss": 0.0672, "step": 21074 }, { "epoch": 4.795221843003413, "grad_norm": 1.1404268360398415, "learning_rate": 5.177749363032627e-09, "loss": 0.041, "step": 21075 }, { "epoch": 4.795449374288965, "grad_norm": 2.2490694164619036, "learning_rate": 5.16627825335958e-09, "loss": 0.0259, "step": 21076 }, { "epoch": 4.795676905574517, "grad_norm": 1.116813857087072, "learning_rate": 5.154819811952552e-09, "loss": 0.0074, "step": 21077 }, { "epoch": 4.795904436860068, "grad_norm": 1.3217995761049972, "learning_rate": 5.14337403904587e-09, "loss": 0.0157, "step": 21078 }, { "epoch": 4.79613196814562, "grad_norm": 1.104745626202287, "learning_rate": 5.131940934873375e-09, "loss": 0.0245, "step": 21079 }, { "epoch": 4.796359499431172, "grad_norm": 1.6871798770470507, "learning_rate": 5.1205204996686994e-09, "loss": 0.013, "step": 21080 }, { "epoch": 4.796587030716724, "grad_norm": 1.2147995605044264, "learning_rate": 5.1091127336654055e-09, "loss": 0.0168, "step": 21081 }, { "epoch": 4.796814562002275, "grad_norm": 0.9560751770444891, "learning_rate": 5.097717637096572e-09, "loss": 0.0254, "step": 21082 }, { "epoch": 4.797042093287827, "grad_norm": 1.4214095707394432, "learning_rate": 5.086335210195137e-09, "loss": 0.016, "step": 21083 }, { "epoch": 4.797269624573379, "grad_norm": 0.9046338859002726, "learning_rate": 5.0749654531936224e-09, "loss": 0.0121, "step": 21084 }, { "epoch": 4.797497155858931, "grad_norm": 1.3486255964234786, "learning_rate": 5.063608366324552e-09, "loss": 0.0072, "step": 21085 }, { "epoch": 4.797724687144482, "grad_norm": 0.8998086649483905, "learning_rate": 5.0522639498198915e-09, "loss": 0.0043, "step": 21086 }, { "epoch": 4.7979522184300345, "grad_norm": 0.37252339302121856, "learning_rate": 5.040932203911539e-09, "loss": 0.002, "step": 21087 }, { "epoch": 4.798179749715586, "grad_norm": 1.2415741902448623, "learning_rate": 5.029613128831184e-09, "loss": 0.0406, "step": 21088 }, { "epoch": 4.798407281001138, "grad_norm": 0.8803286191126711, "learning_rate": 5.018306724810171e-09, "loss": 0.0178, "step": 21089 }, { "epoch": 4.798634812286689, "grad_norm": 1.6113974100163369, "learning_rate": 5.007012992079494e-09, "loss": 0.0291, "step": 21090 }, { "epoch": 4.7988623435722415, "grad_norm": 0.6446880784470094, "learning_rate": 4.995731930869941e-09, "loss": 0.0163, "step": 21091 }, { "epoch": 4.799089874857793, "grad_norm": 1.3503493860358993, "learning_rate": 4.98446354141223e-09, "loss": 0.0735, "step": 21092 }, { "epoch": 4.799317406143345, "grad_norm": 0.9015339387587082, "learning_rate": 4.973207823936524e-09, "loss": 0.0338, "step": 21093 }, { "epoch": 4.799544937428896, "grad_norm": 1.1794952048991811, "learning_rate": 4.961964778672918e-09, "loss": 0.016, "step": 21094 }, { "epoch": 4.7997724687144485, "grad_norm": 0.7289887972534999, "learning_rate": 4.950734405851226e-09, "loss": 0.0042, "step": 21095 }, { "epoch": 4.8, "grad_norm": 1.201890618489287, "learning_rate": 4.939516705700919e-09, "loss": 0.0508, "step": 21096 }, { "epoch": 4.800227531285552, "grad_norm": 1.2361267396083722, "learning_rate": 4.928311678451397e-09, "loss": 0.0155, "step": 21097 }, { "epoch": 4.800455062571103, "grad_norm": 0.7350433916970859, "learning_rate": 4.917119324331504e-09, "loss": 0.01, "step": 21098 }, { "epoch": 4.8006825938566555, "grad_norm": 0.6378490120731323, "learning_rate": 4.905939643570084e-09, "loss": 0.0132, "step": 21099 }, { "epoch": 4.800910125142207, "grad_norm": 1.3444505194652487, "learning_rate": 4.894772636395567e-09, "loss": 0.0516, "step": 21100 }, { "epoch": 4.801137656427759, "grad_norm": 0.5777222718111987, "learning_rate": 4.883618303036242e-09, "loss": 0.0025, "step": 21101 }, { "epoch": 4.80136518771331, "grad_norm": 1.4319245948006973, "learning_rate": 4.8724766437201195e-09, "loss": 0.0509, "step": 21102 }, { "epoch": 4.8015927189988625, "grad_norm": 1.4322806171742686, "learning_rate": 4.861347658674867e-09, "loss": 0.057, "step": 21103 }, { "epoch": 4.801820250284414, "grad_norm": 0.3629679661459507, "learning_rate": 4.85023134812794e-09, "loss": 0.0012, "step": 21104 }, { "epoch": 4.802047781569966, "grad_norm": 1.3899087787037712, "learning_rate": 4.839127712306449e-09, "loss": 0.0131, "step": 21105 }, { "epoch": 4.802275312855517, "grad_norm": 0.8660243513896424, "learning_rate": 4.828036751437504e-09, "loss": 0.0034, "step": 21106 }, { "epoch": 4.8025028441410695, "grad_norm": 1.4859541380577328, "learning_rate": 4.8169584657476596e-09, "loss": 0.047, "step": 21107 }, { "epoch": 4.802730375426621, "grad_norm": 1.0017011833876397, "learning_rate": 4.805892855463332e-09, "loss": 0.0018, "step": 21108 }, { "epoch": 4.802957906712173, "grad_norm": 1.6158782603551116, "learning_rate": 4.794839920810798e-09, "loss": 0.103, "step": 21109 }, { "epoch": 4.803185437997724, "grad_norm": 2.802353303822707, "learning_rate": 4.7837996620158504e-09, "loss": 0.0107, "step": 21110 }, { "epoch": 4.8034129692832765, "grad_norm": 1.5052501072257076, "learning_rate": 4.772772079304211e-09, "loss": 0.0132, "step": 21111 }, { "epoch": 4.803640500568828, "grad_norm": 0.9527224003250861, "learning_rate": 4.761757172901116e-09, "loss": 0.0122, "step": 21112 }, { "epoch": 4.80386803185438, "grad_norm": 1.6403854571981447, "learning_rate": 4.750754943031871e-09, "loss": 0.0196, "step": 21113 }, { "epoch": 4.804095563139931, "grad_norm": 1.1523874255778714, "learning_rate": 4.739765389921227e-09, "loss": 0.0567, "step": 21114 }, { "epoch": 4.8043230944254836, "grad_norm": 1.5570902407267395, "learning_rate": 4.728788513793866e-09, "loss": 0.0075, "step": 21115 }, { "epoch": 4.804550625711036, "grad_norm": 1.3703998852226753, "learning_rate": 4.717824314874053e-09, "loss": 0.0065, "step": 21116 }, { "epoch": 4.804778156996587, "grad_norm": 1.3482576528856398, "learning_rate": 4.706872793385914e-09, "loss": 0.009, "step": 21117 }, { "epoch": 4.805005688282138, "grad_norm": 1.3841153144275709, "learning_rate": 4.695933949553297e-09, "loss": 0.0079, "step": 21118 }, { "epoch": 4.805233219567691, "grad_norm": 1.1730796730184276, "learning_rate": 4.685007783599774e-09, "loss": 0.0103, "step": 21119 }, { "epoch": 4.805460750853243, "grad_norm": 1.256716495068045, "learning_rate": 4.674094295748638e-09, "loss": 0.0136, "step": 21120 }, { "epoch": 4.805688282138794, "grad_norm": 1.9455146598003354, "learning_rate": 4.6631934862229055e-09, "loss": 0.0099, "step": 21121 }, { "epoch": 4.805915813424345, "grad_norm": 1.064546227809149, "learning_rate": 4.652305355245454e-09, "loss": 0.1045, "step": 21122 }, { "epoch": 4.806143344709898, "grad_norm": 0.5598501045180814, "learning_rate": 4.641429903038744e-09, "loss": 0.0024, "step": 21123 }, { "epoch": 4.80637087599545, "grad_norm": 0.4740866090572232, "learning_rate": 4.630567129825028e-09, "loss": 0.0013, "step": 21124 }, { "epoch": 4.806598407281001, "grad_norm": 1.0114346577344493, "learning_rate": 4.61971703582649e-09, "loss": 0.0041, "step": 21125 }, { "epoch": 4.806825938566553, "grad_norm": 1.7517404916889676, "learning_rate": 4.60887962126462e-09, "loss": 0.0465, "step": 21126 }, { "epoch": 4.807053469852105, "grad_norm": 1.3561536388860227, "learning_rate": 4.598054886361114e-09, "loss": 0.0251, "step": 21127 }, { "epoch": 4.807281001137657, "grad_norm": 0.7262638815873556, "learning_rate": 4.587242831337185e-09, "loss": 0.0045, "step": 21128 }, { "epoch": 4.807508532423208, "grad_norm": 1.5789034100387465, "learning_rate": 4.576443456413768e-09, "loss": 0.0341, "step": 21129 }, { "epoch": 4.80773606370876, "grad_norm": 1.0424790415770713, "learning_rate": 4.565656761811587e-09, "loss": 0.0523, "step": 21130 }, { "epoch": 4.807963594994312, "grad_norm": 1.5082762673449683, "learning_rate": 4.5548827477510935e-09, "loss": 0.0434, "step": 21131 }, { "epoch": 4.808191126279864, "grad_norm": 0.6442047919268817, "learning_rate": 4.544121414452527e-09, "loss": 0.0079, "step": 21132 }, { "epoch": 4.808418657565415, "grad_norm": 0.8198458149914135, "learning_rate": 4.533372762135779e-09, "loss": 0.002, "step": 21133 }, { "epoch": 4.808646188850967, "grad_norm": 0.5119613510165429, "learning_rate": 4.522636791020607e-09, "loss": 0.0013, "step": 21134 }, { "epoch": 4.808873720136519, "grad_norm": 0.7097825723961199, "learning_rate": 4.511913501326348e-09, "loss": 0.0038, "step": 21135 }, { "epoch": 4.809101251422071, "grad_norm": 1.4679930572646787, "learning_rate": 4.501202893272202e-09, "loss": 0.0889, "step": 21136 }, { "epoch": 4.809328782707622, "grad_norm": 0.9075798364610885, "learning_rate": 4.490504967077091e-09, "loss": 0.0102, "step": 21137 }, { "epoch": 4.809556313993174, "grad_norm": 0.9101798980749469, "learning_rate": 4.479819722959661e-09, "loss": 0.0188, "step": 21138 }, { "epoch": 4.809783845278726, "grad_norm": 1.5858031400763668, "learning_rate": 4.469147161138276e-09, "loss": 0.0754, "step": 21139 }, { "epoch": 4.810011376564278, "grad_norm": 0.8309014906310674, "learning_rate": 4.458487281831098e-09, "loss": 0.0055, "step": 21140 }, { "epoch": 4.810238907849829, "grad_norm": 1.5987774971163213, "learning_rate": 4.447840085255936e-09, "loss": 0.0245, "step": 21141 }, { "epoch": 4.810466439135381, "grad_norm": 1.8313075776831098, "learning_rate": 4.437205571630465e-09, "loss": 0.0602, "step": 21142 }, { "epoch": 4.810693970420933, "grad_norm": 1.8831320096442774, "learning_rate": 4.426583741172011e-09, "loss": 0.0863, "step": 21143 }, { "epoch": 4.810921501706485, "grad_norm": 0.9162494841295776, "learning_rate": 4.415974594097622e-09, "loss": 0.0367, "step": 21144 }, { "epoch": 4.811149032992036, "grad_norm": 0.7246754601827735, "learning_rate": 4.4053781306242075e-09, "loss": 0.0062, "step": 21145 }, { "epoch": 4.811376564277588, "grad_norm": 1.3956855661033996, "learning_rate": 4.3947943509682625e-09, "loss": 0.0376, "step": 21146 }, { "epoch": 4.81160409556314, "grad_norm": 0.7657214643206403, "learning_rate": 4.38422325534614e-09, "loss": 0.0242, "step": 21147 }, { "epoch": 4.811831626848692, "grad_norm": 0.3083898137397797, "learning_rate": 4.373664843973918e-09, "loss": 0.0017, "step": 21148 }, { "epoch": 4.812059158134243, "grad_norm": 1.7512184890261835, "learning_rate": 4.3631191170673274e-09, "loss": 0.0114, "step": 21149 }, { "epoch": 4.812286689419795, "grad_norm": 1.318725686201643, "learning_rate": 4.352586074841958e-09, "loss": 0.0183, "step": 21150 }, { "epoch": 4.812514220705347, "grad_norm": 0.8421320275996477, "learning_rate": 4.342065717513055e-09, "loss": 0.0054, "step": 21151 }, { "epoch": 4.812741751990899, "grad_norm": 1.2635610273572102, "learning_rate": 4.3315580452957255e-09, "loss": 0.0435, "step": 21152 }, { "epoch": 4.81296928327645, "grad_norm": 1.706998340542797, "learning_rate": 4.321063058404518e-09, "loss": 0.0369, "step": 21153 }, { "epoch": 4.813196814562002, "grad_norm": 1.9315334499921473, "learning_rate": 4.310580757054123e-09, "loss": 0.0282, "step": 21154 }, { "epoch": 4.8134243458475545, "grad_norm": 1.2373755351759654, "learning_rate": 4.3001111414587434e-09, "loss": 0.0099, "step": 21155 }, { "epoch": 4.813651877133106, "grad_norm": 1.3187424434611614, "learning_rate": 4.289654211832306e-09, "loss": 0.0597, "step": 21156 }, { "epoch": 4.813879408418657, "grad_norm": 1.070232347183839, "learning_rate": 4.279209968388528e-09, "loss": 0.0519, "step": 21157 }, { "epoch": 4.814106939704209, "grad_norm": 0.9563604771788673, "learning_rate": 4.26877841134099e-09, "loss": 0.0069, "step": 21158 }, { "epoch": 4.8143344709897615, "grad_norm": 1.0873841190849285, "learning_rate": 4.258359540902785e-09, "loss": 0.052, "step": 21159 }, { "epoch": 4.814562002275313, "grad_norm": 0.08984173615298913, "learning_rate": 4.247953357286796e-09, "loss": 0.0003, "step": 21160 }, { "epoch": 4.814789533560864, "grad_norm": 2.4539580955631894, "learning_rate": 4.237559860705842e-09, "loss": 0.0229, "step": 21161 }, { "epoch": 4.815017064846416, "grad_norm": 0.3295514284621471, "learning_rate": 4.227179051372252e-09, "loss": 0.001, "step": 21162 }, { "epoch": 4.8152445961319685, "grad_norm": 1.0528359727288583, "learning_rate": 4.2168109294982854e-09, "loss": 0.0109, "step": 21163 }, { "epoch": 4.81547212741752, "grad_norm": 1.150980786358721, "learning_rate": 4.2064554952957885e-09, "loss": 0.064, "step": 21164 }, { "epoch": 4.815699658703072, "grad_norm": 1.3235758235065445, "learning_rate": 4.196112748976397e-09, "loss": 0.025, "step": 21165 }, { "epoch": 4.815927189988623, "grad_norm": 0.6394841799240103, "learning_rate": 4.185782690751539e-09, "loss": 0.0047, "step": 21166 }, { "epoch": 4.8161547212741755, "grad_norm": 2.440564751239858, "learning_rate": 4.175465320832295e-09, "loss": 0.0265, "step": 21167 }, { "epoch": 4.816382252559727, "grad_norm": 1.3582596403019207, "learning_rate": 4.165160639429469e-09, "loss": 0.045, "step": 21168 }, { "epoch": 4.816609783845279, "grad_norm": 1.3920680345439653, "learning_rate": 4.1548686467538655e-09, "loss": 0.0475, "step": 21169 }, { "epoch": 4.81683731513083, "grad_norm": 3.5052971082368254, "learning_rate": 4.144589343015662e-09, "loss": 0.0012, "step": 21170 }, { "epoch": 4.8170648464163826, "grad_norm": 1.1625081655641496, "learning_rate": 4.134322728425108e-09, "loss": 0.0252, "step": 21171 }, { "epoch": 4.817292377701934, "grad_norm": 0.6115575573336709, "learning_rate": 4.124068803191828e-09, "loss": 0.023, "step": 21172 }, { "epoch": 4.817519908987486, "grad_norm": 1.476136216925864, "learning_rate": 4.113827567525514e-09, "loss": 0.0242, "step": 21173 }, { "epoch": 4.817747440273037, "grad_norm": 1.013480643279469, "learning_rate": 4.103599021635512e-09, "loss": 0.0099, "step": 21174 }, { "epoch": 4.81797497155859, "grad_norm": 2.321264882627982, "learning_rate": 4.093383165730755e-09, "loss": 0.078, "step": 21175 }, { "epoch": 4.818202502844141, "grad_norm": 2.2667375377616255, "learning_rate": 4.08318000002017e-09, "loss": 0.0517, "step": 21176 }, { "epoch": 4.818430034129693, "grad_norm": 0.7474172616331871, "learning_rate": 4.072989524712204e-09, "loss": 0.0133, "step": 21177 }, { "epoch": 4.818657565415244, "grad_norm": 4.04871821442273, "learning_rate": 4.0628117400152295e-09, "loss": 0.0158, "step": 21178 }, { "epoch": 4.818885096700797, "grad_norm": 1.0900429763418682, "learning_rate": 4.0526466461371375e-09, "loss": 0.052, "step": 21179 }, { "epoch": 4.819112627986348, "grad_norm": 0.9188396797502191, "learning_rate": 4.042494243285746e-09, "loss": 0.0029, "step": 21180 }, { "epoch": 4.8193401592719, "grad_norm": 0.1309970678295312, "learning_rate": 4.0323545316684615e-09, "loss": 0.0005, "step": 21181 }, { "epoch": 4.819567690557451, "grad_norm": 0.9512527344540636, "learning_rate": 4.022227511492685e-09, "loss": 0.0086, "step": 21182 }, { "epoch": 4.819795221843004, "grad_norm": 1.4916939184345515, "learning_rate": 4.012113182965336e-09, "loss": 0.0486, "step": 21183 }, { "epoch": 4.820022753128555, "grad_norm": 1.3134459480577911, "learning_rate": 4.0020115462930525e-09, "loss": 0.0194, "step": 21184 }, { "epoch": 4.820250284414107, "grad_norm": 1.2457994041742373, "learning_rate": 3.9919226016823385e-09, "loss": 0.1054, "step": 21185 }, { "epoch": 4.820477815699658, "grad_norm": 1.7109212676723895, "learning_rate": 3.981846349339416e-09, "loss": 0.1082, "step": 21186 }, { "epoch": 4.820705346985211, "grad_norm": 0.9581380310918113, "learning_rate": 3.9717827894702325e-09, "loss": 0.0076, "step": 21187 }, { "epoch": 4.820932878270762, "grad_norm": 0.5512156061501855, "learning_rate": 3.961731922280388e-09, "loss": 0.004, "step": 21188 }, { "epoch": 4.821160409556314, "grad_norm": 0.26553405469776453, "learning_rate": 3.95169374797548e-09, "loss": 0.0009, "step": 21189 }, { "epoch": 4.821387940841865, "grad_norm": 1.4649419367457521, "learning_rate": 3.941668266760484e-09, "loss": 0.0495, "step": 21190 }, { "epoch": 4.821615472127418, "grad_norm": 1.1725050585606154, "learning_rate": 3.931655478840377e-09, "loss": 0.0058, "step": 21191 }, { "epoch": 4.821843003412969, "grad_norm": 2.0160282439597976, "learning_rate": 3.9216553844198535e-09, "loss": 0.0209, "step": 21192 }, { "epoch": 4.822070534698521, "grad_norm": 1.2067186742221587, "learning_rate": 3.911667983703127e-09, "loss": 0.0064, "step": 21193 }, { "epoch": 4.822298065984073, "grad_norm": 1.0955975071851207, "learning_rate": 3.901693276894547e-09, "loss": 0.0546, "step": 21194 }, { "epoch": 4.822525597269625, "grad_norm": 1.8191371870372646, "learning_rate": 3.891731264197771e-09, "loss": 0.0396, "step": 21195 }, { "epoch": 4.822753128555176, "grad_norm": 1.5599169604339396, "learning_rate": 3.881781945816593e-09, "loss": 0.0041, "step": 21196 }, { "epoch": 4.822980659840728, "grad_norm": 0.931858453024929, "learning_rate": 3.871845321954254e-09, "loss": 0.0117, "step": 21197 }, { "epoch": 4.82320819112628, "grad_norm": 1.3536609882645083, "learning_rate": 3.861921392813856e-09, "loss": 0.0381, "step": 21198 }, { "epoch": 4.823435722411832, "grad_norm": 0.6683314853103228, "learning_rate": 3.8520101585981515e-09, "loss": 0.0034, "step": 21199 }, { "epoch": 4.823663253697383, "grad_norm": 0.88636624462381, "learning_rate": 3.842111619509828e-09, "loss": 0.0028, "step": 21200 }, { "epoch": 4.823890784982935, "grad_norm": 1.5484151860802675, "learning_rate": 3.832225775751152e-09, "loss": 0.0932, "step": 21201 }, { "epoch": 4.824118316268487, "grad_norm": 2.02928329188988, "learning_rate": 3.822352627524187e-09, "loss": 0.0226, "step": 21202 }, { "epoch": 4.824345847554039, "grad_norm": 3.3821646948526807, "learning_rate": 3.8124921750306435e-09, "loss": 0.0241, "step": 21203 }, { "epoch": 4.824573378839591, "grad_norm": 0.9648005117886963, "learning_rate": 3.8026444184721675e-09, "loss": 0.0124, "step": 21204 }, { "epoch": 4.824800910125142, "grad_norm": 1.0056847489579868, "learning_rate": 3.7928093580499175e-09, "loss": 0.0434, "step": 21205 }, { "epoch": 4.825028441410694, "grad_norm": 1.3290879605292991, "learning_rate": 3.782986993965051e-09, "loss": 0.0546, "step": 21206 }, { "epoch": 4.825255972696246, "grad_norm": 1.4238731131029114, "learning_rate": 3.7731773264181035e-09, "loss": 0.0116, "step": 21207 }, { "epoch": 4.825483503981798, "grad_norm": 1.5568924867460983, "learning_rate": 3.763380355609747e-09, "loss": 0.0438, "step": 21208 }, { "epoch": 4.825711035267349, "grad_norm": 1.814966597296964, "learning_rate": 3.753596081740169e-09, "loss": 0.0331, "step": 21209 }, { "epoch": 4.825938566552901, "grad_norm": 1.207305608122443, "learning_rate": 3.743824505009277e-09, "loss": 0.033, "step": 21210 }, { "epoch": 4.826166097838453, "grad_norm": 2.1375934262930745, "learning_rate": 3.734065625616914e-09, "loss": 0.0559, "step": 21211 }, { "epoch": 4.826393629124005, "grad_norm": 1.3957436226612092, "learning_rate": 3.7243194437623642e-09, "loss": 0.052, "step": 21212 }, { "epoch": 4.826621160409556, "grad_norm": 1.3102552040201478, "learning_rate": 3.7145859596449814e-09, "loss": 0.0213, "step": 21213 }, { "epoch": 4.826848691695108, "grad_norm": 1.9882263624108372, "learning_rate": 3.7048651734636344e-09, "loss": 0.0563, "step": 21214 }, { "epoch": 4.82707622298066, "grad_norm": 1.85546560959495, "learning_rate": 3.6951570854169144e-09, "loss": 0.0232, "step": 21215 }, { "epoch": 4.827303754266212, "grad_norm": 0.6021191043402251, "learning_rate": 3.685461695703413e-09, "loss": 0.0055, "step": 21216 }, { "epoch": 4.827531285551763, "grad_norm": 1.1989554955668007, "learning_rate": 3.6757790045210962e-09, "loss": 0.0475, "step": 21217 }, { "epoch": 4.827758816837315, "grad_norm": 0.7505206997657039, "learning_rate": 3.6661090120680696e-09, "loss": 0.0147, "step": 21218 }, { "epoch": 4.827986348122867, "grad_norm": 1.3526716292098266, "learning_rate": 3.6564517185417454e-09, "loss": 0.0194, "step": 21219 }, { "epoch": 4.828213879408419, "grad_norm": 0.9076082634458129, "learning_rate": 3.646807124139673e-09, "loss": 0.005, "step": 21220 }, { "epoch": 4.82844141069397, "grad_norm": 1.1881344543512768, "learning_rate": 3.6371752290589175e-09, "loss": 0.029, "step": 21221 }, { "epoch": 4.828668941979522, "grad_norm": 1.552598095386763, "learning_rate": 3.6275560334962655e-09, "loss": 0.062, "step": 21222 }, { "epoch": 4.828896473265074, "grad_norm": 0.43410928584486275, "learning_rate": 3.6179495376484355e-09, "loss": 0.0022, "step": 21223 }, { "epoch": 4.829124004550626, "grad_norm": 0.8931163906356918, "learning_rate": 3.6083557417117276e-09, "loss": 0.0211, "step": 21224 }, { "epoch": 4.829351535836177, "grad_norm": 1.6903135343253683, "learning_rate": 3.5987746458821665e-09, "loss": 0.037, "step": 21225 }, { "epoch": 4.829579067121729, "grad_norm": 0.5318439228843967, "learning_rate": 3.5892062503555676e-09, "loss": 0.003, "step": 21226 }, { "epoch": 4.829806598407281, "grad_norm": 0.8993884338620517, "learning_rate": 3.579650555327607e-09, "loss": 0.0497, "step": 21227 }, { "epoch": 4.830034129692833, "grad_norm": 0.47898995385957216, "learning_rate": 3.570107560993477e-09, "loss": 0.0055, "step": 21228 }, { "epoch": 4.830261660978384, "grad_norm": 0.39411561230615, "learning_rate": 3.5605772675482294e-09, "loss": 0.0017, "step": 21229 }, { "epoch": 4.830489192263936, "grad_norm": 0.4159983034862431, "learning_rate": 3.5510596751867077e-09, "loss": 0.0015, "step": 21230 }, { "epoch": 4.830716723549488, "grad_norm": 1.7391124010378194, "learning_rate": 3.54155478410341e-09, "loss": 0.0234, "step": 21231 }, { "epoch": 4.83094425483504, "grad_norm": 0.7369858023565069, "learning_rate": 3.5320625944925553e-09, "loss": 0.0061, "step": 21232 }, { "epoch": 4.831171786120592, "grad_norm": 1.233712786137149, "learning_rate": 3.522583106548155e-09, "loss": 0.0321, "step": 21233 }, { "epoch": 4.831399317406143, "grad_norm": 1.6213991813926905, "learning_rate": 3.5131163204640133e-09, "loss": 0.0482, "step": 21234 }, { "epoch": 4.831626848691695, "grad_norm": 1.3202478572807774, "learning_rate": 3.503662236433586e-09, "loss": 0.0312, "step": 21235 }, { "epoch": 4.831854379977247, "grad_norm": 0.8983793033607773, "learning_rate": 3.494220854650052e-09, "loss": 0.0043, "step": 21236 }, { "epoch": 4.832081911262799, "grad_norm": 1.3639906650644538, "learning_rate": 3.4847921753063813e-09, "loss": 0.0396, "step": 21237 }, { "epoch": 4.83230944254835, "grad_norm": 0.9144382016738809, "learning_rate": 3.475376198595337e-09, "loss": 0.0217, "step": 21238 }, { "epoch": 4.832536973833902, "grad_norm": 1.0574718705773085, "learning_rate": 3.4659729247094043e-09, "loss": 0.0401, "step": 21239 }, { "epoch": 4.832764505119454, "grad_norm": 1.1122748492287275, "learning_rate": 3.4565823538406513e-09, "loss": 0.063, "step": 21240 }, { "epoch": 4.832992036405006, "grad_norm": 1.3784522527480472, "learning_rate": 3.4472044861809383e-09, "loss": 0.0925, "step": 21241 }, { "epoch": 4.833219567690557, "grad_norm": 0.8498777535144622, "learning_rate": 3.4378393219221958e-09, "loss": 0.0288, "step": 21242 }, { "epoch": 4.83344709897611, "grad_norm": 0.9935776860957386, "learning_rate": 3.4284868612555895e-09, "loss": 0.0084, "step": 21243 }, { "epoch": 4.833674630261661, "grad_norm": 1.529530704769881, "learning_rate": 3.419147104372425e-09, "loss": 0.0929, "step": 21244 }, { "epoch": 4.833902161547213, "grad_norm": 0.9227291077624745, "learning_rate": 3.409820051463522e-09, "loss": 0.0564, "step": 21245 }, { "epoch": 4.834129692832764, "grad_norm": 1.543356270503993, "learning_rate": 3.400505702719492e-09, "loss": 0.0321, "step": 21246 }, { "epoch": 4.834357224118317, "grad_norm": 0.5167866342437173, "learning_rate": 3.391204058330669e-09, "loss": 0.0034, "step": 21247 }, { "epoch": 4.834584755403868, "grad_norm": 1.233020917920661, "learning_rate": 3.381915118487247e-09, "loss": 0.0646, "step": 21248 }, { "epoch": 4.83481228668942, "grad_norm": 0.912203277691146, "learning_rate": 3.372638883379076e-09, "loss": 0.0033, "step": 21249 }, { "epoch": 4.835039817974971, "grad_norm": 1.3173466399479277, "learning_rate": 3.3633753531956553e-09, "loss": 0.0156, "step": 21250 }, { "epoch": 4.835267349260524, "grad_norm": 1.4900742263799105, "learning_rate": 3.354124528126418e-09, "loss": 0.035, "step": 21251 }, { "epoch": 4.835494880546075, "grad_norm": 1.26720730288505, "learning_rate": 3.3448864083603096e-09, "loss": 0.0115, "step": 21252 }, { "epoch": 4.835722411831627, "grad_norm": 1.6488850875353047, "learning_rate": 3.335660994086276e-09, "loss": 0.0157, "step": 21253 }, { "epoch": 4.835949943117178, "grad_norm": 1.3680364795799, "learning_rate": 3.3264482854927783e-09, "loss": 0.0607, "step": 21254 }, { "epoch": 4.836177474402731, "grad_norm": 2.305176940150293, "learning_rate": 3.3172482827682067e-09, "loss": 0.0154, "step": 21255 }, { "epoch": 4.836405005688282, "grad_norm": 0.7762098841042624, "learning_rate": 3.308060986100467e-09, "loss": 0.0052, "step": 21256 }, { "epoch": 4.836632536973834, "grad_norm": 0.9452141932269319, "learning_rate": 3.298886395677395e-09, "loss": 0.0072, "step": 21257 }, { "epoch": 4.836860068259385, "grad_norm": 0.4411631941100663, "learning_rate": 3.289724511686479e-09, "loss": 0.0016, "step": 21258 }, { "epoch": 4.837087599544938, "grad_norm": 1.168282172046169, "learning_rate": 3.2805753343150004e-09, "loss": 0.028, "step": 21259 }, { "epoch": 4.837315130830489, "grad_norm": 1.839205139342041, "learning_rate": 3.271438863749893e-09, "loss": 0.0221, "step": 21260 }, { "epoch": 4.837542662116041, "grad_norm": 0.668568746464592, "learning_rate": 3.2623151001779514e-09, "loss": 0.0299, "step": 21261 }, { "epoch": 4.837770193401592, "grad_norm": 2.1267014121909353, "learning_rate": 3.253204043785693e-09, "loss": 0.0929, "step": 21262 }, { "epoch": 4.837997724687145, "grad_norm": 1.386759926247947, "learning_rate": 3.24410569475922e-09, "loss": 0.0145, "step": 21263 }, { "epoch": 4.838225255972696, "grad_norm": 0.9686904155166112, "learning_rate": 3.2350200532844934e-09, "loss": 0.0202, "step": 21264 }, { "epoch": 4.838452787258248, "grad_norm": 1.451530570622252, "learning_rate": 3.2259471195473374e-09, "loss": 0.0017, "step": 21265 }, { "epoch": 4.838680318543799, "grad_norm": 1.1737698008514026, "learning_rate": 3.21688689373309e-09, "loss": 0.0076, "step": 21266 }, { "epoch": 4.838907849829352, "grad_norm": 1.271059952018834, "learning_rate": 3.2078393760268804e-09, "loss": 0.0099, "step": 21267 }, { "epoch": 4.839135381114903, "grad_norm": 0.7246315883183181, "learning_rate": 3.198804566613631e-09, "loss": 0.0056, "step": 21268 }, { "epoch": 4.839362912400455, "grad_norm": 1.1751833737149193, "learning_rate": 3.1897824656781233e-09, "loss": 0.0329, "step": 21269 }, { "epoch": 4.839590443686006, "grad_norm": 1.0214618262914437, "learning_rate": 3.1807730734046553e-09, "loss": 0.0194, "step": 21270 }, { "epoch": 4.839817974971559, "grad_norm": 0.7340102031849443, "learning_rate": 3.171776389977385e-09, "loss": 0.0379, "step": 21271 }, { "epoch": 4.840045506257111, "grad_norm": 1.1966858391921562, "learning_rate": 3.1627924155801238e-09, "loss": 0.0405, "step": 21272 }, { "epoch": 4.840273037542662, "grad_norm": 1.3312770734043406, "learning_rate": 3.1538211503966134e-09, "loss": 0.06, "step": 21273 }, { "epoch": 4.8405005688282134, "grad_norm": 2.011475721979492, "learning_rate": 3.1448625946101096e-09, "loss": 0.0359, "step": 21274 }, { "epoch": 4.840728100113766, "grad_norm": 0.739672288004134, "learning_rate": 3.1359167484037307e-09, "loss": 0.0089, "step": 21275 }, { "epoch": 4.840955631399318, "grad_norm": 1.3879993231352945, "learning_rate": 3.126983611960316e-09, "loss": 0.0141, "step": 21276 }, { "epoch": 4.841183162684869, "grad_norm": 1.0918849979102565, "learning_rate": 3.1180631854624973e-09, "loss": 0.0036, "step": 21277 }, { "epoch": 4.8414106939704205, "grad_norm": 1.1571486991799311, "learning_rate": 3.1091554690925592e-09, "loss": 0.0367, "step": 21278 }, { "epoch": 4.841638225255973, "grad_norm": 1.8338589328119976, "learning_rate": 3.1002604630324394e-09, "loss": 0.0178, "step": 21279 }, { "epoch": 4.841865756541525, "grad_norm": 1.195198667962037, "learning_rate": 3.091378167464146e-09, "loss": 0.028, "step": 21280 }, { "epoch": 4.842093287827076, "grad_norm": 1.542636051374237, "learning_rate": 3.0825085825690603e-09, "loss": 0.0108, "step": 21281 }, { "epoch": 4.842320819112628, "grad_norm": 1.1458718961802838, "learning_rate": 3.0736517085284966e-09, "loss": 0.0108, "step": 21282 }, { "epoch": 4.84254835039818, "grad_norm": 1.0441104236398548, "learning_rate": 3.0648075455234904e-09, "loss": 0.0533, "step": 21283 }, { "epoch": 4.842775881683732, "grad_norm": 1.7789038359869833, "learning_rate": 3.055976093734869e-09, "loss": 0.0393, "step": 21284 }, { "epoch": 4.843003412969283, "grad_norm": 0.7134882405528428, "learning_rate": 3.047157353342975e-09, "loss": 0.0026, "step": 21285 }, { "epoch": 4.843230944254835, "grad_norm": 2.1056132341629086, "learning_rate": 3.0383513245281493e-09, "loss": 0.0396, "step": 21286 }, { "epoch": 4.843458475540387, "grad_norm": 0.8185633998755543, "learning_rate": 3.0295580074703874e-09, "loss": 0.0126, "step": 21287 }, { "epoch": 4.843686006825939, "grad_norm": 1.2953824106614165, "learning_rate": 3.020777402349337e-09, "loss": 0.0167, "step": 21288 }, { "epoch": 4.84391353811149, "grad_norm": 2.0848516123141163, "learning_rate": 3.0120095093445073e-09, "loss": 0.036, "step": 21289 }, { "epoch": 4.844141069397042, "grad_norm": 1.4086328174945348, "learning_rate": 3.00325432863513e-09, "loss": 0.0493, "step": 21290 }, { "epoch": 4.844368600682594, "grad_norm": 1.3730892757430777, "learning_rate": 2.9945118604000207e-09, "loss": 0.0237, "step": 21291 }, { "epoch": 4.844596131968146, "grad_norm": 1.1992478830006117, "learning_rate": 2.985782104817994e-09, "loss": 0.0114, "step": 21292 }, { "epoch": 4.844823663253697, "grad_norm": 1.4328268518947944, "learning_rate": 2.9770650620674497e-09, "loss": 0.0182, "step": 21293 }, { "epoch": 4.845051194539249, "grad_norm": 1.4068962228498458, "learning_rate": 2.968360732326439e-09, "loss": 0.0323, "step": 21294 }, { "epoch": 4.845278725824801, "grad_norm": 1.0916749626009756, "learning_rate": 2.959669115772945e-09, "loss": 0.0099, "step": 21295 }, { "epoch": 4.845506257110353, "grad_norm": 1.4729677362382068, "learning_rate": 2.950990212584673e-09, "loss": 0.0935, "step": 21296 }, { "epoch": 4.845733788395904, "grad_norm": 1.2775784204502019, "learning_rate": 2.9423240229388416e-09, "loss": 0.0204, "step": 21297 }, { "epoch": 4.845961319681456, "grad_norm": 0.8382564123889293, "learning_rate": 2.9336705470127402e-09, "loss": 0.0244, "step": 21298 }, { "epoch": 4.846188850967008, "grad_norm": 0.9697678186803349, "learning_rate": 2.9250297849831023e-09, "loss": 0.0092, "step": 21299 }, { "epoch": 4.84641638225256, "grad_norm": 0.6030213039804856, "learning_rate": 2.9164017370265916e-09, "loss": 0.0056, "step": 21300 }, { "epoch": 4.846643913538111, "grad_norm": 0.866769379965856, "learning_rate": 2.9077864033195263e-09, "loss": 0.0116, "step": 21301 }, { "epoch": 4.846871444823663, "grad_norm": 0.9195537924506562, "learning_rate": 2.899183784038015e-09, "loss": 0.0043, "step": 21302 }, { "epoch": 4.847098976109215, "grad_norm": 1.5331747268004405, "learning_rate": 2.890593879357889e-09, "loss": 0.1043, "step": 21303 }, { "epoch": 4.847326507394767, "grad_norm": 2.0079215670531485, "learning_rate": 2.8820166894546335e-09, "loss": 0.0668, "step": 21304 }, { "epoch": 4.847554038680318, "grad_norm": 1.2440939482655369, "learning_rate": 2.8734522145035943e-09, "loss": 0.0109, "step": 21305 }, { "epoch": 4.84778156996587, "grad_norm": 1.5867050899934485, "learning_rate": 2.8649004546799088e-09, "loss": 0.0106, "step": 21306 }, { "epoch": 4.848009101251422, "grad_norm": 1.3171573510901722, "learning_rate": 2.8563614101581595e-09, "loss": 0.0273, "step": 21307 }, { "epoch": 4.848236632536974, "grad_norm": 0.7922589967380564, "learning_rate": 2.847835081113068e-09, "loss": 0.0055, "step": 21308 }, { "epoch": 4.848464163822525, "grad_norm": 0.7808918509643916, "learning_rate": 2.8393214677188003e-09, "loss": 0.0056, "step": 21309 }, { "epoch": 4.848691695108077, "grad_norm": 0.5341409310301114, "learning_rate": 2.8308205701493146e-09, "loss": 0.0056, "step": 21310 }, { "epoch": 4.84891922639363, "grad_norm": 2.1079405354066556, "learning_rate": 2.8223323885784304e-09, "loss": 0.0362, "step": 21311 }, { "epoch": 4.849146757679181, "grad_norm": 1.2347354209664347, "learning_rate": 2.8138569231796198e-09, "loss": 0.0049, "step": 21312 }, { "epoch": 4.849374288964732, "grad_norm": 1.2111652388661245, "learning_rate": 2.8053941741260775e-09, "loss": 0.0405, "step": 21313 }, { "epoch": 4.849601820250284, "grad_norm": 1.2140284033729778, "learning_rate": 2.7969441415907906e-09, "loss": 0.0518, "step": 21314 }, { "epoch": 4.849829351535837, "grad_norm": 3.117736470089328, "learning_rate": 2.7885068257464678e-09, "loss": 0.0139, "step": 21315 }, { "epoch": 4.850056882821388, "grad_norm": 1.1782760967474248, "learning_rate": 2.78008222676554e-09, "loss": 0.0236, "step": 21316 }, { "epoch": 4.850284414106939, "grad_norm": 0.9665716707295525, "learning_rate": 2.7716703448201627e-09, "loss": 0.041, "step": 21317 }, { "epoch": 4.850511945392491, "grad_norm": 0.8007335388549944, "learning_rate": 2.7632711800822805e-09, "loss": 0.0187, "step": 21318 }, { "epoch": 4.850739476678044, "grad_norm": 0.8674169546440604, "learning_rate": 2.7548847327235617e-09, "loss": 0.0107, "step": 21319 }, { "epoch": 4.850967007963595, "grad_norm": 0.44446941888349695, "learning_rate": 2.746511002915467e-09, "loss": 0.0033, "step": 21320 }, { "epoch": 4.851194539249147, "grad_norm": 1.0924613681754107, "learning_rate": 2.738149990829039e-09, "loss": 0.0093, "step": 21321 }, { "epoch": 4.851422070534698, "grad_norm": 1.3440658350761403, "learning_rate": 2.7298016966351843e-09, "loss": 0.01, "step": 21322 }, { "epoch": 4.851649601820251, "grad_norm": 1.4328205021461204, "learning_rate": 2.721466120504598e-09, "loss": 0.0163, "step": 21323 }, { "epoch": 4.851877133105802, "grad_norm": 1.0771198428553708, "learning_rate": 2.713143262607562e-09, "loss": 0.0104, "step": 21324 }, { "epoch": 4.852104664391354, "grad_norm": 2.146856519590455, "learning_rate": 2.7048331231142865e-09, "loss": 0.0776, "step": 21325 }, { "epoch": 4.852332195676905, "grad_norm": 0.9394331055386304, "learning_rate": 2.6965357021944276e-09, "loss": 0.0231, "step": 21326 }, { "epoch": 4.852559726962458, "grad_norm": 0.8086129973909205, "learning_rate": 2.6882510000177803e-09, "loss": 0.0054, "step": 21327 }, { "epoch": 4.852787258248009, "grad_norm": 0.304259922693713, "learning_rate": 2.6799790167535145e-09, "loss": 0.0009, "step": 21328 }, { "epoch": 4.853014789533561, "grad_norm": 1.3568747623436137, "learning_rate": 2.6717197525707312e-09, "loss": 0.011, "step": 21329 }, { "epoch": 4.853242320819112, "grad_norm": 0.8094824986083294, "learning_rate": 2.663473207638323e-09, "loss": 0.0425, "step": 21330 }, { "epoch": 4.853469852104665, "grad_norm": 1.4995502425775258, "learning_rate": 2.655239382124766e-09, "loss": 0.1188, "step": 21331 }, { "epoch": 4.853697383390216, "grad_norm": 1.313055072883757, "learning_rate": 2.647018276198329e-09, "loss": 0.0935, "step": 21332 }, { "epoch": 4.853924914675768, "grad_norm": 1.760893249084216, "learning_rate": 2.6388098900270025e-09, "loss": 0.0039, "step": 21333 }, { "epoch": 4.8541524459613195, "grad_norm": 1.2425634984003777, "learning_rate": 2.630614223778638e-09, "loss": 0.0346, "step": 21334 }, { "epoch": 4.854379977246872, "grad_norm": 2.2135011594730734, "learning_rate": 2.6224312776207406e-09, "loss": 0.0501, "step": 21335 }, { "epoch": 4.854607508532423, "grad_norm": 1.4803646105652144, "learning_rate": 2.6142610517204682e-09, "loss": 0.1006, "step": 21336 }, { "epoch": 4.854835039817975, "grad_norm": 0.25169577380225433, "learning_rate": 2.6061035462448397e-09, "loss": 0.0008, "step": 21337 }, { "epoch": 4.8550625711035265, "grad_norm": 1.1614647316510693, "learning_rate": 2.5979587613606665e-09, "loss": 0.0361, "step": 21338 }, { "epoch": 4.855290102389079, "grad_norm": 1.3570020687932307, "learning_rate": 2.5898266972342738e-09, "loss": 0.0196, "step": 21339 }, { "epoch": 4.85551763367463, "grad_norm": 1.0105502473110766, "learning_rate": 2.581707354031987e-09, "loss": 0.0112, "step": 21340 }, { "epoch": 4.855745164960182, "grad_norm": 0.9558692261737823, "learning_rate": 2.5736007319196455e-09, "loss": 0.0135, "step": 21341 }, { "epoch": 4.8559726962457335, "grad_norm": 0.24901039655599064, "learning_rate": 2.5655068310630192e-09, "loss": 0.0011, "step": 21342 }, { "epoch": 4.856200227531286, "grad_norm": 0.4983674308454119, "learning_rate": 2.5574256516274627e-09, "loss": 0.0044, "step": 21343 }, { "epoch": 4.856427758816837, "grad_norm": 1.1525870044468187, "learning_rate": 2.54935719377819e-09, "loss": 0.038, "step": 21344 }, { "epoch": 4.856655290102389, "grad_norm": 1.163524925818927, "learning_rate": 2.5413014576800704e-09, "loss": 0.028, "step": 21345 }, { "epoch": 4.8568828213879405, "grad_norm": 0.8804035632495189, "learning_rate": 2.5332584434978326e-09, "loss": 0.0032, "step": 21346 }, { "epoch": 4.857110352673493, "grad_norm": 1.4738535351889122, "learning_rate": 2.5252281513957893e-09, "loss": 0.0556, "step": 21347 }, { "epoch": 4.857337883959044, "grad_norm": 2.5781679818293326, "learning_rate": 2.517210581538046e-09, "loss": 0.0098, "step": 21348 }, { "epoch": 4.857565415244596, "grad_norm": 0.9538765681705049, "learning_rate": 2.509205734088499e-09, "loss": 0.0031, "step": 21349 }, { "epoch": 4.857792946530148, "grad_norm": 0.7699450097331638, "learning_rate": 2.501213609210698e-09, "loss": 0.0134, "step": 21350 }, { "epoch": 4.8580204778157, "grad_norm": 0.7595133238809193, "learning_rate": 2.4932342070681237e-09, "loss": 0.008, "step": 21351 }, { "epoch": 4.858248009101251, "grad_norm": 0.6095003238916098, "learning_rate": 2.485267527823701e-09, "loss": 0.0024, "step": 21352 }, { "epoch": 4.858475540386803, "grad_norm": 1.253884679493043, "learning_rate": 2.4773135716404245e-09, "loss": 0.0177, "step": 21353 }, { "epoch": 4.858703071672355, "grad_norm": 1.4766524443869413, "learning_rate": 2.469372338680734e-09, "loss": 0.0745, "step": 21354 }, { "epoch": 4.858930602957907, "grad_norm": 0.4714121701599457, "learning_rate": 2.4614438291068605e-09, "loss": 0.0061, "step": 21355 }, { "epoch": 4.859158134243458, "grad_norm": 1.225474681736517, "learning_rate": 2.4535280430810355e-09, "loss": 0.0404, "step": 21356 }, { "epoch": 4.85938566552901, "grad_norm": 1.4715857142189477, "learning_rate": 2.445624980764935e-09, "loss": 0.0041, "step": 21357 }, { "epoch": 4.859613196814562, "grad_norm": 1.8884203359637293, "learning_rate": 2.437734642320097e-09, "loss": 0.0209, "step": 21358 }, { "epoch": 4.859840728100114, "grad_norm": 1.1171442155913025, "learning_rate": 2.4298570279078505e-09, "loss": 0.0355, "step": 21359 }, { "epoch": 4.860068259385666, "grad_norm": 1.0604334576347216, "learning_rate": 2.4219921376891083e-09, "loss": 0.0107, "step": 21360 }, { "epoch": 4.860295790671217, "grad_norm": 1.707952459840213, "learning_rate": 2.4141399718246444e-09, "loss": 0.0467, "step": 21361 }, { "epoch": 4.860523321956769, "grad_norm": 1.5110339776272115, "learning_rate": 2.4063005304748864e-09, "loss": 0.0526, "step": 21362 }, { "epoch": 4.860750853242321, "grad_norm": 0.7649506125584652, "learning_rate": 2.3984738138001923e-09, "loss": 0.0269, "step": 21363 }, { "epoch": 4.860978384527873, "grad_norm": 0.9800516065042784, "learning_rate": 2.390659821960434e-09, "loss": 0.0038, "step": 21364 }, { "epoch": 4.861205915813424, "grad_norm": 1.6613674518618928, "learning_rate": 2.382858555115344e-09, "loss": 0.111, "step": 21365 }, { "epoch": 4.861433447098976, "grad_norm": 1.1974280307382432, "learning_rate": 2.3750700134243797e-09, "loss": 0.0129, "step": 21366 }, { "epoch": 4.861660978384528, "grad_norm": 0.42381284525469365, "learning_rate": 2.367294197046649e-09, "loss": 0.0026, "step": 21367 }, { "epoch": 4.86188850967008, "grad_norm": 1.2855440879582662, "learning_rate": 2.3595311061411915e-09, "loss": 0.084, "step": 21368 }, { "epoch": 4.862116040955631, "grad_norm": 0.7790811339225383, "learning_rate": 2.3517807408665605e-09, "loss": 0.049, "step": 21369 }, { "epoch": 4.862343572241183, "grad_norm": 1.2057349726707307, "learning_rate": 2.3440431013811714e-09, "loss": 0.06, "step": 21370 }, { "epoch": 4.862571103526735, "grad_norm": 0.7569294817720487, "learning_rate": 2.3363181878432312e-09, "loss": 0.0069, "step": 21371 }, { "epoch": 4.862798634812287, "grad_norm": 0.8775642852669115, "learning_rate": 2.328606000410599e-09, "loss": 0.0584, "step": 21372 }, { "epoch": 4.863026166097838, "grad_norm": 1.1910098754866112, "learning_rate": 2.3209065392409273e-09, "loss": 0.0056, "step": 21373 }, { "epoch": 4.86325369738339, "grad_norm": 1.341564502724964, "learning_rate": 2.3132198044915204e-09, "loss": 0.035, "step": 21374 }, { "epoch": 4.863481228668942, "grad_norm": 0.7813772275083661, "learning_rate": 2.305545796319475e-09, "loss": 0.003, "step": 21375 }, { "epoch": 4.863708759954494, "grad_norm": 0.925232177230986, "learning_rate": 2.297884514881679e-09, "loss": 0.0037, "step": 21376 }, { "epoch": 4.863936291240045, "grad_norm": 1.5579196236769663, "learning_rate": 2.2902359603346747e-09, "loss": 0.0129, "step": 21377 }, { "epoch": 4.864163822525597, "grad_norm": 1.1667839502847652, "learning_rate": 2.282600132834864e-09, "loss": 0.0333, "step": 21378 }, { "epoch": 4.864391353811149, "grad_norm": 0.8282426715427704, "learning_rate": 2.2749770325382336e-09, "loss": 0.0104, "step": 21379 }, { "epoch": 4.864618885096701, "grad_norm": 1.292346699360389, "learning_rate": 2.2673666596005617e-09, "loss": 0.0332, "step": 21380 }, { "epoch": 4.864846416382252, "grad_norm": 1.0240593225719985, "learning_rate": 2.2597690141774874e-09, "loss": 0.011, "step": 21381 }, { "epoch": 4.865073947667804, "grad_norm": 1.2948044846726585, "learning_rate": 2.2521840964242343e-09, "loss": 0.0742, "step": 21382 }, { "epoch": 4.865301478953356, "grad_norm": 1.0579628897823263, "learning_rate": 2.244611906495747e-09, "loss": 0.0675, "step": 21383 }, { "epoch": 4.865529010238908, "grad_norm": 0.7328969140986424, "learning_rate": 2.237052444546972e-09, "loss": 0.0039, "step": 21384 }, { "epoch": 4.865756541524459, "grad_norm": 1.3599207457051987, "learning_rate": 2.229505710732299e-09, "loss": 0.1138, "step": 21385 }, { "epoch": 4.865984072810011, "grad_norm": 0.5602685387070898, "learning_rate": 2.221971705205911e-09, "loss": 0.004, "step": 21386 }, { "epoch": 4.866211604095563, "grad_norm": 1.3384295719455062, "learning_rate": 2.21445042812192e-09, "loss": 0.0289, "step": 21387 }, { "epoch": 4.866439135381115, "grad_norm": 0.9098421414323739, "learning_rate": 2.206941879633953e-09, "loss": 0.0516, "step": 21388 }, { "epoch": 4.866666666666667, "grad_norm": 1.4977861799291747, "learning_rate": 2.199446059895499e-09, "loss": 0.0541, "step": 21389 }, { "epoch": 4.8668941979522184, "grad_norm": 2.201762694272321, "learning_rate": 2.1919629690597688e-09, "loss": 0.0387, "step": 21390 }, { "epoch": 4.86712172923777, "grad_norm": 0.5906605274156796, "learning_rate": 2.184492607279695e-09, "loss": 0.0101, "step": 21391 }, { "epoch": 4.867349260523322, "grad_norm": 1.2617431959852665, "learning_rate": 2.1770349747080034e-09, "loss": 0.0541, "step": 21392 }, { "epoch": 4.867576791808874, "grad_norm": 1.0765400415747555, "learning_rate": 2.169590071497002e-09, "loss": 0.0186, "step": 21393 }, { "epoch": 4.8678043230944255, "grad_norm": 0.8933123639627514, "learning_rate": 2.162157897799e-09, "loss": 0.0133, "step": 21394 }, { "epoch": 4.868031854379977, "grad_norm": 2.4329624781227057, "learning_rate": 2.1547384537657508e-09, "loss": 0.0127, "step": 21395 }, { "epoch": 4.868259385665529, "grad_norm": 2.2715968050460424, "learning_rate": 2.147331739549008e-09, "loss": 0.0026, "step": 21396 }, { "epoch": 4.868486916951081, "grad_norm": 1.1081864286683702, "learning_rate": 2.1399377553000395e-09, "loss": 0.0095, "step": 21397 }, { "epoch": 4.8687144482366325, "grad_norm": 1.590812777799342, "learning_rate": 2.1325565011700437e-09, "loss": 0.0083, "step": 21398 }, { "epoch": 4.868941979522185, "grad_norm": 1.6070692273070284, "learning_rate": 2.125187977309942e-09, "loss": 0.0059, "step": 21399 }, { "epoch": 4.869169510807736, "grad_norm": 1.2902895887196686, "learning_rate": 2.1178321838701693e-09, "loss": 0.0667, "step": 21400 }, { "epoch": 4.869397042093288, "grad_norm": 1.1626336537883406, "learning_rate": 2.11048912100123e-09, "loss": 0.0742, "step": 21401 }, { "epoch": 4.8696245733788395, "grad_norm": 0.6948910351374473, "learning_rate": 2.1031587888530047e-09, "loss": 0.0071, "step": 21402 }, { "epoch": 4.869852104664392, "grad_norm": 0.8326145906974947, "learning_rate": 2.095841187575512e-09, "loss": 0.0133, "step": 21403 }, { "epoch": 4.870079635949943, "grad_norm": 0.9602951191142095, "learning_rate": 2.088536317318285e-09, "loss": 0.0725, "step": 21404 }, { "epoch": 4.870307167235495, "grad_norm": 0.8510700915871662, "learning_rate": 2.0812441782304404e-09, "loss": 0.0145, "step": 21405 }, { "epoch": 4.8705346985210465, "grad_norm": 0.8776840922713969, "learning_rate": 2.0739647704612344e-09, "loss": 0.0158, "step": 21406 }, { "epoch": 4.870762229806599, "grad_norm": 1.5771275159766842, "learning_rate": 2.0666980941592973e-09, "loss": 0.0725, "step": 21407 }, { "epoch": 4.87098976109215, "grad_norm": 1.1897367815873774, "learning_rate": 2.059444149473261e-09, "loss": 0.0124, "step": 21408 }, { "epoch": 4.871217292377702, "grad_norm": 3.4813649640479705, "learning_rate": 2.05220293655127e-09, "loss": 0.0085, "step": 21409 }, { "epoch": 4.8714448236632535, "grad_norm": 0.49392762434757703, "learning_rate": 2.044974455541401e-09, "loss": 0.0021, "step": 21410 }, { "epoch": 4.871672354948806, "grad_norm": 1.0544043883505683, "learning_rate": 2.0377587065913835e-09, "loss": 0.042, "step": 21411 }, { "epoch": 4.871899886234357, "grad_norm": 2.2515770555340837, "learning_rate": 2.030555689848668e-09, "loss": 0.0894, "step": 21412 }, { "epoch": 4.872127417519909, "grad_norm": 0.3679504506304232, "learning_rate": 2.023365405460498e-09, "loss": 0.0026, "step": 21413 }, { "epoch": 4.8723549488054605, "grad_norm": 2.091091067474693, "learning_rate": 2.01618785357377e-09, "loss": 0.0943, "step": 21414 }, { "epoch": 4.872582480091013, "grad_norm": 0.6678010066075505, "learning_rate": 2.009023034335242e-09, "loss": 0.0025, "step": 21415 }, { "epoch": 4.872810011376564, "grad_norm": 0.9380205725512127, "learning_rate": 2.001870947891324e-09, "loss": 0.0464, "step": 21416 }, { "epoch": 4.873037542662116, "grad_norm": 1.3356022884504737, "learning_rate": 1.9947315943881494e-09, "loss": 0.0363, "step": 21417 }, { "epoch": 4.8732650739476675, "grad_norm": 1.8130883905681305, "learning_rate": 1.9876049739717822e-09, "loss": 0.0473, "step": 21418 }, { "epoch": 4.87349260523322, "grad_norm": 2.3601837976643654, "learning_rate": 1.9804910867877307e-09, "loss": 0.0346, "step": 21419 }, { "epoch": 4.873720136518771, "grad_norm": 0.8450141638091698, "learning_rate": 1.9733899329814342e-09, "loss": 0.007, "step": 21420 }, { "epoch": 4.873947667804323, "grad_norm": 1.6047468932326163, "learning_rate": 1.9663015126979853e-09, "loss": 0.0335, "step": 21421 }, { "epoch": 4.8741751990898745, "grad_norm": 0.7019035014156548, "learning_rate": 1.9592258260823364e-09, "loss": 0.004, "step": 21422 }, { "epoch": 4.874402730375427, "grad_norm": 0.7581649326428548, "learning_rate": 1.9521628732790955e-09, "loss": 0.0049, "step": 21423 }, { "epoch": 4.874630261660978, "grad_norm": 0.27411587397269066, "learning_rate": 1.945112654432521e-09, "loss": 0.0009, "step": 21424 }, { "epoch": 4.87485779294653, "grad_norm": 2.744791945615283, "learning_rate": 1.938075169686804e-09, "loss": 0.0542, "step": 21425 }, { "epoch": 4.8750853242320815, "grad_norm": 1.0768164472288435, "learning_rate": 1.9310504191857177e-09, "loss": 0.0234, "step": 21426 }, { "epoch": 4.875312855517634, "grad_norm": 1.2918285565463135, "learning_rate": 1.924038403072967e-09, "loss": 0.0074, "step": 21427 }, { "epoch": 4.875540386803186, "grad_norm": 1.1291765509070304, "learning_rate": 1.9170391214916317e-09, "loss": 0.065, "step": 21428 }, { "epoch": 4.875767918088737, "grad_norm": 1.5561292486455127, "learning_rate": 1.9100525745850002e-09, "loss": 0.0901, "step": 21429 }, { "epoch": 4.8759954493742885, "grad_norm": 1.1701929300646923, "learning_rate": 1.9030787624956666e-09, "loss": 0.0241, "step": 21430 }, { "epoch": 4.876222980659841, "grad_norm": 0.6577452268787262, "learning_rate": 1.8961176853662944e-09, "loss": 0.0025, "step": 21431 }, { "epoch": 4.876450511945393, "grad_norm": 0.9761575212958361, "learning_rate": 1.8891693433391312e-09, "loss": 0.0151, "step": 21432 }, { "epoch": 4.876678043230944, "grad_norm": 0.9472561476589796, "learning_rate": 1.882233736556216e-09, "loss": 0.0046, "step": 21433 }, { "epoch": 4.8769055745164955, "grad_norm": 1.1327593902778479, "learning_rate": 1.8753108651591717e-09, "loss": 0.0043, "step": 21434 }, { "epoch": 4.877133105802048, "grad_norm": 2.076429378968444, "learning_rate": 1.868400729289621e-09, "loss": 0.1082, "step": 21435 }, { "epoch": 4.8773606370876, "grad_norm": 0.9078616243342102, "learning_rate": 1.8615033290887013e-09, "loss": 0.0415, "step": 21436 }, { "epoch": 4.877588168373151, "grad_norm": 1.0440776247189054, "learning_rate": 1.85461866469748e-09, "loss": 0.0342, "step": 21437 }, { "epoch": 4.877815699658703, "grad_norm": 2.242683871922092, "learning_rate": 1.8477467362566087e-09, "loss": 0.0337, "step": 21438 }, { "epoch": 4.878043230944255, "grad_norm": 1.475877374175449, "learning_rate": 1.8408875439065305e-09, "loss": 0.0473, "step": 21439 }, { "epoch": 4.878270762229807, "grad_norm": 1.4270117574689165, "learning_rate": 1.8340410877874804e-09, "loss": 0.0168, "step": 21440 }, { "epoch": 4.878498293515358, "grad_norm": 1.6214298404275762, "learning_rate": 1.8272073680393464e-09, "loss": 0.0643, "step": 21441 }, { "epoch": 4.87872582480091, "grad_norm": 2.7476911491026166, "learning_rate": 1.8203863848017394e-09, "loss": 0.0188, "step": 21442 }, { "epoch": 4.878953356086462, "grad_norm": 0.9031819173631265, "learning_rate": 1.8135781382142004e-09, "loss": 0.0043, "step": 21443 }, { "epoch": 4.879180887372014, "grad_norm": 1.40411312510898, "learning_rate": 1.8067826284157847e-09, "loss": 0.1193, "step": 21444 }, { "epoch": 4.879408418657565, "grad_norm": 1.5478801419509358, "learning_rate": 1.799999855545409e-09, "loss": 0.0785, "step": 21445 }, { "epoch": 4.8796359499431174, "grad_norm": 1.1861589675765447, "learning_rate": 1.7932298197417125e-09, "loss": 0.0365, "step": 21446 }, { "epoch": 4.879863481228669, "grad_norm": 1.1320475056317987, "learning_rate": 1.7864725211430567e-09, "loss": 0.0503, "step": 21447 }, { "epoch": 4.880091012514221, "grad_norm": 0.8074080034699884, "learning_rate": 1.7797279598875256e-09, "loss": 0.0038, "step": 21448 }, { "epoch": 4.880318543799772, "grad_norm": 0.9304512072746698, "learning_rate": 1.772996136112995e-09, "loss": 0.0379, "step": 21449 }, { "epoch": 4.8805460750853245, "grad_norm": 0.6341953895995003, "learning_rate": 1.766277049957063e-09, "loss": 0.0038, "step": 21450 }, { "epoch": 4.880773606370876, "grad_norm": 1.8575303030460433, "learning_rate": 1.7595707015569814e-09, "loss": 0.0194, "step": 21451 }, { "epoch": 4.881001137656428, "grad_norm": 1.3404703081009073, "learning_rate": 1.752877091049862e-09, "loss": 0.0087, "step": 21452 }, { "epoch": 4.881228668941979, "grad_norm": 1.5791934845022664, "learning_rate": 1.7461962185725406e-09, "loss": 0.1022, "step": 21453 }, { "epoch": 4.8814562002275315, "grad_norm": 1.772291656500364, "learning_rate": 1.7395280842615047e-09, "loss": 0.0617, "step": 21454 }, { "epoch": 4.881683731513083, "grad_norm": 1.5352575061843707, "learning_rate": 1.7328726882531038e-09, "loss": 0.084, "step": 21455 }, { "epoch": 4.881911262798635, "grad_norm": 1.2376635950160964, "learning_rate": 1.7262300306832707e-09, "loss": 0.0082, "step": 21456 }, { "epoch": 4.882138794084186, "grad_norm": 1.9769835353474945, "learning_rate": 1.719600111687869e-09, "loss": 0.0529, "step": 21457 }, { "epoch": 4.8823663253697385, "grad_norm": 1.7799057461725631, "learning_rate": 1.712982931402346e-09, "loss": 0.0364, "step": 21458 }, { "epoch": 4.88259385665529, "grad_norm": 1.2565921882527842, "learning_rate": 1.7063784899619407e-09, "loss": 0.0334, "step": 21459 }, { "epoch": 4.882821387940842, "grad_norm": 2.131530995908429, "learning_rate": 1.6997867875016837e-09, "loss": 0.1372, "step": 21460 }, { "epoch": 4.883048919226393, "grad_norm": 1.0691244323150615, "learning_rate": 1.6932078241562594e-09, "loss": 0.0139, "step": 21461 }, { "epoch": 4.8832764505119455, "grad_norm": 1.1810608208143192, "learning_rate": 1.6866416000601432e-09, "loss": 0.0655, "step": 21462 }, { "epoch": 4.883503981797497, "grad_norm": 0.6640346465512286, "learning_rate": 1.6800881153474642e-09, "loss": 0.0022, "step": 21463 }, { "epoch": 4.883731513083049, "grad_norm": 0.9983647069194843, "learning_rate": 1.6735473701522814e-09, "loss": 0.0321, "step": 21464 }, { "epoch": 4.8839590443686, "grad_norm": 0.9440257259349564, "learning_rate": 1.6670193646082382e-09, "loss": 0.0082, "step": 21465 }, { "epoch": 4.8841865756541525, "grad_norm": 0.886972552201152, "learning_rate": 1.6605040988487003e-09, "loss": 0.008, "step": 21466 }, { "epoch": 4.884414106939705, "grad_norm": 0.9229165823508015, "learning_rate": 1.654001573006825e-09, "loss": 0.0044, "step": 21467 }, { "epoch": 4.884641638225256, "grad_norm": 1.7619825930365092, "learning_rate": 1.6475117872156304e-09, "loss": 0.0229, "step": 21468 }, { "epoch": 4.884869169510807, "grad_norm": 1.1574930466985427, "learning_rate": 1.64103474160765e-09, "loss": 0.064, "step": 21469 }, { "epoch": 4.8850967007963595, "grad_norm": 1.0006610810210739, "learning_rate": 1.6345704363152776e-09, "loss": 0.0376, "step": 21470 }, { "epoch": 4.885324232081912, "grad_norm": 3.847992432144887, "learning_rate": 1.628118871470699e-09, "loss": 0.049, "step": 21471 }, { "epoch": 4.885551763367463, "grad_norm": 0.8406013819933321, "learning_rate": 1.6216800472056844e-09, "loss": 0.0342, "step": 21472 }, { "epoch": 4.885779294653014, "grad_norm": 1.558201536156299, "learning_rate": 1.6152539636518639e-09, "loss": 0.0117, "step": 21473 }, { "epoch": 4.8860068259385665, "grad_norm": 1.2190442630779326, "learning_rate": 1.608840620940591e-09, "loss": 0.0085, "step": 21474 }, { "epoch": 4.886234357224119, "grad_norm": 1.4331473589429415, "learning_rate": 1.602440019202872e-09, "loss": 0.0067, "step": 21475 }, { "epoch": 4.88646188850967, "grad_norm": 1.152420951858474, "learning_rate": 1.5960521585696442e-09, "loss": 0.0705, "step": 21476 }, { "epoch": 4.886689419795222, "grad_norm": 1.1228327618537648, "learning_rate": 1.5896770391713585e-09, "loss": 0.0179, "step": 21477 }, { "epoch": 4.8869169510807735, "grad_norm": 0.8498322674535213, "learning_rate": 1.5833146611383964e-09, "loss": 0.0161, "step": 21478 }, { "epoch": 4.887144482366326, "grad_norm": 0.9704349268346042, "learning_rate": 1.576965024600724e-09, "loss": 0.0259, "step": 21479 }, { "epoch": 4.887372013651877, "grad_norm": 0.363427502521533, "learning_rate": 1.5706281296881675e-09, "loss": 0.0024, "step": 21480 }, { "epoch": 4.887599544937429, "grad_norm": 0.8723160420063334, "learning_rate": 1.5643039765301376e-09, "loss": 0.0052, "step": 21481 }, { "epoch": 4.8878270762229805, "grad_norm": 0.5483221058786172, "learning_rate": 1.5579925652560445e-09, "loss": 0.0016, "step": 21482 }, { "epoch": 4.888054607508533, "grad_norm": 0.757324709356603, "learning_rate": 1.5516938959947435e-09, "loss": 0.0053, "step": 21483 }, { "epoch": 4.888282138794084, "grad_norm": 1.3143832719833501, "learning_rate": 1.54540796887509e-09, "loss": 0.0115, "step": 21484 }, { "epoch": 4.888509670079636, "grad_norm": 1.5312108430473896, "learning_rate": 1.5391347840254534e-09, "loss": 0.0448, "step": 21485 }, { "epoch": 4.8887372013651875, "grad_norm": 2.0613581700157533, "learning_rate": 1.5328743415741338e-09, "loss": 0.1205, "step": 21486 }, { "epoch": 4.88896473265074, "grad_norm": 1.3655074507011644, "learning_rate": 1.5266266416489455e-09, "loss": 0.0172, "step": 21487 }, { "epoch": 4.889192263936291, "grad_norm": 1.298761270905793, "learning_rate": 1.5203916843777727e-09, "loss": 0.024, "step": 21488 }, { "epoch": 4.889419795221843, "grad_norm": 1.605297213891384, "learning_rate": 1.5141694698878745e-09, "loss": 0.081, "step": 21489 }, { "epoch": 4.8896473265073945, "grad_norm": 1.487616960540035, "learning_rate": 1.5079599983065103e-09, "loss": 0.0077, "step": 21490 }, { "epoch": 4.889874857792947, "grad_norm": 1.4714369896814974, "learning_rate": 1.501763269760592e-09, "loss": 0.0222, "step": 21491 }, { "epoch": 4.890102389078498, "grad_norm": 1.4732498004129588, "learning_rate": 1.4955792843767553e-09, "loss": 0.0104, "step": 21492 }, { "epoch": 4.89032992036405, "grad_norm": 1.6060563432197321, "learning_rate": 1.4894080422813568e-09, "loss": 0.014, "step": 21493 }, { "epoch": 4.8905574516496015, "grad_norm": 1.109263481835439, "learning_rate": 1.4832495436005456e-09, "loss": 0.0593, "step": 21494 }, { "epoch": 4.890784982935154, "grad_norm": 1.304258682202185, "learning_rate": 1.4771037884601935e-09, "loss": 0.0636, "step": 21495 }, { "epoch": 4.891012514220705, "grad_norm": 0.9661138256419626, "learning_rate": 1.4709707769859638e-09, "loss": 0.0186, "step": 21496 }, { "epoch": 4.891240045506257, "grad_norm": 1.745168094958318, "learning_rate": 1.4648505093031034e-09, "loss": 0.0097, "step": 21497 }, { "epoch": 4.8914675767918085, "grad_norm": 1.5010103702249988, "learning_rate": 1.4587429855367207e-09, "loss": 0.0868, "step": 21498 }, { "epoch": 4.891695108077361, "grad_norm": 1.1948506281602238, "learning_rate": 1.4526482058117852e-09, "loss": 0.0645, "step": 21499 }, { "epoch": 4.891922639362912, "grad_norm": 1.1825115329305997, "learning_rate": 1.4465661702526418e-09, "loss": 0.0674, "step": 21500 }, { "epoch": 4.892150170648464, "grad_norm": 1.0581003980232828, "learning_rate": 1.440496878983705e-09, "loss": 0.0053, "step": 21501 }, { "epoch": 4.8923777019340156, "grad_norm": 0.8113118911524537, "learning_rate": 1.4344403321290418e-09, "loss": 0.0151, "step": 21502 }, { "epoch": 4.892605233219568, "grad_norm": 2.530151158080951, "learning_rate": 1.4283965298124424e-09, "loss": 0.0858, "step": 21503 }, { "epoch": 4.892832764505119, "grad_norm": 2.155838905297652, "learning_rate": 1.42236547215735e-09, "loss": 0.0172, "step": 21504 }, { "epoch": 4.893060295790671, "grad_norm": 1.684217964138259, "learning_rate": 1.4163471592871375e-09, "loss": 0.0501, "step": 21505 }, { "epoch": 4.8932878270762235, "grad_norm": 2.0261564902694125, "learning_rate": 1.4103415913246931e-09, "loss": 0.0145, "step": 21506 }, { "epoch": 4.893515358361775, "grad_norm": 1.8766573026009834, "learning_rate": 1.4043487683928353e-09, "loss": 0.0948, "step": 21507 }, { "epoch": 4.893742889647326, "grad_norm": 1.2557168651032025, "learning_rate": 1.3983686906140354e-09, "loss": 0.0087, "step": 21508 }, { "epoch": 4.893970420932878, "grad_norm": 0.8595167850292128, "learning_rate": 1.3924013581104872e-09, "loss": 0.0146, "step": 21509 }, { "epoch": 4.8941979522184305, "grad_norm": 1.222363515865549, "learning_rate": 1.3864467710041768e-09, "loss": 0.0326, "step": 21510 }, { "epoch": 4.894425483503982, "grad_norm": 0.914140848285237, "learning_rate": 1.3805049294168122e-09, "loss": 0.0038, "step": 21511 }, { "epoch": 4.894653014789533, "grad_norm": 1.025201187852578, "learning_rate": 1.374575833469824e-09, "loss": 0.0047, "step": 21512 }, { "epoch": 4.894880546075085, "grad_norm": 1.7172605688092228, "learning_rate": 1.3686594832843654e-09, "loss": 0.0597, "step": 21513 }, { "epoch": 4.8951080773606375, "grad_norm": 1.1798729642771915, "learning_rate": 1.362755878981381e-09, "loss": 0.0122, "step": 21514 }, { "epoch": 4.895335608646189, "grad_norm": 1.222375590587719, "learning_rate": 1.356865020681608e-09, "loss": 0.0256, "step": 21515 }, { "epoch": 4.895563139931741, "grad_norm": 1.600889948674164, "learning_rate": 1.3509869085052974e-09, "loss": 0.0686, "step": 21516 }, { "epoch": 4.895790671217292, "grad_norm": 1.8537110654969717, "learning_rate": 1.3451215425726306e-09, "loss": 0.067, "step": 21517 }, { "epoch": 4.8960182025028445, "grad_norm": 1.4695995006305138, "learning_rate": 1.3392689230035811e-09, "loss": 0.0188, "step": 21518 }, { "epoch": 4.896245733788396, "grad_norm": 1.6173441401463662, "learning_rate": 1.3334290499176367e-09, "loss": 0.0128, "step": 21519 }, { "epoch": 4.896473265073948, "grad_norm": 0.6321342124100295, "learning_rate": 1.3276019234342158e-09, "loss": 0.0021, "step": 21520 }, { "epoch": 4.896700796359499, "grad_norm": 0.9047586341805943, "learning_rate": 1.3217875436723898e-09, "loss": 0.011, "step": 21521 }, { "epoch": 4.8969283276450515, "grad_norm": 0.8150915660606836, "learning_rate": 1.3159859107510915e-09, "loss": 0.0433, "step": 21522 }, { "epoch": 4.897155858930603, "grad_norm": 1.565345847214059, "learning_rate": 1.3101970247887674e-09, "loss": 0.0318, "step": 21523 }, { "epoch": 4.897383390216155, "grad_norm": 1.7239590196256953, "learning_rate": 1.3044208859037954e-09, "loss": 0.0453, "step": 21524 }, { "epoch": 4.897610921501706, "grad_norm": 0.8551426563256664, "learning_rate": 1.298657494214206e-09, "loss": 0.0046, "step": 21525 }, { "epoch": 4.8978384527872585, "grad_norm": 1.4811284143882621, "learning_rate": 1.2929068498377523e-09, "loss": 0.051, "step": 21526 }, { "epoch": 4.89806598407281, "grad_norm": 1.472708487286846, "learning_rate": 1.2871689528921178e-09, "loss": 0.0292, "step": 21527 }, { "epoch": 4.898293515358362, "grad_norm": 2.1329983500718264, "learning_rate": 1.2814438034943617e-09, "loss": 0.0248, "step": 21528 }, { "epoch": 4.898521046643913, "grad_norm": 1.3245718360946164, "learning_rate": 1.2757314017616818e-09, "loss": 0.0199, "step": 21529 }, { "epoch": 4.8987485779294655, "grad_norm": 1.2326839554963338, "learning_rate": 1.2700317478107904e-09, "loss": 0.0767, "step": 21530 }, { "epoch": 4.898976109215017, "grad_norm": 1.3926510572917552, "learning_rate": 1.264344841758053e-09, "loss": 0.0618, "step": 21531 }, { "epoch": 4.899203640500569, "grad_norm": 1.6903900300553503, "learning_rate": 1.2586706837198342e-09, "loss": 0.0345, "step": 21532 }, { "epoch": 4.89943117178612, "grad_norm": 1.13959097819045, "learning_rate": 1.2530092738120836e-09, "loss": 0.0125, "step": 21533 }, { "epoch": 4.8996587030716725, "grad_norm": 0.8427770663151151, "learning_rate": 1.2473606121504723e-09, "loss": 0.0129, "step": 21534 }, { "epoch": 4.899886234357224, "grad_norm": 0.5945523753761638, "learning_rate": 1.2417246988503939e-09, "loss": 0.003, "step": 21535 }, { "epoch": 4.900113765642776, "grad_norm": 1.5218407178108613, "learning_rate": 1.2361015340271732e-09, "loss": 0.0041, "step": 21536 }, { "epoch": 4.900341296928327, "grad_norm": 1.6679572781066518, "learning_rate": 1.2304911177956486e-09, "loss": 0.0691, "step": 21537 }, { "epoch": 4.9005688282138795, "grad_norm": 1.1633408664602338, "learning_rate": 1.22489345027052e-09, "loss": 0.0882, "step": 21538 }, { "epoch": 4.900796359499431, "grad_norm": 1.4613603265770472, "learning_rate": 1.2193085315661406e-09, "loss": 0.0203, "step": 21539 }, { "epoch": 4.901023890784983, "grad_norm": 1.7786328754316805, "learning_rate": 1.2137363617967247e-09, "loss": 0.0122, "step": 21540 }, { "epoch": 4.901251422070534, "grad_norm": 0.5649003772400564, "learning_rate": 1.208176941076139e-09, "loss": 0.0045, "step": 21541 }, { "epoch": 4.9014789533560865, "grad_norm": 1.5597341737330361, "learning_rate": 1.2026302695179737e-09, "loss": 0.0526, "step": 21542 }, { "epoch": 4.901706484641638, "grad_norm": 0.9097578959875764, "learning_rate": 1.1970963472356102e-09, "loss": 0.0122, "step": 21543 }, { "epoch": 4.90193401592719, "grad_norm": 0.9282437364917648, "learning_rate": 1.1915751743421522e-09, "loss": 0.0052, "step": 21544 }, { "epoch": 4.902161547212742, "grad_norm": 0.7469193602412754, "learning_rate": 1.1860667509504959e-09, "loss": 0.0028, "step": 21545 }, { "epoch": 4.9023890784982935, "grad_norm": 0.3565162763366131, "learning_rate": 1.1805710771731204e-09, "loss": 0.0017, "step": 21546 }, { "epoch": 4.902616609783845, "grad_norm": 1.2365376356997444, "learning_rate": 1.1750881531224362e-09, "loss": 0.044, "step": 21547 }, { "epoch": 4.902844141069397, "grad_norm": 1.8906574732520183, "learning_rate": 1.1696179789104367e-09, "loss": 0.0723, "step": 21548 }, { "epoch": 4.903071672354949, "grad_norm": 0.5681306813217898, "learning_rate": 1.164160554648977e-09, "loss": 0.0021, "step": 21549 }, { "epoch": 4.9032992036405005, "grad_norm": 1.5217472161568188, "learning_rate": 1.1587158804495652e-09, "loss": 0.0204, "step": 21550 }, { "epoch": 4.903526734926052, "grad_norm": 1.012979631117039, "learning_rate": 1.153283956423501e-09, "loss": 0.0072, "step": 21551 }, { "epoch": 4.903754266211604, "grad_norm": 4.437361370632698, "learning_rate": 1.1478647826817373e-09, "loss": 0.0047, "step": 21552 }, { "epoch": 4.903981797497156, "grad_norm": 0.704840579138685, "learning_rate": 1.1424583593351579e-09, "loss": 0.0042, "step": 21553 }, { "epoch": 4.9042093287827075, "grad_norm": 1.292350338024223, "learning_rate": 1.1370646864941604e-09, "loss": 0.0233, "step": 21554 }, { "epoch": 4.90443686006826, "grad_norm": 2.864131314686202, "learning_rate": 1.1316837642690036e-09, "loss": 0.0345, "step": 21555 }, { "epoch": 4.904664391353811, "grad_norm": 0.8387554595243387, "learning_rate": 1.1263155927696694e-09, "loss": 0.0072, "step": 21556 }, { "epoch": 4.904891922639363, "grad_norm": 1.9549462557524144, "learning_rate": 1.1209601721058616e-09, "loss": 0.027, "step": 21557 }, { "epoch": 4.9051194539249146, "grad_norm": 1.558116639134629, "learning_rate": 1.1156175023870757e-09, "loss": 0.071, "step": 21558 }, { "epoch": 4.905346985210467, "grad_norm": 1.9814304725152843, "learning_rate": 1.110287583722461e-09, "loss": 0.0173, "step": 21559 }, { "epoch": 4.905574516496018, "grad_norm": 0.9761439222516446, "learning_rate": 1.1049704162209573e-09, "loss": 0.0097, "step": 21560 }, { "epoch": 4.90580204778157, "grad_norm": 1.8190337006865316, "learning_rate": 1.0996659999912978e-09, "loss": 0.0218, "step": 21561 }, { "epoch": 4.906029579067122, "grad_norm": 1.6049037863263056, "learning_rate": 1.0943743351417984e-09, "loss": 0.0637, "step": 21562 }, { "epoch": 4.906257110352674, "grad_norm": 1.7943025072537209, "learning_rate": 1.089095421780706e-09, "loss": 0.0558, "step": 21563 }, { "epoch": 4.906484641638225, "grad_norm": 0.8687161655732513, "learning_rate": 1.0838292600157813e-09, "loss": 0.0218, "step": 21564 }, { "epoch": 4.906712172923777, "grad_norm": 1.63996561083686, "learning_rate": 1.0785758499548549e-09, "loss": 0.0083, "step": 21565 }, { "epoch": 4.906939704209329, "grad_norm": 1.3957331212520276, "learning_rate": 1.0733351917050633e-09, "loss": 0.0449, "step": 21566 }, { "epoch": 4.907167235494881, "grad_norm": 1.2814260047010226, "learning_rate": 1.0681072853736819e-09, "loss": 0.0455, "step": 21567 }, { "epoch": 4.907394766780432, "grad_norm": 1.8588615018687078, "learning_rate": 1.0628921310675694e-09, "loss": 0.0775, "step": 21568 }, { "epoch": 4.907622298065984, "grad_norm": 0.70478306426439, "learning_rate": 1.0576897288931686e-09, "loss": 0.0261, "step": 21569 }, { "epoch": 4.907849829351536, "grad_norm": 0.8737425071032769, "learning_rate": 1.0525000789569223e-09, "loss": 0.0125, "step": 21570 }, { "epoch": 4.908077360637088, "grad_norm": 1.1936055493776554, "learning_rate": 1.0473231813648567e-09, "loss": 0.0597, "step": 21571 }, { "epoch": 4.908304891922639, "grad_norm": 1.1965361252299174, "learning_rate": 1.0421590362227897e-09, "loss": 0.0299, "step": 21572 }, { "epoch": 4.908532423208191, "grad_norm": 1.8542028916218345, "learning_rate": 1.037007643636262e-09, "loss": 0.1526, "step": 21573 }, { "epoch": 4.908759954493743, "grad_norm": 1.4521566828504635, "learning_rate": 1.031869003710606e-09, "loss": 0.0358, "step": 21574 }, { "epoch": 4.908987485779295, "grad_norm": 1.413871673404006, "learning_rate": 1.0267431165508073e-09, "loss": 0.0647, "step": 21575 }, { "epoch": 4.909215017064846, "grad_norm": 0.992783867283737, "learning_rate": 1.0216299822615739e-09, "loss": 0.0501, "step": 21576 }, { "epoch": 4.909442548350398, "grad_norm": 0.914393608592245, "learning_rate": 1.0165296009474745e-09, "loss": 0.0289, "step": 21577 }, { "epoch": 4.90967007963595, "grad_norm": 0.8756780038818852, "learning_rate": 1.0114419727127316e-09, "loss": 0.0081, "step": 21578 }, { "epoch": 4.909897610921502, "grad_norm": 1.305234376961605, "learning_rate": 1.0063670976613592e-09, "loss": 0.0472, "step": 21579 }, { "epoch": 4.910125142207053, "grad_norm": 1.513780808997868, "learning_rate": 1.001304975897094e-09, "loss": 0.0884, "step": 21580 }, { "epoch": 4.910352673492605, "grad_norm": 1.0028730595073183, "learning_rate": 9.962556075232555e-10, "loss": 0.0112, "step": 21581 }, { "epoch": 4.910580204778157, "grad_norm": 0.9580896980796662, "learning_rate": 9.912189926432336e-10, "loss": 0.05, "step": 21582 }, { "epoch": 4.910807736063709, "grad_norm": 0.7559909192307531, "learning_rate": 9.861951313597933e-10, "loss": 0.0074, "step": 21583 }, { "epoch": 4.911035267349261, "grad_norm": 1.557711991153893, "learning_rate": 9.811840237757691e-10, "loss": 0.0115, "step": 21584 }, { "epoch": 4.911262798634812, "grad_norm": 1.2768122636768968, "learning_rate": 9.761856699934403e-10, "loss": 0.0708, "step": 21585 }, { "epoch": 4.911490329920364, "grad_norm": 1.348530462139812, "learning_rate": 9.712000701150859e-10, "loss": 0.005, "step": 21586 }, { "epoch": 4.911717861205916, "grad_norm": 1.7511164672360624, "learning_rate": 9.662272242425691e-10, "loss": 0.0588, "step": 21587 }, { "epoch": 4.911945392491468, "grad_norm": 1.6555438123243464, "learning_rate": 9.612671324774752e-10, "loss": 0.0096, "step": 21588 }, { "epoch": 4.912172923777019, "grad_norm": 0.8955417781350822, "learning_rate": 9.563197949211816e-10, "loss": 0.0332, "step": 21589 }, { "epoch": 4.912400455062571, "grad_norm": 0.5395195326081599, "learning_rate": 9.513852116748572e-10, "loss": 0.0025, "step": 21590 }, { "epoch": 4.912627986348123, "grad_norm": 0.9906651179688897, "learning_rate": 9.464633828393244e-10, "loss": 0.0627, "step": 21591 }, { "epoch": 4.912855517633675, "grad_norm": 1.9203133948255093, "learning_rate": 9.415543085151273e-10, "loss": 0.0073, "step": 21592 }, { "epoch": 4.913083048919226, "grad_norm": 3.9363588189244547, "learning_rate": 9.366579888027416e-10, "loss": 0.0471, "step": 21593 }, { "epoch": 4.9133105802047785, "grad_norm": 1.660583184376991, "learning_rate": 9.317744238020871e-10, "loss": 0.0886, "step": 21594 }, { "epoch": 4.91353811149033, "grad_norm": 2.092149890694213, "learning_rate": 9.269036136130144e-10, "loss": 0.0287, "step": 21595 }, { "epoch": 4.913765642775882, "grad_norm": 0.7760408726220915, "learning_rate": 9.220455583351662e-10, "loss": 0.0171, "step": 21596 }, { "epoch": 4.913993174061433, "grad_norm": 1.6257225736128058, "learning_rate": 9.172002580677686e-10, "loss": 0.0202, "step": 21597 }, { "epoch": 4.9142207053469855, "grad_norm": 1.0442542207103798, "learning_rate": 9.123677129097702e-10, "loss": 0.0051, "step": 21598 }, { "epoch": 4.914448236632537, "grad_norm": 0.6953258111384765, "learning_rate": 9.075479229600504e-10, "loss": 0.0075, "step": 21599 }, { "epoch": 4.914675767918089, "grad_norm": 1.6804560768678463, "learning_rate": 9.027408883170718e-10, "loss": 0.0074, "step": 21600 }, { "epoch": 4.91490329920364, "grad_norm": 0.4270594284817604, "learning_rate": 8.979466090790895e-10, "loss": 0.0011, "step": 21601 }, { "epoch": 4.9151308304891925, "grad_norm": 1.422749895676555, "learning_rate": 8.931650853440804e-10, "loss": 0.0162, "step": 21602 }, { "epoch": 4.915358361774744, "grad_norm": 0.7494024156535787, "learning_rate": 8.883963172098137e-10, "loss": 0.008, "step": 21603 }, { "epoch": 4.915585893060296, "grad_norm": 1.010039999140944, "learning_rate": 8.836403047737113e-10, "loss": 0.0199, "step": 21604 }, { "epoch": 4.915813424345847, "grad_norm": 1.3012913911141548, "learning_rate": 8.788970481329873e-10, "loss": 0.0663, "step": 21605 }, { "epoch": 4.9160409556313995, "grad_norm": 0.9524363461942525, "learning_rate": 8.741665473845779e-10, "loss": 0.0063, "step": 21606 }, { "epoch": 4.916268486916951, "grad_norm": 1.1198490318919678, "learning_rate": 8.694488026251419e-10, "loss": 0.0217, "step": 21607 }, { "epoch": 4.916496018202503, "grad_norm": 1.0990808981611486, "learning_rate": 8.647438139511993e-10, "loss": 0.0233, "step": 21608 }, { "epoch": 4.916723549488054, "grad_norm": 0.4866731313117844, "learning_rate": 8.600515814587846e-10, "loss": 0.0021, "step": 21609 }, { "epoch": 4.9169510807736065, "grad_norm": 6.330418618475183, "learning_rate": 8.553721052438623e-10, "loss": 0.0155, "step": 21610 }, { "epoch": 4.917178612059158, "grad_norm": 1.6934200008560896, "learning_rate": 8.507053854020508e-10, "loss": 0.0572, "step": 21611 }, { "epoch": 4.91740614334471, "grad_norm": 1.6037971499003012, "learning_rate": 8.460514220287597e-10, "loss": 0.0916, "step": 21612 }, { "epoch": 4.917633674630261, "grad_norm": 0.8488898770955983, "learning_rate": 8.414102152191211e-10, "loss": 0.0088, "step": 21613 }, { "epoch": 4.9178612059158135, "grad_norm": 0.2889847660881758, "learning_rate": 8.367817650679899e-10, "loss": 0.0007, "step": 21614 }, { "epoch": 4.918088737201365, "grad_norm": 1.533298788146413, "learning_rate": 8.321660716698738e-10, "loss": 0.0033, "step": 21615 }, { "epoch": 4.918316268486917, "grad_norm": 1.9625938389765645, "learning_rate": 8.27563135119211e-10, "loss": 0.0467, "step": 21616 }, { "epoch": 4.918543799772468, "grad_norm": 0.3969426939043318, "learning_rate": 8.229729555100236e-10, "loss": 0.0017, "step": 21617 }, { "epoch": 4.918771331058021, "grad_norm": 2.040024072461589, "learning_rate": 8.183955329361254e-10, "loss": 0.0154, "step": 21618 }, { "epoch": 4.918998862343572, "grad_norm": 1.1502515064173333, "learning_rate": 8.138308674911221e-10, "loss": 0.0588, "step": 21619 }, { "epoch": 4.919226393629124, "grad_norm": 1.409815820700038, "learning_rate": 8.092789592682726e-10, "loss": 0.0274, "step": 21620 }, { "epoch": 4.919453924914675, "grad_norm": 1.9226243995119585, "learning_rate": 8.047398083605578e-10, "loss": 0.0302, "step": 21621 }, { "epoch": 4.919681456200228, "grad_norm": 0.9582929667574507, "learning_rate": 8.002134148608898e-10, "loss": 0.0074, "step": 21622 }, { "epoch": 4.91990898748578, "grad_norm": 1.2298105549508016, "learning_rate": 7.956997788616249e-10, "loss": 0.0307, "step": 21623 }, { "epoch": 4.920136518771331, "grad_norm": 1.0747568453699194, "learning_rate": 7.911989004551201e-10, "loss": 0.0087, "step": 21624 }, { "epoch": 4.920364050056882, "grad_norm": 1.2085329919230283, "learning_rate": 7.867107797333157e-10, "loss": 0.021, "step": 21625 }, { "epoch": 4.920591581342435, "grad_norm": 1.279890168510083, "learning_rate": 7.822354167878743e-10, "loss": 0.0455, "step": 21626 }, { "epoch": 4.920819112627987, "grad_norm": 1.5637904331962875, "learning_rate": 7.777728117104589e-10, "loss": 0.027, "step": 21627 }, { "epoch": 4.921046643913538, "grad_norm": 1.7159846734803086, "learning_rate": 7.733229645921076e-10, "loss": 0.0161, "step": 21628 }, { "epoch": 4.921274175199089, "grad_norm": 1.7296532699644314, "learning_rate": 7.688858755237893e-10, "loss": 0.0123, "step": 21629 }, { "epoch": 4.921501706484642, "grad_norm": 1.4186184299893296, "learning_rate": 7.64461544596265e-10, "loss": 0.0477, "step": 21630 }, { "epoch": 4.921729237770194, "grad_norm": 0.9657384895433307, "learning_rate": 7.600499718998788e-10, "loss": 0.0073, "step": 21631 }, { "epoch": 4.921956769055745, "grad_norm": 5.169691409204472, "learning_rate": 7.556511575248365e-10, "loss": 0.0443, "step": 21632 }, { "epoch": 4.922184300341297, "grad_norm": 1.1318314094732649, "learning_rate": 7.512651015610661e-10, "loss": 0.0365, "step": 21633 }, { "epoch": 4.922411831626849, "grad_norm": 0.5043318442432426, "learning_rate": 7.468918040981487e-10, "loss": 0.0033, "step": 21634 }, { "epoch": 4.922639362912401, "grad_norm": 1.5978813075245497, "learning_rate": 7.425312652254573e-10, "loss": 0.0678, "step": 21635 }, { "epoch": 4.922866894197952, "grad_norm": 0.9497737687899703, "learning_rate": 7.381834850322262e-10, "loss": 0.0215, "step": 21636 }, { "epoch": 4.923094425483504, "grad_norm": 1.7198013808895334, "learning_rate": 7.338484636072035e-10, "loss": 0.0192, "step": 21637 }, { "epoch": 4.923321956769056, "grad_norm": 1.5021388069168922, "learning_rate": 7.295262010390686e-10, "loss": 0.006, "step": 21638 }, { "epoch": 4.923549488054608, "grad_norm": 1.0830606191588728, "learning_rate": 7.25216697416084e-10, "loss": 0.0236, "step": 21639 }, { "epoch": 4.923777019340159, "grad_norm": 1.1901319061365605, "learning_rate": 7.209199528263738e-10, "loss": 0.0208, "step": 21640 }, { "epoch": 4.924004550625711, "grad_norm": 1.2947725914424486, "learning_rate": 7.16635967357715e-10, "loss": 0.0116, "step": 21641 }, { "epoch": 4.924232081911263, "grad_norm": 3.285378916717538, "learning_rate": 7.123647410977457e-10, "loss": 0.0109, "step": 21642 }, { "epoch": 4.924459613196815, "grad_norm": 1.1100850574663217, "learning_rate": 7.081062741336881e-10, "loss": 0.007, "step": 21643 }, { "epoch": 4.924687144482366, "grad_norm": 1.2035369455973202, "learning_rate": 7.03860566552625e-10, "loss": 0.013, "step": 21644 }, { "epoch": 4.924914675767918, "grad_norm": 1.4880234904810297, "learning_rate": 6.996276184412925e-10, "loss": 0.0081, "step": 21645 }, { "epoch": 4.92514220705347, "grad_norm": 1.1481129756835027, "learning_rate": 6.95407429886219e-10, "loss": 0.0096, "step": 21646 }, { "epoch": 4.925369738339022, "grad_norm": 1.3252560064287706, "learning_rate": 6.912000009736547e-10, "loss": 0.0161, "step": 21647 }, { "epoch": 4.925597269624573, "grad_norm": 1.1749396572102255, "learning_rate": 6.870053317895725e-10, "loss": 0.0138, "step": 21648 }, { "epoch": 4.925824800910125, "grad_norm": 2.2260269910598853, "learning_rate": 6.828234224198066e-10, "loss": 0.0802, "step": 21649 }, { "epoch": 4.926052332195677, "grad_norm": 1.5703252934043963, "learning_rate": 6.786542729496359e-10, "loss": 0.0902, "step": 21650 }, { "epoch": 4.926279863481229, "grad_norm": 1.1365008918759236, "learning_rate": 6.744978834644784e-10, "loss": 0.0979, "step": 21651 }, { "epoch": 4.92650739476678, "grad_norm": 0.15425577627689016, "learning_rate": 6.70354254049127e-10, "loss": 0.0005, "step": 21652 }, { "epoch": 4.926734926052332, "grad_norm": 1.0542519790599054, "learning_rate": 6.66223384788306e-10, "loss": 0.0368, "step": 21653 }, { "epoch": 4.926962457337884, "grad_norm": 1.720256701724962, "learning_rate": 6.621052757665308e-10, "loss": 0.0693, "step": 21654 }, { "epoch": 4.927189988623436, "grad_norm": 1.7687459783985304, "learning_rate": 6.579999270679011e-10, "loss": 0.0155, "step": 21655 }, { "epoch": 4.927417519908987, "grad_norm": 0.8976119406205922, "learning_rate": 6.539073387763079e-10, "loss": 0.0427, "step": 21656 }, { "epoch": 4.927645051194539, "grad_norm": 1.261928006760202, "learning_rate": 6.498275109753649e-10, "loss": 0.0755, "step": 21657 }, { "epoch": 4.927872582480091, "grad_norm": 1.4483372841596853, "learning_rate": 6.45760443748547e-10, "loss": 0.0972, "step": 21658 }, { "epoch": 4.928100113765643, "grad_norm": 1.0710271542025984, "learning_rate": 6.41706137178913e-10, "loss": 0.0611, "step": 21659 }, { "epoch": 4.928327645051194, "grad_norm": 1.146778268428534, "learning_rate": 6.376645913493823e-10, "loss": 0.0459, "step": 21660 }, { "epoch": 4.928555176336746, "grad_norm": 1.6243052041882098, "learning_rate": 6.33635806342528e-10, "loss": 0.0177, "step": 21661 }, { "epoch": 4.9287827076222985, "grad_norm": 2.0561284521781045, "learning_rate": 6.296197822406452e-10, "loss": 0.0579, "step": 21662 }, { "epoch": 4.92901023890785, "grad_norm": 0.3750632749004006, "learning_rate": 6.256165191258906e-10, "loss": 0.0019, "step": 21663 }, { "epoch": 4.929237770193401, "grad_norm": 1.3552766068762003, "learning_rate": 6.216260170800043e-10, "loss": 0.0286, "step": 21664 }, { "epoch": 4.929465301478953, "grad_norm": 1.7611707380029467, "learning_rate": 6.17648276184657e-10, "loss": 0.011, "step": 21665 }, { "epoch": 4.9296928327645055, "grad_norm": 1.3718072651959379, "learning_rate": 6.136832965209644e-10, "loss": 0.0469, "step": 21666 }, { "epoch": 4.929920364050057, "grad_norm": 1.5610856823824029, "learning_rate": 6.097310781701809e-10, "loss": 0.053, "step": 21667 }, { "epoch": 4.930147895335608, "grad_norm": 1.2966123359806876, "learning_rate": 6.057916212129366e-10, "loss": 0.0262, "step": 21668 }, { "epoch": 4.93037542662116, "grad_norm": 1.0695160259094065, "learning_rate": 6.01864925729792e-10, "loss": 0.0091, "step": 21669 }, { "epoch": 4.9306029579067125, "grad_norm": 1.6819514252470695, "learning_rate": 5.979509918010301e-10, "loss": 0.0199, "step": 21670 }, { "epoch": 4.930830489192264, "grad_norm": 1.3619556489815117, "learning_rate": 5.940498195065175e-10, "loss": 0.0647, "step": 21671 }, { "epoch": 4.931058020477816, "grad_norm": 0.3661849791436827, "learning_rate": 5.901614089261904e-10, "loss": 0.0017, "step": 21672 }, { "epoch": 4.931285551763367, "grad_norm": 2.0382615641600155, "learning_rate": 5.862857601393602e-10, "loss": 0.0493, "step": 21673 }, { "epoch": 4.9315130830489196, "grad_norm": 1.9034363826876568, "learning_rate": 5.82422873225269e-10, "loss": 0.0089, "step": 21674 }, { "epoch": 4.931740614334471, "grad_norm": 1.0443533712546822, "learning_rate": 5.785727482628817e-10, "loss": 0.0048, "step": 21675 }, { "epoch": 4.931968145620023, "grad_norm": 0.8638409420684818, "learning_rate": 5.747353853309545e-10, "loss": 0.0283, "step": 21676 }, { "epoch": 4.932195676905574, "grad_norm": 0.502788036027402, "learning_rate": 5.709107845078277e-10, "loss": 0.0074, "step": 21677 }, { "epoch": 4.932423208191127, "grad_norm": 0.8551414577733178, "learning_rate": 5.670989458716331e-10, "loss": 0.0105, "step": 21678 }, { "epoch": 4.932650739476678, "grad_norm": 0.7880705865492122, "learning_rate": 5.632998695004332e-10, "loss": 0.0112, "step": 21679 }, { "epoch": 4.93287827076223, "grad_norm": 0.9279772033046166, "learning_rate": 5.595135554717357e-10, "loss": 0.0165, "step": 21680 }, { "epoch": 4.933105802047781, "grad_norm": 1.1859650255715937, "learning_rate": 5.557400038629784e-10, "loss": 0.0551, "step": 21681 }, { "epoch": 4.933333333333334, "grad_norm": 1.167650634052499, "learning_rate": 5.51979214751322e-10, "loss": 0.0529, "step": 21682 }, { "epoch": 4.933560864618885, "grad_norm": 1.2297706530522812, "learning_rate": 5.482311882135105e-10, "loss": 0.0377, "step": 21683 }, { "epoch": 4.933788395904437, "grad_norm": 1.5021694243590156, "learning_rate": 5.444959243262881e-10, "loss": 0.0889, "step": 21684 }, { "epoch": 4.934015927189988, "grad_norm": 0.532874416063125, "learning_rate": 5.407734231659135e-10, "loss": 0.0024, "step": 21685 }, { "epoch": 4.934243458475541, "grad_norm": 1.2800186351329728, "learning_rate": 5.370636848085059e-10, "loss": 0.0119, "step": 21686 }, { "epoch": 4.934470989761092, "grad_norm": 1.1178964780586915, "learning_rate": 5.333667093298383e-10, "loss": 0.0145, "step": 21687 }, { "epoch": 4.934698521046644, "grad_norm": 1.5884784383551334, "learning_rate": 5.296824968054754e-10, "loss": 0.0942, "step": 21688 }, { "epoch": 4.934926052332195, "grad_norm": 1.0086392707444147, "learning_rate": 5.260110473107732e-10, "loss": 0.0494, "step": 21689 }, { "epoch": 4.935153583617748, "grad_norm": 0.9267482026275418, "learning_rate": 5.223523609207415e-10, "loss": 0.0086, "step": 21690 }, { "epoch": 4.935381114903299, "grad_norm": 1.8103593790337067, "learning_rate": 5.18706437710112e-10, "loss": 0.1339, "step": 21691 }, { "epoch": 4.935608646188851, "grad_norm": 1.1133956513404248, "learning_rate": 5.150732777534778e-10, "loss": 0.0573, "step": 21692 }, { "epoch": 4.935836177474402, "grad_norm": 1.1966918505217337, "learning_rate": 5.114528811250158e-10, "loss": 0.0161, "step": 21693 }, { "epoch": 4.936063708759955, "grad_norm": 1.3120012045835863, "learning_rate": 5.078452478988332e-10, "loss": 0.0415, "step": 21694 }, { "epoch": 4.936291240045506, "grad_norm": 0.5491685207543312, "learning_rate": 5.04250378148552e-10, "loss": 0.0026, "step": 21695 }, { "epoch": 4.936518771331058, "grad_norm": 0.8848342875796572, "learning_rate": 5.006682719476547e-10, "loss": 0.0468, "step": 21696 }, { "epoch": 4.936746302616609, "grad_norm": 1.5257185526594668, "learning_rate": 4.970989293694162e-10, "loss": 0.0203, "step": 21697 }, { "epoch": 4.936973833902162, "grad_norm": 1.3621114175881137, "learning_rate": 4.935423504866948e-10, "loss": 0.0475, "step": 21698 }, { "epoch": 4.937201365187713, "grad_norm": 1.623737224230848, "learning_rate": 4.899985353722797e-10, "loss": 0.0593, "step": 21699 }, { "epoch": 4.937428896473265, "grad_norm": 1.3902467502548832, "learning_rate": 4.864674840986127e-10, "loss": 0.0127, "step": 21700 }, { "epoch": 4.937656427758817, "grad_norm": 1.3130722533383674, "learning_rate": 4.829491967377197e-10, "loss": 0.0208, "step": 21701 }, { "epoch": 4.937883959044369, "grad_norm": 1.21827109832493, "learning_rate": 4.794436733616264e-10, "loss": 0.0162, "step": 21702 }, { "epoch": 4.93811149032992, "grad_norm": 1.1062631292314036, "learning_rate": 4.759509140420116e-10, "loss": 0.0136, "step": 21703 }, { "epoch": 4.938339021615472, "grad_norm": 1.1072279152458182, "learning_rate": 4.724709188501376e-10, "loss": 0.0026, "step": 21704 }, { "epoch": 4.938566552901024, "grad_norm": 1.1154019278255636, "learning_rate": 4.690036878571974e-10, "loss": 0.0096, "step": 21705 }, { "epoch": 4.938794084186576, "grad_norm": 0.6599337869356864, "learning_rate": 4.655492211340373e-10, "loss": 0.0038, "step": 21706 }, { "epoch": 4.939021615472127, "grad_norm": 5.939688062555205, "learning_rate": 4.621075187512952e-10, "loss": 0.056, "step": 21707 }, { "epoch": 4.939249146757679, "grad_norm": 1.0039798998247729, "learning_rate": 4.586785807792621e-10, "loss": 0.003, "step": 21708 }, { "epoch": 4.939476678043231, "grad_norm": 1.6165139522012777, "learning_rate": 4.5526240728802076e-10, "loss": 0.0174, "step": 21709 }, { "epoch": 4.939704209328783, "grad_norm": 1.9459415805704428, "learning_rate": 4.518589983474458e-10, "loss": 0.1272, "step": 21710 }, { "epoch": 4.939931740614335, "grad_norm": 0.7950614078099689, "learning_rate": 4.484683540270651e-10, "loss": 0.006, "step": 21711 }, { "epoch": 4.940159271899886, "grad_norm": 0.9193627988918505, "learning_rate": 4.450904743961981e-10, "loss": 0.0232, "step": 21712 }, { "epoch": 4.940386803185438, "grad_norm": 1.3898076531772394, "learning_rate": 4.4172535952388683e-10, "loss": 0.0093, "step": 21713 }, { "epoch": 4.94061433447099, "grad_norm": 1.4567486098780202, "learning_rate": 4.3837300947882635e-10, "loss": 0.0745, "step": 21714 }, { "epoch": 4.940841865756542, "grad_norm": 2.6773032895953905, "learning_rate": 4.3503342432964224e-10, "loss": 0.0735, "step": 21715 }, { "epoch": 4.941069397042093, "grad_norm": 1.1090782196690554, "learning_rate": 4.317066041444745e-10, "loss": 0.0126, "step": 21716 }, { "epoch": 4.941296928327645, "grad_norm": 1.0524753279080132, "learning_rate": 4.283925489913937e-10, "loss": 0.0304, "step": 21717 }, { "epoch": 4.941524459613197, "grad_norm": 1.326617732173901, "learning_rate": 4.250912589381928e-10, "loss": 0.0478, "step": 21718 }, { "epoch": 4.941751990898749, "grad_norm": 1.6332129635980535, "learning_rate": 4.218027340521791e-10, "loss": 0.0094, "step": 21719 }, { "epoch": 4.9419795221843, "grad_norm": 1.5812315863425592, "learning_rate": 4.185269744007292e-10, "loss": 0.0654, "step": 21720 }, { "epoch": 4.942207053469852, "grad_norm": 0.9429741395289534, "learning_rate": 4.1526398005066473e-10, "loss": 0.038, "step": 21721 }, { "epoch": 4.942434584755404, "grad_norm": 0.9609426607412168, "learning_rate": 4.1201375106873773e-10, "loss": 0.0089, "step": 21722 }, { "epoch": 4.942662116040956, "grad_norm": 1.9733933020167165, "learning_rate": 4.0877628752142296e-10, "loss": 0.0772, "step": 21723 }, { "epoch": 4.942889647326507, "grad_norm": 1.3863801578991297, "learning_rate": 4.055515894747786e-10, "loss": 0.0694, "step": 21724 }, { "epoch": 4.943117178612059, "grad_norm": 0.5126676817364424, "learning_rate": 4.023396569947935e-10, "loss": 0.0038, "step": 21725 }, { "epoch": 4.943344709897611, "grad_norm": 0.8239607976090076, "learning_rate": 3.9914049014710967e-10, "loss": 0.0404, "step": 21726 }, { "epoch": 4.943572241183163, "grad_norm": 1.5080510790620802, "learning_rate": 3.959540889970914e-10, "loss": 0.0415, "step": 21727 }, { "epoch": 4.943799772468714, "grad_norm": 1.3674909801057507, "learning_rate": 3.9278045360982566e-10, "loss": 0.017, "step": 21728 }, { "epoch": 4.944027303754266, "grad_norm": 2.255630306725204, "learning_rate": 3.8961958405026033e-10, "loss": 0.0192, "step": 21729 }, { "epoch": 4.944254835039818, "grad_norm": 1.8330459078274528, "learning_rate": 3.8647148038285784e-10, "loss": 0.0542, "step": 21730 }, { "epoch": 4.94448236632537, "grad_norm": 1.6250339494486314, "learning_rate": 3.833361426721499e-10, "loss": 0.0271, "step": 21731 }, { "epoch": 4.944709897610921, "grad_norm": 1.410995272507485, "learning_rate": 3.802135709821131e-10, "loss": 0.0538, "step": 21732 }, { "epoch": 4.944937428896473, "grad_norm": 1.2727371586240503, "learning_rate": 3.771037653765158e-10, "loss": 0.0363, "step": 21733 }, { "epoch": 4.945164960182025, "grad_norm": 1.4448894863593313, "learning_rate": 3.740067259189878e-10, "loss": 0.0354, "step": 21734 }, { "epoch": 4.945392491467577, "grad_norm": 0.5883336905478374, "learning_rate": 3.7092245267288107e-10, "loss": 0.0025, "step": 21735 }, { "epoch": 4.945620022753128, "grad_norm": 1.3010456451220316, "learning_rate": 3.6785094570106204e-10, "loss": 0.0225, "step": 21736 }, { "epoch": 4.94584755403868, "grad_norm": 0.24159617799690244, "learning_rate": 3.647922050664665e-10, "loss": 0.0008, "step": 21737 }, { "epoch": 4.946075085324232, "grad_norm": 1.3219440378638039, "learning_rate": 3.617462308315445e-10, "loss": 0.014, "step": 21738 }, { "epoch": 4.946302616609784, "grad_norm": 2.054620193675462, "learning_rate": 3.5871302305860723e-10, "loss": 0.1109, "step": 21739 }, { "epoch": 4.946530147895336, "grad_norm": 1.0769961397903796, "learning_rate": 3.5569258180954967e-10, "loss": 0.0146, "step": 21740 }, { "epoch": 4.946757679180887, "grad_norm": 0.5244801470536041, "learning_rate": 3.52684907146128e-10, "loss": 0.0022, "step": 21741 }, { "epoch": 4.946985210466439, "grad_norm": 1.702719429823508, "learning_rate": 3.496899991298902e-10, "loss": 0.0911, "step": 21742 }, { "epoch": 4.947212741751991, "grad_norm": 0.6147515036289214, "learning_rate": 3.467078578219679e-10, "loss": 0.0078, "step": 21743 }, { "epoch": 4.947440273037543, "grad_norm": 1.4901906582885915, "learning_rate": 3.43738483283354e-10, "loss": 0.0078, "step": 21744 }, { "epoch": 4.947667804323094, "grad_norm": 1.7350643125367768, "learning_rate": 3.4078187557469445e-10, "loss": 0.076, "step": 21745 }, { "epoch": 4.947895335608646, "grad_norm": 1.1382264651921683, "learning_rate": 3.3783803475649636e-10, "loss": 0.028, "step": 21746 }, { "epoch": 4.948122866894198, "grad_norm": 1.0524615877156807, "learning_rate": 3.3490696088885053e-10, "loss": 0.0144, "step": 21747 }, { "epoch": 4.94835039817975, "grad_norm": 0.5848168206811122, "learning_rate": 3.319886540317091e-10, "loss": 0.0055, "step": 21748 }, { "epoch": 4.948577929465301, "grad_norm": 1.2632511549847214, "learning_rate": 3.290831142446077e-10, "loss": 0.0148, "step": 21749 }, { "epoch": 4.948805460750854, "grad_norm": 0.3173195171241955, "learning_rate": 3.261903415870821e-10, "loss": 0.0018, "step": 21750 }, { "epoch": 4.949032992036405, "grad_norm": 0.8459415085413321, "learning_rate": 3.2331033611818217e-10, "loss": 0.0078, "step": 21751 }, { "epoch": 4.949260523321957, "grad_norm": 1.2397903669214494, "learning_rate": 3.2044309789681925e-10, "loss": 0.0597, "step": 21752 }, { "epoch": 4.949488054607508, "grad_norm": 1.3313230251392725, "learning_rate": 3.1758862698148816e-10, "loss": 0.0556, "step": 21753 }, { "epoch": 4.949715585893061, "grad_norm": 0.5135221887198759, "learning_rate": 3.147469234306144e-10, "loss": 0.0017, "step": 21754 }, { "epoch": 4.949943117178612, "grad_norm": 2.897811835043701, "learning_rate": 3.119179873022765e-10, "loss": 0.0418, "step": 21755 }, { "epoch": 4.950170648464164, "grad_norm": 2.2588016021705966, "learning_rate": 3.091018186542061e-10, "loss": 0.0945, "step": 21756 }, { "epoch": 4.950398179749715, "grad_norm": 1.563185400722578, "learning_rate": 3.062984175441347e-10, "loss": 0.0428, "step": 21757 }, { "epoch": 4.950625711035268, "grad_norm": 0.9964062806311414, "learning_rate": 3.0350778402916947e-10, "loss": 0.052, "step": 21758 }, { "epoch": 4.950853242320819, "grad_norm": 1.3751494278493939, "learning_rate": 3.007299181664175e-10, "loss": 0.0065, "step": 21759 }, { "epoch": 4.951080773606371, "grad_norm": 1.336876697779593, "learning_rate": 2.9796482001270833e-10, "loss": 0.0516, "step": 21760 }, { "epoch": 4.951308304891922, "grad_norm": 1.9547052789941548, "learning_rate": 2.9521248962452456e-10, "loss": 0.0976, "step": 21761 }, { "epoch": 4.951535836177475, "grad_norm": 0.9356629705433802, "learning_rate": 2.924729270580712e-10, "loss": 0.0064, "step": 21762 }, { "epoch": 4.951763367463026, "grad_norm": 0.8924157625923782, "learning_rate": 2.897461323693451e-10, "loss": 0.0273, "step": 21763 }, { "epoch": 4.951990898748578, "grad_norm": 1.3756700258725294, "learning_rate": 2.870321056141351e-10, "loss": 0.0546, "step": 21764 }, { "epoch": 4.952218430034129, "grad_norm": 1.7537915803617523, "learning_rate": 2.843308468478828e-10, "loss": 0.0376, "step": 21765 }, { "epoch": 4.952445961319682, "grad_norm": 0.5746187389335121, "learning_rate": 2.8164235612582194e-10, "loss": 0.0025, "step": 21766 }, { "epoch": 4.952673492605233, "grad_norm": 1.3324672067502288, "learning_rate": 2.7896663350283904e-10, "loss": 0.0598, "step": 21767 }, { "epoch": 4.952901023890785, "grad_norm": 1.1431418010704446, "learning_rate": 2.7630367903368205e-10, "loss": 0.0608, "step": 21768 }, { "epoch": 4.953128555176336, "grad_norm": 0.5938428014216035, "learning_rate": 2.736534927727519e-10, "loss": 0.0029, "step": 21769 }, { "epoch": 4.953356086461889, "grad_norm": 1.4022365336517935, "learning_rate": 2.710160747741719e-10, "loss": 0.0365, "step": 21770 }, { "epoch": 4.95358361774744, "grad_norm": 1.228360764782196, "learning_rate": 2.6839142509192673e-10, "loss": 0.0277, "step": 21771 }, { "epoch": 4.953811149032992, "grad_norm": 2.0699777342774506, "learning_rate": 2.657795437795846e-10, "loss": 0.0608, "step": 21772 }, { "epoch": 4.954038680318543, "grad_norm": 0.7805000298384838, "learning_rate": 2.63180430890575e-10, "loss": 0.0049, "step": 21773 }, { "epoch": 4.954266211604096, "grad_norm": 1.6074323833934518, "learning_rate": 2.605940864780498e-10, "loss": 0.0303, "step": 21774 }, { "epoch": 4.954493742889647, "grad_norm": 1.5825523796073877, "learning_rate": 2.580205105947448e-10, "loss": 0.1129, "step": 21775 }, { "epoch": 4.954721274175199, "grad_norm": 0.9790041756941196, "learning_rate": 2.5545970329339533e-10, "loss": 0.0087, "step": 21776 }, { "epoch": 4.9549488054607504, "grad_norm": 0.30025455070726875, "learning_rate": 2.529116646262514e-10, "loss": 0.0014, "step": 21777 }, { "epoch": 4.955176336746303, "grad_norm": 1.1059915043561337, "learning_rate": 2.503763946454935e-10, "loss": 0.042, "step": 21778 }, { "epoch": 4.955403868031855, "grad_norm": 0.8283887532180529, "learning_rate": 2.478538934028163e-10, "loss": 0.0127, "step": 21779 }, { "epoch": 4.955631399317406, "grad_norm": 2.0386605030081655, "learning_rate": 2.453441609497759e-10, "loss": 0.056, "step": 21780 }, { "epoch": 4.9558589306029575, "grad_norm": 1.0105839238376564, "learning_rate": 2.428471973377894e-10, "loss": 0.0247, "step": 21781 }, { "epoch": 4.95608646188851, "grad_norm": 1.0781739942264768, "learning_rate": 2.403630026177883e-10, "loss": 0.0838, "step": 21782 }, { "epoch": 4.956313993174062, "grad_norm": 0.9832006118341667, "learning_rate": 2.378915768405654e-10, "loss": 0.0443, "step": 21783 }, { "epoch": 4.956541524459613, "grad_norm": 1.3427510061711447, "learning_rate": 2.354329200566358e-10, "loss": 0.016, "step": 21784 }, { "epoch": 4.9567690557451645, "grad_norm": 1.4228274509091445, "learning_rate": 2.3298703231630635e-10, "loss": 0.0362, "step": 21785 }, { "epoch": 4.956996587030717, "grad_norm": 0.9109680225655851, "learning_rate": 2.305539136694679e-10, "loss": 0.0046, "step": 21786 }, { "epoch": 4.957224118316269, "grad_norm": 1.4269372352391294, "learning_rate": 2.2813356416594156e-10, "loss": 0.0402, "step": 21787 }, { "epoch": 4.95745164960182, "grad_norm": 0.9234416009571959, "learning_rate": 2.2572598385513233e-10, "loss": 0.0067, "step": 21788 }, { "epoch": 4.957679180887372, "grad_norm": 1.719358797796503, "learning_rate": 2.2333117278623695e-10, "loss": 0.0641, "step": 21789 }, { "epoch": 4.957906712172924, "grad_norm": 0.9463323499845239, "learning_rate": 2.20949131008244e-10, "loss": 0.0498, "step": 21790 }, { "epoch": 4.958134243458476, "grad_norm": 0.596591177963559, "learning_rate": 2.1857985856979514e-10, "loss": 0.0028, "step": 21791 }, { "epoch": 4.958361774744027, "grad_norm": 1.1699870494963438, "learning_rate": 2.1622335551939323e-10, "loss": 0.0638, "step": 21792 }, { "epoch": 4.958589306029579, "grad_norm": 2.582745113503745, "learning_rate": 2.1387962190512478e-10, "loss": 0.0083, "step": 21793 }, { "epoch": 4.958816837315131, "grad_norm": 1.4048883192902686, "learning_rate": 2.1154865777486821e-10, "loss": 0.0163, "step": 21794 }, { "epoch": 4.959044368600683, "grad_norm": 1.54178335384342, "learning_rate": 2.0923046317636307e-10, "loss": 0.0435, "step": 21795 }, { "epoch": 4.959271899886234, "grad_norm": 0.6272576230787992, "learning_rate": 2.0692503815693265e-10, "loss": 0.0182, "step": 21796 }, { "epoch": 4.959499431171786, "grad_norm": 1.359614972779478, "learning_rate": 2.04632382763692e-10, "loss": 0.01, "step": 21797 }, { "epoch": 4.959726962457338, "grad_norm": 1.715523461197189, "learning_rate": 2.023524970434787e-10, "loss": 0.0133, "step": 21798 }, { "epoch": 4.95995449374289, "grad_norm": 2.351565631202689, "learning_rate": 2.0008538104299147e-10, "loss": 0.0493, "step": 21799 }, { "epoch": 4.960182025028441, "grad_norm": 1.2666558211696797, "learning_rate": 1.9783103480844334e-10, "loss": 0.0117, "step": 21800 }, { "epoch": 4.960409556313993, "grad_norm": 1.4766728709676986, "learning_rate": 1.95589458385978e-10, "loss": 0.0333, "step": 21801 }, { "epoch": 4.960637087599545, "grad_norm": 1.3408718880269068, "learning_rate": 1.9336065182139207e-10, "loss": 0.0723, "step": 21802 }, { "epoch": 4.960864618885097, "grad_norm": 1.3694858273154802, "learning_rate": 1.9114461516020478e-10, "loss": 0.0149, "step": 21803 }, { "epoch": 4.961092150170648, "grad_norm": 0.6369075525709583, "learning_rate": 1.8894134844772704e-10, "loss": 0.0084, "step": 21804 }, { "epoch": 4.9613196814562, "grad_norm": 2.0655996267073298, "learning_rate": 1.8675085172906172e-10, "loss": 0.0134, "step": 21805 }, { "epoch": 4.961547212741752, "grad_norm": 1.7556035491765483, "learning_rate": 1.8457312504882586e-10, "loss": 0.014, "step": 21806 }, { "epoch": 4.961774744027304, "grad_norm": 2.429603616914737, "learning_rate": 1.8240816845170596e-10, "loss": 0.1038, "step": 21807 }, { "epoch": 4.962002275312855, "grad_norm": 0.8122583707569474, "learning_rate": 1.8025598198183336e-10, "loss": 0.0303, "step": 21808 }, { "epoch": 4.962229806598407, "grad_norm": 0.8558589858875921, "learning_rate": 1.7811656568320068e-10, "loss": 0.0025, "step": 21809 }, { "epoch": 4.962457337883959, "grad_norm": 1.0546301855597704, "learning_rate": 1.7598991959959233e-10, "loss": 0.0269, "step": 21810 }, { "epoch": 4.962684869169511, "grad_norm": 0.8110796211451843, "learning_rate": 1.7387604377444578e-10, "loss": 0.0174, "step": 21811 }, { "epoch": 4.962912400455062, "grad_norm": 1.759603534101428, "learning_rate": 1.7177493825092096e-10, "loss": 0.0303, "step": 21812 }, { "epoch": 4.963139931740614, "grad_norm": 1.0039555879868172, "learning_rate": 1.69686603072039e-10, "loss": 0.0261, "step": 21813 }, { "epoch": 4.963367463026166, "grad_norm": 1.2299455345313124, "learning_rate": 1.676110382804047e-10, "loss": 0.0383, "step": 21814 }, { "epoch": 4.963594994311718, "grad_norm": 0.737938180707936, "learning_rate": 1.6554824391848412e-10, "loss": 0.0044, "step": 21815 }, { "epoch": 4.963822525597269, "grad_norm": 1.210127599743137, "learning_rate": 1.6349822002846573e-10, "loss": 0.0287, "step": 21816 }, { "epoch": 4.964050056882821, "grad_norm": 2.176160579605264, "learning_rate": 1.6146096665212162e-10, "loss": 0.0219, "step": 21817 }, { "epoch": 4.964277588168374, "grad_norm": 0.7213643723662608, "learning_rate": 1.5943648383122401e-10, "loss": 0.0107, "step": 21818 }, { "epoch": 4.964505119453925, "grad_norm": 1.3321376660946445, "learning_rate": 1.5742477160712865e-10, "loss": 0.0159, "step": 21819 }, { "epoch": 4.964732650739476, "grad_norm": 1.8247763510292998, "learning_rate": 1.5542583002091382e-10, "loss": 0.0488, "step": 21820 }, { "epoch": 4.964960182025028, "grad_norm": 1.3390688381749336, "learning_rate": 1.534396591134496e-10, "loss": 0.0388, "step": 21821 }, { "epoch": 4.965187713310581, "grad_norm": 2.002809599830293, "learning_rate": 1.514662589253285e-10, "loss": 0.074, "step": 21822 }, { "epoch": 4.965415244596132, "grad_norm": 1.183537038639788, "learning_rate": 1.4950562949686554e-10, "loss": 0.0399, "step": 21823 }, { "epoch": 4.965642775881683, "grad_norm": 2.246979074648316, "learning_rate": 1.4755777086816746e-10, "loss": 0.0129, "step": 21824 }, { "epoch": 4.965870307167235, "grad_norm": 1.1690215971811428, "learning_rate": 1.4562268307899419e-10, "loss": 0.0173, "step": 21825 }, { "epoch": 4.966097838452788, "grad_norm": 1.2921951032889973, "learning_rate": 1.4370036616889738e-10, "loss": 0.1017, "step": 21826 }, { "epoch": 4.966325369738339, "grad_norm": 1.4887105367677058, "learning_rate": 1.4179082017728995e-10, "loss": 0.0173, "step": 21827 }, { "epoch": 4.966552901023891, "grad_norm": 1.1613628192900953, "learning_rate": 1.3989404514302974e-10, "loss": 0.0939, "step": 21828 }, { "epoch": 4.966780432309442, "grad_norm": 1.2251225962254875, "learning_rate": 1.380100411049745e-10, "loss": 0.0214, "step": 21829 }, { "epoch": 4.967007963594995, "grad_norm": 1.1062664716739268, "learning_rate": 1.3613880810163516e-10, "loss": 0.0325, "step": 21830 }, { "epoch": 4.967235494880546, "grad_norm": 0.945378936216029, "learning_rate": 1.3428034617124498e-10, "loss": 0.0111, "step": 21831 }, { "epoch": 4.967463026166098, "grad_norm": 1.3616649535783814, "learning_rate": 1.3243465535169032e-10, "loss": 0.0421, "step": 21832 }, { "epoch": 4.9676905574516494, "grad_norm": 1.105218740212072, "learning_rate": 1.3060173568085755e-10, "loss": 0.0157, "step": 21833 }, { "epoch": 4.967918088737202, "grad_norm": 1.6194004728317264, "learning_rate": 1.287815871961473e-10, "loss": 0.0058, "step": 21834 }, { "epoch": 4.968145620022753, "grad_norm": 0.7547241562038925, "learning_rate": 1.2697420993468268e-10, "loss": 0.0023, "step": 21835 }, { "epoch": 4.968373151308305, "grad_norm": 1.5604658289109603, "learning_rate": 1.2517960393351735e-10, "loss": 0.0642, "step": 21836 }, { "epoch": 4.9686006825938565, "grad_norm": 1.4214054617230814, "learning_rate": 1.233977692292193e-10, "loss": 0.0197, "step": 21837 }, { "epoch": 4.968828213879409, "grad_norm": 0.8469284839320924, "learning_rate": 1.216287058583565e-10, "loss": 0.0065, "step": 21838 }, { "epoch": 4.96905574516496, "grad_norm": 1.2771702868359833, "learning_rate": 1.1987241385687243e-10, "loss": 0.0478, "step": 21839 }, { "epoch": 4.969283276450512, "grad_norm": 1.6361575093301137, "learning_rate": 1.181288932608493e-10, "loss": 0.0099, "step": 21840 }, { "epoch": 4.9695108077360635, "grad_norm": 0.6455049265046995, "learning_rate": 1.1639814410588368e-10, "loss": 0.0159, "step": 21841 }, { "epoch": 4.969738339021616, "grad_norm": 1.2767596203665907, "learning_rate": 1.1468016642729451e-10, "loss": 0.0773, "step": 21842 }, { "epoch": 4.969965870307167, "grad_norm": 1.420517589916515, "learning_rate": 1.1297496026019261e-10, "loss": 0.0193, "step": 21843 }, { "epoch": 4.970193401592719, "grad_norm": 0.5210331148677849, "learning_rate": 1.1128252563948061e-10, "loss": 0.0018, "step": 21844 }, { "epoch": 4.9704209328782705, "grad_norm": 0.36652667706723285, "learning_rate": 1.096028625997142e-10, "loss": 0.0015, "step": 21845 }, { "epoch": 4.970648464163823, "grad_norm": 0.2441797027098087, "learning_rate": 1.079359711752409e-10, "loss": 0.0009, "step": 21846 }, { "epoch": 4.970875995449374, "grad_norm": 1.8488555933044593, "learning_rate": 1.0628185140006131e-10, "loss": 0.0075, "step": 21847 }, { "epoch": 4.971103526734926, "grad_norm": 1.75923083748694, "learning_rate": 1.046405033081066e-10, "loss": 0.1588, "step": 21848 }, { "epoch": 4.9713310580204775, "grad_norm": 1.3539955313079328, "learning_rate": 1.0301192693289163e-10, "loss": 0.0511, "step": 21849 }, { "epoch": 4.97155858930603, "grad_norm": 0.9887316928449036, "learning_rate": 1.0139612230758434e-10, "loss": 0.0533, "step": 21850 }, { "epoch": 4.971786120591581, "grad_norm": 1.0927191760348425, "learning_rate": 9.97930894653526e-11, "loss": 0.0582, "step": 21851 }, { "epoch": 4.972013651877133, "grad_norm": 1.2211286783716617, "learning_rate": 9.820282843887862e-11, "loss": 0.0614, "step": 21852 }, { "epoch": 4.9722411831626845, "grad_norm": 0.5417475391765557, "learning_rate": 9.662533926070583e-11, "loss": 0.007, "step": 21853 }, { "epoch": 4.972468714448237, "grad_norm": 1.6908640698396296, "learning_rate": 9.506062196303068e-11, "loss": 0.024, "step": 21854 }, { "epoch": 4.972696245733788, "grad_norm": 2.087359287598625, "learning_rate": 9.350867657784146e-11, "loss": 0.033, "step": 21855 }, { "epoch": 4.97292377701934, "grad_norm": 1.777796697939175, "learning_rate": 9.196950313684894e-11, "loss": 0.0588, "step": 21856 }, { "epoch": 4.973151308304892, "grad_norm": 0.6741127180207108, "learning_rate": 9.044310167162507e-11, "loss": 0.004, "step": 21857 }, { "epoch": 4.973378839590444, "grad_norm": 1.0599058341170973, "learning_rate": 8.89294722131867e-11, "loss": 0.0266, "step": 21858 }, { "epoch": 4.973606370875995, "grad_norm": 1.2780450002597616, "learning_rate": 8.74286147926201e-11, "loss": 0.0189, "step": 21859 }, { "epoch": 4.973833902161547, "grad_norm": 0.9397614023060061, "learning_rate": 8.594052944052578e-11, "loss": 0.0051, "step": 21860 }, { "epoch": 4.974061433447099, "grad_norm": 1.5698155835322458, "learning_rate": 8.446521618729609e-11, "loss": 0.082, "step": 21861 }, { "epoch": 4.974288964732651, "grad_norm": 1.9143675403381706, "learning_rate": 8.30026750631846e-11, "loss": 0.0942, "step": 21862 }, { "epoch": 4.974516496018202, "grad_norm": 1.931730064455477, "learning_rate": 8.155290609795919e-11, "loss": 0.02, "step": 21863 }, { "epoch": 4.974744027303754, "grad_norm": 0.7621036857107729, "learning_rate": 8.01159093213183e-11, "loss": 0.0034, "step": 21864 }, { "epoch": 4.974971558589306, "grad_norm": 0.35669836973994945, "learning_rate": 7.869168476268285e-11, "loss": 0.0011, "step": 21865 }, { "epoch": 4.975199089874858, "grad_norm": 1.3171056539886066, "learning_rate": 7.728023245098803e-11, "loss": 0.0221, "step": 21866 }, { "epoch": 4.97542662116041, "grad_norm": 0.40431519354403384, "learning_rate": 7.588155241530781e-11, "loss": 0.0033, "step": 21867 }, { "epoch": 4.975654152445961, "grad_norm": 1.5890005655884012, "learning_rate": 7.449564468402227e-11, "loss": 0.0274, "step": 21868 }, { "epoch": 4.975881683731513, "grad_norm": 1.5530011551306002, "learning_rate": 7.312250928558085e-11, "loss": 0.0587, "step": 21869 }, { "epoch": 4.976109215017065, "grad_norm": 1.5589499047144524, "learning_rate": 7.17621462480167e-11, "loss": 0.0186, "step": 21870 }, { "epoch": 4.976336746302617, "grad_norm": 1.291812971980054, "learning_rate": 7.041455559915478e-11, "loss": 0.0538, "step": 21871 }, { "epoch": 4.976564277588168, "grad_norm": 1.1948872040694565, "learning_rate": 6.90797373665425e-11, "loss": 0.0525, "step": 21872 }, { "epoch": 4.97679180887372, "grad_norm": 0.5625717457325073, "learning_rate": 6.775769157738033e-11, "loss": 0.0048, "step": 21873 }, { "epoch": 4.977019340159272, "grad_norm": 1.66838683498533, "learning_rate": 6.644841825872994e-11, "loss": 0.0923, "step": 21874 }, { "epoch": 4.977246871444824, "grad_norm": 2.020060529147694, "learning_rate": 6.515191743737548e-11, "loss": 0.048, "step": 21875 }, { "epoch": 4.977474402730375, "grad_norm": 1.8796257314175373, "learning_rate": 6.386818913982351e-11, "loss": 0.0559, "step": 21876 }, { "epoch": 4.977701934015927, "grad_norm": 1.2511385853066739, "learning_rate": 6.259723339230305e-11, "loss": 0.0058, "step": 21877 }, { "epoch": 4.977929465301479, "grad_norm": 2.5819812650901466, "learning_rate": 6.13390502206962e-11, "loss": 0.1051, "step": 21878 }, { "epoch": 4.978156996587031, "grad_norm": 1.130519914825783, "learning_rate": 6.009363965088499e-11, "loss": 0.0687, "step": 21879 }, { "epoch": 4.978384527872582, "grad_norm": 1.0308723418040346, "learning_rate": 5.886100170819641e-11, "loss": 0.0069, "step": 21880 }, { "epoch": 4.978612059158134, "grad_norm": 1.4360992761812705, "learning_rate": 5.7641136417888043e-11, "loss": 0.0206, "step": 21881 }, { "epoch": 4.978839590443686, "grad_norm": 1.538163384865803, "learning_rate": 5.6434043804801106e-11, "loss": 0.0651, "step": 21882 }, { "epoch": 4.979067121729238, "grad_norm": 1.8904304329705077, "learning_rate": 5.523972389370746e-11, "loss": 0.0808, "step": 21883 }, { "epoch": 4.979294653014789, "grad_norm": 0.8271208016216993, "learning_rate": 5.405817670903202e-11, "loss": 0.0054, "step": 21884 }, { "epoch": 4.979522184300341, "grad_norm": 1.3117654932897704, "learning_rate": 5.288940227485273e-11, "loss": 0.0183, "step": 21885 }, { "epoch": 4.979749715585893, "grad_norm": 1.0249023004180569, "learning_rate": 5.1733400615039394e-11, "loss": 0.004, "step": 21886 }, { "epoch": 4.979977246871445, "grad_norm": 1.4126527055593667, "learning_rate": 5.0590171753253644e-11, "loss": 0.0476, "step": 21887 }, { "epoch": 4.980204778156996, "grad_norm": 1.5376465509660553, "learning_rate": 4.9459715712879555e-11, "loss": 0.0573, "step": 21888 }, { "epoch": 4.9804323094425484, "grad_norm": 0.3241867290156327, "learning_rate": 4.834203251702363e-11, "loss": 0.0013, "step": 21889 }, { "epoch": 4.9806598407281, "grad_norm": 1.6379689299235793, "learning_rate": 4.723712218851484e-11, "loss": 0.0308, "step": 21890 }, { "epoch": 4.980887372013652, "grad_norm": 0.5279978976051928, "learning_rate": 4.614498474990459e-11, "loss": 0.0012, "step": 21891 }, { "epoch": 4.981114903299203, "grad_norm": 1.7767413160093175, "learning_rate": 4.506562022353611e-11, "loss": 0.1289, "step": 21892 }, { "epoch": 4.9813424345847555, "grad_norm": 0.33407194316048755, "learning_rate": 4.3999028631544486e-11, "loss": 0.0015, "step": 21893 }, { "epoch": 4.981569965870307, "grad_norm": 1.5574310231772506, "learning_rate": 4.2945209995579054e-11, "loss": 0.0246, "step": 21894 }, { "epoch": 4.981797497155859, "grad_norm": 1.033286853233603, "learning_rate": 4.190416433728917e-11, "loss": 0.0167, "step": 21895 }, { "epoch": 4.982025028441411, "grad_norm": 0.6477166989396768, "learning_rate": 4.087589167790784e-11, "loss": 0.0028, "step": 21896 }, { "epoch": 4.9822525597269625, "grad_norm": 2.1161391339026276, "learning_rate": 3.9860392038529316e-11, "loss": 0.0901, "step": 21897 }, { "epoch": 4.982480091012514, "grad_norm": 0.7071111204637381, "learning_rate": 3.8857665439762105e-11, "loss": 0.0234, "step": 21898 }, { "epoch": 4.982707622298066, "grad_norm": 1.0858738501206513, "learning_rate": 3.786771190221472e-11, "loss": 0.0136, "step": 21899 }, { "epoch": 4.982935153583618, "grad_norm": 1.6975275998212778, "learning_rate": 3.689053144607935e-11, "loss": 0.0136, "step": 21900 }, { "epoch": 4.9831626848691695, "grad_norm": 1.074825390627585, "learning_rate": 3.5926124091339995e-11, "loss": 0.041, "step": 21901 }, { "epoch": 4.983390216154721, "grad_norm": 0.9577762835333035, "learning_rate": 3.4974489857703134e-11, "loss": 0.0095, "step": 21902 }, { "epoch": 4.983617747440273, "grad_norm": 1.3305301134012713, "learning_rate": 3.403562876459765e-11, "loss": 0.0982, "step": 21903 }, { "epoch": 4.983845278725825, "grad_norm": 0.4639770129899262, "learning_rate": 3.310954083131368e-11, "loss": 0.0058, "step": 21904 }, { "epoch": 4.9840728100113765, "grad_norm": 1.2671583280901006, "learning_rate": 3.219622607658623e-11, "loss": 0.061, "step": 21905 }, { "epoch": 4.984300341296929, "grad_norm": 1.6016266867041655, "learning_rate": 3.1295684519289107e-11, "loss": 0.024, "step": 21906 }, { "epoch": 4.98452787258248, "grad_norm": 1.4845069724394806, "learning_rate": 3.040791617767158e-11, "loss": 0.1195, "step": 21907 }, { "epoch": 4.984755403868032, "grad_norm": 1.5305293816763486, "learning_rate": 2.9532921069982956e-11, "loss": 0.0592, "step": 21908 }, { "epoch": 4.9849829351535835, "grad_norm": 1.3847296244398344, "learning_rate": 2.8670699213986796e-11, "loss": 0.0018, "step": 21909 }, { "epoch": 4.985210466439136, "grad_norm": 0.8336804649229347, "learning_rate": 2.7821250627446672e-11, "loss": 0.004, "step": 21910 }, { "epoch": 4.985437997724687, "grad_norm": 1.5202598473131559, "learning_rate": 2.698457532764043e-11, "loss": 0.0465, "step": 21911 }, { "epoch": 4.985665529010239, "grad_norm": 0.8141689023370506, "learning_rate": 2.6160673331707133e-11, "loss": 0.0076, "step": 21912 }, { "epoch": 4.9858930602957905, "grad_norm": 1.5849685449675428, "learning_rate": 2.534954465643891e-11, "loss": 0.0261, "step": 21913 }, { "epoch": 4.986120591581343, "grad_norm": 2.002567248275687, "learning_rate": 2.455118931841971e-11, "loss": 0.0205, "step": 21914 }, { "epoch": 4.986348122866894, "grad_norm": 1.2844468308898263, "learning_rate": 2.376560733402533e-11, "loss": 0.0055, "step": 21915 }, { "epoch": 4.986575654152446, "grad_norm": 2.1870123350420116, "learning_rate": 2.2992798719284614e-11, "loss": 0.0228, "step": 21916 }, { "epoch": 4.9868031854379975, "grad_norm": 1.22383404091575, "learning_rate": 2.223276348994885e-11, "loss": 0.0682, "step": 21917 }, { "epoch": 4.98703071672355, "grad_norm": 1.6367938173188439, "learning_rate": 2.148550166156116e-11, "loss": 0.0224, "step": 21918 }, { "epoch": 4.987258248009101, "grad_norm": 0.3354714937248174, "learning_rate": 2.0751013249456498e-11, "loss": 0.001, "step": 21919 }, { "epoch": 4.987485779294653, "grad_norm": 1.738086613401021, "learning_rate": 2.0029298268553488e-11, "loss": 0.0547, "step": 21920 }, { "epoch": 4.9877133105802045, "grad_norm": 1.3943144936004501, "learning_rate": 1.932035673370136e-11, "loss": 0.018, "step": 21921 }, { "epoch": 4.987940841865757, "grad_norm": 1.492287946642316, "learning_rate": 1.862418865933302e-11, "loss": 0.063, "step": 21922 }, { "epoch": 4.988168373151308, "grad_norm": 1.2217540524972486, "learning_rate": 1.7940794059673195e-11, "loss": 0.021, "step": 21923 }, { "epoch": 4.98839590443686, "grad_norm": 0.7300419483512963, "learning_rate": 1.727017294873845e-11, "loss": 0.0044, "step": 21924 }, { "epoch": 4.9886234357224115, "grad_norm": 0.412017800619884, "learning_rate": 1.661232534019841e-11, "loss": 0.0023, "step": 21925 }, { "epoch": 4.988850967007964, "grad_norm": 1.792875678923058, "learning_rate": 1.5967251247445135e-11, "loss": 0.0145, "step": 21926 }, { "epoch": 4.989078498293515, "grad_norm": 1.3941141051381152, "learning_rate": 1.5334950683731918e-11, "loss": 0.0051, "step": 21927 }, { "epoch": 4.989306029579067, "grad_norm": 1.1129666896096067, "learning_rate": 1.471542366203449e-11, "loss": 0.0204, "step": 21928 }, { "epoch": 4.9895335608646185, "grad_norm": 1.1024558535763958, "learning_rate": 1.4108670194842855e-11, "loss": 0.0032, "step": 21929 }, { "epoch": 4.989761092150171, "grad_norm": 1.1626433926035118, "learning_rate": 1.3514690294716415e-11, "loss": 0.0191, "step": 21930 }, { "epoch": 4.989988623435722, "grad_norm": 1.7336316935594243, "learning_rate": 1.2933483973728845e-11, "loss": 0.0736, "step": 21931 }, { "epoch": 4.990216154721274, "grad_norm": 1.3560720837373252, "learning_rate": 1.2365051243815041e-11, "loss": 0.0361, "step": 21932 }, { "epoch": 4.9904436860068255, "grad_norm": 1.6523419073242651, "learning_rate": 1.1809392116493568e-11, "loss": 0.0081, "step": 21933 }, { "epoch": 4.990671217292378, "grad_norm": 1.4171036978806, "learning_rate": 1.12665066032136e-11, "loss": 0.0693, "step": 21934 }, { "epoch": 4.99089874857793, "grad_norm": 1.7769155982416065, "learning_rate": 1.073639471500798e-11, "loss": 0.0129, "step": 21935 }, { "epoch": 4.991126279863481, "grad_norm": 1.5215140133149199, "learning_rate": 1.0219056462770771e-11, "loss": 0.0184, "step": 21936 }, { "epoch": 4.9913538111490325, "grad_norm": 1.2123689011731298, "learning_rate": 9.71449185704909e-12, "loss": 0.0293, "step": 21937 }, { "epoch": 4.991581342434585, "grad_norm": 0.999153120768275, "learning_rate": 9.222700908112503e-12, "loss": 0.0308, "step": 21938 }, { "epoch": 4.991808873720137, "grad_norm": 1.1872063047393953, "learning_rate": 8.743683626022403e-12, "loss": 0.0491, "step": 21939 }, { "epoch": 4.992036405005688, "grad_norm": 1.1366820993294504, "learning_rate": 8.277440020632022e-12, "loss": 0.0046, "step": 21940 }, { "epoch": 4.99226393629124, "grad_norm": 1.1197793489925603, "learning_rate": 7.823970101447641e-12, "loss": 0.013, "step": 21941 }, { "epoch": 4.992491467576792, "grad_norm": 1.3806235120249446, "learning_rate": 7.383273877697993e-12, "loss": 0.1046, "step": 21942 }, { "epoch": 4.992718998862344, "grad_norm": 1.2477790276808822, "learning_rate": 6.955351358403639e-12, "loss": 0.0204, "step": 21943 }, { "epoch": 4.992946530147895, "grad_norm": 2.0092859839715236, "learning_rate": 6.540202552307584e-12, "loss": 0.0611, "step": 21944 }, { "epoch": 4.993174061433447, "grad_norm": 0.4241225282556457, "learning_rate": 6.1378274679446684e-12, "loss": 0.0025, "step": 21945 }, { "epoch": 4.993401592718999, "grad_norm": 1.1247780576769706, "learning_rate": 5.748226113502786e-12, "loss": 0.0353, "step": 21946 }, { "epoch": 4.993629124004551, "grad_norm": 1.7300096875619573, "learning_rate": 5.371398496892277e-12, "loss": 0.1095, "step": 21947 }, { "epoch": 4.993856655290102, "grad_norm": 1.4116266448426495, "learning_rate": 5.007344625954092e-12, "loss": 0.012, "step": 21948 }, { "epoch": 4.9940841865756544, "grad_norm": 1.3636898672878544, "learning_rate": 4.6560645079740676e-12, "loss": 0.0705, "step": 21949 }, { "epoch": 4.994311717861206, "grad_norm": 1.0209240817342802, "learning_rate": 4.317558150238044e-12, "loss": 0.0029, "step": 21950 }, { "epoch": 4.994539249146758, "grad_norm": 1.0471794359570106, "learning_rate": 3.991825559615526e-12, "loss": 0.0444, "step": 21951 }, { "epoch": 4.994766780432309, "grad_norm": 2.6928284913011593, "learning_rate": 3.678866742767851e-12, "loss": 0.0286, "step": 21952 }, { "epoch": 4.9949943117178615, "grad_norm": 1.209228039877937, "learning_rate": 3.3786817060788014e-12, "loss": 0.0234, "step": 21953 }, { "epoch": 4.995221843003413, "grad_norm": 0.9249526306883964, "learning_rate": 3.0912704557239936e-12, "loss": 0.0046, "step": 21954 }, { "epoch": 4.995449374288965, "grad_norm": 0.867589243314405, "learning_rate": 2.816632997532098e-12, "loss": 0.0427, "step": 21955 }, { "epoch": 4.995676905574516, "grad_norm": 0.6096698057753664, "learning_rate": 2.5547693371930082e-12, "loss": 0.0049, "step": 21956 }, { "epoch": 4.9959044368600685, "grad_norm": 1.2410453794234793, "learning_rate": 2.305679479980283e-12, "loss": 0.0722, "step": 21957 }, { "epoch": 4.99613196814562, "grad_norm": 10.809744272423558, "learning_rate": 2.069363430959315e-12, "loss": 0.0132, "step": 21958 }, { "epoch": 4.996359499431172, "grad_norm": 2.560763018458557, "learning_rate": 1.8458211950567184e-12, "loss": 0.0217, "step": 21959 }, { "epoch": 4.996587030716723, "grad_norm": 1.195711222465255, "learning_rate": 1.6350527767827751e-12, "loss": 0.024, "step": 21960 }, { "epoch": 4.9968145620022755, "grad_norm": 1.535001456493351, "learning_rate": 1.4370581805089878e-12, "loss": 0.0728, "step": 21961 }, { "epoch": 4.997042093287827, "grad_norm": 1.1288939043336987, "learning_rate": 1.2518374101905262e-12, "loss": 0.0052, "step": 21962 }, { "epoch": 4.997269624573379, "grad_norm": 1.0352134641100466, "learning_rate": 1.0793904696437818e-12, "loss": 0.0054, "step": 21963 }, { "epoch": 4.99749715585893, "grad_norm": 0.669824971245679, "learning_rate": 9.197173624075906e-13, "loss": 0.0038, "step": 21964 }, { "epoch": 4.9977246871444825, "grad_norm": 1.407852187545592, "learning_rate": 7.728180917432326e-13, "loss": 0.0126, "step": 21965 }, { "epoch": 4.997952218430034, "grad_norm": 1.4355773287481643, "learning_rate": 6.386926606344324e-13, "loss": 0.0077, "step": 21966 }, { "epoch": 4.998179749715586, "grad_norm": 1.5638136765412318, "learning_rate": 5.173410718567473e-13, "loss": 0.0075, "step": 21967 }, { "epoch": 4.998407281001137, "grad_norm": 0.7079659818823961, "learning_rate": 4.087633278387904e-13, "loss": 0.0176, "step": 21968 }, { "epoch": 4.9986348122866895, "grad_norm": 1.023814490481492, "learning_rate": 3.129594308703965e-13, "loss": 0.0439, "step": 21969 }, { "epoch": 4.998862343572241, "grad_norm": 0.7347274192804285, "learning_rate": 2.2992938289445597e-13, "loss": 0.0094, "step": 21970 }, { "epoch": 4.999089874857793, "grad_norm": 0.36073843306736875, "learning_rate": 1.596731855763034e-13, "loss": 0.0015, "step": 21971 }, { "epoch": 4.999317406143344, "grad_norm": 1.3544867273591497, "learning_rate": 1.0219084030371751e-13, "loss": 0.0111, "step": 21972 }, { "epoch": 4.9995449374288965, "grad_norm": 1.5122087420736827, "learning_rate": 5.748234839508815e-14, "loss": 0.0073, "step": 21973 }, { "epoch": 4.999772468714449, "grad_norm": 0.1440402691919318, "learning_rate": 2.5547710613693654e-14, "loss": 0.0003, "step": 21974 }, { "epoch": 5.0, "grad_norm": 0.8369046376111358, "learning_rate": 6.386927653423414e-15, "loss": 0.0046, "step": 21975 }, { "epoch": 5.0, "step": 21975, "total_flos": 97517022511104.0, "train_loss": 0.07617780651347497, "train_runtime": 29171.336, "train_samples_per_second": 1.506, "train_steps_per_second": 0.753 } ], "logging_steps": 1, "max_steps": 21975, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 1110, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 97517022511104.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }